tokenize.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. const openBracket = '{'.charCodeAt(0);
  2. const closeBracket = '}'.charCodeAt(0);
  3. const openParen = '('.charCodeAt(0);
  4. const closeParen = ')'.charCodeAt(0);
  5. const singleQuote = "'".charCodeAt(0);
  6. const doubleQuote = '"'.charCodeAt(0);
  7. const backslash = '\\'.charCodeAt(0);
  8. const slash = '/'.charCodeAt(0);
  9. const period = '.'.charCodeAt(0);
  10. const comma = ','.charCodeAt(0);
  11. const colon = ':'.charCodeAt(0);
  12. const asterisk = '*'.charCodeAt(0);
  13. const minus = '-'.charCodeAt(0);
  14. const plus = '+'.charCodeAt(0);
  15. const pound = '#'.charCodeAt(0);
  16. const newline = '\n'.charCodeAt(0);
  17. const space = ' '.charCodeAt(0);
  18. const feed = '\f'.charCodeAt(0);
  19. const tab = '\t'.charCodeAt(0);
  20. const cr = '\r'.charCodeAt(0);
  21. const at = '@'.charCodeAt(0);
  22. const lowerE = 'e'.charCodeAt(0);
  23. const upperE = 'E'.charCodeAt(0);
  24. const digit0 = '0'.charCodeAt(0);
  25. const digit9 = '9'.charCodeAt(0);
  26. const lowerU = 'u'.charCodeAt(0);
  27. const upperU = 'U'.charCodeAt(0);
  28. const atEnd = /[ \n\t\r\{\(\)'"\\;,/]/g;
  29. const wordEnd = /[ \n\t\r\(\)\{\}\*:;@!&'"\+\|~>,\[\]\\]|\/(?=\*)/g;
  30. const wordEndNum = /[ \n\t\r\(\)\{\}\*:;@!&'"\-\+\|~>,\[\]\\]|\//g;
  31. const alphaNum = /^[a-z0-9]/i;
  32. const unicodeRange = /^[a-f0-9?\-]/i;
  33. const util = require('util');
  34. const TokenizeError = require('./errors/TokenizeError');
  35. module.exports = function tokenize(input, options) {
  36. options = options || {};
  37. const tokens = [];
  38. const css = input.valueOf();
  39. const length = css.length;
  40. let offset = -1;
  41. let line = 1;
  42. let pos = 0;
  43. let parentCount = 0;
  44. let isURLArg = null;
  45. let code;
  46. let next;
  47. let quote;
  48. let lines;
  49. let last;
  50. let content;
  51. let escape;
  52. let nextLine;
  53. let nextOffset;
  54. let escaped;
  55. let escapePos;
  56. let nextChar;
  57. function unclosed(what) {
  58. const message = util.format(
  59. 'Unclosed %s at line: %d, column: %d, token: %d',
  60. what,
  61. line,
  62. pos - offset,
  63. pos
  64. );
  65. throw new TokenizeError(message);
  66. }
  67. function tokenizeError() {
  68. const message = util.format(
  69. 'Syntax error at line: %d, column: %d, token: %d',
  70. line,
  71. pos - offset,
  72. pos
  73. );
  74. throw new TokenizeError(message);
  75. }
  76. while (pos < length) {
  77. code = css.charCodeAt(pos);
  78. if (code === newline) {
  79. offset = pos;
  80. line += 1;
  81. }
  82. switch (code) {
  83. case newline:
  84. case space:
  85. case tab:
  86. case cr:
  87. case feed:
  88. next = pos;
  89. do {
  90. next += 1;
  91. code = css.charCodeAt(next);
  92. if (code === newline) {
  93. offset = next;
  94. line += 1;
  95. }
  96. } while (
  97. code === space ||
  98. code === newline ||
  99. code === tab ||
  100. code === cr ||
  101. code === feed
  102. );
  103. tokens.push(['space', css.slice(pos, next), line, pos - offset, line, next - offset, pos]);
  104. pos = next - 1;
  105. break;
  106. case colon:
  107. next = pos + 1;
  108. tokens.push(['colon', css.slice(pos, next), line, pos - offset, line, next - offset, pos]);
  109. pos = next - 1;
  110. break;
  111. case comma:
  112. next = pos + 1;
  113. tokens.push(['comma', css.slice(pos, next), line, pos - offset, line, next - offset, pos]);
  114. pos = next - 1;
  115. break;
  116. case openBracket:
  117. tokens.push(['{', '{', line, pos - offset, line, next - offset, pos]);
  118. break;
  119. case closeBracket:
  120. tokens.push(['}', '}', line, pos - offset, line, next - offset, pos]);
  121. break;
  122. case openParen:
  123. parentCount++;
  124. isURLArg =
  125. !isURLArg &&
  126. parentCount === 1 &&
  127. tokens.length > 0 &&
  128. tokens[tokens.length - 1][0] === 'word' &&
  129. tokens[tokens.length - 1][1] === 'url';
  130. tokens.push(['(', '(', line, pos - offset, line, next - offset, pos]);
  131. break;
  132. case closeParen:
  133. parentCount--;
  134. isURLArg = !isURLArg && parentCount === 1;
  135. tokens.push([')', ')', line, pos - offset, line, next - offset, pos]);
  136. break;
  137. case singleQuote:
  138. case doubleQuote:
  139. quote = code === singleQuote ? "'" : '"';
  140. next = pos;
  141. do {
  142. escaped = false;
  143. next = css.indexOf(quote, next + 1);
  144. if (next === -1) {
  145. unclosed('quote', quote);
  146. }
  147. escapePos = next;
  148. while (css.charCodeAt(escapePos - 1) === backslash) {
  149. escapePos -= 1;
  150. escaped = !escaped;
  151. }
  152. } while (escaped);
  153. tokens.push([
  154. 'string',
  155. css.slice(pos, next + 1),
  156. line,
  157. pos - offset,
  158. line,
  159. next - offset,
  160. pos
  161. ]);
  162. pos = next;
  163. break;
  164. case at:
  165. atEnd.lastIndex = pos + 1;
  166. atEnd.test(css);
  167. if (atEnd.lastIndex === 0) {
  168. next = css.length - 1;
  169. } else {
  170. next = atEnd.lastIndex - 2;
  171. }
  172. tokens.push([
  173. 'atword',
  174. css.slice(pos, next + 1),
  175. line,
  176. pos - offset,
  177. line,
  178. next - offset,
  179. pos
  180. ]);
  181. pos = next;
  182. break;
  183. case backslash:
  184. next = pos;
  185. code = css.charCodeAt(next + 1);
  186. if (
  187. escape &&
  188. (code !== slash &&
  189. code !== space &&
  190. code !== newline &&
  191. code !== tab &&
  192. code !== cr &&
  193. code !== feed)
  194. ) {
  195. next += 1;
  196. }
  197. tokens.push([
  198. 'word',
  199. css.slice(pos, next + 1),
  200. line,
  201. pos - offset,
  202. line,
  203. next - offset,
  204. pos
  205. ]);
  206. pos = next;
  207. break;
  208. case plus:
  209. case minus:
  210. case asterisk:
  211. next = pos + 1;
  212. nextChar = css.slice(pos + 1, next + 1);
  213. const prevChar = css.slice(pos - 1, pos);
  214. // if the operator is immediately followed by a word character, then we
  215. // have a prefix of some kind, and should fall-through. eg. -webkit
  216. // look for --* for custom variables
  217. if (code === minus && nextChar.charCodeAt(0) === minus) {
  218. next++;
  219. tokens.push(['word', css.slice(pos, next), line, pos - offset, line, next - offset, pos]);
  220. pos = next - 1;
  221. break;
  222. }
  223. tokens.push([
  224. 'operator',
  225. css.slice(pos, next),
  226. line,
  227. pos - offset,
  228. line,
  229. next - offset,
  230. pos
  231. ]);
  232. pos = next - 1;
  233. break;
  234. default:
  235. if (
  236. code === slash &&
  237. (css.charCodeAt(pos + 1) === asterisk ||
  238. (options.loose && !isURLArg && css.charCodeAt(pos + 1) === slash))
  239. ) {
  240. const isStandardComment = css.charCodeAt(pos + 1) === asterisk;
  241. if (isStandardComment) {
  242. next = css.indexOf('*/', pos + 2) + 1;
  243. if (next === 0) {
  244. unclosed('comment', '*/');
  245. }
  246. } else {
  247. const newlinePos = css.indexOf('\n', pos + 2);
  248. next = newlinePos !== -1 ? newlinePos - 1 : length;
  249. }
  250. content = css.slice(pos, next + 1);
  251. lines = content.split('\n');
  252. last = lines.length - 1;
  253. if (last > 0) {
  254. nextLine = line + last;
  255. nextOffset = next - lines[last].length;
  256. } else {
  257. nextLine = line;
  258. nextOffset = offset;
  259. }
  260. tokens.push(['comment', content, line, pos - offset, nextLine, next - nextOffset, pos]);
  261. offset = nextOffset;
  262. line = nextLine;
  263. pos = next;
  264. } else if (code === pound && !alphaNum.test(css.slice(pos + 1, pos + 2))) {
  265. next = pos + 1;
  266. tokens.push(['#', css.slice(pos, next), line, pos - offset, line, next - offset, pos]);
  267. pos = next - 1;
  268. } else if ((code === lowerU || code === upperU) && css.charCodeAt(pos + 1) === plus) {
  269. next = pos + 2;
  270. do {
  271. next += 1;
  272. code = css.charCodeAt(next);
  273. } while (next < length && unicodeRange.test(css.slice(next, next + 1)));
  274. tokens.push([
  275. 'unicoderange',
  276. css.slice(pos, next),
  277. line,
  278. pos - offset,
  279. line,
  280. next - offset,
  281. pos
  282. ]);
  283. pos = next - 1;
  284. }
  285. // catch a regular slash, that isn't a comment
  286. else if (code === slash) {
  287. next = pos + 1;
  288. tokens.push([
  289. 'operator',
  290. css.slice(pos, next),
  291. line,
  292. pos - offset,
  293. line,
  294. next - offset,
  295. pos
  296. ]);
  297. pos = next - 1;
  298. } else {
  299. let regex = wordEnd;
  300. // we're dealing with a word that starts with a number
  301. // those get treated differently
  302. if (code >= digit0 && code <= digit9) {
  303. regex = wordEndNum;
  304. }
  305. regex.lastIndex = pos + 1;
  306. regex.test(css);
  307. if (regex.lastIndex === 0) {
  308. next = css.length - 1;
  309. } else {
  310. next = regex.lastIndex - 2;
  311. }
  312. // Exponential number notation with minus or plus: 1e-10, 1e+10
  313. if (regex === wordEndNum || code === period) {
  314. const ncode = css.charCodeAt(next);
  315. const ncode1 = css.charCodeAt(next + 1);
  316. const ncode2 = css.charCodeAt(next + 2);
  317. if (
  318. (ncode === lowerE || ncode === upperE) &&
  319. (ncode1 === minus || ncode1 === plus) &&
  320. (ncode2 >= digit0 && ncode2 <= digit9)
  321. ) {
  322. wordEndNum.lastIndex = next + 2;
  323. wordEndNum.test(css);
  324. if (wordEndNum.lastIndex === 0) {
  325. next = css.length - 1;
  326. } else {
  327. next = wordEndNum.lastIndex - 2;
  328. }
  329. }
  330. }
  331. tokens.push([
  332. 'word',
  333. css.slice(pos, next + 1),
  334. line,
  335. pos - offset,
  336. line,
  337. next - offset,
  338. pos
  339. ]);
  340. pos = next;
  341. }
  342. break;
  343. }
  344. pos++;
  345. }
  346. return tokens;
  347. };