tokenize.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. 'use strict';
  2. const openBracket = '{'.charCodeAt(0);
  3. const closeBracket = '}'.charCodeAt(0);
  4. const openParen = '('.charCodeAt(0);
  5. const closeParen = ')'.charCodeAt(0);
  6. const singleQuote = '\''.charCodeAt(0);
  7. const doubleQuote = '"'.charCodeAt(0);
  8. const backslash = '\\'.charCodeAt(0);
  9. const slash = '/'.charCodeAt(0);
  10. const period = '.'.charCodeAt(0);
  11. const comma = ','.charCodeAt(0);
  12. const colon = ':'.charCodeAt(0);
  13. const asterisk = '*'.charCodeAt(0);
  14. const minus = '-'.charCodeAt(0);
  15. const plus = '+'.charCodeAt(0);
  16. const pound = '#'.charCodeAt(0);
  17. const newline = '\n'.charCodeAt(0);
  18. const space = ' '.charCodeAt(0);
  19. const feed = '\f'.charCodeAt(0);
  20. const tab = '\t'.charCodeAt(0);
  21. const cr = '\r'.charCodeAt(0);
  22. const at = '@'.charCodeAt(0);
  23. const lowerE = 'e'.charCodeAt(0);
  24. const upperE = 'E'.charCodeAt(0);
  25. const digit0 = '0'.charCodeAt(0);
  26. const digit9 = '9'.charCodeAt(0);
  27. const lowerU = 'u'.charCodeAt(0);
  28. const upperU = 'U'.charCodeAt(0);
  29. const atEnd = /[ \n\t\r\{\(\)'"\\;,/]/g;
  30. const wordEnd = /[ \n\t\r\(\)\{\}\*:;@!&'"\+\|~>,\[\]\\]|\/(?=\*)/g;
  31. const wordEndNum = /[ \n\t\r\(\)\{\}\*:;@!&'"\-\+\|~>,\[\]\\]|\//g;
  32. const alphaNum = /^[a-z0-9]/i;
  33. const unicodeRange = /^[a-f0-9?\-]/i;
  34. const util = require('util');
  35. const TokenizeError = require('./errors/TokenizeError');
  36. module.exports = function tokenize (input, options) {
  37. options = options || {};
  38. let tokens = [],
  39. css = input.valueOf(),
  40. length = css.length,
  41. offset = -1,
  42. line = 1,
  43. pos = 0,
  44. parentCount = 0,
  45. isURLArg = null,
  46. code, next, quote, lines, last, content, escape, nextLine, nextOffset,
  47. escaped, escapePos, nextChar;
  48. function unclosed (what) {
  49. let message = util.format('Unclosed %s at line: %d, column: %d, token: %d', what, line, pos - offset, pos);
  50. throw new TokenizeError(message);
  51. }
  52. function tokenizeError () {
  53. let message = util.format('Syntax error at line: %d, column: %d, token: %d', line, pos - offset, pos);
  54. throw new TokenizeError(message);
  55. }
  56. while (pos < length) {
  57. code = css.charCodeAt(pos);
  58. if (code === newline) {
  59. offset = pos;
  60. line += 1;
  61. }
  62. switch (code) {
  63. case newline:
  64. case space:
  65. case tab:
  66. case cr:
  67. case feed:
  68. next = pos;
  69. do {
  70. next += 1;
  71. code = css.charCodeAt(next);
  72. if (code === newline) {
  73. offset = next;
  74. line += 1;
  75. }
  76. } while (code === space ||
  77. code === newline ||
  78. code === tab ||
  79. code === cr ||
  80. code === feed);
  81. tokens.push(['space', css.slice(pos, next),
  82. line, pos - offset,
  83. line, next - offset,
  84. pos
  85. ]);
  86. pos = next - 1;
  87. break;
  88. case colon:
  89. next = pos + 1;
  90. tokens.push(['colon', css.slice(pos, next),
  91. line, pos - offset,
  92. line, next - offset,
  93. pos
  94. ]);
  95. pos = next - 1;
  96. break;
  97. case comma:
  98. next = pos + 1;
  99. tokens.push(['comma', css.slice(pos, next),
  100. line, pos - offset,
  101. line, next - offset,
  102. pos
  103. ]);
  104. pos = next - 1;
  105. break;
  106. case openBracket:
  107. tokens.push(['{', '{',
  108. line, pos - offset,
  109. line, next - offset,
  110. pos
  111. ]);
  112. break;
  113. case closeBracket:
  114. tokens.push(['}', '}',
  115. line, pos - offset,
  116. line, next - offset,
  117. pos
  118. ]);
  119. break;
  120. case openParen:
  121. parentCount++;
  122. isURLArg = !isURLArg && parentCount === 1 &&
  123. tokens.length > 0 &&
  124. tokens[tokens.length - 1][0] === "word" &&
  125. tokens[tokens.length - 1][1] === "url";
  126. tokens.push(['(', '(',
  127. line, pos - offset,
  128. line, next - offset,
  129. pos
  130. ]);
  131. break;
  132. case closeParen:
  133. parentCount--;
  134. isURLArg = isURLArg && parentCount > 0;
  135. tokens.push([')', ')',
  136. line, pos - offset,
  137. line, next - offset,
  138. pos
  139. ]);
  140. break;
  141. case singleQuote:
  142. case doubleQuote:
  143. quote = code === singleQuote ? '\'' : '"';
  144. next = pos;
  145. do {
  146. escaped = false;
  147. next = css.indexOf(quote, next + 1);
  148. if (next === -1) {
  149. unclosed('quote', quote);
  150. }
  151. escapePos = next;
  152. while (css.charCodeAt(escapePos - 1) === backslash) {
  153. escapePos -= 1;
  154. escaped = !escaped;
  155. }
  156. } while (escaped);
  157. tokens.push(['string', css.slice(pos, next + 1),
  158. line, pos - offset,
  159. line, next - offset,
  160. pos
  161. ]);
  162. pos = next;
  163. break;
  164. case at:
  165. atEnd.lastIndex = pos + 1;
  166. atEnd.test(css);
  167. if (atEnd.lastIndex === 0) {
  168. next = css.length - 1;
  169. }
  170. else {
  171. next = atEnd.lastIndex - 2;
  172. }
  173. tokens.push(['atword', css.slice(pos, next + 1),
  174. line, pos - offset,
  175. line, next - offset,
  176. pos
  177. ]);
  178. pos = next;
  179. break;
  180. case backslash:
  181. next = pos;
  182. code = css.charCodeAt(next + 1);
  183. if (escape && (code !== slash && code !== space &&
  184. code !== newline && code !== tab &&
  185. code !== cr && code !== feed)) {
  186. next += 1;
  187. }
  188. tokens.push(['word', css.slice(pos, next + 1),
  189. line, pos - offset,
  190. line, next - offset,
  191. pos
  192. ]);
  193. pos = next;
  194. break;
  195. case plus:
  196. case minus:
  197. case asterisk:
  198. next = pos + 1;
  199. nextChar = css.slice(pos + 1, next + 1);
  200. let prevChar = css.slice(pos - 1, pos);
  201. // if the operator is immediately followed by a word character, then we
  202. // have a prefix of some kind, and should fall-through. eg. -webkit
  203. // look for --* for custom variables
  204. if (code === minus && nextChar.charCodeAt(0) === minus) {
  205. next++;
  206. tokens.push(['word', css.slice(pos, next),
  207. line, pos - offset,
  208. line, next - offset,
  209. pos
  210. ]);
  211. pos = next - 1;
  212. break;
  213. }
  214. tokens.push(['operator', css.slice(pos, next),
  215. line, pos - offset,
  216. line, next - offset,
  217. pos
  218. ]);
  219. pos = next - 1;
  220. break;
  221. default:
  222. if (code === slash && (css.charCodeAt(pos + 1) === asterisk || (options.loose && !isURLArg && css.charCodeAt(pos + 1) === slash))) {
  223. const isStandardComment = css.charCodeAt(pos + 1) === asterisk;
  224. if (isStandardComment) {
  225. next = css.indexOf('*/', pos + 2) + 1;
  226. if (next === 0) {
  227. unclosed('comment', '*/');
  228. }
  229. }
  230. else {
  231. const newlinePos = css.indexOf('\n', pos + 2);
  232. next = newlinePos !== -1 ? newlinePos - 1 : length;
  233. }
  234. content = css.slice(pos, next + 1);
  235. lines = content.split('\n');
  236. last = lines.length - 1;
  237. if (last > 0) {
  238. nextLine = line + last;
  239. nextOffset = next - lines[last].length;
  240. }
  241. else {
  242. nextLine = line;
  243. nextOffset = offset;
  244. }
  245. tokens.push(['comment', content,
  246. line, pos - offset,
  247. nextLine, next - nextOffset,
  248. pos
  249. ]);
  250. offset = nextOffset;
  251. line = nextLine;
  252. pos = next;
  253. }
  254. else if (code === pound && !alphaNum.test(css.slice(pos + 1, pos + 2))) {
  255. next = pos + 1;
  256. tokens.push(['#', css.slice(pos, next),
  257. line, pos - offset,
  258. line, next - offset,
  259. pos
  260. ]);
  261. pos = next - 1;
  262. }
  263. else if ((code === lowerU || code === upperU) && css.charCodeAt(pos + 1) === plus) {
  264. next = pos + 2;
  265. do {
  266. next += 1;
  267. code = css.charCodeAt(next);
  268. } while (next < length && unicodeRange.test(css.slice(next, next + 1)));
  269. tokens.push(['unicoderange', css.slice(pos, next),
  270. line, pos - offset,
  271. line, next - offset,
  272. pos
  273. ]);
  274. pos = next - 1;
  275. }
  276. // catch a regular slash, that isn't a comment
  277. else if (code === slash) {
  278. next = pos + 1;
  279. tokens.push(['operator', css.slice(pos, next),
  280. line, pos - offset,
  281. line, next - offset,
  282. pos
  283. ]);
  284. pos = next - 1;
  285. }
  286. else {
  287. let regex = wordEnd;
  288. // we're dealing with a word that starts with a number
  289. // those get treated differently
  290. if (code >= digit0 && code <= digit9) {
  291. regex = wordEndNum;
  292. }
  293. regex.lastIndex = pos + 1;
  294. regex.test(css);
  295. if (regex.lastIndex === 0) {
  296. next = css.length - 1;
  297. }
  298. else {
  299. next = regex.lastIndex - 2;
  300. }
  301. // Exponential number notation with minus or plus: 1e-10, 1e+10
  302. if (regex === wordEndNum || code === period) {
  303. let ncode = css.charCodeAt(next),
  304. ncode1 = css.charCodeAt(next + 1),
  305. ncode2 = css.charCodeAt(next + 2);
  306. if (
  307. (ncode === lowerE || ncode === upperE) &&
  308. (ncode1 === minus || ncode1 === plus) &&
  309. (ncode2 >= digit0 && ncode2 <= digit9)
  310. ) {
  311. wordEndNum.lastIndex = next + 2;
  312. wordEndNum.test(css);
  313. if (wordEndNum.lastIndex === 0) {
  314. next = css.length - 1;
  315. }
  316. else {
  317. next = wordEndNum.lastIndex - 2;
  318. }
  319. }
  320. }
  321. tokens.push(['word', css.slice(pos, next + 1),
  322. line, pos - offset,
  323. line, next - offset,
  324. pos
  325. ]);
  326. pos = next;
  327. }
  328. break;
  329. }
  330. pos ++;
  331. }
  332. return tokens;
  333. };