LexerTest.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. <?php declare(strict_types=1);
  2. namespace PhpParser;
  3. use PhpParser\Parser\Tokens;
  4. class LexerTest extends \PHPUnit\Framework\TestCase
  5. {
  6. /* To allow overwriting in parent class */
  7. protected function getLexer(array $options = []) {
  8. return new Lexer($options);
  9. }
  10. /**
  11. * @dataProvider provideTestError
  12. */
  13. public function testError($code, $messages) {
  14. if (defined('HHVM_VERSION')) {
  15. $this->markTestSkipped('HHVM does not throw warnings from token_get_all()');
  16. }
  17. $errorHandler = new ErrorHandler\Collecting();
  18. $lexer = $this->getLexer(['usedAttributes' => [
  19. 'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'
  20. ]]);
  21. $lexer->startLexing($code, $errorHandler);
  22. $errors = $errorHandler->getErrors();
  23. $this->assertCount(count($messages), $errors);
  24. for ($i = 0; $i < count($messages); $i++) {
  25. $this->assertSame($messages[$i], $errors[$i]->getMessageWithColumnInfo($code));
  26. }
  27. }
  28. public function provideTestError() {
  29. return [
  30. ["<?php /*", ["Unterminated comment from 1:7 to 1:9"]],
  31. ["<?php \1", ["Unexpected character \"\1\" (ASCII 1) from 1:7 to 1:7"]],
  32. ["<?php \0", ["Unexpected null byte from 1:7 to 1:7"]],
  33. // Error with potentially emulated token
  34. ["<?php ?? \0", ["Unexpected null byte from 1:10 to 1:10"]],
  35. ["<?php\n\0\1 foo /* bar", [
  36. "Unexpected null byte from 2:1 to 2:1",
  37. "Unexpected character \"\1\" (ASCII 1) from 2:2 to 2:2",
  38. "Unterminated comment from 2:8 to 2:14"
  39. ]],
  40. ];
  41. }
  42. /**
  43. * @dataProvider provideTestLex
  44. */
  45. public function testLex($code, $options, $tokens) {
  46. $lexer = $this->getLexer($options);
  47. $lexer->startLexing($code);
  48. while ($id = $lexer->getNextToken($value, $startAttributes, $endAttributes)) {
  49. $token = array_shift($tokens);
  50. $this->assertSame($token[0], $id);
  51. $this->assertSame($token[1], $value);
  52. $this->assertEquals($token[2], $startAttributes);
  53. $this->assertEquals($token[3], $endAttributes);
  54. }
  55. }
  56. public function provideTestLex() {
  57. return [
  58. // tests conversion of closing PHP tag and drop of whitespace and opening tags
  59. [
  60. '<?php tokens ?>plaintext',
  61. [],
  62. [
  63. [
  64. Tokens::T_STRING, 'tokens',
  65. ['startLine' => 1], ['endLine' => 1]
  66. ],
  67. [
  68. ord(';'), '?>',
  69. ['startLine' => 1], ['endLine' => 1]
  70. ],
  71. [
  72. Tokens::T_INLINE_HTML, 'plaintext',
  73. ['startLine' => 1, 'hasLeadingNewline' => false],
  74. ['endLine' => 1]
  75. ],
  76. ]
  77. ],
  78. // tests line numbers
  79. [
  80. '<?php' . "\n" . '$ token /** doc' . "\n" . 'comment */ $',
  81. [],
  82. [
  83. [
  84. ord('$'), '$',
  85. ['startLine' => 2], ['endLine' => 2]
  86. ],
  87. [
  88. Tokens::T_STRING, 'token',
  89. ['startLine' => 2], ['endLine' => 2]
  90. ],
  91. [
  92. ord('$'), '$',
  93. [
  94. 'startLine' => 3,
  95. 'comments' => [
  96. new Comment\Doc('/** doc' . "\n" . 'comment */', 2, 14, 5),
  97. ]
  98. ],
  99. ['endLine' => 3]
  100. ],
  101. ]
  102. ],
  103. // tests comment extraction
  104. [
  105. '<?php /* comment */ // comment' . "\n" . '/** docComment 1 *//** docComment 2 */ token',
  106. [],
  107. [
  108. [
  109. Tokens::T_STRING, 'token',
  110. [
  111. 'startLine' => 2,
  112. 'comments' => [
  113. new Comment('/* comment */', 1, 6, 1),
  114. new Comment('// comment' . "\n", 1, 20, 3),
  115. new Comment\Doc('/** docComment 1 */', 2, 31, 4),
  116. new Comment\Doc('/** docComment 2 */', 2, 50, 5),
  117. ],
  118. ],
  119. ['endLine' => 2]
  120. ],
  121. ]
  122. ],
  123. // tests differing start and end line
  124. [
  125. '<?php "foo' . "\n" . 'bar"',
  126. [],
  127. [
  128. [
  129. Tokens::T_CONSTANT_ENCAPSED_STRING, '"foo' . "\n" . 'bar"',
  130. ['startLine' => 1], ['endLine' => 2]
  131. ],
  132. ]
  133. ],
  134. // tests exact file offsets
  135. [
  136. '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
  137. ['usedAttributes' => ['startFilePos', 'endFilePos']],
  138. [
  139. [
  140. Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
  141. ['startFilePos' => 6], ['endFilePos' => 8]
  142. ],
  143. [
  144. ord(';'), ';',
  145. ['startFilePos' => 9], ['endFilePos' => 9]
  146. ],
  147. [
  148. Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
  149. ['startFilePos' => 18], ['endFilePos' => 20]
  150. ],
  151. [
  152. ord(';'), ';',
  153. ['startFilePos' => 21], ['endFilePos' => 21]
  154. ],
  155. ]
  156. ],
  157. // tests token offsets
  158. [
  159. '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
  160. ['usedAttributes' => ['startTokenPos', 'endTokenPos']],
  161. [
  162. [
  163. Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
  164. ['startTokenPos' => 1], ['endTokenPos' => 1]
  165. ],
  166. [
  167. ord(';'), ';',
  168. ['startTokenPos' => 2], ['endTokenPos' => 2]
  169. ],
  170. [
  171. Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
  172. ['startTokenPos' => 5], ['endTokenPos' => 5]
  173. ],
  174. [
  175. ord(';'), ';',
  176. ['startTokenPos' => 6], ['endTokenPos' => 6]
  177. ],
  178. ]
  179. ],
  180. // tests all attributes being disabled
  181. [
  182. '<?php /* foo */ $bar;',
  183. ['usedAttributes' => []],
  184. [
  185. [
  186. Tokens::T_VARIABLE, '$bar',
  187. [], []
  188. ],
  189. [
  190. ord(';'), ';',
  191. [], []
  192. ]
  193. ]
  194. ],
  195. // tests no tokens
  196. [
  197. '',
  198. [],
  199. []
  200. ],
  201. ];
  202. }
  203. /**
  204. * @dataProvider provideTestHaltCompiler
  205. */
  206. public function testHandleHaltCompiler($code, $remaining) {
  207. $lexer = $this->getLexer();
  208. $lexer->startLexing($code);
  209. while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
  210. $this->assertSame($remaining, $lexer->handleHaltCompiler());
  211. $this->assertSame(0, $lexer->getNextToken());
  212. }
  213. public function provideTestHaltCompiler() {
  214. return [
  215. ['<?php ... __halt_compiler();Remaining Text', 'Remaining Text'],
  216. ['<?php ... __halt_compiler ( ) ;Remaining Text', 'Remaining Text'],
  217. ['<?php ... __halt_compiler() ?>Remaining Text', 'Remaining Text'],
  218. //array('<?php ... __halt_compiler();' . "\0", "\0"),
  219. //array('<?php ... __halt_compiler /* */ ( ) ;Remaining Text', 'Remaining Text'),
  220. ];
  221. }
  222. public function testHandleHaltCompilerError() {
  223. $this->expectException(Error::class);
  224. $this->expectExceptionMessage('__HALT_COMPILER must be followed by "();"');
  225. $lexer = $this->getLexer();
  226. $lexer->startLexing('<?php ... __halt_compiler invalid ();');
  227. while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
  228. $lexer->handleHaltCompiler();
  229. }
  230. public function testGetTokens() {
  231. $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
  232. $expectedTokens = [
  233. [T_OPEN_TAG, '<?php ', 1],
  234. [T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
  235. ';',
  236. [T_WHITESPACE, "\n", 1],
  237. [T_COMMENT, '// foo' . "\n", 2],
  238. [T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
  239. ';',
  240. ];
  241. $lexer = $this->getLexer();
  242. $lexer->startLexing($code);
  243. $this->assertSame($expectedTokens, $lexer->getTokens());
  244. }
  245. }