EmailLexer.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. <?php
  2. namespace Egulias\EmailValidator;
  3. use Doctrine\Common\Lexer\AbstractLexer;
  4. class EmailLexer extends AbstractLexer
  5. {
  6. //ASCII values
  7. const C_DEL = 127;
  8. const C_NUL = 0;
  9. const S_AT = 64;
  10. const S_BACKSLASH = 92;
  11. const S_DOT = 46;
  12. const S_DQUOTE = 34;
  13. const S_OPENPARENTHESIS = 49;
  14. const S_CLOSEPARENTHESIS = 261;
  15. const S_OPENBRACKET = 262;
  16. const S_CLOSEBRACKET = 263;
  17. const S_HYPHEN = 264;
  18. const S_COLON = 265;
  19. const S_DOUBLECOLON = 266;
  20. const S_SP = 267;
  21. const S_HTAB = 268;
  22. const S_CR = 269;
  23. const S_LF = 270;
  24. const S_IPV6TAG = 271;
  25. const S_LOWERTHAN = 272;
  26. const S_GREATERTHAN = 273;
  27. const S_COMMA = 274;
  28. const S_SEMICOLON = 275;
  29. const S_OPENQBRACKET = 276;
  30. const S_CLOSEQBRACKET = 277;
  31. const S_SLASH = 278;
  32. const S_EMPTY = null;
  33. const GENERIC = 300;
  34. const CRLF = 301;
  35. const INVALID = 302;
  36. const ASCII_INVALID_FROM = 127;
  37. const ASCII_INVALID_TO = 199;
  38. /**
  39. * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
  40. *
  41. * @var array
  42. */
  43. protected $charValue = array(
  44. '(' => self::S_OPENPARENTHESIS,
  45. ')' => self::S_CLOSEPARENTHESIS,
  46. '<' => self::S_LOWERTHAN,
  47. '>' => self::S_GREATERTHAN,
  48. '[' => self::S_OPENBRACKET,
  49. ']' => self::S_CLOSEBRACKET,
  50. ':' => self::S_COLON,
  51. ';' => self::S_SEMICOLON,
  52. '@' => self::S_AT,
  53. '\\' => self::S_BACKSLASH,
  54. '/' => self::S_SLASH,
  55. ',' => self::S_COMMA,
  56. '.' => self::S_DOT,
  57. '"' => self::S_DQUOTE,
  58. '-' => self::S_HYPHEN,
  59. '::' => self::S_DOUBLECOLON,
  60. ' ' => self::S_SP,
  61. "\t" => self::S_HTAB,
  62. "\r" => self::S_CR,
  63. "\n" => self::S_LF,
  64. "\r\n" => self::CRLF,
  65. 'IPv6' => self::S_IPV6TAG,
  66. '{' => self::S_OPENQBRACKET,
  67. '}' => self::S_CLOSEQBRACKET,
  68. '' => self::S_EMPTY,
  69. '\0' => self::C_NUL,
  70. );
  71. protected $hasInvalidTokens = false;
  72. protected $previous;
  73. public function reset()
  74. {
  75. $this->hasInvalidTokens = false;
  76. parent::reset();
  77. }
  78. public function hasInvalidTokens()
  79. {
  80. return $this->hasInvalidTokens;
  81. }
  82. /**
  83. * @param $type
  84. * @throws \UnexpectedValueException
  85. * @return boolean
  86. */
  87. public function find($type)
  88. {
  89. $search = clone $this;
  90. $search->skipUntil($type);
  91. if (!$search->lookahead) {
  92. throw new \UnexpectedValueException($type . ' not found');
  93. }
  94. return true;
  95. }
  96. /**
  97. * getPrevious
  98. *
  99. * @return array token
  100. */
  101. public function getPrevious()
  102. {
  103. return $this->previous;
  104. }
  105. /**
  106. * moveNext
  107. *
  108. * @return boolean
  109. */
  110. public function moveNext()
  111. {
  112. $this->previous = $this->token;
  113. return parent::moveNext();
  114. }
  115. /**
  116. * Lexical catchable patterns.
  117. *
  118. * @return string[]
  119. */
  120. protected function getCatchablePatterns()
  121. {
  122. return array(
  123. '[a-zA-Z_]+[46]?', //ASCII and domain literal
  124. '[^\x00-\x7F]', //UTF-8
  125. '[0-9]+',
  126. '\r\n',
  127. '::',
  128. '\s+?',
  129. '.',
  130. );
  131. }
  132. /**
  133. * Lexical non-catchable patterns.
  134. *
  135. * @return string[]
  136. */
  137. protected function getNonCatchablePatterns()
  138. {
  139. return array('[\xA0-\xff]+');
  140. }
  141. /**
  142. * Retrieve token type. Also processes the token value if necessary.
  143. *
  144. * @param string $value
  145. * @throws \InvalidArgumentException
  146. * @return integer
  147. */
  148. protected function getType(&$value)
  149. {
  150. if ($this->isNullType($value)) {
  151. return self::C_NUL;
  152. }
  153. if ($this->isValid($value)) {
  154. return $this->charValue[$value];
  155. }
  156. if ($this->isUTF8Invalid($value)) {
  157. $this->hasInvalidTokens = true;
  158. return self::INVALID;
  159. }
  160. return self::GENERIC;
  161. }
  162. protected function isValid($value)
  163. {
  164. if (isset($this->charValue[$value])) {
  165. return true;
  166. }
  167. return false;
  168. }
  169. /**
  170. * @param $value
  171. * @return bool
  172. */
  173. protected function isNullType($value)
  174. {
  175. if ($value === "\0") {
  176. return true;
  177. }
  178. return false;
  179. }
  180. /**
  181. * @param $value
  182. * @return bool
  183. */
  184. protected function isUTF8Invalid($value)
  185. {
  186. if (preg_match('/\p{Cc}+/u', $value)) {
  187. return true;
  188. }
  189. return false;
  190. }
  191. protected function getModifiers()
  192. {
  193. return 'iu';
  194. }
  195. }