DomainPart.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. namespace Egulias\EmailValidator\Parser;
  3. use Egulias\EmailValidator\EmailLexer;
  4. use Egulias\EmailValidator\Exception\CharNotAllowed;
  5. use Egulias\EmailValidator\Exception\CommaInDomain;
  6. use Egulias\EmailValidator\Exception\ConsecutiveAt;
  7. use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
  8. use Egulias\EmailValidator\Exception\CRNoLF;
  9. use Egulias\EmailValidator\Exception\DomainHyphened;
  10. use Egulias\EmailValidator\Exception\DotAtEnd;
  11. use Egulias\EmailValidator\Exception\DotAtStart;
  12. use Egulias\EmailValidator\Exception\ExpectingATEXT;
  13. use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
  14. use Egulias\EmailValidator\Exception\ExpectingDTEXT;
  15. use Egulias\EmailValidator\Exception\NoDomainPart;
  16. use Egulias\EmailValidator\Exception\UnopenedComment;
  17. use Egulias\EmailValidator\Warning\AddressLiteral;
  18. use Egulias\EmailValidator\Warning\CFWSWithFWS;
  19. use Egulias\EmailValidator\Warning\DeprecatedComment;
  20. use Egulias\EmailValidator\Warning\DomainLiteral;
  21. use Egulias\EmailValidator\Warning\DomainTooLong;
  22. use Egulias\EmailValidator\Warning\IPV6BadChar;
  23. use Egulias\EmailValidator\Warning\IPV6ColonEnd;
  24. use Egulias\EmailValidator\Warning\IPV6ColonStart;
  25. use Egulias\EmailValidator\Warning\IPV6Deprecated;
  26. use Egulias\EmailValidator\Warning\IPV6DoubleColon;
  27. use Egulias\EmailValidator\Warning\IPV6GroupCount;
  28. use Egulias\EmailValidator\Warning\IPV6MaxGroups;
  29. use Egulias\EmailValidator\Warning\LabelTooLong;
  30. use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
  31. use Egulias\EmailValidator\Warning\TLD;
  32. class DomainPart extends Parser
  33. {
  34. const DOMAIN_MAX_LENGTH = 254;
  35. protected $domainPart = '';
  36. public function parse($domainPart)
  37. {
  38. $this->lexer->moveNext();
  39. if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
  40. throw new DotAtStart();
  41. }
  42. if ($this->lexer->token['type'] === EmailLexer::S_EMPTY) {
  43. throw new NoDomainPart();
  44. }
  45. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
  46. throw new DomainHyphened();
  47. }
  48. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  49. $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
  50. $this->parseDomainComments();
  51. }
  52. $domain = $this->doParseDomainPart();
  53. $prev = $this->lexer->getPrevious();
  54. $length = strlen($domain);
  55. if ($prev['type'] === EmailLexer::S_DOT) {
  56. throw new DotAtEnd();
  57. }
  58. if ($prev['type'] === EmailLexer::S_HYPHEN) {
  59. throw new DomainHyphened();
  60. }
  61. if ($length > self::DOMAIN_MAX_LENGTH) {
  62. $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
  63. }
  64. if ($prev['type'] === EmailLexer::S_CR) {
  65. throw new CRLFAtTheEnd();
  66. }
  67. $this->domainPart = $domain;
  68. }
  69. public function getDomainPart()
  70. {
  71. return $this->domainPart;
  72. }
  73. public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
  74. {
  75. $prev = $this->lexer->getPrevious();
  76. if ($prev['type'] === EmailLexer::S_COLON) {
  77. $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
  78. }
  79. $IPv6 = substr($addressLiteral, 5);
  80. //Daniel Marschall's new IPv6 testing strategy
  81. $matchesIP = explode(':', $IPv6);
  82. $groupCount = count($matchesIP);
  83. $colons = strpos($IPv6, '::');
  84. if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
  85. $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
  86. }
  87. if ($colons === false) {
  88. // We need exactly the right number of groups
  89. if ($groupCount !== $maxGroups) {
  90. $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
  91. }
  92. return;
  93. }
  94. if ($colons !== strrpos($IPv6, '::')) {
  95. $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
  96. return;
  97. }
  98. if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
  99. // RFC 4291 allows :: at the start or end of an address
  100. //with 7 other groups in addition
  101. ++$maxGroups;
  102. }
  103. if ($groupCount > $maxGroups) {
  104. $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
  105. } elseif ($groupCount === $maxGroups) {
  106. $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
  107. }
  108. }
  109. protected function doParseDomainPart()
  110. {
  111. $domain = '';
  112. $openedParenthesis = 0;
  113. do {
  114. $prev = $this->lexer->getPrevious();
  115. $this->checkNotAllowedChars($this->lexer->token);
  116. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  117. $this->parseComments();
  118. $openedParenthesis += $this->getOpenedParenthesis();
  119. $this->lexer->moveNext();
  120. $tmpPrev = $this->lexer->getPrevious();
  121. if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  122. $openedParenthesis--;
  123. }
  124. }
  125. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  126. if ($openedParenthesis === 0) {
  127. throw new UnopenedComment();
  128. } else {
  129. $openedParenthesis--;
  130. }
  131. }
  132. $this->checkConsecutiveDots();
  133. $this->checkDomainPartExceptions($prev);
  134. if ($this->hasBrackets()) {
  135. $this->parseDomainLiteral();
  136. }
  137. $this->checkLabelLength($prev);
  138. if ($this->isFWS()) {
  139. $this->parseFWS();
  140. }
  141. $domain .= $this->lexer->token['value'];
  142. $this->lexer->moveNext();
  143. } while ($this->lexer->token);
  144. return $domain;
  145. }
  146. private function checkNotAllowedChars($token)
  147. {
  148. $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
  149. if (isset($notAllowed[$token['type']])) {
  150. throw new CharNotAllowed();
  151. }
  152. }
  153. protected function parseDomainLiteral()
  154. {
  155. if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
  156. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  157. }
  158. if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
  159. $lexer = clone $this->lexer;
  160. $lexer->moveNext();
  161. if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
  162. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  163. }
  164. }
  165. return $this->doParseDomainLiteral();
  166. }
  167. protected function doParseDomainLiteral()
  168. {
  169. $IPv6TAG = false;
  170. $addressLiteral = '';
  171. do {
  172. if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
  173. throw new ExpectingDTEXT();
  174. }
  175. if ($this->lexer->token['type'] === EmailLexer::INVALID ||
  176. $this->lexer->token['type'] === EmailLexer::C_DEL ||
  177. $this->lexer->token['type'] === EmailLexer::S_LF
  178. ) {
  179. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  180. }
  181. if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
  182. throw new ExpectingDTEXT();
  183. }
  184. if ($this->lexer->isNextTokenAny(
  185. array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
  186. )) {
  187. $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
  188. $this->parseFWS();
  189. }
  190. if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
  191. throw new CRNoLF();
  192. }
  193. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
  194. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  195. $addressLiteral .= $this->lexer->token['value'];
  196. $this->lexer->moveNext();
  197. $this->validateQuotedPair();
  198. }
  199. if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
  200. $IPv6TAG = true;
  201. }
  202. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
  203. break;
  204. }
  205. $addressLiteral .= $this->lexer->token['value'];
  206. } while ($this->lexer->moveNext());
  207. $addressLiteral = str_replace('[', '', $addressLiteral);
  208. $addressLiteral = $this->checkIPV4Tag($addressLiteral);
  209. if (false === $addressLiteral) {
  210. return $addressLiteral;
  211. }
  212. if (!$IPv6TAG) {
  213. $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
  214. return $addressLiteral;
  215. }
  216. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  217. $this->checkIPV6Tag($addressLiteral);
  218. return $addressLiteral;
  219. }
  220. protected function checkIPV4Tag($addressLiteral)
  221. {
  222. $matchesIP = array();
  223. // Extract IPv4 part from the end of the address-literal (if there is one)
  224. if (preg_match(
  225. '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
  226. $addressLiteral,
  227. $matchesIP
  228. ) > 0
  229. ) {
  230. $index = strrpos($addressLiteral, $matchesIP[0]);
  231. if ($index === 0) {
  232. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  233. return false;
  234. }
  235. // Convert IPv4 part to IPv6 format for further testing
  236. $addressLiteral = substr($addressLiteral, 0, $index) . '0:0';
  237. }
  238. return $addressLiteral;
  239. }
  240. protected function checkDomainPartExceptions($prev)
  241. {
  242. $invalidDomainTokens = array(
  243. EmailLexer::S_DQUOTE => true,
  244. EmailLexer::S_SEMICOLON => true,
  245. EmailLexer::S_GREATERTHAN => true,
  246. EmailLexer::S_LOWERTHAN => true,
  247. );
  248. if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
  249. throw new ExpectingATEXT();
  250. }
  251. if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
  252. throw new CommaInDomain();
  253. }
  254. if ($this->lexer->token['type'] === EmailLexer::S_AT) {
  255. throw new ConsecutiveAt();
  256. }
  257. if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
  258. throw new ExpectingATEXT();
  259. }
  260. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
  261. throw new DomainHyphened();
  262. }
  263. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
  264. && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
  265. throw new ExpectingATEXT();
  266. }
  267. }
  268. protected function hasBrackets()
  269. {
  270. if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
  271. return false;
  272. }
  273. try {
  274. $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
  275. } catch (\RuntimeException $e) {
  276. throw new ExpectingDomainLiteralClose();
  277. }
  278. return true;
  279. }
  280. protected function checkLabelLength($prev)
  281. {
  282. if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
  283. $prev['type'] === EmailLexer::GENERIC &&
  284. strlen($prev['value']) > 63
  285. ) {
  286. $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
  287. }
  288. }
  289. protected function parseDomainComments()
  290. {
  291. $this->isUnclosedComment();
  292. while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
  293. $this->warnEscaping();
  294. $this->lexer->moveNext();
  295. }
  296. $this->lexer->moveNext();
  297. if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
  298. throw new ExpectingATEXT();
  299. }
  300. }
  301. protected function addTLDWarnings()
  302. {
  303. if ($this->warnings[DomainLiteral::CODE]) {
  304. $this->warnings[TLD::CODE] = new TLD();
  305. }
  306. }
  307. }