12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- /**
- * Regex for matching unicode values out of Basic Multilingual Plane (BMP)
- * Reference:
- * - https://github.com/mathiasbynens/regenerate
- * - https://unicode-table.com/
- * - https://mathiasbynens.be/notes/javascript-unicode
- *
- * @returns {RegExp}
- */
- export function getUnicodeNonBmpRegExp() {
- /**
- * Regex for matching astral plane unicode
- * - http://kourge.net/projects/regexp-unicode-block
- */
- /**
- * Notes on various unicode planes being used in the regex below:
- * '\u1D00-\u1D7F' Phonetic Extensions
- * '\u1D80-\u1DBF' Phonetic Extensions Supplement
- * '\u1DC0-\u1DFF' Combining Diacritical Marks Supplement
- * '\u20A0-\u20CF' Currency symbols
- * '\u20D0-\u20FF' Combining Diacritical Marks for Symbols
- * '\u2100-\u214F' Letter like symbols
- * '\u2150-\u218F' Number forms (eg: Roman numbers)
- * '\u2190-\u21FF' Arrows
- * '\u2200-\u22FF' Mathematical operators
- * '\u2300-\u23FF' Misc Technical
- * '\u2400-\u243F' Control pictures
- * '\u2440-\u245F' OCR
- * '\u2460-\u24FF' Enclosed alpha numerics
- * '\u2500-\u257F' Box Drawing
- * '\u2580-\u259F' Block Elements
- * '\u25A0-\u25FF' Geometric Shapes
- * '\u2600-\u26FF' Misc Symbols
- * '\u2700-\u27BF' Dingbats
- * '\uE000-\uF8FF' Private Use
- *
- * Note: plane '\u2000-\u206F' used for General punctuation is excluded as it is handled in -> getPunctuationRegExp
- */
- return /[\u1D00-\u1D7F\u1D80-\u1DBF\u1DC0-\u1DFF\u20A0-\u20CF\u20D0-\u20FF\u2100-\u214F\u2150-\u218F\u2190-\u21FF\u2200-\u22FF\u2300-\u23FF\u2400-\u243F\u2440-\u245F\u2460-\u24FF\u2500-\u257F\u2580-\u259F\u25A0-\u25FF\u2600-\u26FF\u2700-\u27BF\uE000-\uF8FF]/g;
- }
- /**
- * Get regular expression for matching punctuations
- *
- * @returns {RegExp}
- */
- export function getPunctuationRegExp() {
- /**
- * Reference: http://kunststube.net/encoding/
- * US-ASCII
- * -> !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
- *
- * General Punctuation block
- * -> \u2000-\u206F
- *
- * Supplemental Punctuation block
- * Reference: https://en.wikipedia.org/wiki/Supplemental_Punctuation
- * -> \u2E00-\u2E7F Reference
- */
- return /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&£¢¥§€()*+,\-.\/:;<=>?@\[\]^_`{|}~±]/g;
- }
- /**
- * Get regular expression for supplementary private use
- *
- * @returns {RegExp}
- */
- export function getSupplementaryPrivateUseRegExp() {
- // Supplementary private use area A (https://www.unicode.org/charts/PDF/UF0000.pdf) contains
- // characters between F0000 and FFFFF. Because ES5 doesn't have a syntax for regular expressions
- // of such characters, search instead for the corresponding surrogate pairs.
- //
- // Code points FFFFD and FFFFF are "noncharacters", but the regex still matches them, because its
- // intent is to match things we don't want to check color contrast for. This is why the low
- // surrogate range in the regex ends at DFFF, not DFFD.
- //
- // 1. High surrogate area (https://www.unicode.org/charts/PDF/UD800.pdf)
- // 2. Low surrogate area (https://www.unicode.org/charts/PDF/UDC00.pdf)
- //
- // 1 2
- // ┏━━━━━━┻━━━━━━┓┏━━━━━━┻━━━━━━┓
- return /[\uDB80-\uDBBF][\uDC00-\uDFFF]/g;
- }
|