/** * Regex for matching unicode values out of Basic Multilingual Plane (BMP) * Reference: * - https://github.com/mathiasbynens/regenerate * - https://unicode-table.com/ * - https://mathiasbynens.be/notes/javascript-unicode * * @returns {RegExp} */ export function getUnicodeNonBmpRegExp() { /** * Regex for matching astral plane unicode * - http://kourge.net/projects/regexp-unicode-block */ /** * Notes on various unicode planes being used in the regex below: * '\u1D00-\u1D7F' Phonetic Extensions * '\u1D80-\u1DBF' Phonetic Extensions Supplement * '\u1DC0-\u1DFF' Combining Diacritical Marks Supplement * '\u20A0-\u20CF' Currency symbols * '\u20D0-\u20FF' Combining Diacritical Marks for Symbols * '\u2100-\u214F' Letter like symbols * '\u2150-\u218F' Number forms (eg: Roman numbers) * '\u2190-\u21FF' Arrows * '\u2200-\u22FF' Mathematical operators * '\u2300-\u23FF' Misc Technical * '\u2400-\u243F' Control pictures * '\u2440-\u245F' OCR * '\u2460-\u24FF' Enclosed alpha numerics * '\u2500-\u257F' Box Drawing * '\u2580-\u259F' Block Elements * '\u25A0-\u25FF' Geometric Shapes * '\u2600-\u26FF' Misc Symbols * '\u2700-\u27BF' Dingbats * '\uE000-\uF8FF' Private Use * * Note: plane '\u2000-\u206F' used for General punctuation is excluded as it is handled in -> getPunctuationRegExp */ return /[\u1D00-\u1D7F\u1D80-\u1DBF\u1DC0-\u1DFF\u20A0-\u20CF\u20D0-\u20FF\u2100-\u214F\u2150-\u218F\u2190-\u21FF\u2200-\u22FF\u2300-\u23FF\u2400-\u243F\u2440-\u245F\u2460-\u24FF\u2500-\u257F\u2580-\u259F\u25A0-\u25FF\u2600-\u26FF\u2700-\u27BF\uE000-\uF8FF]/g; } /** * Get regular expression for matching punctuations * * @returns {RegExp} */ export function getPunctuationRegExp() { /** * Reference: http://kunststube.net/encoding/ * US-ASCII * -> !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ * * General Punctuation block * -> \u2000-\u206F * * Supplemental Punctuation block * Reference: https://en.wikipedia.org/wiki/Supplemental_Punctuation * -> \u2E00-\u2E7F Reference */ return /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&£¢¥§€()*+,\-.\/:;<=>?@\[\]^_`{|}~±]/g; } /** * Get regular expression for supplementary private use * * @returns {RegExp} */ export function getSupplementaryPrivateUseRegExp() { // Supplementary private use area A (https://www.unicode.org/charts/PDF/UF0000.pdf) contains // characters between F0000 and FFFFF. Because ES5 doesn't have a syntax for regular expressions // of such characters, search instead for the corresponding surrogate pairs. // // Code points FFFFD and FFFFF are "noncharacters", but the regex still matches them, because its // intent is to match things we don't want to check color contrast for. This is why the low // surrogate range in the regex ends at DFFF, not DFFD. // // 1. High surrogate area (https://www.unicode.org/charts/PDF/UD800.pdf) // 2. Low surrogate area (https://www.unicode.org/charts/PDF/UDC00.pdf) // // 1 2 // ┏━━━━━━┻━━━━━━┓┏━━━━━━┻━━━━━━┓ return /[\uDB80-\uDBBF][\uDC00-\uDFFF]/g; }