gen_unvis_fixtures.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #! /usr/bin/env python3
  2. import sys
  3. from os.path import abspath, expanduser, dirname, join
  4. from itertools import chain
  5. import json
  6. import argparse
  7. from vis import vis, unvis, VIS_WHITE
  8. __dir__ = dirname(abspath(__file__))
  9. OUTPUT_FILE = join(__dir__, '..', 'fixtures', 'unvis_fixtures.json')
  10. # Add custom fixtures here
  11. CUSTOM_FIXTURES = [
  12. # test long multibyte string
  13. ''.join(chr(cp) for cp in range(1024)),
  14. 'foo bar',
  15. 'foo\nbar',
  16. "$bar = 'baz';",
  17. r'$foo = "\x20\\x20\\\x20\\\\x20"',
  18. '$foo = function($bar) use($baz) {\n\treturn $baz->getFoo()\n};'
  19. ]
  20. RANGES = {
  21. # All valid codepoints in the BMP
  22. 'bmp': chain(range(0x0000, 0xD800), range(0xE000, 0xFFFF)),
  23. # Smaller set of pertinent? codepoints inside BMP
  24. # see: http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
  25. 'small': chain(
  26. # latin blocks
  27. range(0x0000, 0x0250),
  28. # Greek, Cyrillic
  29. range(0x0370, 0x0530),
  30. # Hebrew, Arabic
  31. range(0x590, 0x0700),
  32. # CJK radicals
  33. range(0x2E80, 0x2F00),
  34. # Hiragana, Katakana
  35. range(0x3040, 0x3100)
  36. )
  37. }
  38. if __name__ == '__main__':
  39. argp = argparse.ArgumentParser(
  40. description='Generates test data for Psy\\Test\\Util\\StrTest')
  41. argp.add_argument('-f', '--format-output', action='store_true',
  42. help='Indent JSON output to ease debugging')
  43. argp.add_argument('-a', '--all', action='store_true',
  44. help="""Generates test data for all codepoints of the BMP.
  45. (same as --range=bmp). WARNING: You will need quite
  46. a lot of RAM to run the testsuite !
  47. """)
  48. argp.add_argument('-r', '--range',
  49. help="""Choose the range of codepoints used to generate
  50. test data.""",
  51. choices=list(RANGES.keys()),
  52. default='small')
  53. argp.add_argument('-o', '--output-file',
  54. help="""Write test data to OUTPUT_FILE
  55. (defaults to PSYSH_DIR/test/fixtures)""")
  56. args = argp.parse_args()
  57. cp_range = RANGES['bmp'] if args.all else RANGES[args.range]
  58. indent = 2 if args.format_output else None
  59. if args.output_file:
  60. OUTPUT_FILE = abspath(expanduser(args.output_file))
  61. fixtures = []
  62. # use SMALL_RANGE by default, it should be enough.
  63. # use BMP_RANGE for a more complete smoke test
  64. for codepoint in cp_range:
  65. char = chr(codepoint)
  66. encoded = vis(char, VIS_WHITE)
  67. decoded = unvis(encoded)
  68. fixtures.append((encoded, decoded))
  69. # Add our own custom fixtures at the end,
  70. # since they would fail anyway if one of the previous did.
  71. for fixture in CUSTOM_FIXTURES:
  72. encoded = vis(fixture, VIS_WHITE)
  73. decoded = unvis(encoded)
  74. fixtures.append((encoded, decoded))
  75. with open(OUTPUT_FILE, 'w') as fp:
  76. # dump as json to avoid backslashin and quotin nightmare
  77. # between php and python
  78. json.dump(fixtures, fp, indent=indent)
  79. sys.exit(0)