1import sys
2from unicode_parse_common import *
3
4# http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakProperty.txt
5
6property_to_harfbuzz = {
7  'CR': 'HB_Grapheme_CR',
8  'LF': 'HB_Grapheme_LF',
9  'Control': 'HB_Grapheme_Control',
10  'Extend': 'HB_Grapheme_Extend',
11  'Prepend': 'HB_Grapheme_Other',
12  'SpacingMark': 'HB_Grapheme_Other',
13  'L': 'HB_Grapheme_L',
14  'V': 'HB_Grapheme_V',
15  'T': 'HB_Grapheme_T',
16  'LV': 'HB_Grapheme_LV',
17  'LVT': 'HB_Grapheme_LVT',
18}
19
20def main(infile, outfile):
21  ranges = unicode_file_parse(infile, property_to_harfbuzz)
22  ranges.sort()
23
24  print >>outfile, '// Generated from Unicode Grapheme break tables\n'
25  print >>outfile, '#ifndef GRAPHEME_BREAK_PROPERTY_H_'
26  print >>outfile, '#define GRAPHEME_BREAK_PROPERTY_H_\n'
27  print >>outfile, '#include <stdint.h>'
28  print >>outfile, '#include "harfbuzz-external.h"\n'
29  print >>outfile, 'struct grapheme_break_property {'
30  print >>outfile, '  uint32_t range_start;'
31  print >>outfile, '  uint32_t range_end;'
32  print >>outfile, '  HB_GraphemeClass klass;'
33  print >>outfile, '};\n'
34  print >>outfile, 'static const struct grapheme_break_property grapheme_break_properties[] = {'
35  for (start, end, value) in ranges:
36    print >>outfile, '  {0x%x, 0x%x, %s},' % (start, end, value)
37  print >>outfile, '};\n'
38  print >>outfile, 'static const unsigned grapheme_break_properties_count = %d;\n' % len(ranges)
39  print >>outfile, '#endif  // GRAPHEME_BREAK_PROPERTY_H_'
40
41if __name__ == '__main__':
42  if len(sys.argv) != 3:
43    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
44  else:
45    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))
46