1import sys
2from unicode_parse_common import *
3
4# http://www.unicode.org/Public/5.1.0/ucd/extracted/DerivedGeneralCategory.txt
5
6category_to_harfbuzz = {
7  'Mn': 'HB_Mark_NonSpacing',
8  'Mc': 'HB_Mark_SpacingCombining',
9  'Me': 'HB_Mark_Enclosing',
10
11  'Nd': 'HB_Number_DecimalDigit',
12  'Nl': 'HB_Number_Letter',
13  'No': 'HB_Number_Other',
14
15  'Zs': 'HB_Separator_Space',
16  'Zl': 'HB_Separator_Line',
17  'Zp': 'HB_Separator_Paragraph',
18
19  'Cc': 'HB_Other_Control',
20  'Cf': 'HB_Other_Format',
21  'Cs': 'HB_Other_Surrogate',
22  'Co': 'HB_Other_PrivateUse',
23  'Cn': 'HB_Other_NotAssigned',
24
25  'Lu': 'HB_Letter_Uppercase',
26  'Ll': 'HB_Letter_Lowercase',
27  'Lt': 'HB_Letter_Titlecase',
28  'Lm': 'HB_Letter_Modifier',
29  'Lo': 'HB_Letter_Other',
30
31  'Pc': 'HB_Punctuation_Connector',
32  'Pd': 'HB_Punctuation_Dash',
33  'Ps': 'HB_Punctuation_Open',
34  'Pe': 'HB_Punctuation_Close',
35  'Pi': 'HB_Punctuation_InitialQuote',
36  'Pf': 'HB_Punctuation_FinalQuote',
37  'Po': 'HB_Punctuation_Other',
38
39  'Sm': 'HB_Symbol_Math',
40  'Sc': 'HB_Symbol_Currency',
41  'Sk': 'HB_Symbol_Modifier',
42  'So': 'HB_Symbol_Other',
43}
44
45def main(infile, outfile):
46  ranges = unicode_file_parse(infile, category_to_harfbuzz)
47  ranges = sort_and_merge(ranges)
48
49  print >>outfile, '// Generated from Unicode script tables\n'
50  print >>outfile, '#ifndef CATEGORY_PROPERTIES_H_'
51  print >>outfile, '#define CATEGORY_PROPERTIES_H_\n'
52  print >>outfile, '#include <stdint.h>'
53  print >>outfile, '#include "harfbuzz-external.h"\n'
54  print >>outfile, 'struct category_property {'
55  print >>outfile, '  uint32_t range_start;'
56  print >>outfile, '  uint32_t range_end;'
57  print >>outfile, '  HB_CharCategory category;'
58  print >>outfile, '};\n'
59  print >>outfile, 'static const struct category_property category_properties[] = {'
60  for (start, end, value) in ranges:
61    print >>outfile, '  {0x%x, 0x%x, %s},' % (start, end, value)
62  print >>outfile, '};\n'
63  print >>outfile, 'static const unsigned category_properties_count = %d;\n' % len(ranges)
64  print >>outfile, '#endif  // CATEGORY_PROPERTIES_H_'
65
66if __name__ == '__main__':
67  if len(sys.argv) != 3:
68    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
69  else:
70    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))
71