15569331642446be05292e3e1f8a51218827168cdclairehoimport sys
25569331642446be05292e3e1f8a51218827168cdclairehofrom unicode_parse_common import *
35569331642446be05292e3e1f8a51218827168cdclaireho
45569331642446be05292e3e1f8a51218827168cdclaireho# http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakProperty.txt
55569331642446be05292e3e1f8a51218827168cdclaireho
65569331642446be05292e3e1f8a51218827168cdclairehoproperty_to_harfbuzz = {
75569331642446be05292e3e1f8a51218827168cdclaireho  'CR': 'HB_Grapheme_CR',
85569331642446be05292e3e1f8a51218827168cdclaireho  'LF': 'HB_Grapheme_LF',
95569331642446be05292e3e1f8a51218827168cdclaireho  'Control': 'HB_Grapheme_Control',
105569331642446be05292e3e1f8a51218827168cdclaireho  'Extend': 'HB_Grapheme_Extend',
115569331642446be05292e3e1f8a51218827168cdclaireho  'Prepend': 'HB_Grapheme_Other',
125569331642446be05292e3e1f8a51218827168cdclaireho  'SpacingMark': 'HB_Grapheme_Other',
135569331642446be05292e3e1f8a51218827168cdclaireho  'L': 'HB_Grapheme_L',
145569331642446be05292e3e1f8a51218827168cdclaireho  'V': 'HB_Grapheme_V',
155569331642446be05292e3e1f8a51218827168cdclaireho  'T': 'HB_Grapheme_T',
165569331642446be05292e3e1f8a51218827168cdclaireho  'LV': 'HB_Grapheme_LV',
175569331642446be05292e3e1f8a51218827168cdclaireho  'LVT': 'HB_Grapheme_LVT',
185569331642446be05292e3e1f8a51218827168cdclaireho}
195569331642446be05292e3e1f8a51218827168cdclaireho
205569331642446be05292e3e1f8a51218827168cdclairehodef main(infile, outfile):
215569331642446be05292e3e1f8a51218827168cdclaireho  ranges = unicode_file_parse(infile, property_to_harfbuzz)
225569331642446be05292e3e1f8a51218827168cdclaireho  ranges.sort()
235569331642446be05292e3e1f8a51218827168cdclaireho
245569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '// Generated from Unicode Grapheme break tables\n'
255569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#ifndef GRAPHEME_BREAK_PROPERTY_H_'
265569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#define GRAPHEME_BREAK_PROPERTY_H_\n'
275569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#include <stdint.h>'
285569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#include "harfbuzz-external.h"\n'
295569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'struct grapheme_break_property {'
305569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  uint32_t range_start;'
315569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  uint32_t range_end;'
325569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  HB_GraphemeClass klass;'
335569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '};\n'
345569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'static const struct grapheme_break_property grapheme_break_properties[] = {'
355569331642446be05292e3e1f8a51218827168cdclaireho  for (start, end, value) in ranges:
365569331642446be05292e3e1f8a51218827168cdclaireho    print >>outfile, '  {0x%x, 0x%x, %s},' % (start, end, value)
375569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '};\n'
385569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'static const unsigned grapheme_break_properties_count = %d;\n' % len(ranges)
395569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#endif  // GRAPHEME_BREAK_PROPERTY_H_'
405569331642446be05292e3e1f8a51218827168cdclaireho
415569331642446be05292e3e1f8a51218827168cdclairehoif __name__ == '__main__':
425569331642446be05292e3e1f8a51218827168cdclaireho  if len(sys.argv) != 3:
435569331642446be05292e3e1f8a51218827168cdclaireho    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
445569331642446be05292e3e1f8a51218827168cdclaireho  else:
455569331642446be05292e3e1f8a51218827168cdclaireho    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))
46