15569331642446be05292e3e1f8a51218827168cdclairehoimport sys
25569331642446be05292e3e1f8a51218827168cdclairehofrom unicode_parse_common import *
35569331642446be05292e3e1f8a51218827168cdclaireho
45569331642446be05292e3e1f8a51218827168cdclaireho# http://www.unicode.org/Public/5.1.0/ucd/Scripts.txt
55569331642446be05292e3e1f8a51218827168cdclaireho
65569331642446be05292e3e1f8a51218827168cdclairehoscript_to_harfbuzz = {
75569331642446be05292e3e1f8a51218827168cdclaireho  # This is the list of HB_Script_* at the time of writing
85569331642446be05292e3e1f8a51218827168cdclaireho  'Common': 'HB_Script_Common',
95569331642446be05292e3e1f8a51218827168cdclaireho  'Greek': 'HB_Script_Greek',
105569331642446be05292e3e1f8a51218827168cdclaireho  'Cyrillic': 'HB_Script_Cyrillic',
115569331642446be05292e3e1f8a51218827168cdclaireho  'Armenian': 'HB_Script_Armenian',
125569331642446be05292e3e1f8a51218827168cdclaireho  'Hebrew': 'HB_Script_Hebrew',
135569331642446be05292e3e1f8a51218827168cdclaireho  'Arabic': 'HB_Script_Arabic',
145569331642446be05292e3e1f8a51218827168cdclaireho  'Syriac': 'HB_Script_Syriac',
155569331642446be05292e3e1f8a51218827168cdclaireho  'Thaana': 'HB_Script_Thaana',
165569331642446be05292e3e1f8a51218827168cdclaireho  'Devanagari': 'HB_Script_Devanagari',
175569331642446be05292e3e1f8a51218827168cdclaireho  'Bengali': 'HB_Script_Bengali',
185569331642446be05292e3e1f8a51218827168cdclaireho  'Gurmukhi': 'HB_Script_Gurmukhi',
195569331642446be05292e3e1f8a51218827168cdclaireho  'Gujarati': 'HB_Script_Gujarati',
205569331642446be05292e3e1f8a51218827168cdclaireho  'Oriya': 'HB_Script_Oriya',
215569331642446be05292e3e1f8a51218827168cdclaireho  'Tamil': 'HB_Script_Tamil',
225569331642446be05292e3e1f8a51218827168cdclaireho  'Telugu': 'HB_Script_Telugu',
235569331642446be05292e3e1f8a51218827168cdclaireho  'Kannada': 'HB_Script_Kannada',
245569331642446be05292e3e1f8a51218827168cdclaireho  'Malayalam': 'HB_Script_Malayalam',
255569331642446be05292e3e1f8a51218827168cdclaireho  'Sinhala': 'HB_Script_Sinhala',
265569331642446be05292e3e1f8a51218827168cdclaireho  'Thai': 'HB_Script_Thai',
275569331642446be05292e3e1f8a51218827168cdclaireho  'Lao': 'HB_Script_Lao',
285569331642446be05292e3e1f8a51218827168cdclaireho  'Tibetan': 'HB_Script_Tibetan',
295569331642446be05292e3e1f8a51218827168cdclaireho  'Myanmar': 'HB_Script_Myanmar',
305569331642446be05292e3e1f8a51218827168cdclaireho  'Georgian': 'HB_Script_Georgian',
315569331642446be05292e3e1f8a51218827168cdclaireho  'Hangul': 'HB_Script_Hangul',
325569331642446be05292e3e1f8a51218827168cdclaireho  'Ogham': 'HB_Script_Ogham',
335569331642446be05292e3e1f8a51218827168cdclaireho  'Runic': 'HB_Script_Runic',
345569331642446be05292e3e1f8a51218827168cdclaireho  'Khmer': 'HB_Script_Khmer',
355569331642446be05292e3e1f8a51218827168cdclaireho  'Inherited': 'HB_Script_Inherited',
365569331642446be05292e3e1f8a51218827168cdclaireho}
375569331642446be05292e3e1f8a51218827168cdclaireho
385569331642446be05292e3e1f8a51218827168cdclairehoclass ScriptDict(object):
395569331642446be05292e3e1f8a51218827168cdclaireho  def __init__(self, base):
405569331642446be05292e3e1f8a51218827168cdclaireho    self.base = base
415569331642446be05292e3e1f8a51218827168cdclaireho
425569331642446be05292e3e1f8a51218827168cdclaireho  def __getitem__(self, key):
435569331642446be05292e3e1f8a51218827168cdclaireho    r = self.base.get(key, None)
445569331642446be05292e3e1f8a51218827168cdclaireho    if r is None:
455569331642446be05292e3e1f8a51218827168cdclaireho      return 'HB_Script_Common'
465569331642446be05292e3e1f8a51218827168cdclaireho    return r
475569331642446be05292e3e1f8a51218827168cdclaireho
485569331642446be05292e3e1f8a51218827168cdclairehodef main(infile, outfile):
495569331642446be05292e3e1f8a51218827168cdclaireho  ranges = unicode_file_parse(infile,
505569331642446be05292e3e1f8a51218827168cdclaireho                              ScriptDict(script_to_harfbuzz),
515569331642446be05292e3e1f8a51218827168cdclaireho                              'HB_Script_Common')
525569331642446be05292e3e1f8a51218827168cdclaireho  ranges = sort_and_merge(ranges)
535569331642446be05292e3e1f8a51218827168cdclaireho
545569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '// Generated from Unicode script tables\n'
555569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#ifndef SCRIPT_PROPERTIES_H_'
565569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#define SCRIPT_PROPERTIES_H_\n'
575569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#include <stdint.h>'
585569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#include "harfbuzz-shaper.h"\n'
595569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'struct script_property {'
605569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  uint32_t range_start;'
615569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  uint32_t range_end;'
625569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '  HB_Script script;'
635569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '};\n'
645569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'static const struct script_property script_properties[] = {'
655569331642446be05292e3e1f8a51218827168cdclaireho  for (start, end, value) in ranges:
665569331642446be05292e3e1f8a51218827168cdclaireho    print >>outfile, '  {0x%x, 0x%x, %s},' % (start, end, value)
675569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '};\n'
685569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, 'static const unsigned script_properties_count = %d;\n' % len(ranges)
695569331642446be05292e3e1f8a51218827168cdclaireho  print >>outfile, '#endif  // SCRIPT_PROPERTIES_H_'
705569331642446be05292e3e1f8a51218827168cdclaireho
715569331642446be05292e3e1f8a51218827168cdclairehoif __name__ == '__main__':
725569331642446be05292e3e1f8a51218827168cdclaireho  if len(sys.argv) != 3:
735569331642446be05292e3e1f8a51218827168cdclaireho    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
745569331642446be05292e3e1f8a51218827168cdclaireho  else:
755569331642446be05292e3e1f8a51218827168cdclaireho    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))
76