15569331642446be05292e3e1f8a51218827168cdclairehoimport sys 25569331642446be05292e3e1f8a51218827168cdclairehofrom unicode_parse_common import * 35569331642446be05292e3e1f8a51218827168cdclaireho 45569331642446be05292e3e1f8a51218827168cdclaireho# http://www.unicode.org/Public/5.1.0/ucd/Scripts.txt 55569331642446be05292e3e1f8a51218827168cdclaireho 65569331642446be05292e3e1f8a51218827168cdclairehoscript_to_harfbuzz = { 75569331642446be05292e3e1f8a51218827168cdclaireho # This is the list of HB_Script_* at the time of writing 85569331642446be05292e3e1f8a51218827168cdclaireho 'Common': 'HB_Script_Common', 95569331642446be05292e3e1f8a51218827168cdclaireho 'Greek': 'HB_Script_Greek', 105569331642446be05292e3e1f8a51218827168cdclaireho 'Cyrillic': 'HB_Script_Cyrillic', 115569331642446be05292e3e1f8a51218827168cdclaireho 'Armenian': 'HB_Script_Armenian', 125569331642446be05292e3e1f8a51218827168cdclaireho 'Hebrew': 'HB_Script_Hebrew', 135569331642446be05292e3e1f8a51218827168cdclaireho 'Arabic': 'HB_Script_Arabic', 145569331642446be05292e3e1f8a51218827168cdclaireho 'Syriac': 'HB_Script_Syriac', 155569331642446be05292e3e1f8a51218827168cdclaireho 'Thaana': 'HB_Script_Thaana', 165569331642446be05292e3e1f8a51218827168cdclaireho 'Devanagari': 'HB_Script_Devanagari', 175569331642446be05292e3e1f8a51218827168cdclaireho 'Bengali': 'HB_Script_Bengali', 185569331642446be05292e3e1f8a51218827168cdclaireho 'Gurmukhi': 'HB_Script_Gurmukhi', 195569331642446be05292e3e1f8a51218827168cdclaireho 'Gujarati': 'HB_Script_Gujarati', 205569331642446be05292e3e1f8a51218827168cdclaireho 'Oriya': 'HB_Script_Oriya', 215569331642446be05292e3e1f8a51218827168cdclaireho 'Tamil': 'HB_Script_Tamil', 225569331642446be05292e3e1f8a51218827168cdclaireho 'Telugu': 'HB_Script_Telugu', 235569331642446be05292e3e1f8a51218827168cdclaireho 'Kannada': 'HB_Script_Kannada', 245569331642446be05292e3e1f8a51218827168cdclaireho 'Malayalam': 'HB_Script_Malayalam', 255569331642446be05292e3e1f8a51218827168cdclaireho 'Sinhala': 'HB_Script_Sinhala', 265569331642446be05292e3e1f8a51218827168cdclaireho 'Thai': 'HB_Script_Thai', 275569331642446be05292e3e1f8a51218827168cdclaireho 'Lao': 'HB_Script_Lao', 285569331642446be05292e3e1f8a51218827168cdclaireho 'Tibetan': 'HB_Script_Tibetan', 295569331642446be05292e3e1f8a51218827168cdclaireho 'Myanmar': 'HB_Script_Myanmar', 305569331642446be05292e3e1f8a51218827168cdclaireho 'Georgian': 'HB_Script_Georgian', 315569331642446be05292e3e1f8a51218827168cdclaireho 'Hangul': 'HB_Script_Hangul', 325569331642446be05292e3e1f8a51218827168cdclaireho 'Ogham': 'HB_Script_Ogham', 335569331642446be05292e3e1f8a51218827168cdclaireho 'Runic': 'HB_Script_Runic', 345569331642446be05292e3e1f8a51218827168cdclaireho 'Khmer': 'HB_Script_Khmer', 355569331642446be05292e3e1f8a51218827168cdclaireho 'Inherited': 'HB_Script_Inherited', 365569331642446be05292e3e1f8a51218827168cdclaireho} 375569331642446be05292e3e1f8a51218827168cdclaireho 385569331642446be05292e3e1f8a51218827168cdclairehoclass ScriptDict(object): 395569331642446be05292e3e1f8a51218827168cdclaireho def __init__(self, base): 405569331642446be05292e3e1f8a51218827168cdclaireho self.base = base 415569331642446be05292e3e1f8a51218827168cdclaireho 425569331642446be05292e3e1f8a51218827168cdclaireho def __getitem__(self, key): 435569331642446be05292e3e1f8a51218827168cdclaireho r = self.base.get(key, None) 445569331642446be05292e3e1f8a51218827168cdclaireho if r is None: 455569331642446be05292e3e1f8a51218827168cdclaireho return 'HB_Script_Common' 465569331642446be05292e3e1f8a51218827168cdclaireho return r 475569331642446be05292e3e1f8a51218827168cdclaireho 485569331642446be05292e3e1f8a51218827168cdclairehodef main(infile, outfile): 495569331642446be05292e3e1f8a51218827168cdclaireho ranges = unicode_file_parse(infile, 505569331642446be05292e3e1f8a51218827168cdclaireho ScriptDict(script_to_harfbuzz), 515569331642446be05292e3e1f8a51218827168cdclaireho 'HB_Script_Common') 525569331642446be05292e3e1f8a51218827168cdclaireho ranges = sort_and_merge(ranges) 535569331642446be05292e3e1f8a51218827168cdclaireho 545569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '// Generated from Unicode script tables\n' 555569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '#ifndef SCRIPT_PROPERTIES_H_' 565569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '#define SCRIPT_PROPERTIES_H_\n' 575569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '#include <stdint.h>' 585569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '#include "harfbuzz-shaper.h"\n' 595569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, 'struct script_property {' 605569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, ' uint32_t range_start;' 615569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, ' uint32_t range_end;' 625569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, ' HB_Script script;' 635569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '};\n' 645569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, 'static const struct script_property script_properties[] = {' 655569331642446be05292e3e1f8a51218827168cdclaireho for (start, end, value) in ranges: 665569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, ' {0x%x, 0x%x, %s},' % (start, end, value) 675569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '};\n' 685569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, 'static const unsigned script_properties_count = %d;\n' % len(ranges) 695569331642446be05292e3e1f8a51218827168cdclaireho print >>outfile, '#endif // SCRIPT_PROPERTIES_H_' 705569331642446be05292e3e1f8a51218827168cdclaireho 715569331642446be05292e3e1f8a51218827168cdclairehoif __name__ == '__main__': 725569331642446be05292e3e1f8a51218827168cdclaireho if len(sys.argv) != 3: 735569331642446be05292e3e1f8a51218827168cdclaireho print 'Usage: %s <input .txt> <output .h>' % sys.argv[0] 745569331642446be05292e3e1f8a51218827168cdclaireho else: 755569331642446be05292e3e1f8a51218827168cdclaireho main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+')) 76