1e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod#!/usr/bin/python 2e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 3e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodimport sys 4e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodif len (sys.argv) != 5: 620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod print >>sys.stderr, "usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt" 7e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod sys.exit (1) 8e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 9e2c95116e1423f83a692d6170553d0cc95733d24Behdad EsfahbodBLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"] 10e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 11e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfiles = [file (x) for x in sys.argv[1:]] 12e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 1320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodheaders = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2] 1420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodheaders.append (["UnicodeData.txt does not have a header."]) 15e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 16e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboddata = [{} for f in files] 17e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodvalues = [{} for f in files] 18e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor i, f in enumerate (files): 19e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for line in f: 20e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 21e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod j = line.find ('#') 22e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if j >= 0: 23e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod line = line[:j] 24e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 25e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod fields = [x.strip () for x in line.split (';')] 26e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if len (fields) == 1: 27e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod continue 28e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 29e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod uu = fields[0].split ('..') 30e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod start = int (uu[0], 16) 31e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if len (uu) == 1: 32e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod end = start 33e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod else: 34e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod end = int (uu[1], 16) 35e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 3620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod t = fields[1 if i != 2 else 2] 37e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 38e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for u in range (start, end + 1): 39e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod data[i][u] = t 40e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod values[i][t] = values[i].get (t, 0) + end - start + 1 41e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 4220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddefaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block') 43ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod 442d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahbod# TODO Characters that are not in Unicode Indic files, but used in USE 45ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahboddata[0][0x034F] = defaults[0] 462d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahboddata[0][0x2060] = defaults[0] 472d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahbodfor u in range (0xFE00, 0xFE0F + 1): 482d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahbod data[0][u] = defaults[0] 49ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod 50ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod# Merge data into one dict: 51e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor i,v in enumerate (defaults): 52e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod values[i][v] = values[i].get (v, 0) + 1 53e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodcombined = {} 54e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor i,d in enumerate (data): 55e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for u,v in d.items (): 5620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod if i >= 2 and not u in combined: 57e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod continue 58e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if not u in combined: 59e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod combined[u] = list (defaults) 60e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod combined[u][i] = v 6120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodcombined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS} 62e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboddata = combined 63e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboddel combined 64e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodnum = len (data) 65e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 6620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 6720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodproperty_names = [ 6820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod # General_Category 6920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc', 7020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 7120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs', 7220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod # Indic_Syllabic_Category 73ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod 'Other', 7420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Bindu', 7520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Visarga', 7620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Avagraha', 7720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Nukta', 7820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Virama', 7920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Pure_Killer', 8020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Invisible_Stacker', 8120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Vowel_Independent', 8220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Vowel_Dependent', 8320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Vowel', 8420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Placeholder', 8520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant', 8620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Dead', 8720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_With_Stacker', 8820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Prefixed', 8920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Preceding_Repha', 9020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Succeeding_Repha', 9120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Subjoined', 9220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Medial', 9320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Final', 9420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Head_Letter', 9520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Modifying_Letter', 9620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Tone_Letter', 9720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Tone_Mark', 9820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Gemination_Mark', 9920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Cantillation_Mark', 10020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Register_Shifter', 10120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Syllable_Modifier', 10220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Consonant_Killer', 10320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Non_Joiner', 10420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Joiner', 10520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Number_Joiner', 10620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Number', 10720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Brahmi_Joining_Number', 10820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod # Indic_Positional_Category 10944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Not_Applicable', 11020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Right', 11120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Left', 11220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Visual_Order_Left', 11320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Left_And_Right', 11420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top', 11520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Bottom', 11620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top_And_Bottom', 11720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top_And_Right', 11820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top_And_Left', 11920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top_And_Left_And_Right', 12020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Bottom_And_Right', 12120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Top_And_Bottom_And_Right', 12220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Overstruck', 12320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod] 12420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 12520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodclass PropertyValue(object): 12620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod def __init__(self, name_): 12720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod self.name = name_ 128ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod def __str__(self): 129ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return self.name 130ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod def __eq__(self, other): 13144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod return self.name == (other if isinstance(other, basestring) else other.name) 132ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod def __ne__(self, other): 133ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return not (self == other) 13420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 13520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodproperty_values = {} 13620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 13720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodfor name in property_names: 13820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod value = PropertyValue(name) 13920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod assert value not in property_values 14020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod assert value not in globals() 14120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod property_values[name] = value 14220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodglobals().update(property_values) 14320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 14420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 14520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_BASE(U, UISC, UGC): 146ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return (UISC in [Number, Consonant, Consonant_Head_Letter, 1479b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT Consonant_Placeholder, 1489b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod Tone_Letter, 1499b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod Vowel_Independent #SPEC-DRAFT 1509b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod ] or 15120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial, 15220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod Consonant_Subjoined, Vowel, Vowel_Dependent])) 15320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_BASE_IND(U, UISC, UGC): 1549b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po) 155ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return (UISC in [Consonant_Dead, Modifying_Letter] or 1569b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod (UGC == Po and not U in [0x104E, 0x2022]) or 1579b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod False # SPEC-DRAFT-OUTDATED! U == 0x002D 1589b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod ) 15920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_BASE_NUM(U, UISC, UGC): 16020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC == Brahmi_Joining_Number 16120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_BASE_OTHER(U, UISC, UGC): 1629b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod if UISC == Consonant_Placeholder: return True #SPEC-DRAFT 1639b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE] 1649b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] 16520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CGJ(U, UISC, UGC): 16620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return U == 0x034F 16720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CONS_FINAL(U, UISC, UGC): 16820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return ((UISC == Consonant_Final and UGC != Lo) or 16920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod UISC == Consonant_Succeeding_Repha) 17020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CONS_FINAL_MOD(U, UISC, UGC): 1719b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT return UISC in [Consonant_Final_Modifier, Syllable_Modifier] 172ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return UISC == Syllable_Modifier 17320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CONS_MED(U, UISC, UGC): 17420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC == Consonant_Medial and UGC != Lo 17520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CONS_MOD(U, UISC, UGC): 17620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC in [Nukta, Gemination_Mark, Consonant_Killer] 17720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_CONS_SUB(U, UISC, UGC): 1789b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT return UISC == Consonant_Subjoined 179ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return UISC == Consonant_Subjoined and UGC != Lo 18020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_HALANT(U, UISC, UGC): 18120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC in [Virama, Invisible_Stacker] 18220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_HALANT_NUM(U, UISC, UGC): 18320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC == Number_Joiner 18420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_ZWNJ(U, UISC, UGC): 18520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC == Non_Joiner 18620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_ZWJ(U, UISC, UGC): 18720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UISC == Joiner 18820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_Word_Joiner(U, UISC, UGC): 18920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return U == 0x2060 19020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_OTHER(U, UISC, UGC): 191ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod #SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters 192ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod return (UISC == Other 193ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod and not is_SYM_MOD(U, UISC, UGC) 194ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod and not is_CGJ(U, UISC, UGC) 1952d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahbod and not is_Word_Joiner(U, UISC, UGC) 1962d4b62ead931b13f95f5dc0e5b740d997a8d1a8eBehdad Esfahbod and not is_VARIATION_SELECTOR(U, UISC, UGC) 197ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbod ) 19820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_Reserved(U, UISC, UGC): 19920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return UGC == 'Cn' 20020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_REPHA(U, UISC, UGC): 201ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod #return UISC == Consonant_Preceding_Repha 202ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod #SPEC-OUTDATED hack to categorize Consonant_With_Stacker and Consonant_Prefixed 203ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return UISC in [Consonant_Preceding_Repha, Consonant_With_Stacker, Consonant_Prefixed] 20420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_SYM(U, UISC, UGC): 2059b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod if U == 0x25CC: return False #SPEC-DRAFT 2069b6312f9451db790db00dc2c2e27a5a1db852e72Behdad Esfahbod #SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter 207ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return UGC in [So, Sc] 20820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_SYM_MOD(U, UISC, UGC): 20920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73] 21020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_VARIATION_SELECTOR(U, UISC, UGC): 21120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return 0xFE00 <= U <= 0xFE0F 21220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_VOWEL(U, UISC, UGC): 21320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return (UISC == Pure_Killer or 21420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod (UGC != Lo and UISC in [Vowel, Vowel_Dependent])) 21520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboddef is_VOWEL_MOD(U, UISC, UGC): 21620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or 21720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod (UGC != Lo and UISC == Bindu)) 21820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 21920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahboduse_mapping = { 22020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'B': is_BASE, 22120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'IND': is_BASE_IND, 22220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'N': is_BASE_NUM, 22320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'GB': is_BASE_OTHER, 22420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'CGJ': is_CGJ, 22520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'F': is_CONS_FINAL, 22620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'FM': is_CONS_FINAL_MOD, 22720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'M': is_CONS_MED, 22820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'CM': is_CONS_MOD, 22920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'SUB': is_CONS_SUB, 23020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'H': is_HALANT, 23120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'HN': is_HALANT_NUM, 23220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'ZWNJ': is_ZWNJ, 23320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'ZWJ': is_ZWJ, 23420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'WJ': is_Word_Joiner, 23520e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'O': is_OTHER, 23620e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'Rsv': is_Reserved, 23720e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'R': is_REPHA, 23820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'S': is_SYM, 23920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'SM': is_SYM_MOD, 24020e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'VS': is_VARIATION_SELECTOR, 24120e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'V': is_VOWEL, 24220e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 'VM': is_VOWEL_MOD, 24320e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod} 24420e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 24544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahboduse_positions = { 24644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'F': { 24744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top], 24844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom], 24944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pst': [Right], 25044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 25144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'M': { 25244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top], 25344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom], 25444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pst': [Right], 25544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pre': [Left], 25644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 25744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'CM': { 25844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top], 25944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom], 26044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 26144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'V': { 26244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right], 26344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom, Overstruck, Bottom_And_Right], 26444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pst': [Right], 26544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right], 26644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 26744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'VM': { 26844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top], 26944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom, Overstruck], 27044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pst': [Right], 27144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Pre': [Left], 27244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 27344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'SM': { 27444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Abv': [Top], 27544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'Blw': [Bottom], 27644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod }, 27744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'H': None, 27844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'B': None, 27944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'FM': None, 28044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 'SUB': None, 28144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod} 28244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 283ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahboddef map_to_use(data): 284ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod out = {} 285ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod items = use_mapping.items() 286ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod for U,(UISC,UIPC,UGC,UBlock) in data.items(): 28744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 28844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # Resolve Indic_Syllabic_Category 28944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 29044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: These don't have UISC assigned in Unicode 8.0, but 29144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # have UIPC 29244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x17DD: UISC = Vowel_Dependent 29344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark 29444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 29544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: U+1CED should only be allowed after some of 29644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # the nasalization marks, maybe only for U+1CE9..U+1CF1. 29744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x1CED: UISC = Tone_Mark 29844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 299ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod evals = [(k, v(U,UISC,UGC)) for k,v in items] 300ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod values = [k for k,v in evals if v] 301ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values) 30244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod USE = values[0] 30344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 30444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # Resolve Indic_Positional_Category 30544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 30644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: Not in Unicode 8.0 yet, but in spec. 30744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x1B6C: UIPC = Bottom 30844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 30944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: These should die, but have UIPC in Unicode 8.0 31044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U in [0x953, 0x954]: UIPC = Not_Applicable 31144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 31244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: In USE's override list but not in Unicode 8.0 31344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x103C: UIPC = Left 31444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 31544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod # TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0 31644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if 0xA926 <= U <= 0xA92A: UIPC = Top 31744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x111CA: UIPC = Bottom 31844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x11300: UIPC = Top 31944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x1133C: UIPC = Bottom 32044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if U == 0x1171E: UIPC = Left # Correct?! 32144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if 0x1CF2 <= U <= 0x1CF3: UIPC = Right 32244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if 0x1CF8 <= U <= 0x1CF9: UIPC = Top 32344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 32444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod assert (UIPC in [Not_Applicable, Visual_Order_Left] or 32544910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC) 32644910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 32744910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod pos_mapping = use_positions.get(USE, None) 32844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if pos_mapping: 32944910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod values = [k for k,v in pos_mapping.items() if v and UIPC in v] 33044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values) 33144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod USE = USE + values[0] 33244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod 33344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod out[U] = (USE, UBlock) 334ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod return out 335ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod 336ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahboddefaults = ('O', 'No_Block') 337ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahboddata = map_to_use(data) 33820e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbod 339c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbod# Remove the outliers 340e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodsingles = {} 341ba72801325e4bd58f7597938d4409762c9fa530cBehdad Esfahbodfor u in [0x034F, 0x25CC, 0x1107F]: 342e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod singles[u] = data[u] 343e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod del data[u] 344e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 345e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "/* == Start of generated table == */" 346e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "/*" 347e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " * The following table is generated by running:" 348e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " *" 34920e246e674155d5fb6527722fc3ef3accf2413dfBehdad Esfahbodprint " * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt" 350e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " *" 351e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " * on files with these headers:" 352e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " *" 353e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor h in headers: 354e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for l in h: 355e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print " * %s" % (l.strip()) 356e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " */" 357e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 358c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbodprint '#include "hb-ot-shape-complex-use-private.hh"' 359e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 360e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 361e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodtotal = 0 362e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodused = 0 363e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodlast_block = None 364e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboddef print_block (block, start, end, data): 365e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod global total, used, last_block 366e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if block and block != last_block: 367e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print 368e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print 369e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print " /* %s */" % block 370ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod if start % 16: 37144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod print ' ' * (20 + (start % 16 * 6)), 372e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod num = 0 373e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod assert start % 8 == 0 374e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod assert (end+1) % 8 == 0 375e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for u in range (start, end+1): 376ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod if u % 16 == 0: 377e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print 378e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print " /* %04X */" % u, 379e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if u in data: 380e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod num += 1 381e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod d = data.get (u, defaults) 38244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod sys.stdout.write ("%6s," % d[0]) 383e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 384e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod total += end - start + 1 385e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod used += num 386e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if block: 387e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod last_block = block 388e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 389e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboduu = data.keys () 390e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahboduu.sort () 391e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 392e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodlast = -100000 393e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodnum = 0 394e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodoffset = 0 395e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodstarts = [] 396e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodends = [] 397ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbodfor k,v in sorted(use_mapping.items()): 39844910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if k in use_positions and use_positions[k]: continue 399ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod print "#define %s USE_%s /* %s */" % (k, k, v.__name__[3:]) 40044910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbodfor k,v in sorted(use_positions.items()): 40144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod if not v: continue 40244910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod for suf in v.keys(): 40344910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod tag = k + suf 40444910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbod print "#define %s USE_%s" % (tag, tag) 405ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbodprint "" 406c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbodprint "static const USE_TABLE_ELEMENT_TYPE use_table[] = {" 407e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor u in uu: 408e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if u <= last: 409e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod continue 410ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod block = data[u][1] 411e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 412e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod start = u//8*8 413e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod end = start+1 414ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod while end in uu and block == data[end][1]: 415e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod end += 1 416e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod end = (end-1)//8*8 + 7 417e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 418e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if start != last + 1: 419e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if start - last <= 1+16*3: 420e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print_block (None, last+1, start-1, data) 421e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod last = start-1 422e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod else: 423e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if last >= 0: 424e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod ends.append (last + 1) 425e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod offset += ends[-1] - starts[-1] 426e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print 427e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print 428c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbod print "#define use_offset_0x%04xu %d" % (start, offset) 429e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod starts.append (start) 430e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 431e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print_block (block, start, end, data) 432e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod last = end 433e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodends.append (last + 1) 434e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodoffset += ends[-1] - starts[-1] 435e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 436e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 437e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodoccupancy = used * 100. / total 438e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodpage_bits = 12 439e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) 440e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 441c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbodprint "USE_TABLE_ELEMENT_TYPE" 442c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbodprint "hb_use_get_categories (hb_codepoint_t u)" 443e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "{" 444e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " switch (u >> %d)" % page_bits 445e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " {" 446e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodpages = set([u>>page_bits for u in starts+ends+singles.keys()]) 447e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodfor p in sorted(pages): 448e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print " case 0x%0Xu:" % p 449e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for (start,end) in zip (starts, ends): 450e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if p not in [start>>page_bits, end>>page_bits]: continue 451c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbod offset = "use_offset_0x%04xu" % start 452c48ff288522f33dc6c78520de0a0a74306630895Behdad Esfahbod print " if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset) 453e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod for u,d in singles.items (): 454e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod if p != u>>page_bits: continue 455ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod print " if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0]) 456e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print " break;" 457e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod print "" 458e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " default:" 459e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " break;" 460e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint " }" 46144910cef626e6d03baa4d89d8fbe2c088971902dBehdad Esfahbodprint " return USE_O;" 462e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "}" 463e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint 464ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbodfor k in sorted(use_mapping.keys()): 465ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod if k in use_positions and use_positions[k]: continue 466ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod print "#undef %s" % k 467ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbodfor k,v in sorted(use_positions.items()): 468ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod if not v: continue 469ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod for suf in v.keys(): 470ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod tag = k + suf 471ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbod print "#undef %s" % tag 472ad7178227f16abc17456f122deac1508031cbbc3Behdad Esfahbodprint 473e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbodprint "/* == End of generated table == */" 474e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod 475ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbod# Maintain at least 50% occupancy in the table */ 476ad725552521273a1f571f04bc96a04221c3e067aBehdad Esfahbodif occupancy < 50: 477e2c95116e1423f83a692d6170553d0cc95733d24Behdad Esfahbod raise Exception ("Table too sparse, please investigate: ", occupancy) 478