gen-indic-table.py revision ae4a2b9365051c23c9a299cf76f3ab7e661999b1
1#!/usr/bin/python 2 3import sys 4 5if len (sys.argv) != 4: 6 print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" 7 sys.exit (1) 8 9files = [file (x) for x in sys.argv[1:]] 10 11headers = [[f.readline () for i in range (2)] for f in files] 12 13blocks = {} 14data = [{} for f in files] 15values = [{} for f in files] 16for i, f in enumerate (files): 17 for line in f: 18 19 j = line.find ('#') 20 if j >= 0: 21 line = line[:j] 22 23 fields = [x.strip () for x in line.split (';')] 24 if len (fields) == 1: 25 continue 26 27 uu = fields[0].split ('..') 28 start = int (uu[0], 16) 29 if len (uu) == 1: 30 end = start 31 else: 32 end = int (uu[1], 16) 33 34 t = fields[1] 35 36 for u in range (start, end + 1): 37 data[i][u] = t 38 values[i][t] = values[i].get (t, 0) + 1 39 40 if i == 2: 41 blocks[t] = (start, end) 42 43# Merge data into one dict: 44defaults = ('Other', 'Not_Applicable', 'No_Block') 45for i,v in enumerate (defaults): 46 values[i][v] = values[i].get (v, 0) + 1 47combined = {} 48for i,d in enumerate (data): 49 for u,v in d.items (): 50 if i == 2 and not u in combined: 51 continue 52 if not u in combined: 53 combined[u] = list (defaults) 54 combined[u][i] = v 55data = combined 56del combined 57num = len (data) 58 59# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out 60singles = {} 61for u in [0x00A0, 0x25CC]: 62 singles[u] = data[u] 63 del data[u] 64 65print "/* == Start of generated table == */" 66print "/*" 67print " * The following table is generated by running:" 68print " *" 69print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" 70print " *" 71print " * on files with these headers:" 72print " *" 73for h in headers: 74 for l in h: 75 print " * %s" % (l.strip()) 76print " */" 77print 78print "#ifndef HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH" 79print "#define HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH" 80print 81 82# Shorten values 83short = [{ 84 "Bindu": 'Bi', 85 "Visarga": 'Vs', 86 "Vowel": 'Vo', 87 "Vowel_Dependent": 'M', 88 "Other": 'x', 89},{ 90 "Not_Applicable": 'x', 91}] 92all_shorts = [[],[]] 93 94# Add some of the values, to make them more readable, and to avoid duplicates 95 96 97for i in range (2): 98 for v,s in short[i].items (): 99 all_shorts[i].append (s) 100 101what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"] 102what_short = ["ISC", "IMC"] 103for i in range (2): 104 print 105 vv = values[i].keys () 106 vv.sort () 107 for v in vv: 108 v_no_and = v.replace ('_And_', '_') 109 if v in short[i]: 110 s = short[i][v] 111 else: 112 s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) 113 if s in all_shorts[i]: 114 raise Exception ("Duplicate short value alias", v, s) 115 all_shorts[i].append (s) 116 short[i][v] = s 117 print "#define %s_%s %s_%s %s/* %3d chars; %s */" % \ 118 (what_short[i], s, what[i], v.upper (), \ 119 ' '* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \ 120 values[i][v], v) 121print 122print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)" 123print 124print 125 126total = 0 127used = 0 128def print_block (block, start, end, data): 129 print 130 print 131 print " /* %s (%04X..%04X) */" % (block, start, end) 132 num = 0 133 for u in range (start, end+1): 134 if u % 8 == 0: 135 print 136 print " /* %04X */" % u, 137 if u in data: 138 num += 1 139 d = data.get (u, defaults) 140 sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]]))) 141 142 global total, used 143 total += end - start + 1 144 used += num 145 146uu = data.keys () 147uu.sort () 148 149last = -1 150num = 0 151offset = 0 152starts = [] 153ends = [] 154print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {" 155for u in uu: 156 if u <= last: 157 continue 158 block = data[u][2] 159 (start, end) = blocks[block] 160 161 if start != last + 1: 162 if start - last <= 33: 163 print_block ("FILLER", last+1, start-1, data) 164 last = start-1 165 else: 166 if last >= 0: 167 ends.append (last + 1) 168 offset += ends[-1] - starts[-1] 169 print 170 print 171 print "#define indic_offset_0x%04x %d" % (start, offset) 172 starts.append (start) 173 174 print_block (block, start, end, data) 175 last = end 176ends.append (last + 1) 177offset += ends[-1] - starts[-1] 178print 179print 180print "#define indic_offset_total %d" % offset 181print 182occupancy = used * 100. / total 183print "}; /* Table occupancy: %d%% */" % occupancy 184print 185print "static INDIC_TABLE_ELEMENT_TYPE" 186print "get_indic_categories (hb_codepoint_t u)" 187print "{" 188for (start,end) in zip (starts, ends): 189 offset = "indic_offset_0x%04x" % start 190 print " if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset) 191for u,d in singles.items (): 192 print " if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]) 193print " return _(x,x);" 194print "}" 195print 196print "#undef _" 197for i in range (2): 198 print 199 vv = values[i].keys () 200 vv.sort () 201 for v in vv: 202 print "#undef %s_%s" % \ 203 (what_short[i], short[i][v]) 204print 205print "#endif /* HB_OT_SHAPE_COMPLEX_INDIC_TABLE_HH */" 206print 207print "/* == End of generated table == */" 208 209# Maintain at least 30% occupancy in the table */ 210if occupancy < 30: 211 raise Exception ("Table too sparse, please investigate: ", occupancy) 212