1#!/usr/bin/python
2
3import sys
4
5if len (sys.argv) != 4:
6	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
7	sys.exit (1)
8
9files = [file (x) for x in sys.argv[1:]]
10
11headers = [[f.readline () for i in range (2)] for f in files]
12
13blocks = {}
14data = [{} for f in files]
15values = [{} for f in files]
16for i, f in enumerate (files):
17	for line in f:
18
19		j = line.find ('#')
20		if j >= 0:
21			line = line[:j]
22
23		fields = [x.strip () for x in line.split (';')]
24		if len (fields) == 1:
25			continue
26
27		uu = fields[0].split ('..')
28		start = int (uu[0], 16)
29		if len (uu) == 1:
30			end = start
31		else:
32			end = int (uu[1], 16)
33
34		t = fields[1]
35
36		for u in range (start, end + 1):
37			data[i][u] = t
38		values[i][t] = values[i].get (t, 0) + 1
39
40		if i == 2:
41			blocks[t] = (start, end)
42
43# Merge data into one dict:
44defaults = ('Other', 'Not_Applicable', 'No_Block')
45for i,v in enumerate (defaults):
46	values[i][v] = values[i].get (v, 0) + 1
47combined = {}
48for i,d in enumerate (data):
49	for u,v in d.items ():
50		if i == 2 and not u in combined:
51			continue
52		if not u in combined:
53			combined[u] = list (defaults)
54		combined[u][i] = v
55data = combined
56del combined
57num = len (data)
58
59# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
60singles = {}
61for u in [0x00A0, 0x25CC]:
62	singles[u] = data[u]
63	del data[u]
64
65print "/* == Start of generated table == */"
66print "/*"
67print " * The following table is generated by running:"
68print " *"
69print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
70print " *"
71print " * on files with these headers:"
72print " *"
73for h in headers:
74	for l in h:
75		print " * %s" % (l.strip())
76print " */"
77print
78print '#include "hb-ot-shape-complex-indic-private.hh"'
79print
80
81# Shorten values
82short = [{
83	"Bindu":		'Bi',
84	"Visarga":		'Vs',
85	"Vowel":		'Vo',
86	"Vowel_Dependent":	'M',
87	"Other":		'x',
88},{
89	"Not_Applicable":	'x',
90}]
91all_shorts = [[],[]]
92
93# Add some of the values, to make them more readable, and to avoid duplicates
94
95
96for i in range (2):
97	for v,s in short[i].items ():
98		all_shorts[i].append (s)
99
100what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
101what_short = ["ISC", "IMC"]
102for i in range (2):
103	print
104	vv = values[i].keys ()
105	vv.sort ()
106	for v in vv:
107		v_no_and = v.replace ('_And_', '_')
108		if v in short[i]:
109			s = short[i][v]
110		else:
111			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
112			if s in all_shorts[i]:
113				raise Exception ("Duplicate short value alias", v, s)
114			all_shorts[i].append (s)
115			short[i][v] = s
116		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
117			(what_short[i], s, what[i], v.upper (), \
118			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
119			values[i][v], v)
120print
121print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
122print
123print
124
125total = 0
126used = 0
127def print_block (block, start, end, data):
128	print
129	print
130	print "  /* %s  (%04X..%04X) */" % (block, start, end)
131	num = 0
132	for u in range (start, end+1):
133		if u % 8 == 0:
134			print
135			print "  /* %04X */" % u,
136		if u in data:
137			num += 1
138		d = data.get (u, defaults)
139		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))
140
141	global total, used
142	total += end - start + 1
143	used += num
144
145uu = data.keys ()
146uu.sort ()
147
148last = -1
149num = 0
150offset = 0
151starts = []
152ends = []
153print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
154for u in uu:
155	if u <= last:
156		continue
157	block = data[u][2]
158	(start, end) = blocks[block]
159
160	if start != last + 1:
161		if start - last <= 33:
162			print_block ("FILLER", last+1, start-1, data)
163			last = start-1
164		else:
165			if last >= 0:
166				ends.append (last + 1)
167				offset += ends[-1] - starts[-1]
168			print
169			print
170			print "#define indic_offset_0x%04x %d" % (start, offset)
171			starts.append (start)
172
173	print_block (block, start, end, data)
174	last = end
175ends.append (last + 1)
176offset += ends[-1] - starts[-1]
177print
178print
179print "#define indic_offset_total %d" % offset
180print
181occupancy = used * 100. / total
182print "}; /* Table occupancy: %d%% */" % occupancy
183print
184print "INDIC_TABLE_ELEMENT_TYPE"
185print "hb_indic_get_categories (hb_codepoint_t u)"
186print "{"
187for (start,end) in zip (starts, ends):
188	offset = "indic_offset_0x%04x" % start
189	print "  if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
190for u,d in singles.items ():
191	print "  if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
192print "  return _(x,x);"
193print "}"
194print
195print "#undef _"
196for i in range (2):
197	print
198	vv = values[i].keys ()
199	vv.sort ()
200	for v in vv:
201		print "#undef %s_%s" % \
202			(what_short[i], short[i][v])
203print
204print "/* == End of generated table == */"
205
206# Maintain at least 30% occupancy in the table */
207if occupancy < 30:
208	raise Exception ("Table too sparse, please investigate: ", occupancy)
209