1import sys
2
3# http://www.unicode.org/Public/UNIDATA/auxiliary/BidiMirroring.txt
4
5# This parses a file in the format of the above file and outputs a table
6# suitable for bsearch(3). This table maps Unicode code points to their
7# 'mirror'. (Mirroring is used when rendering RTL characters, see the Unicode
8# standard). By convention, this mapping should be commutative, but this code
9# doesn't enforce or check this.
10
11def main(infile, outfile):
12  pairs = []
13  for line in infile:
14    line = line[:-1]
15    if len(line) == 0 or line[0] == '#':
16      continue
17    if '#' in line:
18      (data, _) = line.split('#', 1)
19    else:
20      data = line
21    if ';' not in data:
22      continue
23    (a, b) = data.split(';', 1)
24    a = int(a, 16)
25    b = int(b, 16)
26
27    pairs.append((a, b))
28
29  pairs.sort()
30
31  print >>outfile, '// Generated from Unicode Bidi Mirroring tables\n'
32  print >>outfile, '#ifndef MIRRORING_PROPERTY_H_'
33  print >>outfile, '#define MIRRORING_PROPERTY_H_\n'
34  print >>outfile, '#include <stdint.h>'
35  print >>outfile, 'struct mirroring_property {'
36  print >>outfile, '  uint32_t a;'
37  print >>outfile, '  uint32_t b;'
38  print >>outfile, '};\n'
39  print >>outfile, 'static const struct mirroring_property mirroring_properties[] = {'
40  for pair in pairs:
41    print >>outfile, '  {0x%x, 0x%x},' % pair
42  print >>outfile, '};\n'
43  print >>outfile, 'static const unsigned mirroring_properties_count = %d;\n' % len(pairs)
44  print >>outfile, '#endif  // MIRRORING_PROPERTY_H_'
45
46if __name__ == '__main__':
47  if len(sys.argv) != 3:
48    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
49  else:
50    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))
51