15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#!/usr/bin/python 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright 2008 The RE2 Authors. All Rights Reserved. 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# license that can be found in the LICENSE file. 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Generate C++ tables for Unicode Script and Category groups.""" 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import sys 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import unicode 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_header = """ 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// GENERATED BY make_unicode_groups.py; DO NOT EDIT. 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// make_unicode_groups.py >unicode_groups.cc 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/unicode_groups.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)_trailer = """ 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)n16 = 0 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)n32 = 0 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def MakeRanges(codes): 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]""" 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ranges = [] 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) last = -100 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for c in codes: 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if c == last+1: 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ranges[-1][1] = c 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ranges.append([c, c]) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) last = c 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ranges 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def PrintRanges(type, name, ranges): 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Print the ranges as an array of type named name.""" 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "static %s %s[] = {" % (type, name,) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for lo, hi in ranges: 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "\t{ %d, %d }," % (lo, hi) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "};" 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# def PrintCodes(type, name, codes): 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# """Print the codes as an array of type named name.""" 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# print "static %s %s[] = {" % (type, name,) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# for c in codes: 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# print "\t%d," % (c,) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# print "};" 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def PrintGroup(name, codes): 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) """Print the data structures for the group of codes. 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Return a UGroup literal for the group.""" 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # See unicode_groups.h for a description of the data structure. 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Split codes into 16-bit ranges and 32-bit ranges. 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) range16 = MakeRanges([c for c in codes if c < 65536]) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) range32 = MakeRanges([c for c in codes if c >= 65536]) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # Pull singleton ranges out of range16. 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # code16 = [lo for lo, hi in range16 if lo == hi] 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # range16 = [[lo, hi] for lo, hi in range16 if lo != hi] 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) global n16 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) global n32 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n16 += len(range16) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n32 += len(range32) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup = "{ \"%s\", +1" % (name,) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # if len(code16) > 0: 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # PrintCodes("uint16", name+"_code16", code16) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # ugroup += ", %s_code16, %d" % (name, len(code16)) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # else: 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) # ugroup += ", 0, 0" 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if len(range16) > 0: 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrintRanges("URange16", name+"_range16", range16) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup += ", %s_range16, %d" % (name, len(range16)) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup += ", 0, 0" 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if len(range32) > 0: 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrintRanges("URange32", name+"_range32", range32) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup += ", %s_range32, %d" % (name, len(range32)) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup += ", 0, 0" 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroup += " }" 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ugroup 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def main(): 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print _header 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroups = [] 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for name, codes in unicode.Categories().iteritems(): 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroups.append(PrintGroup(name, codes)) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for name, codes in unicode.Scripts().iteritems(): 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroups.append(PrintGroup(name, codes)) 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "UGroup unicode_groups[] = {"; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ugroups.sort() 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ug in ugroups: 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "\t%s," % (ug,) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "};" 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print "int num_unicode_groups = %d;" % (len(ugroups),) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) print _trailer 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if __name__ == '__main__': 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) main() 112