1#!/usr/bin/python2.4
2#
3# Copyright 2008 The RE2 Authors.  All Rights Reserved.
4# Use of this source code is governed by a BSD-style
5# license that can be found in the LICENSE file.
6
7"""Unittest for the util/regexp/re2/unicode.py module."""
8
9import os
10import StringIO
11from google3.pyglib import flags
12from google3.testing.pybase import googletest
13from google3.util.regexp.re2 import unicode
14
15_UNICODE_DIR = os.path.join(flags.FLAGS.test_srcdir, "google3", "third_party",
16                            "unicode", "ucd-5.1.0")
17
18
19class ConvertTest(googletest.TestCase):
20  """Test the conversion functions."""
21
22  def testUInt(self):
23    self.assertEquals(0x0000, unicode._UInt("0000"))
24    self.assertEquals(0x263A, unicode._UInt("263A"))
25    self.assertEquals(0x10FFFF, unicode._UInt("10FFFF"))
26    self.assertRaises(unicode.InputError, unicode._UInt, "263")
27    self.assertRaises(unicode.InputError, unicode._UInt, "263AAAA")
28    self.assertRaises(unicode.InputError, unicode._UInt, "110000")
29
30  def testURange(self):
31    self.assertEquals([1, 2, 3], unicode._URange("0001..0003"))
32    self.assertEquals([1], unicode._URange("0001"))
33    self.assertRaises(unicode.InputError, unicode._URange, "0001..0003..0005")
34    self.assertRaises(unicode.InputError, unicode._URange, "0003..0001")
35    self.assertRaises(unicode.InputError, unicode._URange, "0001..0001")
36
37  def testUStr(self):
38    self.assertEquals("0x263A", unicode._UStr(0x263a))
39    self.assertEquals("0x10FFFF", unicode._UStr(0x10FFFF))
40    self.assertRaises(unicode.InputError, unicode._UStr, 0x110000)
41    self.assertRaises(unicode.InputError, unicode._UStr, -1)
42
43
44_UNICODE_TABLE = """# Commented line, should be ignored.
45# The next line is blank and should be ignored.
46
470041;Capital A;Line 1
480061..007A;Lowercase;Line 2
491F00;<Greek, First>;Ignored
501FFE;<Greek, Last>;Line 3
5110FFFF;Runemax;Line 4
520000;Zero;Line 5
53"""
54
55_BAD_TABLE1 = """
56111111;Not a code point;
57"""
58
59_BAD_TABLE2 = """
600000;<Zero, First>;Missing <Zero, Last>
61"""
62
63_BAD_TABLE3 = """
640010..0001;Bad range;
65"""
66
67
68class AbortError(Exception):
69  """Function should not have been called."""
70
71
72def Abort():
73  raise AbortError("Abort")
74
75
76def StringTable(s, n, f):
77  unicode.ReadUnicodeTable(StringIO.StringIO(s), n, f)
78
79
80class ReadUnicodeTableTest(googletest.TestCase):
81  """Test the ReadUnicodeTable function."""
82
83  def testSimpleTable(self):
84
85    ncall = [0]  # can't assign to ordinary int in DoLine
86
87    def DoLine(codes, fields):
88      self.assertEquals(3, len(fields))
89      ncall[0] += 1
90      self.assertEquals("Line %d" % (ncall[0],), fields[2])
91      if ncall[0] == 1:
92        self.assertEquals([0x0041], codes)
93        self.assertEquals("0041", fields[0])
94        self.assertEquals("Capital A", fields[1])
95      elif ncall[0] == 2:
96        self.assertEquals(range(0x0061, 0x007A + 1), codes)
97        self.assertEquals("0061..007A", fields[0])
98        self.assertEquals("Lowercase", fields[1])
99      elif ncall[0] == 3:
100        self.assertEquals(range(0x1F00, 0x1FFE + 1), codes)
101        self.assertEquals("1F00..1FFE", fields[0])
102        self.assertEquals("Greek", fields[1])
103      elif ncall[0] == 4:
104        self.assertEquals([0x10FFFF], codes)
105        self.assertEquals("10FFFF", fields[0])
106        self.assertEquals("Runemax", fields[1])
107      elif ncall[0] == 5:
108        self.assertEquals([0x0000], codes)
109        self.assertEquals("0000", fields[0])
110        self.assertEquals("Zero", fields[1])
111
112    StringTable(_UNICODE_TABLE, 3, DoLine)
113    self.assertEquals(5, ncall[0])
114
115  def testErrorTables(self):
116    self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 4, Abort)
117    self.assertRaises(unicode.InputError, StringTable, _UNICODE_TABLE, 2, Abort)
118    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE1, 3, Abort)
119    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE2, 3, Abort)
120    self.assertRaises(unicode.InputError, StringTable, _BAD_TABLE3, 3, Abort)
121
122
123class ParseContinueTest(googletest.TestCase):
124  """Test the ParseContinue function."""
125
126  def testParseContinue(self):
127    self.assertEquals(("Private Use", "First"),
128                      unicode._ParseContinue("<Private Use, First>"))
129    self.assertEquals(("Private Use", "Last"),
130                      unicode._ParseContinue("<Private Use, Last>"))
131    self.assertEquals(("<Private Use, Blah>", None),
132                      unicode._ParseContinue("<Private Use, Blah>"))
133
134
135class CaseGroupsTest(googletest.TestCase):
136  """Test the CaseGroups function (and the CaseFoldingReader)."""
137
138  def FindGroup(self, c):
139    if type(c) == str:
140      c = ord(c)
141    for g in self.groups:
142      if c in g:
143        return g
144    return None
145
146  def testCaseGroups(self):
147    self.groups = unicode.CaseGroups(unicode_dir=_UNICODE_DIR)
148    self.assertEquals([ord("A"), ord("a")], self.FindGroup("a"))
149    self.assertEquals(None, self.FindGroup("0"))
150
151
152class ScriptsTest(googletest.TestCase):
153  """Test the Scripts function (and the ScriptsReader)."""
154
155  def FindScript(self, c):
156    if type(c) == str:
157      c = ord(c)
158    for script, codes in self.scripts.items():
159      for code in codes:
160        if c == code:
161          return script
162    return None
163
164  def testScripts(self):
165    self.scripts = unicode.Scripts(unicode_dir=_UNICODE_DIR)
166    self.assertEquals("Latin", self.FindScript("a"))
167    self.assertEquals("Common", self.FindScript("0"))
168    self.assertEquals(None, self.FindScript(0xFFFE))
169
170
171class CategoriesTest(googletest.TestCase):
172  """Test the Categories function (and the UnicodeDataReader)."""
173
174  def FindCategory(self, c):
175    if type(c) == str:
176      c = ord(c)
177    short = None
178    for category, codes in self.categories.items():
179      for code in codes:
180        if code == c:
181          # prefer category Nd over N
182          if len(category) > 1:
183            return category
184          if short == None:
185            short = category
186    return short
187
188  def testCategories(self):
189    self.categories = unicode.Categories(unicode_dir=_UNICODE_DIR)
190    self.assertEquals("Ll", self.FindCategory("a"))
191    self.assertEquals("Nd", self.FindCategory("0"))
192    self.assertEquals("Lo", self.FindCategory(0xAD00))  # in First, Last range
193    self.assertEquals(None, self.FindCategory(0xFFFE))
194    self.assertEquals("Lo", self.FindCategory(0x8B5A))
195    self.assertEquals("Lo", self.FindCategory(0x6C38))
196    self.assertEquals("Lo", self.FindCategory(0x92D2))
197    self.assertTrue(ord("a") in self.categories["L"])
198    self.assertTrue(ord("0") in self.categories["N"])
199    self.assertTrue(0x8B5A in self.categories["L"])
200    self.assertTrue(0x6C38 in self.categories["L"])
201    self.assertTrue(0x92D2 in self.categories["L"])
202
203def main():
204  googletest.main()
205
206if __name__ == "__main__":
207  main()
208