1#! /usr/bin/python
2#
3# Protocol Buffers - Google's data interchange format
4# Copyright 2008 Google Inc.  All rights reserved.
5# http://code.google.com/p/protobuf/
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are
9# met:
10#
11#     * Redistributions of source code must retain the above copyright
12# notice, this list of conditions and the following disclaimer.
13#     * Redistributions in binary form must reproduce the above
14# copyright notice, this list of conditions and the following disclaimer
15# in the documentation and/or other materials provided with the
16# distribution.
17#     * Neither the name of Google Inc. nor the names of its
18# contributors may be used to endorse or promote products derived from
19# this software without specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33"""Test for google.protobuf.text_format."""
34
35__author__ = 'kenton@google.com (Kenton Varda)'
36
37import difflib
38
39import unittest
40from google.protobuf import text_format
41from google.protobuf.internal import test_util
42from google.protobuf import unittest_pb2
43from google.protobuf import unittest_mset_pb2
44
45
46class TextFormatTest(unittest.TestCase):
47  def ReadGolden(self, golden_filename):
48    f = test_util.GoldenFile(golden_filename)
49    golden_lines = f.readlines()
50    f.close()
51    return golden_lines
52
53  def CompareToGoldenFile(self, text, golden_filename):
54    golden_lines = self.ReadGolden(golden_filename)
55    self.CompareToGoldenLines(text, golden_lines)
56
57  def CompareToGoldenText(self, text, golden_text):
58    self.CompareToGoldenLines(text, golden_text.splitlines(1))
59
60  def CompareToGoldenLines(self, text, golden_lines):
61    actual_lines = text.splitlines(1)
62    self.assertEqual(golden_lines, actual_lines,
63      "Text doesn't match golden.  Diff:\n" +
64      ''.join(difflib.ndiff(golden_lines, actual_lines)))
65
66  def testPrintAllFields(self):
67    message = unittest_pb2.TestAllTypes()
68    test_util.SetAllFields(message)
69    self.CompareToGoldenFile(
70      self.RemoveRedundantZeros(text_format.MessageToString(message)),
71      'text_format_unittest_data.txt')
72
73  def testPrintAllExtensions(self):
74    message = unittest_pb2.TestAllExtensions()
75    test_util.SetAllExtensions(message)
76    self.CompareToGoldenFile(
77      self.RemoveRedundantZeros(text_format.MessageToString(message)),
78      'text_format_unittest_extensions_data.txt')
79
80  def testPrintMessageSet(self):
81    message = unittest_mset_pb2.TestMessageSetContainer()
82    ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension
83    ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension
84    message.message_set.Extensions[ext1].i = 23
85    message.message_set.Extensions[ext2].str = 'foo'
86    self.CompareToGoldenText(text_format.MessageToString(message),
87      'message_set {\n'
88      '  [protobuf_unittest.TestMessageSetExtension1] {\n'
89      '    i: 23\n'
90      '  }\n'
91      '  [protobuf_unittest.TestMessageSetExtension2] {\n'
92      '    str: \"foo\"\n'
93      '  }\n'
94      '}\n')
95
96  def testPrintExotic(self):
97    message = unittest_pb2.TestAllTypes()
98    message.repeated_int64.append(-9223372036854775808);
99    message.repeated_uint64.append(18446744073709551615);
100    message.repeated_double.append(123.456);
101    message.repeated_double.append(1.23e22);
102    message.repeated_double.append(1.23e-18);
103    message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'\"');
104    self.CompareToGoldenText(
105      self.RemoveRedundantZeros(text_format.MessageToString(message)),
106      'repeated_int64: -9223372036854775808\n'
107      'repeated_uint64: 18446744073709551615\n'
108      'repeated_double: 123.456\n'
109      'repeated_double: 1.23e+22\n'
110      'repeated_double: 1.23e-18\n'
111      'repeated_string: '
112        '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n')
113
114  def testMessageToString(self):
115    message = unittest_pb2.ForeignMessage()
116    message.c = 123
117    self.assertEqual('c: 123\n', str(message))
118
119  def RemoveRedundantZeros(self, text):
120    # Some platforms print 1e+5 as 1e+005.  This is fine, but we need to remove
121    # these zeros in order to match the golden file.
122    return text.replace('e+0','e+').replace('e+0','e+') \
123               .replace('e-0','e-').replace('e-0','e-')
124
125  def testMergeGolden(self):
126    golden_text = '\n'.join(self.ReadGolden('text_format_unittest_data.txt'))
127    parsed_message = unittest_pb2.TestAllTypes()
128    text_format.Merge(golden_text, parsed_message)
129
130    message = unittest_pb2.TestAllTypes()
131    test_util.SetAllFields(message)
132    self.assertEquals(message, parsed_message)
133
134  def testMergeGoldenExtensions(self):
135    golden_text = '\n'.join(self.ReadGolden(
136        'text_format_unittest_extensions_data.txt'))
137    parsed_message = unittest_pb2.TestAllExtensions()
138    text_format.Merge(golden_text, parsed_message)
139
140    message = unittest_pb2.TestAllExtensions()
141    test_util.SetAllExtensions(message)
142    self.assertEquals(message, parsed_message)
143
144  def testMergeAllFields(self):
145    message = unittest_pb2.TestAllTypes()
146    test_util.SetAllFields(message)
147    ascii_text = text_format.MessageToString(message)
148
149    parsed_message = unittest_pb2.TestAllTypes()
150    text_format.Merge(ascii_text, parsed_message)
151    self.assertEqual(message, parsed_message)
152    test_util.ExpectAllFieldsSet(self, message)
153
154  def testMergeAllExtensions(self):
155    message = unittest_pb2.TestAllExtensions()
156    test_util.SetAllExtensions(message)
157    ascii_text = text_format.MessageToString(message)
158
159    parsed_message = unittest_pb2.TestAllExtensions()
160    text_format.Merge(ascii_text, parsed_message)
161    self.assertEqual(message, parsed_message)
162
163  def testMergeMessageSet(self):
164    message = unittest_pb2.TestAllTypes()
165    text = ('repeated_uint64: 1\n'
166            'repeated_uint64: 2\n')
167    text_format.Merge(text, message)
168    self.assertEqual(1, message.repeated_uint64[0])
169    self.assertEqual(2, message.repeated_uint64[1])
170
171    message = unittest_mset_pb2.TestMessageSetContainer()
172    text = ('message_set {\n'
173            '  [protobuf_unittest.TestMessageSetExtension1] {\n'
174            '    i: 23\n'
175            '  }\n'
176            '  [protobuf_unittest.TestMessageSetExtension2] {\n'
177            '    str: \"foo\"\n'
178            '  }\n'
179            '}\n')
180    text_format.Merge(text, message)
181    ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension
182    ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension
183    self.assertEquals(23, message.message_set.Extensions[ext1].i)
184    self.assertEquals('foo', message.message_set.Extensions[ext2].str)
185
186  def testMergeExotic(self):
187    message = unittest_pb2.TestAllTypes()
188    text = ('repeated_int64: -9223372036854775808\n'
189            'repeated_uint64: 18446744073709551615\n'
190            'repeated_double: 123.456\n'
191            'repeated_double: 1.23e+22\n'
192            'repeated_double: 1.23e-18\n'
193            'repeated_string: \n'
194            '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n'
195            'repeated_string: "foo" \'corge\' "grault"')
196    text_format.Merge(text, message)
197
198    self.assertEqual(-9223372036854775808, message.repeated_int64[0])
199    self.assertEqual(18446744073709551615, message.repeated_uint64[0])
200    self.assertEqual(123.456, message.repeated_double[0])
201    self.assertEqual(1.23e22, message.repeated_double[1])
202    self.assertEqual(1.23e-18, message.repeated_double[2])
203    self.assertEqual(
204        '\000\001\a\b\f\n\r\t\v\\\'\"', message.repeated_string[0])
205    self.assertEqual('foocorgegrault', message.repeated_string[1])
206
207  def testMergeUnknownField(self):
208    message = unittest_pb2.TestAllTypes()
209    text = 'unknown_field: 8\n'
210    self.assertRaisesWithMessage(
211        text_format.ParseError,
212        ('1:1 : Message type "protobuf_unittest.TestAllTypes" has no field named '
213         '"unknown_field".'),
214        text_format.Merge, text, message)
215
216  def testMergeBadExtension(self):
217    message = unittest_pb2.TestAllExtensions()
218    text = '[unknown_extension]: 8\n'
219    self.assertRaisesWithMessage(
220        text_format.ParseError,
221        '1:2 : Extension "unknown_extension" not registered.',
222        text_format.Merge, text, message)
223    message = unittest_pb2.TestAllTypes()
224    self.assertRaisesWithMessage(
225        text_format.ParseError,
226        ('1:2 : Message type "protobuf_unittest.TestAllTypes" does not have '
227         'extensions.'),
228        text_format.Merge, text, message)
229
230  def testMergeGroupNotClosed(self):
231    message = unittest_pb2.TestAllTypes()
232    text = 'RepeatedGroup: <'
233    self.assertRaisesWithMessage(
234        text_format.ParseError, '1:16 : Expected ">".',
235        text_format.Merge, text, message)
236
237    text = 'RepeatedGroup: {'
238    self.assertRaisesWithMessage(
239        text_format.ParseError, '1:16 : Expected "}".',
240        text_format.Merge, text, message)
241
242  def testMergeEmptyGroup(self):
243    message = unittest_pb2.TestAllTypes()
244    text = 'OptionalGroup: {}'
245    text_format.Merge(text, message)
246    self.assertTrue(message.HasField('optionalgroup'))
247
248    message.Clear()
249
250    message = unittest_pb2.TestAllTypes()
251    text = 'OptionalGroup: <>'
252    text_format.Merge(text, message)
253    self.assertTrue(message.HasField('optionalgroup'))
254
255  def testMergeBadEnumValue(self):
256    message = unittest_pb2.TestAllTypes()
257    text = 'optional_nested_enum: BARR'
258    self.assertRaisesWithMessage(
259        text_format.ParseError,
260        ('1:23 : Enum type "protobuf_unittest.TestAllTypes.NestedEnum" '
261         'has no value named BARR.'),
262        text_format.Merge, text, message)
263
264    message = unittest_pb2.TestAllTypes()
265    text = 'optional_nested_enum: 100'
266    self.assertRaisesWithMessage(
267        text_format.ParseError,
268        ('1:23 : Enum type "protobuf_unittest.TestAllTypes.NestedEnum" '
269         'has no value with number 100.'),
270        text_format.Merge, text, message)
271
272  def assertRaisesWithMessage(self, e_class, e, func, *args, **kwargs):
273    """Same as assertRaises, but also compares the exception message."""
274    if hasattr(e_class, '__name__'):
275      exc_name = e_class.__name__
276    else:
277      exc_name = str(e_class)
278
279    try:
280      func(*args, **kwargs)
281    except e_class, expr:
282      if str(expr) != e:
283        msg = '%s raised, but with wrong message: "%s" instead of "%s"'
284        raise self.failureException(msg % (exc_name,
285                                           str(expr).encode('string_escape'),
286                                           e.encode('string_escape')))
287      return
288    else:
289      raise self.failureException('%s not raised' % exc_name)
290
291
292class TokenizerTest(unittest.TestCase):
293
294  def testSimpleTokenCases(self):
295    text = ('identifier1:"string1"\n     \n\n'
296            'identifier2 : \n \n123  \n  identifier3 :\'string\'\n'
297            'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n'
298            'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n'
299            'ID9: 22 ID10: -111111111111111111 ID11: -22\n'
300            'ID12: 2222222222222222222')
301    tokenizer = text_format._Tokenizer(text)
302    methods = [(tokenizer.ConsumeIdentifier, 'identifier1'),
303               ':',
304               (tokenizer.ConsumeString, 'string1'),
305               (tokenizer.ConsumeIdentifier, 'identifier2'),
306               ':',
307               (tokenizer.ConsumeInt32, 123),
308               (tokenizer.ConsumeIdentifier, 'identifier3'),
309               ':',
310               (tokenizer.ConsumeString, 'string'),
311               (tokenizer.ConsumeIdentifier, 'identifiER_4'),
312               ':',
313               (tokenizer.ConsumeFloat, 1.1e+2),
314               (tokenizer.ConsumeIdentifier, 'ID5'),
315               ':',
316               (tokenizer.ConsumeFloat, -0.23),
317               (tokenizer.ConsumeIdentifier, 'ID6'),
318               ':',
319               (tokenizer.ConsumeString, 'aaaa\'bbbb'),
320               (tokenizer.ConsumeIdentifier, 'ID7'),
321               ':',
322               (tokenizer.ConsumeString, 'aa\"bb'),
323               (tokenizer.ConsumeIdentifier, 'ID8'),
324               ':',
325               '{',
326               (tokenizer.ConsumeIdentifier, 'A'),
327               ':',
328               (tokenizer.ConsumeFloat, text_format._INFINITY),
329               (tokenizer.ConsumeIdentifier, 'B'),
330               ':',
331               (tokenizer.ConsumeFloat, -text_format._INFINITY),
332               (tokenizer.ConsumeIdentifier, 'C'),
333               ':',
334               (tokenizer.ConsumeBool, True),
335               (tokenizer.ConsumeIdentifier, 'D'),
336               ':',
337               (tokenizer.ConsumeBool, False),
338               '}',
339               (tokenizer.ConsumeIdentifier, 'ID9'),
340               ':',
341               (tokenizer.ConsumeUint32, 22),
342               (tokenizer.ConsumeIdentifier, 'ID10'),
343               ':',
344               (tokenizer.ConsumeInt64, -111111111111111111),
345               (tokenizer.ConsumeIdentifier, 'ID11'),
346               ':',
347               (tokenizer.ConsumeInt32, -22),
348               (tokenizer.ConsumeIdentifier, 'ID12'),
349               ':',
350               (tokenizer.ConsumeUint64, 2222222222222222222)]
351
352    i = 0
353    while not tokenizer.AtEnd():
354      m = methods[i]
355      if type(m) == str:
356        token = tokenizer.token
357        self.assertEqual(token, m)
358        tokenizer.NextToken()
359      else:
360        self.assertEqual(m[1], m[0]())
361      i += 1
362
363  def testConsumeIntegers(self):
364    # This test only tests the failures in the integer parsing methods as well
365    # as the '0' special cases.
366    int64_max = (1 << 63) - 1
367    uint32_max = (1 << 32) - 1
368    text = '-1 %d %d' % (uint32_max + 1, int64_max + 1)
369    tokenizer = text_format._Tokenizer(text)
370    self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint32)
371    self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint64)
372    self.assertEqual(-1, tokenizer.ConsumeInt32())
373
374    self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint32)
375    self.assertRaises(text_format.ParseError, tokenizer.ConsumeInt32)
376    self.assertEqual(uint32_max + 1, tokenizer.ConsumeInt64())
377
378    self.assertRaises(text_format.ParseError, tokenizer.ConsumeInt64)
379    self.assertEqual(int64_max + 1, tokenizer.ConsumeUint64())
380    self.assertTrue(tokenizer.AtEnd())
381
382    text = '-0 -0 0 0'
383    tokenizer = text_format._Tokenizer(text)
384    self.assertEqual(0, tokenizer.ConsumeUint32())
385    self.assertEqual(0, tokenizer.ConsumeUint64())
386    self.assertEqual(0, tokenizer.ConsumeUint32())
387    self.assertEqual(0, tokenizer.ConsumeUint64())
388    self.assertTrue(tokenizer.AtEnd())
389
390  def testConsumeByteString(self):
391    text = '"string1\''
392    tokenizer = text_format._Tokenizer(text)
393    self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
394
395    text = 'string1"'
396    tokenizer = text_format._Tokenizer(text)
397    self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
398
399    text = '\n"\\xt"'
400    tokenizer = text_format._Tokenizer(text)
401    self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
402
403    text = '\n"\\"'
404    tokenizer = text_format._Tokenizer(text)
405    self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
406
407    text = '\n"\\x"'
408    tokenizer = text_format._Tokenizer(text)
409    self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
410
411  def testConsumeBool(self):
412    text = 'not-a-bool'
413    tokenizer = text_format._Tokenizer(text)
414    self.assertRaises(text_format.ParseError, tokenizer.ConsumeBool)
415
416  def testInfNan(self):
417    # Make sure our infinity and NaN definitions are sound.
418    self.assertEquals(float, type(text_format._INFINITY))
419    self.assertEquals(float, type(text_format._NAN))
420    self.assertTrue(text_format._NAN != text_format._NAN)
421
422    inf_times_zero = text_format._INFINITY * 0
423    self.assertTrue(inf_times_zero != inf_times_zero)
424    self.assertTrue(text_format._INFINITY > 0)
425
426
427if __name__ == '__main__':
428  unittest.main()
429