1#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc.  All rights reserved.
4// https://developers.google.com/protocol-buffers/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32using NUnit.Framework;
33using System;
34using System.IO;
35
36namespace Google.Protobuf
37{
38    public class JsonTokenizerTest
39    {
40        [Test]
41        public void EmptyObjectValue()
42        {
43            AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
44        }
45
46        [Test]
47        public void EmptyArrayValue()
48        {
49            AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
50        }
51
52        [Test]
53        [TestCase("foo", "foo")]
54        [TestCase("tab\\t", "tab\t")]
55        [TestCase("line\\nfeed", "line\nfeed")]
56        [TestCase("carriage\\rreturn", "carriage\rreturn")]
57        [TestCase("back\\bspace", "back\bspace")]
58        [TestCase("form\\ffeed", "form\ffeed")]
59        [TestCase("escaped\\/slash", "escaped/slash")]
60        [TestCase("escaped\\\\backslash", "escaped\\backslash")]
61        [TestCase("escaped\\\"quote", "escaped\"quote")]
62        [TestCase("foo {}[] bar", "foo {}[] bar")]
63        [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
64        [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
65        [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
66        public void StringValue(string json, string expectedValue)
67        {
68            AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
69        }
70
71        // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
72        // using TestCase as they have no valid UTF-8 representation.
73        // It's unclear exactly how we should handle a mixture of escaped or not: that can't
74        // come from UTF-8 text, but could come from a .NET string. For the moment,
75        // treat it as valid in the obvious way.
76        [Test]
77        public void MixedSurrogatePairs()
78        {
79            string expected = "\ud800\udc00";
80            AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
81            AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
82        }
83
84        [Test]
85        public void ObjectDepth()
86        {
87            string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
88            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
89            // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
90            Assert.AreEqual(0, tokenizer.ObjectDepth);
91            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
92            Assert.AreEqual(1, tokenizer.ObjectDepth);
93            Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
94            Assert.AreEqual(1, tokenizer.ObjectDepth);
95            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
96            Assert.AreEqual(2, tokenizer.ObjectDepth);
97            Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
98            Assert.AreEqual(2, tokenizer.ObjectDepth);
99            Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
100            Assert.AreEqual(2, tokenizer.ObjectDepth);
101            Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
102            Assert.AreEqual(2, tokenizer.ObjectDepth);
103            Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
104            Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
105            Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
106            Assert.AreEqual(2, tokenizer.ObjectDepth);
107            Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
108            Assert.AreEqual(2, tokenizer.ObjectDepth);
109            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
110            Assert.AreEqual(1, tokenizer.ObjectDepth);
111            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
112            Assert.AreEqual(0, tokenizer.ObjectDepth);
113            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
114            Assert.AreEqual(0, tokenizer.ObjectDepth);
115        }
116
117        [Test]
118        public void ObjectDepth_WithPushBack()
119        {
120            string json = "{}";
121            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
122            Assert.AreEqual(0, tokenizer.ObjectDepth);
123            var token = tokenizer.Next();
124            Assert.AreEqual(1, tokenizer.ObjectDepth);
125            // When we push back a "start object", we should effectively be back to the previous depth.
126            tokenizer.PushBack(token);
127            Assert.AreEqual(0, tokenizer.ObjectDepth);
128            // Read the same token again, and get back to depth 1
129            token = tokenizer.Next();
130            Assert.AreEqual(1, tokenizer.ObjectDepth);
131
132            // Now the same in reverse, with EndObject
133            token = tokenizer.Next();
134            Assert.AreEqual(0, tokenizer.ObjectDepth);
135            tokenizer.PushBack(token);
136            Assert.AreEqual(1, tokenizer.ObjectDepth);
137            tokenizer.Next();
138            Assert.AreEqual(0, tokenizer.ObjectDepth);
139        }
140
141        [Test]
142        [TestCase("embedded tab\t")]
143        [TestCase("embedded CR\r")]
144        [TestCase("embedded LF\n")]
145        [TestCase("embedded bell\u0007")]
146        [TestCase("bad escape\\a")]
147        [TestCase("incomplete escape\\")]
148        [TestCase("incomplete Unicode escape\\u000")]
149        [TestCase("invalid Unicode escape\\u000H")]
150        // Surrogate pair handling, both in raw .NET strings and escaped. We only need
151        // to detect this in strings, as non-ASCII characters anywhere other than in strings
152        // will already lead to parsing errors.
153        [TestCase("\\ud800")]
154        [TestCase("\\udc00")]
155        [TestCase("\\ud800x")]
156        [TestCase("\\udc00x")]
157        [TestCase("\\udc00\\ud800y")]
158        public void InvalidStringValue(string json)
159        {
160            AssertThrowsAfter("\"" + json + "\"");
161        }
162
163        // Tests for invalid strings that can't be expressed in attributes,
164        // as the constants can't be expressed as UTF-8 strings.
165        [Test]
166        public void InvalidSurrogatePairs()
167        {
168            AssertThrowsAfter("\"\ud800x\"");
169            AssertThrowsAfter("\"\udc00y\"");
170            AssertThrowsAfter("\"\udc00\ud800y\"");
171        }
172
173        [Test]
174        [TestCase("0", 0)]
175        [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
176        [TestCase("1", 1)]
177        [TestCase("-1", -1)]
178        // From here on, assume leading sign is okay...
179        [TestCase("1.125", 1.125)]
180        [TestCase("1.0", 1)]
181        [TestCase("1e5", 100000)]
182        [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
183        [TestCase("1E5", 100000)]
184        [TestCase("1e+5", 100000)]
185        [TestCase("1E-5", 0.00001)]
186        [TestCase("123E-2", 1.23)]
187        [TestCase("123.45E3", 123450)]
188        [TestCase("   1   ", 1)]
189        public void NumberValue(string json, double expectedValue)
190        {
191            AssertTokens(json, JsonToken.Value(expectedValue));
192        }
193
194        [Test]
195        [TestCase("00")]
196        [TestCase(".5")]
197        [TestCase("1.")]
198        [TestCase("1e")]
199        [TestCase("1e-")]
200        [TestCase("--")]
201        [TestCase("--1")]
202        [TestCase("-1.7977e308")]
203        [TestCase("1.7977e308")]
204        public void InvalidNumberValue(string json)
205        {
206            AssertThrowsAfter(json);
207        }
208
209        [Test]
210        [TestCase("nul")]
211        [TestCase("nothing")]
212        [TestCase("truth")]
213        [TestCase("fALSEhood")]
214        public void InvalidLiterals(string json)
215        {
216            AssertThrowsAfter(json);
217        }
218
219        [Test]
220        public void NullValue()
221        {
222            AssertTokens("null", JsonToken.Null);
223        }
224
225        [Test]
226        public void TrueValue()
227        {
228            AssertTokens("true", JsonToken.True);
229        }
230
231        [Test]
232        public void FalseValue()
233        {
234            AssertTokens("false", JsonToken.False);
235        }
236
237        [Test]
238        public void SimpleObject()
239        {
240            AssertTokens("{'x': 'y'}",
241                JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
242        }
243
244        [Test]
245        [TestCase("[10, 20", 3)]
246        [TestCase("[10,", 2)]
247        [TestCase("[10:20]", 2)]
248        [TestCase("[", 1)]
249        [TestCase("[,", 1)]
250        [TestCase("{", 1)]
251        [TestCase("{,", 1)]
252        [TestCase("{[", 1)]
253        [TestCase("{{", 1)]
254        [TestCase("{0", 1)]
255        [TestCase("{null", 1)]
256        [TestCase("{false", 1)]
257        [TestCase("{true", 1)]
258        [TestCase("}", 0)]
259        [TestCase("]", 0)]
260        [TestCase(",", 0)]
261        [TestCase("'foo' 'bar'", 1)]
262        [TestCase(":", 0)]
263        [TestCase("'foo", 0)] // Incomplete string
264        [TestCase("{ 'foo' }", 2)]
265        [TestCase("{ x:1", 1)] // Property names must be quoted
266        [TestCase("{]", 1)]
267        [TestCase("[}", 1)]
268        [TestCase("[1,", 2)]
269        [TestCase("{'x':0]", 3)]
270        [TestCase("{ 'foo': }", 2)]
271        [TestCase("{ 'foo':'bar', }", 3)]
272        public void InvalidStructure(string json, int expectedValidTokens)
273        {
274            // Note: we don't test that the earlier tokens are exactly as expected,
275            // partly because that's hard to parameterize.
276            var reader = new StringReader(json.Replace('\'', '"'));
277            var tokenizer = JsonTokenizer.FromTextReader(reader);
278            for (int i = 0; i < expectedValidTokens; i++)
279            {
280                Assert.IsNotNull(tokenizer.Next());
281            }
282            Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
283        }
284
285        [Test]
286        public void ArrayMixedType()
287        {
288            AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
289                JsonToken.StartArray,
290                JsonToken.Value(1),
291                JsonToken.Value("foo"),
292                JsonToken.Null,
293                JsonToken.False,
294                JsonToken.True,
295                JsonToken.StartArray,
296                JsonToken.Value(2),
297                JsonToken.EndArray,
298                JsonToken.StartObject,
299                JsonToken.Name("x"),
300                JsonToken.Value("y"),
301                JsonToken.EndObject,
302                JsonToken.EndArray);
303        }
304
305        [Test]
306        public void ObjectMixedType()
307        {
308            AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true,
309                           'f': [2], 'g': {'x':'y' }}",
310                JsonToken.StartObject,
311                JsonToken.Name("a"),
312                JsonToken.Value(1),
313                JsonToken.Name("b"),
314                JsonToken.Value("bar"),
315                JsonToken.Name("c"),
316                JsonToken.Null,
317                JsonToken.Name("d"),
318                JsonToken.False,
319                JsonToken.Name("e"),
320                JsonToken.True,
321                JsonToken.Name("f"),
322                JsonToken.StartArray,
323                JsonToken.Value(2),
324                JsonToken.EndArray,
325                JsonToken.Name("g"),
326                JsonToken.StartObject,
327                JsonToken.Name("x"),
328                JsonToken.Value("y"),
329                JsonToken.EndObject,
330                JsonToken.EndObject);
331        }
332
333        [Test]
334        public void NextAfterEndDocumentThrows()
335        {
336            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
337            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
338            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
339            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
340        }
341
342        [Test]
343        public void CanPushBackEndDocument()
344        {
345            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
346            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
347            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
348            tokenizer.PushBack(JsonToken.EndDocument);
349            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
350            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
351        }
352
353        /// <summary>
354        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
355        /// All apostrophes are first converted to double quotes, allowing any tests
356        /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
357        /// messy string literal escaping. The "end document" token is not specified in the list of
358        /// expected tokens, but is implicit.
359        /// </summary>
360        private static void AssertTokens(string json, params JsonToken[] expectedTokens)
361        {
362            AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
363        }
364
365        /// <summary>
366        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
367        /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
368        /// replacement on the specified JSON, and should be used when the text contains apostrophes which
369        /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of
370        /// expected tokens, but is implicit.
371        /// </summary>
372        private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
373        {
374            var reader = new StringReader(json);
375            var tokenizer = JsonTokenizer.FromTextReader(reader);
376            for (int i = 0; i < expectedTokens.Length; i++)
377            {
378                var actualToken = tokenizer.Next();
379                if (actualToken == JsonToken.EndDocument)
380                {
381                    Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
382                }
383                Assert.AreEqual(expectedTokens[i], actualToken);
384            }
385            var finalToken = tokenizer.Next();
386            if (finalToken != JsonToken.EndDocument)
387            {
388                Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
389            }
390        }
391
392        private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
393        {
394            var reader = new StringReader(json);
395            var tokenizer = JsonTokenizer.FromTextReader(reader);
396            for (int i = 0; i < expectedTokens.Length; i++)
397            {
398                var actualToken = tokenizer.Next();
399                if (actualToken == JsonToken.EndDocument)
400                {
401                    Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
402                }
403                Assert.AreEqual(expectedTokens[i], actualToken);
404            }
405            Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
406        }
407    }
408}
409