1#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2015 Google Inc.  All rights reserved.
4// https://developers.google.com/protocol-buffers/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10//     * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12//     * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16//     * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32
33using Google.Protobuf.Reflection;
34using Google.Protobuf.WellKnownTypes;
35using System;
36using System.Collections;
37using System.Collections.Generic;
38using System.Globalization;
39using System.IO;
40using System.Text;
41using System.Text.RegularExpressions;
42
43namespace Google.Protobuf
44{
45    /// <summary>
46    /// Reflection-based converter from JSON to messages.
47    /// </summary>
48    /// <remarks>
49    /// <para>
50    /// Instances of this class are thread-safe, with no mutable state.
51    /// </para>
52    /// <para>
53    /// This is a simple start to get JSON parsing working. As it's reflection-based,
54    /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
55    /// (This code is generally not heavily optimized.)
56    /// </para>
57    /// </remarks>
58    public sealed class JsonParser
59    {
60        // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
61        // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
62        private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
63        private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
64        private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
65        private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
66
67        private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
68
69        // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
70        // and the signatures of various methods.
71        private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
72            WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
73        {
74            { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
75            { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
76            { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
77            { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
78                parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
79            { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
80            { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
81            { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
82            { Int32Value.Descriptor.FullName, MergeWrapperField },
83            { Int64Value.Descriptor.FullName, MergeWrapperField },
84            { UInt32Value.Descriptor.FullName, MergeWrapperField },
85            { UInt64Value.Descriptor.FullName, MergeWrapperField },
86            { FloatValue.Descriptor.FullName, MergeWrapperField },
87            { DoubleValue.Descriptor.FullName, MergeWrapperField },
88            { BytesValue.Descriptor.FullName, MergeWrapperField },
89            { StringValue.Descriptor.FullName, MergeWrapperField }
90        };
91
92        // Convenience method to avoid having to repeat the same code multiple times in the above
93        // dictionary initialization.
94        private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
95        {
96            parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
97        }
98
99        /// <summary>
100        /// Returns a formatter using the default settings.
101        /// </summary>
102        public static JsonParser Default { get { return defaultInstance; } }
103
104        private readonly Settings settings;
105
106        /// <summary>
107        /// Creates a new formatted with the given settings.
108        /// </summary>
109        /// <param name="settings">The settings.</param>
110        public JsonParser(Settings settings)
111        {
112            this.settings = settings;
113        }
114
115        /// <summary>
116        /// Parses <paramref name="json"/> and merges the information into the given message.
117        /// </summary>
118        /// <param name="message">The message to merge the JSON information into.</param>
119        /// <param name="json">The JSON to parse.</param>
120        internal void Merge(IMessage message, string json)
121        {
122            Merge(message, new StringReader(json));
123        }
124
125        /// <summary>
126        /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
127        /// </summary>
128        /// <param name="message">The message to merge the JSON information into.</param>
129        /// <param name="jsonReader">Reader providing the JSON to parse.</param>
130        internal void Merge(IMessage message, TextReader jsonReader)
131        {
132            var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
133            Merge(message, tokenizer);
134            var lastToken = tokenizer.Next();
135            if (lastToken != JsonToken.EndDocument)
136            {
137                throw new InvalidProtocolBufferException("Expected end of JSON after object");
138            }
139        }
140
141        /// <summary>
142        /// Merges the given message using data from the given tokenizer. In most cases, the next
143        /// token should be a "start object" token, but wrapper types and nullity can invalidate
144        /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
145        /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
146        /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
147        /// </summary>
148        private void Merge(IMessage message, JsonTokenizer tokenizer)
149        {
150            if (tokenizer.ObjectDepth > settings.RecursionLimit)
151            {
152                throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
153            }
154            if (message.Descriptor.IsWellKnownType)
155            {
156                Action<JsonParser, IMessage, JsonTokenizer> handler;
157                if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
158                {
159                    handler(this, message, tokenizer);
160                    return;
161                }
162                // Well-known types with no special handling continue in the normal way.
163            }
164            var token = tokenizer.Next();
165            if (token.Type != JsonToken.TokenType.StartObject)
166            {
167                throw new InvalidProtocolBufferException("Expected an object");
168            }
169            var descriptor = message.Descriptor;
170            var jsonFieldMap = descriptor.Fields.ByJsonName();
171            // All the oneof fields we've already accounted for - we can only see each of them once.
172            // The set is created lazily to avoid the overhead of creating a set for every message
173            // we parsed, when oneofs are relatively rare.
174            HashSet<OneofDescriptor> seenOneofs = null;
175            while (true)
176            {
177                token = tokenizer.Next();
178                if (token.Type == JsonToken.TokenType.EndObject)
179                {
180                    return;
181                }
182                if (token.Type != JsonToken.TokenType.Name)
183                {
184                    throw new InvalidOperationException("Unexpected token type " + token.Type);
185                }
186                string name = token.StringValue;
187                FieldDescriptor field;
188                if (jsonFieldMap.TryGetValue(name, out field))
189                {
190                    if (field.ContainingOneof != null)
191                    {
192                        if (seenOneofs == null)
193                        {
194                            seenOneofs = new HashSet<OneofDescriptor>();
195                        }
196                        if (!seenOneofs.Add(field.ContainingOneof))
197                        {
198                            throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
199                        }
200                    }
201                    MergeField(message, field, tokenizer);
202                }
203                else
204                {
205                    // TODO: Is this what we want to do? If not, we'll need to skip the value,
206                    // which may be an object or array. (We might want to put code in the tokenizer
207                    // to do that.)
208                    throw new InvalidProtocolBufferException("Unknown field: " + name);
209                }
210            }
211        }
212
213        private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
214        {
215            var token = tokenizer.Next();
216            if (token.Type == JsonToken.TokenType.Null)
217            {
218                // Clear the field if we see a null token, unless it's for a singular field of type
219                // google.protobuf.Value.
220                // Note: different from Java API, which just ignores it.
221                // TODO: Bring it more in line? Discuss...
222                if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
223                {
224                    field.Accessor.Clear(message);
225                    return;
226                }
227            }
228            tokenizer.PushBack(token);
229
230            if (field.IsMap)
231            {
232                MergeMapField(message, field, tokenizer);
233            }
234            else if (field.IsRepeated)
235            {
236                MergeRepeatedField(message, field, tokenizer);
237            }
238            else
239            {
240                var value = ParseSingleValue(field, tokenizer);
241                field.Accessor.SetValue(message, value);
242            }
243        }
244
245        private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
246        {
247            var token = tokenizer.Next();
248            if (token.Type != JsonToken.TokenType.StartArray)
249            {
250                throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
251            }
252
253            IList list = (IList) field.Accessor.GetValue(message);
254            while (true)
255            {
256                token = tokenizer.Next();
257                if (token.Type == JsonToken.TokenType.EndArray)
258                {
259                    return;
260                }
261                tokenizer.PushBack(token);
262                if (token.Type == JsonToken.TokenType.Null)
263                {
264                    throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
265                }
266                list.Add(ParseSingleValue(field, tokenizer));
267            }
268        }
269
270        private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
271        {
272            // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
273            var token = tokenizer.Next();
274            if (token.Type != JsonToken.TokenType.StartObject)
275            {
276                throw new InvalidProtocolBufferException("Expected an object to populate a map");
277            }
278
279            var type = field.MessageType;
280            var keyField = type.FindFieldByNumber(1);
281            var valueField = type.FindFieldByNumber(2);
282            if (keyField == null || valueField == null)
283            {
284                throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
285            }
286            IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
287
288            while (true)
289            {
290                token = tokenizer.Next();
291                if (token.Type == JsonToken.TokenType.EndObject)
292                {
293                    return;
294                }
295                object key = ParseMapKey(keyField, token.StringValue);
296                object value = ParseSingleValue(valueField, tokenizer);
297                if (value == null)
298                {
299                    throw new InvalidProtocolBufferException("Map values must not be null");
300                }
301                dictionary[key] = value;
302            }
303        }
304
305        private static bool IsGoogleProtobufValueField(FieldDescriptor field)
306        {
307            return field.FieldType == FieldType.Message &&
308                field.MessageType.FullName == Value.Descriptor.FullName;
309        }
310
311        private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
312        {
313            var token = tokenizer.Next();
314            if (token.Type == JsonToken.TokenType.Null)
315            {
316                // TODO: In order to support dynamic messages, we should really build this up
317                // dynamically.
318                if (IsGoogleProtobufValueField(field))
319                {
320                    return Value.ForNull();
321                }
322                return null;
323            }
324
325            var fieldType = field.FieldType;
326            if (fieldType == FieldType.Message)
327            {
328                // Parse wrapper types as their constituent types.
329                // TODO: What does this mean for null?
330                if (field.MessageType.IsWrapperType)
331                {
332                    field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
333                    fieldType = field.FieldType;
334                }
335                else
336                {
337                    // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
338                    tokenizer.PushBack(token);
339                    IMessage subMessage = NewMessageForField(field);
340                    Merge(subMessage, tokenizer);
341                    return subMessage;
342                }
343            }
344
345            switch (token.Type)
346            {
347                case JsonToken.TokenType.True:
348                case JsonToken.TokenType.False:
349                    if (fieldType == FieldType.Bool)
350                    {
351                        return token.Type == JsonToken.TokenType.True;
352                    }
353                    // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
354                    // case instead, but this way we'd only need to change one place.
355                    goto default;
356                case JsonToken.TokenType.StringValue:
357                    return ParseSingleStringValue(field, token.StringValue);
358                // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
359                case JsonToken.TokenType.Number:
360                    return ParseSingleNumberValue(field, token);
361                case JsonToken.TokenType.Null:
362                    throw new NotImplementedException("Haven't worked out what to do for null yet");
363                default:
364                    throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
365            }
366        }
367
368        /// <summary>
369        /// Parses <paramref name="json"/> into a new message.
370        /// </summary>
371        /// <typeparam name="T">The type of message to create.</typeparam>
372        /// <param name="json">The JSON to parse.</param>
373        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
374        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
375        public T Parse<T>(string json) where T : IMessage, new()
376        {
377            ProtoPreconditions.CheckNotNull(json, nameof(json));
378            return Parse<T>(new StringReader(json));
379        }
380
381        /// <summary>
382        /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
383        /// </summary>
384        /// <typeparam name="T">The type of message to create.</typeparam>
385        /// <param name="jsonReader">Reader providing the JSON to parse.</param>
386        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
387        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
388        public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
389        {
390            ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
391            T message = new T();
392            Merge(message, jsonReader);
393            return message;
394        }
395
396        /// <summary>
397        /// Parses <paramref name="json"/> into a new message.
398        /// </summary>
399        /// <param name="json">The JSON to parse.</param>
400        /// <param name="descriptor">Descriptor of message type to parse.</param>
401        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
402        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
403        public IMessage Parse(string json, MessageDescriptor descriptor)
404        {
405            ProtoPreconditions.CheckNotNull(json, nameof(json));
406            ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
407            return Parse(new StringReader(json), descriptor);
408        }
409
410        /// <summary>
411        /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
412        /// </summary>
413        /// <param name="jsonReader">Reader providing the JSON to parse.</param>
414        /// <param name="descriptor">Descriptor of message type to parse.</param>
415        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
416        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
417        public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
418        {
419            ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
420            ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
421            IMessage message = descriptor.Parser.CreateTemplate();
422            Merge(message, jsonReader);
423            return message;
424        }
425
426        private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
427        {
428            var firstToken = tokenizer.Next();
429            var fields = message.Descriptor.Fields;
430            switch (firstToken.Type)
431            {
432                case JsonToken.TokenType.Null:
433                    fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
434                    return;
435                case JsonToken.TokenType.StringValue:
436                    fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
437                    return;
438                case JsonToken.TokenType.Number:
439                    fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
440                    return;
441                case JsonToken.TokenType.False:
442                case JsonToken.TokenType.True:
443                    fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
444                    return;
445                case JsonToken.TokenType.StartObject:
446                    {
447                        var field = fields[Value.StructValueFieldNumber];
448                        var structMessage = NewMessageForField(field);
449                        tokenizer.PushBack(firstToken);
450                        Merge(structMessage, tokenizer);
451                        field.Accessor.SetValue(message, structMessage);
452                        return;
453                    }
454                case JsonToken.TokenType.StartArray:
455                    {
456                        var field = fields[Value.ListValueFieldNumber];
457                        var list = NewMessageForField(field);
458                        tokenizer.PushBack(firstToken);
459                        Merge(list, tokenizer);
460                        field.Accessor.SetValue(message, list);
461                        return;
462                    }
463                default:
464                    throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
465            }
466        }
467
468        private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
469        {
470            var token = tokenizer.Next();
471            if (token.Type != JsonToken.TokenType.StartObject)
472            {
473                throw new InvalidProtocolBufferException("Expected object value for Struct");
474            }
475            tokenizer.PushBack(token);
476
477            var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
478            MergeMapField(message, field, tokenizer);
479        }
480
481        private void MergeAny(IMessage message, JsonTokenizer tokenizer)
482        {
483            // Record the token stream until we see the @type property. At that point, we can take the value, consult
484            // the type registry for the relevant message, and replay the stream, omitting the @type property.
485            var tokens = new List<JsonToken>();
486
487            var token = tokenizer.Next();
488            if (token.Type != JsonToken.TokenType.StartObject)
489            {
490                throw new InvalidProtocolBufferException("Expected object value for Any");
491            }
492            int typeUrlObjectDepth = tokenizer.ObjectDepth;
493
494            // The check for the property depth protects us from nested Any values which occur before the type URL
495            // for *this* Any.
496            while (token.Type != JsonToken.TokenType.Name ||
497                token.StringValue != JsonFormatter.AnyTypeUrlField ||
498                tokenizer.ObjectDepth != typeUrlObjectDepth)
499            {
500                tokens.Add(token);
501                token = tokenizer.Next();
502
503                if (tokenizer.ObjectDepth < typeUrlObjectDepth)
504                {
505                    throw new InvalidProtocolBufferException("Any message with no @type");
506                }
507            }
508
509            // Don't add the @type property or its value to the recorded token list
510            token = tokenizer.Next();
511            if (token.Type != JsonToken.TokenType.StringValue)
512            {
513                throw new InvalidProtocolBufferException("Expected string value for Any.@type");
514            }
515            string typeUrl = token.StringValue;
516            string typeName = Any.GetTypeName(typeUrl);
517
518            MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
519            if (descriptor == null)
520            {
521                throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
522            }
523
524            // Now replay the token stream we've already read and anything that remains of the object, just parsing it
525            // as normal. Our original tokenizer should end up at the end of the object.
526            var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
527            var body = descriptor.Parser.CreateTemplate();
528            if (descriptor.IsWellKnownType)
529            {
530                MergeWellKnownTypeAnyBody(body, replay);
531            }
532            else
533            {
534                Merge(body, replay);
535            }
536            var data = body.ToByteString();
537
538            // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
539            message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
540            message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
541        }
542
543        // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
544        // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
545        // itself, and then end-object.
546        private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
547        {
548            var token = tokenizer.Next(); // Definitely start-object; checked in previous method
549            token = tokenizer.Next();
550            // TODO: What about an absent Int32Value, for example?
551            if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
552            {
553                throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
554            }
555            Merge(body, tokenizer);
556            token = tokenizer.Next();
557            if (token.Type != JsonToken.TokenType.EndObject)
558            {
559                throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
560            }
561        }
562
563        #region Utility methods which don't depend on the state (or settings) of the parser.
564        private static object ParseMapKey(FieldDescriptor field, string keyText)
565        {
566            switch (field.FieldType)
567            {
568                case FieldType.Bool:
569                    if (keyText == "true")
570                    {
571                        return true;
572                    }
573                    if (keyText == "false")
574                    {
575                        return false;
576                    }
577                    throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
578                case FieldType.String:
579                    return keyText;
580                case FieldType.Int32:
581                case FieldType.SInt32:
582                case FieldType.SFixed32:
583                    return ParseNumericString(keyText, int.Parse);
584                case FieldType.UInt32:
585                case FieldType.Fixed32:
586                    return ParseNumericString(keyText, uint.Parse);
587                case FieldType.Int64:
588                case FieldType.SInt64:
589                case FieldType.SFixed64:
590                    return ParseNumericString(keyText, long.Parse);
591                case FieldType.UInt64:
592                case FieldType.Fixed64:
593                    return ParseNumericString(keyText, ulong.Parse);
594                default:
595                    throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
596            }
597        }
598
599        private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
600        {
601            double value = token.NumberValue;
602            checked
603            {
604                try
605                {
606                    switch (field.FieldType)
607                    {
608                        case FieldType.Int32:
609                        case FieldType.SInt32:
610                        case FieldType.SFixed32:
611                            CheckInteger(value);
612                            return (int) value;
613                        case FieldType.UInt32:
614                        case FieldType.Fixed32:
615                            CheckInteger(value);
616                            return (uint) value;
617                        case FieldType.Int64:
618                        case FieldType.SInt64:
619                        case FieldType.SFixed64:
620                            CheckInteger(value);
621                            return (long) value;
622                        case FieldType.UInt64:
623                        case FieldType.Fixed64:
624                            CheckInteger(value);
625                            return (ulong) value;
626                        case FieldType.Double:
627                            return value;
628                        case FieldType.Float:
629                            if (double.IsNaN(value))
630                            {
631                                return float.NaN;
632                            }
633                            if (value > float.MaxValue || value < float.MinValue)
634                            {
635                                if (double.IsPositiveInfinity(value))
636                                {
637                                    return float.PositiveInfinity;
638                                }
639                                if (double.IsNegativeInfinity(value))
640                                {
641                                    return float.NegativeInfinity;
642                                }
643                                throw new InvalidProtocolBufferException($"Value out of range: {value}");
644                            }
645                            return (float) value;
646                        case FieldType.Enum:
647                            CheckInteger(value);
648                            // Just return it as an int, and let the CLR convert it.
649                            // Note that we deliberately don't check that it's a known value.
650                            return (int) value;
651                        default:
652                            throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
653                    }
654                }
655                catch (OverflowException)
656                {
657                    throw new InvalidProtocolBufferException($"Value out of range: {value}");
658                }
659            }
660        }
661
662        private static void CheckInteger(double value)
663        {
664            if (double.IsInfinity(value) || double.IsNaN(value))
665            {
666                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
667            }
668            if (value != Math.Floor(value))
669            {
670                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
671            }
672        }
673
674        private static object ParseSingleStringValue(FieldDescriptor field, string text)
675        {
676            switch (field.FieldType)
677            {
678                case FieldType.String:
679                    return text;
680                case FieldType.Bytes:
681                    try
682                    {
683                        return ByteString.FromBase64(text);
684                    }
685                    catch (FormatException e)
686                    {
687                        throw InvalidProtocolBufferException.InvalidBase64(e);
688                    }
689                case FieldType.Int32:
690                case FieldType.SInt32:
691                case FieldType.SFixed32:
692                    return ParseNumericString(text, int.Parse);
693                case FieldType.UInt32:
694                case FieldType.Fixed32:
695                    return ParseNumericString(text, uint.Parse);
696                case FieldType.Int64:
697                case FieldType.SInt64:
698                case FieldType.SFixed64:
699                    return ParseNumericString(text, long.Parse);
700                case FieldType.UInt64:
701                case FieldType.Fixed64:
702                    return ParseNumericString(text, ulong.Parse);
703                case FieldType.Double:
704                    double d = ParseNumericString(text, double.Parse);
705                    ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
706                    return d;
707                case FieldType.Float:
708                    float f = ParseNumericString(text, float.Parse);
709                    ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
710                    return f;
711                case FieldType.Enum:
712                    var enumValue = field.EnumType.FindValueByName(text);
713                    if (enumValue == null)
714                    {
715                        throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
716                    }
717                    // Just return it as an int, and let the CLR convert it.
718                    return enumValue.Number;
719                default:
720                    throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
721            }
722        }
723
724        /// <summary>
725        /// Creates a new instance of the message type for the given field.
726        /// </summary>
727        private static IMessage NewMessageForField(FieldDescriptor field)
728        {
729            return field.MessageType.Parser.CreateTemplate();
730        }
731
732        private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
733        {
734            // Can't prohibit this with NumberStyles.
735            if (text.StartsWith("+"))
736            {
737                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
738            }
739            if (text.StartsWith("0") && text.Length > 1)
740            {
741                if (text[1] >= '0' && text[1] <= '9')
742                {
743                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
744                }
745            }
746            else if (text.StartsWith("-0") && text.Length > 2)
747            {
748                if (text[2] >= '0' && text[2] <= '9')
749                {
750                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
751                }
752            }
753            try
754            {
755                return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
756            }
757            catch (FormatException)
758            {
759                throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
760            }
761            catch (OverflowException)
762            {
763                throw new InvalidProtocolBufferException($"Value out of range: {text}");
764            }
765        }
766
767        /// <summary>
768        /// Checks that any infinite/NaN values originated from the correct text.
769        /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
770        /// way that Mono parses out-of-range values as infinity.
771        /// </summary>
772        private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
773        {
774            if ((isPositiveInfinity && text != "Infinity") ||
775                (isNegativeInfinity && text != "-Infinity") ||
776                (isNaN && text != "NaN"))
777            {
778                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
779            }
780        }
781
782        private static void MergeTimestamp(IMessage message, JsonToken token)
783        {
784            if (token.Type != JsonToken.TokenType.StringValue)
785            {
786                throw new InvalidProtocolBufferException("Expected string value for Timestamp");
787            }
788            var match = TimestampRegex.Match(token.StringValue);
789            if (!match.Success)
790            {
791                throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
792            }
793            var dateTime = match.Groups["datetime"].Value;
794            var subseconds = match.Groups["subseconds"].Value;
795            var offset = match.Groups["offset"].Value;
796
797            try
798            {
799                DateTime parsed = DateTime.ParseExact(
800                    dateTime,
801                    "yyyy-MM-dd'T'HH:mm:ss",
802                    CultureInfo.InvariantCulture,
803                    DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
804                // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
805                Timestamp timestamp = Timestamp.FromDateTime(parsed);
806                int nanosToAdd = 0;
807                if (subseconds != "")
808                {
809                    // This should always work, as we've got 1-9 digits.
810                    int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
811                    nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
812                }
813                int secondsToAdd = 0;
814                if (offset != "Z")
815                {
816                    // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
817                    int sign = offset[0] == '-' ? 1 : -1;
818                    int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
819                    int minutes = int.Parse(offset.Substring(4, 2));
820                    int totalMinutes = hours * 60 + minutes;
821                    if (totalMinutes > 18 * 60)
822                    {
823                        throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
824                    }
825                    if (totalMinutes == 0 && sign == 1)
826                    {
827                        // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
828                        throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
829                    }
830                    // We need to *subtract* the offset from local time to get UTC.
831                    secondsToAdd = sign * totalMinutes * 60;
832                }
833                // Ensure we've got the right signs. Currently unnecessary, but easy to do.
834                if (secondsToAdd < 0 && nanosToAdd > 0)
835                {
836                    secondsToAdd++;
837                    nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
838                }
839                if (secondsToAdd != 0 || nanosToAdd != 0)
840                {
841                    timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
842                    // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
843                    // anywhere, but we shouldn't parse it.
844                    if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
845                    {
846                        throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
847                    }
848                }
849                message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
850                message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
851            }
852            catch (FormatException)
853            {
854                throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
855            }
856        }
857
858        private static void MergeDuration(IMessage message, JsonToken token)
859        {
860            if (token.Type != JsonToken.TokenType.StringValue)
861            {
862                throw new InvalidProtocolBufferException("Expected string value for Duration");
863            }
864            var match = DurationRegex.Match(token.StringValue);
865            if (!match.Success)
866            {
867                throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
868            }
869            var sign = match.Groups["sign"].Value;
870            var secondsText = match.Groups["int"].Value;
871            // Prohibit leading insignficant zeroes
872            if (secondsText[0] == '0' && secondsText.Length > 1)
873            {
874                throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
875            }
876            var subseconds = match.Groups["subseconds"].Value;
877            var multiplier = sign == "-" ? -1 : 1;
878
879            try
880            {
881                long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
882                int nanos = 0;
883                if (subseconds != "")
884                {
885                    // This should always work, as we've got 1-9 digits.
886                    int parsedFraction = int.Parse(subseconds.Substring(1));
887                    nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
888                }
889                if (!Duration.IsNormalized(seconds, nanos))
890                {
891                    throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
892                }
893                message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
894                message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
895            }
896            catch (FormatException)
897            {
898                throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
899            }
900        }
901
902        private static void MergeFieldMask(IMessage message, JsonToken token)
903        {
904            if (token.Type != JsonToken.TokenType.StringValue)
905            {
906                throw new InvalidProtocolBufferException("Expected string value for FieldMask");
907            }
908            // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
909            string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
910            IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
911            foreach (var path in jsonPaths)
912            {
913                messagePaths.Add(ToSnakeCase(path));
914            }
915        }
916
917        // Ported from src/google/protobuf/util/internal/utility.cc
918        private static string ToSnakeCase(string text)
919        {
920            var builder = new StringBuilder(text.Length * 2);
921            // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
922            // C++, whilst still throwing an exception on underscores.
923            bool wasNotUnderscore = false;  // Initialize to false for case 1 (below)
924            bool wasNotCap = false;
925
926            for (int i = 0; i < text.Length; i++)
927            {
928                char c = text[i];
929                if (c >= 'A' && c <= 'Z') // ascii_isupper
930                {
931                    // Consider when the current character B is capitalized:
932                    // 1) At beginning of input:   "B..." => "b..."
933                    //    (e.g. "Biscuit" => "biscuit")
934                    // 2) Following a lowercase:   "...aB..." => "...a_b..."
935                    //    (e.g. "gBike" => "g_bike")
936                    // 3) At the end of input:     "...AB" => "...ab"
937                    //    (e.g. "GoogleLAB" => "google_lab")
938                    // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
939                    //    (e.g. "GBike" => "g_bike")
940                    if (wasNotUnderscore &&               //            case 1 out
941                        (wasNotCap ||                     // case 2 in, case 3 out
942                         (i + 1 < text.Length &&         //            case 3 out
943                          (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
944                    {  // case 4 in
945                       // We add an underscore for case 2 and case 4.
946                        builder.Append('_');
947                    }
948                    // ascii_tolower, but we already know that c *is* an upper case ASCII character...
949                    builder.Append((char) (c + 'a' - 'A'));
950                    wasNotUnderscore = true;
951                    wasNotCap = false;
952                }
953                else
954                {
955                    builder.Append(c);
956                    if (c == '_')
957                    {
958                        throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
959                    }
960                    wasNotUnderscore = true;
961                    wasNotCap = true;
962                }
963            }
964            return builder.ToString();
965        }
966        #endregion
967
968        /// <summary>
969        /// Settings controlling JSON parsing.
970        /// </summary>
971        public sealed class Settings
972        {
973            /// <summary>
974            /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
975            /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
976            /// </summary>
977            public static Settings Default { get; }
978
979            // Workaround for the Mono compiler complaining about XML comments not being on
980            // valid language elements.
981            static Settings()
982            {
983                Default = new Settings(CodedInputStream.DefaultRecursionLimit);
984            }
985
986            /// <summary>
987            /// The maximum depth of messages to parse. Note that this limit only applies to parsing
988            /// messages, not collections - so a message within a collection within a message only counts as
989            /// depth 2, not 3.
990            /// </summary>
991            public int RecursionLimit { get; }
992
993            /// <summary>
994            /// The type registry used to parse <see cref="Any"/> messages.
995            /// </summary>
996            public TypeRegistry TypeRegistry { get; }
997
998            /// <summary>
999            /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1000            /// </summary>
1001            /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1002            public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1003            {
1004            }
1005
1006            /// <summary>
1007            /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1008            /// </summary>
1009            /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1010            /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
1011            public Settings(int recursionLimit, TypeRegistry typeRegistry)
1012            {
1013                RecursionLimit = recursionLimit;
1014                TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1015            }
1016        }
1017    }
1018}
1019