1#region Copyright notice and license 2// Protocol Buffers - Google's data interchange format 3// Copyright 2015 Google Inc. All rights reserved. 4// https://developers.google.com/protocol-buffers/ 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31#endregion 32 33using Google.Protobuf.Reflection; 34using Google.Protobuf.WellKnownTypes; 35using System; 36using System.Collections; 37using System.Collections.Generic; 38using System.Globalization; 39using System.IO; 40using System.Text; 41using System.Text.RegularExpressions; 42 43namespace Google.Protobuf 44{ 45 /// <summary> 46 /// Reflection-based converter from JSON to messages. 47 /// </summary> 48 /// <remarks> 49 /// <para> 50 /// Instances of this class are thread-safe, with no mutable state. 51 /// </para> 52 /// <para> 53 /// This is a simple start to get JSON parsing working. As it's reflection-based, 54 /// it's not as quick as baking calls into generated messages - but is a simpler implementation. 55 /// (This code is generally not heavily optimized.) 56 /// </para> 57 /// </remarks> 58 public sealed class JsonParser 59 { 60 // Note: using 0-9 instead of \d to ensure no non-ASCII digits. 61 // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. 62 private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); 63 private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); 64 private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; 65 private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; 66 67 private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); 68 69 // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers 70 // and the signatures of various methods. 71 private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 72 WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> 73 { 74 { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, 75 { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, 76 { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, 77 { ListValue.Descriptor.FullName, (parser, message, tokenizer) => 78 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, 79 { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, 80 { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, 81 { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, 82 { Int32Value.Descriptor.FullName, MergeWrapperField }, 83 { Int64Value.Descriptor.FullName, MergeWrapperField }, 84 { UInt32Value.Descriptor.FullName, MergeWrapperField }, 85 { UInt64Value.Descriptor.FullName, MergeWrapperField }, 86 { FloatValue.Descriptor.FullName, MergeWrapperField }, 87 { DoubleValue.Descriptor.FullName, MergeWrapperField }, 88 { BytesValue.Descriptor.FullName, MergeWrapperField }, 89 { StringValue.Descriptor.FullName, MergeWrapperField } 90 }; 91 92 // Convenience method to avoid having to repeat the same code multiple times in the above 93 // dictionary initialization. 94 private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) 95 { 96 parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); 97 } 98 99 /// <summary> 100 /// Returns a formatter using the default settings. 101 /// </summary> 102 public static JsonParser Default { get { return defaultInstance; } } 103 104 private readonly Settings settings; 105 106 /// <summary> 107 /// Creates a new formatted with the given settings. 108 /// </summary> 109 /// <param name="settings">The settings.</param> 110 public JsonParser(Settings settings) 111 { 112 this.settings = settings; 113 } 114 115 /// <summary> 116 /// Parses <paramref name="json"/> and merges the information into the given message. 117 /// </summary> 118 /// <param name="message">The message to merge the JSON information into.</param> 119 /// <param name="json">The JSON to parse.</param> 120 internal void Merge(IMessage message, string json) 121 { 122 Merge(message, new StringReader(json)); 123 } 124 125 /// <summary> 126 /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. 127 /// </summary> 128 /// <param name="message">The message to merge the JSON information into.</param> 129 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 130 internal void Merge(IMessage message, TextReader jsonReader) 131 { 132 var tokenizer = JsonTokenizer.FromTextReader(jsonReader); 133 Merge(message, tokenizer); 134 var lastToken = tokenizer.Next(); 135 if (lastToken != JsonToken.EndDocument) 136 { 137 throw new InvalidProtocolBufferException("Expected end of JSON after object"); 138 } 139 } 140 141 /// <summary> 142 /// Merges the given message using data from the given tokenizer. In most cases, the next 143 /// token should be a "start object" token, but wrapper types and nullity can invalidate 144 /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream 145 /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the 146 /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". 147 /// </summary> 148 private void Merge(IMessage message, JsonTokenizer tokenizer) 149 { 150 if (tokenizer.ObjectDepth > settings.RecursionLimit) 151 { 152 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); 153 } 154 if (message.Descriptor.IsWellKnownType) 155 { 156 Action<JsonParser, IMessage, JsonTokenizer> handler; 157 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) 158 { 159 handler(this, message, tokenizer); 160 return; 161 } 162 // Well-known types with no special handling continue in the normal way. 163 } 164 var token = tokenizer.Next(); 165 if (token.Type != JsonToken.TokenType.StartObject) 166 { 167 throw new InvalidProtocolBufferException("Expected an object"); 168 } 169 var descriptor = message.Descriptor; 170 var jsonFieldMap = descriptor.Fields.ByJsonName(); 171 // All the oneof fields we've already accounted for - we can only see each of them once. 172 // The set is created lazily to avoid the overhead of creating a set for every message 173 // we parsed, when oneofs are relatively rare. 174 HashSet<OneofDescriptor> seenOneofs = null; 175 while (true) 176 { 177 token = tokenizer.Next(); 178 if (token.Type == JsonToken.TokenType.EndObject) 179 { 180 return; 181 } 182 if (token.Type != JsonToken.TokenType.Name) 183 { 184 throw new InvalidOperationException("Unexpected token type " + token.Type); 185 } 186 string name = token.StringValue; 187 FieldDescriptor field; 188 if (jsonFieldMap.TryGetValue(name, out field)) 189 { 190 if (field.ContainingOneof != null) 191 { 192 if (seenOneofs == null) 193 { 194 seenOneofs = new HashSet<OneofDescriptor>(); 195 } 196 if (!seenOneofs.Add(field.ContainingOneof)) 197 { 198 throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); 199 } 200 } 201 MergeField(message, field, tokenizer); 202 } 203 else 204 { 205 // TODO: Is this what we want to do? If not, we'll need to skip the value, 206 // which may be an object or array. (We might want to put code in the tokenizer 207 // to do that.) 208 throw new InvalidProtocolBufferException("Unknown field: " + name); 209 } 210 } 211 } 212 213 private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 214 { 215 var token = tokenizer.Next(); 216 if (token.Type == JsonToken.TokenType.Null) 217 { 218 // Clear the field if we see a null token, unless it's for a singular field of type 219 // google.protobuf.Value. 220 // Note: different from Java API, which just ignores it. 221 // TODO: Bring it more in line? Discuss... 222 if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field)) 223 { 224 field.Accessor.Clear(message); 225 return; 226 } 227 } 228 tokenizer.PushBack(token); 229 230 if (field.IsMap) 231 { 232 MergeMapField(message, field, tokenizer); 233 } 234 else if (field.IsRepeated) 235 { 236 MergeRepeatedField(message, field, tokenizer); 237 } 238 else 239 { 240 var value = ParseSingleValue(field, tokenizer); 241 field.Accessor.SetValue(message, value); 242 } 243 } 244 245 private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 246 { 247 var token = tokenizer.Next(); 248 if (token.Type != JsonToken.TokenType.StartArray) 249 { 250 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); 251 } 252 253 IList list = (IList) field.Accessor.GetValue(message); 254 while (true) 255 { 256 token = tokenizer.Next(); 257 if (token.Type == JsonToken.TokenType.EndArray) 258 { 259 return; 260 } 261 tokenizer.PushBack(token); 262 if (token.Type == JsonToken.TokenType.Null) 263 { 264 throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); 265 } 266 list.Add(ParseSingleValue(field, tokenizer)); 267 } 268 } 269 270 private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) 271 { 272 // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. 273 var token = tokenizer.Next(); 274 if (token.Type != JsonToken.TokenType.StartObject) 275 { 276 throw new InvalidProtocolBufferException("Expected an object to populate a map"); 277 } 278 279 var type = field.MessageType; 280 var keyField = type.FindFieldByNumber(1); 281 var valueField = type.FindFieldByNumber(2); 282 if (keyField == null || valueField == null) 283 { 284 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); 285 } 286 IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); 287 288 while (true) 289 { 290 token = tokenizer.Next(); 291 if (token.Type == JsonToken.TokenType.EndObject) 292 { 293 return; 294 } 295 object key = ParseMapKey(keyField, token.StringValue); 296 object value = ParseSingleValue(valueField, tokenizer); 297 if (value == null) 298 { 299 throw new InvalidProtocolBufferException("Map values must not be null"); 300 } 301 dictionary[key] = value; 302 } 303 } 304 305 private static bool IsGoogleProtobufValueField(FieldDescriptor field) 306 { 307 return field.FieldType == FieldType.Message && 308 field.MessageType.FullName == Value.Descriptor.FullName; 309 } 310 311 private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) 312 { 313 var token = tokenizer.Next(); 314 if (token.Type == JsonToken.TokenType.Null) 315 { 316 // TODO: In order to support dynamic messages, we should really build this up 317 // dynamically. 318 if (IsGoogleProtobufValueField(field)) 319 { 320 return Value.ForNull(); 321 } 322 return null; 323 } 324 325 var fieldType = field.FieldType; 326 if (fieldType == FieldType.Message) 327 { 328 // Parse wrapper types as their constituent types. 329 // TODO: What does this mean for null? 330 if (field.MessageType.IsWrapperType) 331 { 332 field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; 333 fieldType = field.FieldType; 334 } 335 else 336 { 337 // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) 338 tokenizer.PushBack(token); 339 IMessage subMessage = NewMessageForField(field); 340 Merge(subMessage, tokenizer); 341 return subMessage; 342 } 343 } 344 345 switch (token.Type) 346 { 347 case JsonToken.TokenType.True: 348 case JsonToken.TokenType.False: 349 if (fieldType == FieldType.Bool) 350 { 351 return token.Type == JsonToken.TokenType.True; 352 } 353 // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default 354 // case instead, but this way we'd only need to change one place. 355 goto default; 356 case JsonToken.TokenType.StringValue: 357 return ParseSingleStringValue(field, token.StringValue); 358 // Note: not passing the number value itself here, as we may end up storing the string value in the token too. 359 case JsonToken.TokenType.Number: 360 return ParseSingleNumberValue(field, token); 361 case JsonToken.TokenType.Null: 362 throw new NotImplementedException("Haven't worked out what to do for null yet"); 363 default: 364 throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); 365 } 366 } 367 368 /// <summary> 369 /// Parses <paramref name="json"/> into a new message. 370 /// </summary> 371 /// <typeparam name="T">The type of message to create.</typeparam> 372 /// <param name="json">The JSON to parse.</param> 373 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 374 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 375 public T Parse<T>(string json) where T : IMessage, new() 376 { 377 ProtoPreconditions.CheckNotNull(json, nameof(json)); 378 return Parse<T>(new StringReader(json)); 379 } 380 381 /// <summary> 382 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 383 /// </summary> 384 /// <typeparam name="T">The type of message to create.</typeparam> 385 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 386 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 387 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 388 public T Parse<T>(TextReader jsonReader) where T : IMessage, new() 389 { 390 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 391 T message = new T(); 392 Merge(message, jsonReader); 393 return message; 394 } 395 396 /// <summary> 397 /// Parses <paramref name="json"/> into a new message. 398 /// </summary> 399 /// <param name="json">The JSON to parse.</param> 400 /// <param name="descriptor">Descriptor of message type to parse.</param> 401 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 402 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 403 public IMessage Parse(string json, MessageDescriptor descriptor) 404 { 405 ProtoPreconditions.CheckNotNull(json, nameof(json)); 406 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 407 return Parse(new StringReader(json), descriptor); 408 } 409 410 /// <summary> 411 /// Parses JSON read from <paramref name="jsonReader"/> into a new message. 412 /// </summary> 413 /// <param name="jsonReader">Reader providing the JSON to parse.</param> 414 /// <param name="descriptor">Descriptor of message type to parse.</param> 415 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> 416 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> 417 public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) 418 { 419 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); 420 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); 421 IMessage message = descriptor.Parser.CreateTemplate(); 422 Merge(message, jsonReader); 423 return message; 424 } 425 426 private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) 427 { 428 var firstToken = tokenizer.Next(); 429 var fields = message.Descriptor.Fields; 430 switch (firstToken.Type) 431 { 432 case JsonToken.TokenType.Null: 433 fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); 434 return; 435 case JsonToken.TokenType.StringValue: 436 fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); 437 return; 438 case JsonToken.TokenType.Number: 439 fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); 440 return; 441 case JsonToken.TokenType.False: 442 case JsonToken.TokenType.True: 443 fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); 444 return; 445 case JsonToken.TokenType.StartObject: 446 { 447 var field = fields[Value.StructValueFieldNumber]; 448 var structMessage = NewMessageForField(field); 449 tokenizer.PushBack(firstToken); 450 Merge(structMessage, tokenizer); 451 field.Accessor.SetValue(message, structMessage); 452 return; 453 } 454 case JsonToken.TokenType.StartArray: 455 { 456 var field = fields[Value.ListValueFieldNumber]; 457 var list = NewMessageForField(field); 458 tokenizer.PushBack(firstToken); 459 Merge(list, tokenizer); 460 field.Accessor.SetValue(message, list); 461 return; 462 } 463 default: 464 throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); 465 } 466 } 467 468 private void MergeStruct(IMessage message, JsonTokenizer tokenizer) 469 { 470 var token = tokenizer.Next(); 471 if (token.Type != JsonToken.TokenType.StartObject) 472 { 473 throw new InvalidProtocolBufferException("Expected object value for Struct"); 474 } 475 tokenizer.PushBack(token); 476 477 var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; 478 MergeMapField(message, field, tokenizer); 479 } 480 481 private void MergeAny(IMessage message, JsonTokenizer tokenizer) 482 { 483 // Record the token stream until we see the @type property. At that point, we can take the value, consult 484 // the type registry for the relevant message, and replay the stream, omitting the @type property. 485 var tokens = new List<JsonToken>(); 486 487 var token = tokenizer.Next(); 488 if (token.Type != JsonToken.TokenType.StartObject) 489 { 490 throw new InvalidProtocolBufferException("Expected object value for Any"); 491 } 492 int typeUrlObjectDepth = tokenizer.ObjectDepth; 493 494 // The check for the property depth protects us from nested Any values which occur before the type URL 495 // for *this* Any. 496 while (token.Type != JsonToken.TokenType.Name || 497 token.StringValue != JsonFormatter.AnyTypeUrlField || 498 tokenizer.ObjectDepth != typeUrlObjectDepth) 499 { 500 tokens.Add(token); 501 token = tokenizer.Next(); 502 503 if (tokenizer.ObjectDepth < typeUrlObjectDepth) 504 { 505 throw new InvalidProtocolBufferException("Any message with no @type"); 506 } 507 } 508 509 // Don't add the @type property or its value to the recorded token list 510 token = tokenizer.Next(); 511 if (token.Type != JsonToken.TokenType.StringValue) 512 { 513 throw new InvalidProtocolBufferException("Expected string value for Any.@type"); 514 } 515 string typeUrl = token.StringValue; 516 string typeName = Any.GetTypeName(typeUrl); 517 518 MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); 519 if (descriptor == null) 520 { 521 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); 522 } 523 524 // Now replay the token stream we've already read and anything that remains of the object, just parsing it 525 // as normal. Our original tokenizer should end up at the end of the object. 526 var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); 527 var body = descriptor.Parser.CreateTemplate(); 528 if (descriptor.IsWellKnownType) 529 { 530 MergeWellKnownTypeAnyBody(body, replay); 531 } 532 else 533 { 534 Merge(body, replay); 535 } 536 var data = body.ToByteString(); 537 538 // Now that we have the message data, we can pack it into an Any (the message received as a parameter). 539 message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); 540 message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); 541 } 542 543 // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property 544 // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value 545 // itself, and then end-object. 546 private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) 547 { 548 var token = tokenizer.Next(); // Definitely start-object; checked in previous method 549 token = tokenizer.Next(); 550 // TODO: What about an absent Int32Value, for example? 551 if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) 552 { 553 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); 554 } 555 Merge(body, tokenizer); 556 token = tokenizer.Next(); 557 if (token.Type != JsonToken.TokenType.EndObject) 558 { 559 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); 560 } 561 } 562 563 #region Utility methods which don't depend on the state (or settings) of the parser. 564 private static object ParseMapKey(FieldDescriptor field, string keyText) 565 { 566 switch (field.FieldType) 567 { 568 case FieldType.Bool: 569 if (keyText == "true") 570 { 571 return true; 572 } 573 if (keyText == "false") 574 { 575 return false; 576 } 577 throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); 578 case FieldType.String: 579 return keyText; 580 case FieldType.Int32: 581 case FieldType.SInt32: 582 case FieldType.SFixed32: 583 return ParseNumericString(keyText, int.Parse); 584 case FieldType.UInt32: 585 case FieldType.Fixed32: 586 return ParseNumericString(keyText, uint.Parse); 587 case FieldType.Int64: 588 case FieldType.SInt64: 589 case FieldType.SFixed64: 590 return ParseNumericString(keyText, long.Parse); 591 case FieldType.UInt64: 592 case FieldType.Fixed64: 593 return ParseNumericString(keyText, ulong.Parse); 594 default: 595 throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); 596 } 597 } 598 599 private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) 600 { 601 double value = token.NumberValue; 602 checked 603 { 604 try 605 { 606 switch (field.FieldType) 607 { 608 case FieldType.Int32: 609 case FieldType.SInt32: 610 case FieldType.SFixed32: 611 CheckInteger(value); 612 return (int) value; 613 case FieldType.UInt32: 614 case FieldType.Fixed32: 615 CheckInteger(value); 616 return (uint) value; 617 case FieldType.Int64: 618 case FieldType.SInt64: 619 case FieldType.SFixed64: 620 CheckInteger(value); 621 return (long) value; 622 case FieldType.UInt64: 623 case FieldType.Fixed64: 624 CheckInteger(value); 625 return (ulong) value; 626 case FieldType.Double: 627 return value; 628 case FieldType.Float: 629 if (double.IsNaN(value)) 630 { 631 return float.NaN; 632 } 633 if (value > float.MaxValue || value < float.MinValue) 634 { 635 if (double.IsPositiveInfinity(value)) 636 { 637 return float.PositiveInfinity; 638 } 639 if (double.IsNegativeInfinity(value)) 640 { 641 return float.NegativeInfinity; 642 } 643 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 644 } 645 return (float) value; 646 case FieldType.Enum: 647 CheckInteger(value); 648 // Just return it as an int, and let the CLR convert it. 649 // Note that we deliberately don't check that it's a known value. 650 return (int) value; 651 default: 652 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); 653 } 654 } 655 catch (OverflowException) 656 { 657 throw new InvalidProtocolBufferException($"Value out of range: {value}"); 658 } 659 } 660 } 661 662 private static void CheckInteger(double value) 663 { 664 if (double.IsInfinity(value) || double.IsNaN(value)) 665 { 666 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 667 } 668 if (value != Math.Floor(value)) 669 { 670 throw new InvalidProtocolBufferException($"Value not an integer: {value}"); 671 } 672 } 673 674 private static object ParseSingleStringValue(FieldDescriptor field, string text) 675 { 676 switch (field.FieldType) 677 { 678 case FieldType.String: 679 return text; 680 case FieldType.Bytes: 681 try 682 { 683 return ByteString.FromBase64(text); 684 } 685 catch (FormatException e) 686 { 687 throw InvalidProtocolBufferException.InvalidBase64(e); 688 } 689 case FieldType.Int32: 690 case FieldType.SInt32: 691 case FieldType.SFixed32: 692 return ParseNumericString(text, int.Parse); 693 case FieldType.UInt32: 694 case FieldType.Fixed32: 695 return ParseNumericString(text, uint.Parse); 696 case FieldType.Int64: 697 case FieldType.SInt64: 698 case FieldType.SFixed64: 699 return ParseNumericString(text, long.Parse); 700 case FieldType.UInt64: 701 case FieldType.Fixed64: 702 return ParseNumericString(text, ulong.Parse); 703 case FieldType.Double: 704 double d = ParseNumericString(text, double.Parse); 705 ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); 706 return d; 707 case FieldType.Float: 708 float f = ParseNumericString(text, float.Parse); 709 ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); 710 return f; 711 case FieldType.Enum: 712 var enumValue = field.EnumType.FindValueByName(text); 713 if (enumValue == null) 714 { 715 throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); 716 } 717 // Just return it as an int, and let the CLR convert it. 718 return enumValue.Number; 719 default: 720 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); 721 } 722 } 723 724 /// <summary> 725 /// Creates a new instance of the message type for the given field. 726 /// </summary> 727 private static IMessage NewMessageForField(FieldDescriptor field) 728 { 729 return field.MessageType.Parser.CreateTemplate(); 730 } 731 732 private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) 733 { 734 // Can't prohibit this with NumberStyles. 735 if (text.StartsWith("+")) 736 { 737 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 738 } 739 if (text.StartsWith("0") && text.Length > 1) 740 { 741 if (text[1] >= '0' && text[1] <= '9') 742 { 743 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 744 } 745 } 746 else if (text.StartsWith("-0") && text.Length > 2) 747 { 748 if (text[2] >= '0' && text[2] <= '9') 749 { 750 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 751 } 752 } 753 try 754 { 755 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); 756 } 757 catch (FormatException) 758 { 759 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); 760 } 761 catch (OverflowException) 762 { 763 throw new InvalidProtocolBufferException($"Value out of range: {text}"); 764 } 765 } 766 767 /// <summary> 768 /// Checks that any infinite/NaN values originated from the correct text. 769 /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the 770 /// way that Mono parses out-of-range values as infinity. 771 /// </summary> 772 private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) 773 { 774 if ((isPositiveInfinity && text != "Infinity") || 775 (isNegativeInfinity && text != "-Infinity") || 776 (isNaN && text != "NaN")) 777 { 778 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); 779 } 780 } 781 782 private static void MergeTimestamp(IMessage message, JsonToken token) 783 { 784 if (token.Type != JsonToken.TokenType.StringValue) 785 { 786 throw new InvalidProtocolBufferException("Expected string value for Timestamp"); 787 } 788 var match = TimestampRegex.Match(token.StringValue); 789 if (!match.Success) 790 { 791 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); 792 } 793 var dateTime = match.Groups["datetime"].Value; 794 var subseconds = match.Groups["subseconds"].Value; 795 var offset = match.Groups["offset"].Value; 796 797 try 798 { 799 DateTime parsed = DateTime.ParseExact( 800 dateTime, 801 "yyyy-MM-dd'T'HH:mm:ss", 802 CultureInfo.InvariantCulture, 803 DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); 804 // TODO: It would be nice not to have to create all these objects... easy to optimize later though. 805 Timestamp timestamp = Timestamp.FromDateTime(parsed); 806 int nanosToAdd = 0; 807 if (subseconds != "") 808 { 809 // This should always work, as we've got 1-9 digits. 810 int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); 811 nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; 812 } 813 int secondsToAdd = 0; 814 if (offset != "Z") 815 { 816 // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. 817 int sign = offset[0] == '-' ? 1 : -1; 818 int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); 819 int minutes = int.Parse(offset.Substring(4, 2)); 820 int totalMinutes = hours * 60 + minutes; 821 if (totalMinutes > 18 * 60) 822 { 823 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 824 } 825 if (totalMinutes == 0 && sign == 1) 826 { 827 // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. 828 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 829 } 830 // We need to *subtract* the offset from local time to get UTC. 831 secondsToAdd = sign * totalMinutes * 60; 832 } 833 // Ensure we've got the right signs. Currently unnecessary, but easy to do. 834 if (secondsToAdd < 0 && nanosToAdd > 0) 835 { 836 secondsToAdd++; 837 nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; 838 } 839 if (secondsToAdd != 0 || nanosToAdd != 0) 840 { 841 timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; 842 // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this 843 // anywhere, but we shouldn't parse it. 844 if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) 845 { 846 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 847 } 848 } 849 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); 850 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); 851 } 852 catch (FormatException) 853 { 854 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); 855 } 856 } 857 858 private static void MergeDuration(IMessage message, JsonToken token) 859 { 860 if (token.Type != JsonToken.TokenType.StringValue) 861 { 862 throw new InvalidProtocolBufferException("Expected string value for Duration"); 863 } 864 var match = DurationRegex.Match(token.StringValue); 865 if (!match.Success) 866 { 867 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 868 } 869 var sign = match.Groups["sign"].Value; 870 var secondsText = match.Groups["int"].Value; 871 // Prohibit leading insignficant zeroes 872 if (secondsText[0] == '0' && secondsText.Length > 1) 873 { 874 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); 875 } 876 var subseconds = match.Groups["subseconds"].Value; 877 var multiplier = sign == "-" ? -1 : 1; 878 879 try 880 { 881 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; 882 int nanos = 0; 883 if (subseconds != "") 884 { 885 // This should always work, as we've got 1-9 digits. 886 int parsedFraction = int.Parse(subseconds.Substring(1)); 887 nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; 888 } 889 if (!Duration.IsNormalized(seconds, nanos)) 890 { 891 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 892 } 893 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); 894 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); 895 } 896 catch (FormatException) 897 { 898 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); 899 } 900 } 901 902 private static void MergeFieldMask(IMessage message, JsonToken token) 903 { 904 if (token.Type != JsonToken.TokenType.StringValue) 905 { 906 throw new InvalidProtocolBufferException("Expected string value for FieldMask"); 907 } 908 // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? 909 string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); 910 IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); 911 foreach (var path in jsonPaths) 912 { 913 messagePaths.Add(ToSnakeCase(path)); 914 } 915 } 916 917 // Ported from src/google/protobuf/util/internal/utility.cc 918 private static string ToSnakeCase(string text) 919 { 920 var builder = new StringBuilder(text.Length * 2); 921 // Note: this is probably unnecessary now, but currently retained to be as close as possible to the 922 // C++, whilst still throwing an exception on underscores. 923 bool wasNotUnderscore = false; // Initialize to false for case 1 (below) 924 bool wasNotCap = false; 925 926 for (int i = 0; i < text.Length; i++) 927 { 928 char c = text[i]; 929 if (c >= 'A' && c <= 'Z') // ascii_isupper 930 { 931 // Consider when the current character B is capitalized: 932 // 1) At beginning of input: "B..." => "b..." 933 // (e.g. "Biscuit" => "biscuit") 934 // 2) Following a lowercase: "...aB..." => "...a_b..." 935 // (e.g. "gBike" => "g_bike") 936 // 3) At the end of input: "...AB" => "...ab" 937 // (e.g. "GoogleLAB" => "google_lab") 938 // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." 939 // (e.g. "GBike" => "g_bike") 940 if (wasNotUnderscore && // case 1 out 941 (wasNotCap || // case 2 in, case 3 out 942 (i + 1 < text.Length && // case 3 out 943 (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) 944 { // case 4 in 945 // We add an underscore for case 2 and case 4. 946 builder.Append('_'); 947 } 948 // ascii_tolower, but we already know that c *is* an upper case ASCII character... 949 builder.Append((char) (c + 'a' - 'A')); 950 wasNotUnderscore = true; 951 wasNotCap = false; 952 } 953 else 954 { 955 builder.Append(c); 956 if (c == '_') 957 { 958 throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); 959 } 960 wasNotUnderscore = true; 961 wasNotCap = true; 962 } 963 } 964 return builder.ToString(); 965 } 966 #endregion 967 968 /// <summary> 969 /// Settings controlling JSON parsing. 970 /// </summary> 971 public sealed class Settings 972 { 973 /// <summary> 974 /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default 975 /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. 976 /// </summary> 977 public static Settings Default { get; } 978 979 // Workaround for the Mono compiler complaining about XML comments not being on 980 // valid language elements. 981 static Settings() 982 { 983 Default = new Settings(CodedInputStream.DefaultRecursionLimit); 984 } 985 986 /// <summary> 987 /// The maximum depth of messages to parse. Note that this limit only applies to parsing 988 /// messages, not collections - so a message within a collection within a message only counts as 989 /// depth 2, not 3. 990 /// </summary> 991 public int RecursionLimit { get; } 992 993 /// <summary> 994 /// The type registry used to parse <see cref="Any"/> messages. 995 /// </summary> 996 public TypeRegistry TypeRegistry { get; } 997 998 /// <summary> 999 /// Creates a new <see cref="Settings"/> object with the specified recursion limit. 1000 /// </summary> 1001 /// <param name="recursionLimit">The maximum depth of messages to parse</param> 1002 public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) 1003 { 1004 } 1005 1006 /// <summary> 1007 /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. 1008 /// </summary> 1009 /// <param name="recursionLimit">The maximum depth of messages to parse</param> 1010 /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> 1011 public Settings(int recursionLimit, TypeRegistry typeRegistry) 1012 { 1013 RecursionLimit = recursionLimit; 1014 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); 1015 } 1016 } 1017 } 1018} 1019