wire_format.cc revision fbaaef999ba563838ebd00874ed8a1c01fbf286d
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <stack>
36#include <string>
37#include <vector>
38
39#include <google/protobuf/wire_format.h>
40
41#include <google/protobuf/stubs/common.h>
42#include <google/protobuf/descriptor.h>
43#include <google/protobuf/wire_format_lite_inl.h>
44#include <google/protobuf/descriptor.pb.h>
45#include <google/protobuf/io/coded_stream.h>
46#include <google/protobuf/io/zero_copy_stream.h>
47#include <google/protobuf/io/zero_copy_stream_impl.h>
48#include <google/protobuf/unknown_field_set.h>
49
50
51namespace google {
52namespace protobuf {
53namespace internal {
54
55using internal::WireFormatLite;
56
57namespace {
58
59// This function turns out to be convenient when using some macros later.
60inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
61  return descriptor->number();
62}
63
64}  // anonymous namespace
65
66// ===================================================================
67
68bool UnknownFieldSetFieldSkipper::SkipField(
69    io::CodedInputStream* input, uint32 tag) {
70  return WireFormat::SkipField(input, tag, unknown_fields_);
71}
72
73bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
74  return WireFormat::SkipMessage(input, unknown_fields_);
75}
76
77void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
78    int field_number, int value) {
79  unknown_fields_->AddVarint(field_number, value);
80}
81
82bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
83                           UnknownFieldSet* unknown_fields) {
84  int number = WireFormatLite::GetTagFieldNumber(tag);
85
86  switch (WireFormatLite::GetTagWireType(tag)) {
87    case WireFormatLite::WIRETYPE_VARINT: {
88      uint64 value;
89      if (!input->ReadVarint64(&value)) return false;
90      if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
91      return true;
92    }
93    case WireFormatLite::WIRETYPE_FIXED64: {
94      uint64 value;
95      if (!input->ReadLittleEndian64(&value)) return false;
96      if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
97      return true;
98    }
99    case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
100      uint32 length;
101      if (!input->ReadVarint32(&length)) return false;
102      if (unknown_fields == NULL) {
103        if (!input->Skip(length)) return false;
104      } else {
105        if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
106                               length)) {
107          return false;
108        }
109      }
110      return true;
111    }
112    case WireFormatLite::WIRETYPE_START_GROUP: {
113      if (!input->IncrementRecursionDepth()) return false;
114      if (!SkipMessage(input, (unknown_fields == NULL) ?
115                              NULL : unknown_fields->AddGroup(number))) {
116        return false;
117      }
118      input->DecrementRecursionDepth();
119      // Check that the ending tag matched the starting tag.
120      if (!input->LastTagWas(WireFormatLite::MakeTag(
121          WireFormatLite::GetTagFieldNumber(tag),
122          WireFormatLite::WIRETYPE_END_GROUP))) {
123        return false;
124      }
125      return true;
126    }
127    case WireFormatLite::WIRETYPE_END_GROUP: {
128      return false;
129    }
130    case WireFormatLite::WIRETYPE_FIXED32: {
131      uint32 value;
132      if (!input->ReadLittleEndian32(&value)) return false;
133      if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
134      return true;
135    }
136    default: {
137      return false;
138    }
139  }
140}
141
142bool WireFormat::SkipMessage(io::CodedInputStream* input,
143                             UnknownFieldSet* unknown_fields) {
144  while(true) {
145    uint32 tag = input->ReadTag();
146    if (tag == 0) {
147      // End of input.  This is a valid place to end, so return true.
148      return true;
149    }
150
151    WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
152
153    if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
154      // Must be the end of the message.
155      return true;
156    }
157
158    if (!SkipField(input, tag, unknown_fields)) return false;
159  }
160}
161
162void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
163                                        io::CodedOutputStream* output) {
164  for (int i = 0; i < unknown_fields.field_count(); i++) {
165    const UnknownField& field = unknown_fields.field(i);
166    switch (field.type()) {
167      case UnknownField::TYPE_VARINT:
168        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
169            WireFormatLite::WIRETYPE_VARINT));
170        output->WriteVarint64(field.varint());
171        break;
172      case UnknownField::TYPE_FIXED32:
173        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
174            WireFormatLite::WIRETYPE_FIXED32));
175        output->WriteLittleEndian32(field.fixed32());
176        break;
177      case UnknownField::TYPE_FIXED64:
178        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
179            WireFormatLite::WIRETYPE_FIXED64));
180        output->WriteLittleEndian64(field.fixed64());
181        break;
182      case UnknownField::TYPE_LENGTH_DELIMITED:
183        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
184            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
185        output->WriteVarint32(field.length_delimited().size());
186        output->WriteString(field.length_delimited());
187        break;
188      case UnknownField::TYPE_GROUP:
189        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
190            WireFormatLite::WIRETYPE_START_GROUP));
191        SerializeUnknownFields(field.group(), output);
192        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
193            WireFormatLite::WIRETYPE_END_GROUP));
194        break;
195    }
196  }
197}
198
199uint8* WireFormat::SerializeUnknownFieldsToArray(
200    const UnknownFieldSet& unknown_fields,
201    uint8* target) {
202  for (int i = 0; i < unknown_fields.field_count(); i++) {
203    const UnknownField& field = unknown_fields.field(i);
204
205    switch (field.type()) {
206      case UnknownField::TYPE_VARINT:
207        target = WireFormatLite::WriteInt64ToArray(
208            field.number(), field.varint(), target);
209        break;
210      case UnknownField::TYPE_FIXED32:
211        target = WireFormatLite::WriteFixed32ToArray(
212            field.number(), field.fixed32(), target);
213        break;
214      case UnknownField::TYPE_FIXED64:
215        target = WireFormatLite::WriteFixed64ToArray(
216            field.number(), field.fixed64(), target);
217        break;
218      case UnknownField::TYPE_LENGTH_DELIMITED:
219        target = WireFormatLite::WriteBytesToArray(
220            field.number(), field.length_delimited(), target);
221        break;
222      case UnknownField::TYPE_GROUP:
223        target = WireFormatLite::WriteTagToArray(
224            field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
225        target = SerializeUnknownFieldsToArray(field.group(), target);
226        target = WireFormatLite::WriteTagToArray(
227            field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
228        break;
229    }
230  }
231  return target;
232}
233
234void WireFormat::SerializeUnknownMessageSetItems(
235    const UnknownFieldSet& unknown_fields,
236    io::CodedOutputStream* output) {
237  for (int i = 0; i < unknown_fields.field_count(); i++) {
238    const UnknownField& field = unknown_fields.field(i);
239    // The only unknown fields that are allowed to exist in a MessageSet are
240    // messages, which are length-delimited.
241    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
242      const string& data = field.length_delimited();
243
244      // Start group.
245      output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
246
247      // Write type ID.
248      output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
249      output->WriteVarint32(field.number());
250
251      // Write message.
252      output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
253      output->WriteVarint32(data.size());
254      output->WriteString(data);
255
256      // End group.
257      output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
258    }
259  }
260}
261
262uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
263    const UnknownFieldSet& unknown_fields,
264    uint8* target) {
265  for (int i = 0; i < unknown_fields.field_count(); i++) {
266    const UnknownField& field = unknown_fields.field(i);
267
268    // The only unknown fields that are allowed to exist in a MessageSet are
269    // messages, which are length-delimited.
270    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
271      const string& data = field.length_delimited();
272
273      // Start group.
274      target = io::CodedOutputStream::WriteTagToArray(
275          WireFormatLite::kMessageSetItemStartTag, target);
276
277      // Write type ID.
278      target = io::CodedOutputStream::WriteTagToArray(
279          WireFormatLite::kMessageSetTypeIdTag, target);
280      target = io::CodedOutputStream::WriteVarint32ToArray(
281          field.number(), target);
282
283      // Write message.
284      target = io::CodedOutputStream::WriteTagToArray(
285          WireFormatLite::kMessageSetMessageTag, target);
286      target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
287      target = io::CodedOutputStream::WriteStringToArray(data, target);
288
289      // End group.
290      target = io::CodedOutputStream::WriteTagToArray(
291          WireFormatLite::kMessageSetItemEndTag, target);
292    }
293  }
294
295  return target;
296}
297
298int WireFormat::ComputeUnknownFieldsSize(
299    const UnknownFieldSet& unknown_fields) {
300  int size = 0;
301  for (int i = 0; i < unknown_fields.field_count(); i++) {
302    const UnknownField& field = unknown_fields.field(i);
303
304    switch (field.type()) {
305      case UnknownField::TYPE_VARINT:
306        size += io::CodedOutputStream::VarintSize32(
307            WireFormatLite::MakeTag(field.number(),
308            WireFormatLite::WIRETYPE_VARINT));
309        size += io::CodedOutputStream::VarintSize64(field.varint());
310        break;
311      case UnknownField::TYPE_FIXED32:
312        size += io::CodedOutputStream::VarintSize32(
313            WireFormatLite::MakeTag(field.number(),
314            WireFormatLite::WIRETYPE_FIXED32));
315        size += sizeof(int32);
316        break;
317      case UnknownField::TYPE_FIXED64:
318        size += io::CodedOutputStream::VarintSize32(
319            WireFormatLite::MakeTag(field.number(),
320            WireFormatLite::WIRETYPE_FIXED64));
321        size += sizeof(int64);
322        break;
323      case UnknownField::TYPE_LENGTH_DELIMITED:
324        size += io::CodedOutputStream::VarintSize32(
325            WireFormatLite::MakeTag(field.number(),
326            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
327        size += io::CodedOutputStream::VarintSize32(
328            field.length_delimited().size());
329        size += field.length_delimited().size();
330        break;
331      case UnknownField::TYPE_GROUP:
332        size += io::CodedOutputStream::VarintSize32(
333            WireFormatLite::MakeTag(field.number(),
334            WireFormatLite::WIRETYPE_START_GROUP));
335        size += ComputeUnknownFieldsSize(field.group());
336        size += io::CodedOutputStream::VarintSize32(
337            WireFormatLite::MakeTag(field.number(),
338            WireFormatLite::WIRETYPE_END_GROUP));
339        break;
340    }
341  }
342
343  return size;
344}
345
346int WireFormat::ComputeUnknownMessageSetItemsSize(
347    const UnknownFieldSet& unknown_fields) {
348  int size = 0;
349  for (int i = 0; i < unknown_fields.field_count(); i++) {
350    const UnknownField& field = unknown_fields.field(i);
351
352    // The only unknown fields that are allowed to exist in a MessageSet are
353    // messages, which are length-delimited.
354    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
355      size += WireFormatLite::kMessageSetItemTagsSize;
356      size += io::CodedOutputStream::VarintSize32(field.number());
357      size += io::CodedOutputStream::VarintSize32(
358        field.length_delimited().size());
359      size += field.length_delimited().size();
360    }
361  }
362
363  return size;
364}
365
366// ===================================================================
367
368bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
369                                      Message* message) {
370  const Descriptor* descriptor = message->GetDescriptor();
371  const Reflection* message_reflection = message->GetReflection();
372
373  while(true) {
374    uint32 tag = input->ReadTag();
375    if (tag == 0) {
376      // End of input.  This is a valid place to end, so return true.
377      return true;
378    }
379
380    if (WireFormatLite::GetTagWireType(tag) ==
381        WireFormatLite::WIRETYPE_END_GROUP) {
382      // Must be the end of the message.
383      return true;
384    }
385
386    const FieldDescriptor* field = NULL;
387
388    if (descriptor != NULL) {
389      int field_number = WireFormatLite::GetTagFieldNumber(tag);
390      field = descriptor->FindFieldByNumber(field_number);
391
392      // If that failed, check if the field is an extension.
393      if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
394        field = message_reflection->FindKnownExtensionByNumber(field_number);
395      }
396
397      // If that failed, but we're a MessageSet, and this is the tag for a
398      // MessageSet item, then parse that.
399      if (field == NULL &&
400          descriptor->options().message_set_wire_format() &&
401          tag == WireFormatLite::kMessageSetItemStartTag) {
402        if (!ParseAndMergeMessageSetItem(input, message)) {
403          return false;
404        }
405        continue;  // Skip ParseAndMergeField(); already taken care of.
406      }
407    }
408
409    if (!ParseAndMergeField(tag, field, message, input)) {
410      return false;
411    }
412  }
413}
414
415bool WireFormat::ParseAndMergeField(
416    uint32 tag,
417    const FieldDescriptor* field,        // May be NULL for unknown
418    Message* message,
419    io::CodedInputStream* input) {
420  const Reflection* message_reflection = message->GetReflection();
421
422  if (field == NULL ||
423      WireFormatLite::GetTagWireType(tag) != WireTypeForField(field)) {
424    // We don't recognize this field.  Either the field number is unknown
425    // or the wire type doesn't match.  Put it in our unknown field set.
426    return SkipField(input, tag,
427                     message_reflection->MutableUnknownFields(message));
428  }
429
430  if (field->options().packed()) {
431    uint32 length;
432    if (!input->ReadVarint32(&length)) return false;
433    io::CodedInputStream::Limit limit = input->PushLimit(length);
434
435    switch (field->type()) {
436#define HANDLE_PACKED_TYPE(TYPE, TYPE_METHOD, CPPTYPE, CPPTYPE_METHOD)         \
437      case FieldDescriptor::TYPE_##TYPE: {                                     \
438        while (input->BytesUntilLimit() > 0) {                                 \
439          CPPTYPE value;                                                       \
440          if (!WireFormatLite::Read##TYPE_METHOD(input, &value)) return false; \
441          message_reflection->Add##CPPTYPE_METHOD(message, field, value);      \
442        }                                                                      \
443        break;                                                                 \
444      }
445
446      HANDLE_PACKED_TYPE( INT32,  Int32,  int32,  Int32)
447      HANDLE_PACKED_TYPE( INT64,  Int64,  int64,  Int64)
448      HANDLE_PACKED_TYPE(SINT32, SInt32,  int32,  Int32)
449      HANDLE_PACKED_TYPE(SINT64, SInt64,  int64,  Int64)
450      HANDLE_PACKED_TYPE(UINT32, UInt32, uint32, UInt32)
451      HANDLE_PACKED_TYPE(UINT64, UInt64, uint64, UInt64)
452
453      HANDLE_PACKED_TYPE( FIXED32,  Fixed32, uint32, UInt32)
454      HANDLE_PACKED_TYPE( FIXED64,  Fixed64, uint64, UInt64)
455      HANDLE_PACKED_TYPE(SFIXED32, SFixed32,  int32,  Int32)
456      HANDLE_PACKED_TYPE(SFIXED64, SFixed64,  int64,  Int64)
457
458      HANDLE_PACKED_TYPE(FLOAT , Float , float , Float )
459      HANDLE_PACKED_TYPE(DOUBLE, Double, double, Double)
460
461      HANDLE_PACKED_TYPE(BOOL, Bool, bool, Bool)
462#undef HANDLE_PACKED_TYPE
463
464      case FieldDescriptor::TYPE_ENUM: {
465        while (input->BytesUntilLimit() > 0) {
466          int value;
467          if (!WireFormatLite::ReadEnum(input, &value)) return false;
468          const EnumValueDescriptor* enum_value =
469              field->enum_type()->FindValueByNumber(value);
470          if (enum_value != NULL) {
471            message_reflection->AddEnum(message, field, enum_value);
472          }
473        }
474
475        break;
476      }
477
478      case FieldDescriptor::TYPE_STRING:
479      case FieldDescriptor::TYPE_GROUP:
480      case FieldDescriptor::TYPE_MESSAGE:
481      case FieldDescriptor::TYPE_BYTES:
482        // Can't have packed fields of these types: these should be caught by
483        // the protocol compiler.
484        return false;
485        break;
486    }
487
488    input->PopLimit(limit);
489  } else {
490    switch (field->type()) {
491#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE, CPPTYPE_METHOD)               \
492      case FieldDescriptor::TYPE_##TYPE: {                                    \
493        CPPTYPE value;                                                        \
494        if (!WireFormatLite::Read##TYPE_METHOD(input, &value)) return false;  \
495        if (field->is_repeated()) {                                           \
496          message_reflection->Add##CPPTYPE_METHOD(message, field, value);     \
497        } else {                                                              \
498          message_reflection->Set##CPPTYPE_METHOD(message, field, value);     \
499        }                                                                     \
500        break;                                                                \
501      }
502
503      HANDLE_TYPE( INT32,  Int32,  int32,  Int32)
504      HANDLE_TYPE( INT64,  Int64,  int64,  Int64)
505      HANDLE_TYPE(SINT32, SInt32,  int32,  Int32)
506      HANDLE_TYPE(SINT64, SInt64,  int64,  Int64)
507      HANDLE_TYPE(UINT32, UInt32, uint32, UInt32)
508      HANDLE_TYPE(UINT64, UInt64, uint64, UInt64)
509
510      HANDLE_TYPE( FIXED32,  Fixed32, uint32, UInt32)
511      HANDLE_TYPE( FIXED64,  Fixed64, uint64, UInt64)
512      HANDLE_TYPE(SFIXED32, SFixed32,  int32,  Int32)
513      HANDLE_TYPE(SFIXED64, SFixed64,  int64,  Int64)
514
515      HANDLE_TYPE(FLOAT , Float , float , Float )
516      HANDLE_TYPE(DOUBLE, Double, double, Double)
517
518      HANDLE_TYPE(BOOL, Bool, bool, Bool)
519
520      HANDLE_TYPE(STRING, String, string, String)
521      HANDLE_TYPE(BYTES, Bytes, string, String)
522
523#undef HANDLE_TYPE
524
525      case FieldDescriptor::TYPE_ENUM: {
526        int value;
527        if (!WireFormatLite::ReadEnum(input, &value)) return false;
528        const EnumValueDescriptor* enum_value =
529          field->enum_type()->FindValueByNumber(value);
530        if (enum_value != NULL) {
531          if (field->is_repeated()) {
532            message_reflection->AddEnum(message, field, enum_value);
533          } else {
534            message_reflection->SetEnum(message, field, enum_value);
535          }
536        } else {
537          // The enum value is not one of the known values.  Add it to the
538          // UnknownFieldSet.
539          int64 sign_extended_value = static_cast<int64>(value);
540          message_reflection->MutableUnknownFields(message)
541                            ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
542                                        sign_extended_value);
543        }
544        break;
545      }
546
547
548      case FieldDescriptor::TYPE_GROUP: {
549        Message* sub_message;
550        if (field->is_repeated()) {
551          sub_message = message_reflection->AddMessage(message, field);
552        } else {
553          sub_message = message_reflection->MutableMessage(message, field);
554        }
555
556        if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
557                                       input, sub_message))
558          return false;
559        break;
560      }
561
562      case FieldDescriptor::TYPE_MESSAGE: {
563        Message* sub_message;
564        if (field->is_repeated()) {
565          sub_message = message_reflection->AddMessage(message, field);
566        } else {
567          sub_message = message_reflection->MutableMessage(message, field);
568        }
569
570        if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
571        break;
572      }
573    }
574  }
575
576  return true;
577}
578
579bool WireFormat::ParseAndMergeMessageSetItem(
580    io::CodedInputStream* input,
581    Message* message) {
582  const Reflection* message_reflection = message->GetReflection();
583
584  // This method parses a group which should contain two fields:
585  //   required int32 type_id = 2;
586  //   required data message = 3;
587
588  // Once we see a type_id, we'll construct a fake tag for this extension
589  // which is the tag it would have had under the proto2 extensions wire
590  // format.
591  uint32 fake_tag = 0;
592
593  // Once we see a type_id, we'll look up the FieldDescriptor for the
594  // extension.
595  const FieldDescriptor* field = NULL;
596
597  // If we see message data before the type_id, we'll append it to this so
598  // we can parse it later.  This will probably never happen in practice,
599  // as no MessageSet encoder I know of writes the message before the type ID.
600  // But, it's technically valid so we should allow it.
601  // TODO(kenton):  Use a Cord instead?  Do I care?
602  string message_data;
603
604  while (true) {
605    uint32 tag = input->ReadTag();
606    if (tag == 0) return false;
607
608    switch (tag) {
609      case WireFormatLite::kMessageSetTypeIdTag: {
610        uint32 type_id;
611        if (!input->ReadVarint32(&type_id)) return false;
612        fake_tag = WireFormatLite::MakeTag(
613            type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
614        field = message_reflection->FindKnownExtensionByNumber(type_id);
615
616        if (!message_data.empty()) {
617          // We saw some message data before the type_id.  Have to parse it
618          // now.
619          io::ArrayInputStream raw_input(message_data.data(),
620                                         message_data.size());
621          io::CodedInputStream sub_input(&raw_input);
622          if (!ParseAndMergeField(fake_tag, field, message,
623                                  &sub_input)) {
624            return false;
625          }
626          message_data.clear();
627        }
628
629        break;
630      }
631
632      case WireFormatLite::kMessageSetMessageTag: {
633        if (fake_tag == 0) {
634          // We haven't seen a type_id yet.  Append this data to message_data.
635          string temp;
636          uint32 length;
637          if (!input->ReadVarint32(&length)) return false;
638          if (!input->ReadString(&temp, length)) return false;
639          message_data.append(temp);
640        } else {
641          // Already saw type_id, so we can parse this directly.
642          if (!ParseAndMergeField(fake_tag, field, message, input)) {
643            return false;
644          }
645        }
646
647        break;
648      }
649
650      case WireFormatLite::kMessageSetItemEndTag: {
651        return true;
652      }
653
654      default: {
655        if (!SkipField(input, tag, NULL)) return false;
656      }
657    }
658  }
659}
660
661// ===================================================================
662
663void WireFormat::SerializeWithCachedSizes(
664    const Message& message,
665    int size, io::CodedOutputStream* output) {
666  const Descriptor* descriptor = message.GetDescriptor();
667  const Reflection* message_reflection = message.GetReflection();
668  int expected_endpoint = output->ByteCount() + size;
669
670  vector<const FieldDescriptor*> fields;
671  message_reflection->ListFields(message, &fields);
672  for (int i = 0; i < fields.size(); i++) {
673    SerializeFieldWithCachedSizes(fields[i], message, output);
674  }
675
676  if (descriptor->options().message_set_wire_format()) {
677    SerializeUnknownMessageSetItems(
678        message_reflection->GetUnknownFields(message), output);
679  } else {
680    SerializeUnknownFields(
681        message_reflection->GetUnknownFields(message), output);
682  }
683
684  GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
685    << ": Protocol message serialized to a size different from what was "
686       "originally expected.  Perhaps it was modified by another thread "
687       "during serialization?";
688}
689
690void WireFormat::SerializeFieldWithCachedSizes(
691    const FieldDescriptor* field,
692    const Message& message,
693    io::CodedOutputStream* output) {
694  const Reflection* message_reflection = message.GetReflection();
695
696  if (field->is_extension() &&
697      field->containing_type()->options().message_set_wire_format() &&
698      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
699      !field->is_repeated()) {
700    SerializeMessageSetItemWithCachedSizes(field, message, output);
701    return;
702  }
703
704  int count = 0;
705
706  if (field->is_repeated()) {
707    count = message_reflection->FieldSize(message, field);
708  } else if (message_reflection->HasField(message, field)) {
709    count = 1;
710  }
711
712  const bool is_packed = field->options().packed();
713  if (is_packed && count > 0) {
714    WireFormatLite::WriteTag(field->number(),
715        WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
716    const int data_size = FieldDataOnlyByteSize(field, message);
717    output->WriteVarint32(data_size);
718  }
719
720  for (int j = 0; j < count; j++) {
721    switch (field->type()) {
722#define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD)      \
723      case FieldDescriptor::TYPE_##TYPE: {                                     \
724        const CPPTYPE value = field->is_repeated() ?                           \
725                              message_reflection->GetRepeated##CPPTYPE_METHOD( \
726                                message, field, j) :                           \
727                              message_reflection->Get##CPPTYPE_METHOD(         \
728                                message, field);                               \
729        if (is_packed) {                                                       \
730          WireFormatLite::Write##TYPE_METHOD##NoTag(value, output);            \
731        } else {                                                               \
732          WireFormatLite::Write##TYPE_METHOD(field->number(), value, output);  \
733        }                                                                      \
734        break;                                                                 \
735      }
736
737      HANDLE_PRIMITIVE_TYPE( INT32,  int32,  Int32,  Int32)
738      HANDLE_PRIMITIVE_TYPE( INT64,  int64,  Int64,  Int64)
739      HANDLE_PRIMITIVE_TYPE(SINT32,  int32, SInt32,  Int32)
740      HANDLE_PRIMITIVE_TYPE(SINT64,  int64, SInt64,  Int64)
741      HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
742      HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
743
744      HANDLE_PRIMITIVE_TYPE( FIXED32, uint32,  Fixed32, UInt32)
745      HANDLE_PRIMITIVE_TYPE( FIXED64, uint64,  Fixed64, UInt64)
746      HANDLE_PRIMITIVE_TYPE(SFIXED32,  int32, SFixed32,  Int32)
747      HANDLE_PRIMITIVE_TYPE(SFIXED64,  int64, SFixed64,  Int64)
748
749      HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
750      HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
751
752      HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
753#undef HANDLE_PRIMITIVE_TYPE
754
755#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                       \
756      case FieldDescriptor::TYPE_##TYPE:                                     \
757        WireFormatLite::Write##TYPE_METHOD(                                  \
758              field->number(),                                               \
759              field->is_repeated() ?                                         \
760                message_reflection->GetRepeated##CPPTYPE_METHOD(             \
761                  message, field, j) :                                       \
762                message_reflection->Get##CPPTYPE_METHOD(message, field),     \
763              output);                                                       \
764        break;
765
766      HANDLE_TYPE(GROUP  , Group  , Message)
767      HANDLE_TYPE(MESSAGE, Message, Message)
768#undef HANDLE_TYPE
769
770      case FieldDescriptor::TYPE_ENUM: {
771        const EnumValueDescriptor* value = field->is_repeated() ?
772          message_reflection->GetRepeatedEnum(message, field, j) :
773          message_reflection->GetEnum(message, field);
774        if (is_packed) {
775          WireFormatLite::WriteEnumNoTag(value->number(), output);
776        } else {
777          WireFormatLite::WriteEnum(field->number(), value->number(), output);
778        }
779        break;
780      }
781
782      // Handle strings separately so that we can get string references
783      // instead of copying.
784      case FieldDescriptor::TYPE_STRING: {
785          string scratch;
786          const string& value = field->is_repeated() ?
787            message_reflection->GetRepeatedStringReference(
788              message, field, j, &scratch) :
789            message_reflection->GetStringReference(message, field, &scratch);
790          VerifyUTF8String(value.data(), value.length(), SERIALIZE);
791          WireFormatLite::WriteString(field->number(), value, output);
792        break;
793      }
794
795      case FieldDescriptor::TYPE_BYTES: {
796          string scratch;
797          const string& value = field->is_repeated() ?
798            message_reflection->GetRepeatedStringReference(
799              message, field, j, &scratch) :
800            message_reflection->GetStringReference(message, field, &scratch);
801          WireFormatLite::WriteBytes(field->number(), value, output);
802        break;
803      }
804    }
805  }
806}
807
808void WireFormat::SerializeMessageSetItemWithCachedSizes(
809    const FieldDescriptor* field,
810    const Message& message,
811    io::CodedOutputStream* output) {
812  const Reflection* message_reflection = message.GetReflection();
813
814  // Start group.
815  output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
816
817  // Write type ID.
818  output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
819  output->WriteVarint32(field->number());
820
821  // Write message.
822  output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
823
824  const Message& sub_message = message_reflection->GetMessage(message, field);
825  output->WriteVarint32(sub_message.GetCachedSize());
826  sub_message.SerializeWithCachedSizes(output);
827
828  // End group.
829  output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
830}
831
832// ===================================================================
833
834int WireFormat::ByteSize(const Message& message) {
835  const Descriptor* descriptor = message.GetDescriptor();
836  const Reflection* message_reflection = message.GetReflection();
837
838  int our_size = 0;
839
840  vector<const FieldDescriptor*> fields;
841  message_reflection->ListFields(message, &fields);
842  for (int i = 0; i < fields.size(); i++) {
843    our_size += FieldByteSize(fields[i], message);
844  }
845
846  if (descriptor->options().message_set_wire_format()) {
847    our_size += ComputeUnknownMessageSetItemsSize(
848      message_reflection->GetUnknownFields(message));
849  } else {
850    our_size += ComputeUnknownFieldsSize(
851      message_reflection->GetUnknownFields(message));
852  }
853
854  return our_size;
855}
856
857int WireFormat::FieldByteSize(
858    const FieldDescriptor* field,
859    const Message& message) {
860  const Reflection* message_reflection = message.GetReflection();
861
862  if (field->is_extension() &&
863      field->containing_type()->options().message_set_wire_format() &&
864      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
865      !field->is_repeated()) {
866    return MessageSetItemByteSize(field, message);
867  }
868
869  int count = 0;
870  if (field->is_repeated()) {
871    count = message_reflection->FieldSize(message, field);
872  } else if (message_reflection->HasField(message, field)) {
873    count = 1;
874  }
875
876  const int data_size = FieldDataOnlyByteSize(field, message);
877  int our_size = data_size;
878  if (field->options().packed()) {
879    if (data_size > 0) {
880      // Packed fields get serialized like a string, not their native type.
881      // Technically this doesn't really matter; the size only changes if it's
882      // a GROUP
883      our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
884      our_size += io::CodedOutputStream::VarintSize32(data_size);
885    }
886  } else {
887    our_size += count * TagSize(field->number(), field->type());
888  }
889  return our_size;
890}
891
892int WireFormat::FieldDataOnlyByteSize(
893    const FieldDescriptor* field,
894    const Message& message) {
895  const Reflection* message_reflection = message.GetReflection();
896
897  int count = 0;
898  if (field->is_repeated()) {
899    count = message_reflection->FieldSize(message, field);
900  } else if (message_reflection->HasField(message, field)) {
901    count = 1;
902  }
903
904  int data_size = 0;
905  switch (field->type()) {
906#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                     \
907    case FieldDescriptor::TYPE_##TYPE:                                     \
908      if (field->is_repeated()) {                                          \
909        for (int j = 0; j < count; j++) {                                  \
910          data_size += WireFormatLite::TYPE_METHOD##Size(                  \
911            message_reflection->GetRepeated##CPPTYPE_METHOD(               \
912              message, field, j));                                         \
913        }                                                                  \
914      } else {                                                             \
915        data_size += WireFormatLite::TYPE_METHOD##Size(                    \
916          message_reflection->Get##CPPTYPE_METHOD(message, field));        \
917      }                                                                    \
918      break;
919
920#define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD)                               \
921    case FieldDescriptor::TYPE_##TYPE:                                     \
922      data_size += count * WireFormatLite::k##TYPE_METHOD##Size;           \
923      break;
924
925    HANDLE_TYPE( INT32,  Int32,  Int32)
926    HANDLE_TYPE( INT64,  Int64,  Int64)
927    HANDLE_TYPE(SINT32, SInt32,  Int32)
928    HANDLE_TYPE(SINT64, SInt64,  Int64)
929    HANDLE_TYPE(UINT32, UInt32, UInt32)
930    HANDLE_TYPE(UINT64, UInt64, UInt64)
931
932    HANDLE_FIXED_TYPE( FIXED32,  Fixed32)
933    HANDLE_FIXED_TYPE( FIXED64,  Fixed64)
934    HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
935    HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
936
937    HANDLE_FIXED_TYPE(FLOAT , Float )
938    HANDLE_FIXED_TYPE(DOUBLE, Double)
939
940    HANDLE_FIXED_TYPE(BOOL, Bool)
941
942    HANDLE_TYPE(GROUP  , Group  , Message)
943    HANDLE_TYPE(MESSAGE, Message, Message)
944#undef HANDLE_TYPE
945#undef HANDLE_FIXED_TYPE
946
947    case FieldDescriptor::TYPE_ENUM: {
948      if (field->is_repeated()) {
949        for (int j = 0; j < count; j++) {
950          data_size += WireFormatLite::EnumSize(
951            message_reflection->GetRepeatedEnum(message, field, j)->number());
952        }
953      } else {
954        data_size += WireFormatLite::EnumSize(
955          message_reflection->GetEnum(message, field)->number());
956      }
957      break;
958    }
959
960    // Handle strings separately so that we can get string references
961    // instead of copying.
962    case FieldDescriptor::TYPE_STRING:
963    case FieldDescriptor::TYPE_BYTES: {
964        for (int j = 0; j < count; j++) {
965          string scratch;
966          const string& value = field->is_repeated() ?
967            message_reflection->GetRepeatedStringReference(
968              message, field, j, &scratch) :
969            message_reflection->GetStringReference(message, field, &scratch);
970          data_size += WireFormatLite::StringSize(value);
971        }
972      break;
973    }
974  }
975  return data_size;
976}
977
978int WireFormat::MessageSetItemByteSize(
979    const FieldDescriptor* field,
980    const Message& message) {
981  const Reflection* message_reflection = message.GetReflection();
982
983  int our_size = WireFormatLite::kMessageSetItemTagsSize;
984
985  // type_id
986  our_size += io::CodedOutputStream::VarintSize32(field->number());
987
988  // message
989  const Message& sub_message = message_reflection->GetMessage(message, field);
990  int message_size = sub_message.ByteSize();
991
992  our_size += io::CodedOutputStream::VarintSize32(message_size);
993  our_size += message_size;
994
995  return our_size;
996}
997
998void WireFormat::VerifyUTF8StringFallback(const char* data,
999                                          int size,
1000                                          Operation op) {
1001  if (!IsStructurallyValidUTF8(data, size)) {
1002    const char* operation_str = NULL;
1003    switch (op) {
1004      case PARSE:
1005        operation_str = "parsing";
1006        break;
1007      case SERIALIZE:
1008        operation_str = "serializing";
1009        break;
1010      // no default case: have the compiler warn if a case is not covered.
1011    }
1012    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
1013               << operation_str
1014               << " protocol buffer. Strings must contain only UTF-8; "
1015                  "use the 'bytes' type for raw bytes.";
1016  }
1017}
1018
1019
1020}  // namespace internal
1021}  // namespace protobuf
1022}  // namespace google
1023