wire_format.cc revision d0332953cda33fb4f8e24ebff9c49159b69c43d6
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <stack>
36#include <string>
37#include <vector>
38
39#include <google/protobuf/wire_format.h>
40
41#include <google/protobuf/stubs/common.h>
42#include <google/protobuf/descriptor.h>
43#include <google/protobuf/wire_format_lite_inl.h>
44#include <google/protobuf/descriptor.pb.h>
45#include <google/protobuf/io/coded_stream.h>
46#include <google/protobuf/io/zero_copy_stream.h>
47#include <google/protobuf/io/zero_copy_stream_impl.h>
48#include <google/protobuf/unknown_field_set.h>
49
50
51namespace google {
52namespace protobuf {
53namespace internal {
54
55using internal::WireFormatLite;
56
57namespace {
58
59// This function turns out to be convenient when using some macros later.
60inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
61  return descriptor->number();
62}
63
64}  // anonymous namespace
65
66// ===================================================================
67
68bool UnknownFieldSetFieldSkipper::SkipField(
69    io::CodedInputStream* input, uint32 tag) {
70  return WireFormat::SkipField(input, tag, unknown_fields_);
71}
72
73bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
74  return WireFormat::SkipMessage(input, unknown_fields_);
75}
76
77void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
78    int field_number, int value) {
79  unknown_fields_->AddVarint(field_number, value);
80}
81
82bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
83                           UnknownFieldSet* unknown_fields) {
84  int number = WireFormatLite::GetTagFieldNumber(tag);
85
86  switch (WireFormatLite::GetTagWireType(tag)) {
87    case WireFormatLite::WIRETYPE_VARINT: {
88      uint64 value;
89      if (!input->ReadVarint64(&value)) return false;
90      if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
91      return true;
92    }
93    case WireFormatLite::WIRETYPE_FIXED64: {
94      uint64 value;
95      if (!input->ReadLittleEndian64(&value)) return false;
96      if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
97      return true;
98    }
99    case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
100      uint32 length;
101      if (!input->ReadVarint32(&length)) return false;
102      if (unknown_fields == NULL) {
103        if (!input->Skip(length)) return false;
104      } else {
105        if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
106                               length)) {
107          return false;
108        }
109      }
110      return true;
111    }
112    case WireFormatLite::WIRETYPE_START_GROUP: {
113      if (!input->IncrementRecursionDepth()) return false;
114      if (!SkipMessage(input, (unknown_fields == NULL) ?
115                              NULL : unknown_fields->AddGroup(number))) {
116        return false;
117      }
118      input->DecrementRecursionDepth();
119      // Check that the ending tag matched the starting tag.
120      if (!input->LastTagWas(WireFormatLite::MakeTag(
121          WireFormatLite::GetTagFieldNumber(tag),
122          WireFormatLite::WIRETYPE_END_GROUP))) {
123        return false;
124      }
125      return true;
126    }
127    case WireFormatLite::WIRETYPE_END_GROUP: {
128      return false;
129    }
130    case WireFormatLite::WIRETYPE_FIXED32: {
131      uint32 value;
132      if (!input->ReadLittleEndian32(&value)) return false;
133      if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
134      return true;
135    }
136    default: {
137      return false;
138    }
139  }
140}
141
142bool WireFormat::SkipMessage(io::CodedInputStream* input,
143                             UnknownFieldSet* unknown_fields) {
144  while(true) {
145    uint32 tag = input->ReadTag();
146    if (tag == 0) {
147      // End of input.  This is a valid place to end, so return true.
148      return true;
149    }
150
151    WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
152
153    if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
154      // Must be the end of the message.
155      return true;
156    }
157
158    if (!SkipField(input, tag, unknown_fields)) return false;
159  }
160}
161
162void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
163                                        io::CodedOutputStream* output) {
164  for (int i = 0; i < unknown_fields.field_count(); i++) {
165    const UnknownField& field = unknown_fields.field(i);
166    switch (field.type()) {
167      case UnknownField::TYPE_VARINT:
168        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
169            WireFormatLite::WIRETYPE_VARINT));
170        output->WriteVarint64(field.varint());
171        break;
172      case UnknownField::TYPE_FIXED32:
173        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
174            WireFormatLite::WIRETYPE_FIXED32));
175        output->WriteLittleEndian32(field.fixed32());
176        break;
177      case UnknownField::TYPE_FIXED64:
178        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
179            WireFormatLite::WIRETYPE_FIXED64));
180        output->WriteLittleEndian64(field.fixed64());
181        break;
182      case UnknownField::TYPE_LENGTH_DELIMITED:
183        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
184            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
185        output->WriteVarint32(field.length_delimited().size());
186        output->WriteString(field.length_delimited());
187        break;
188      case UnknownField::TYPE_GROUP:
189        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
190            WireFormatLite::WIRETYPE_START_GROUP));
191        SerializeUnknownFields(field.group(), output);
192        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
193            WireFormatLite::WIRETYPE_END_GROUP));
194        break;
195    }
196  }
197}
198
199uint8* WireFormat::SerializeUnknownFieldsToArray(
200    const UnknownFieldSet& unknown_fields,
201    uint8* target) {
202  for (int i = 0; i < unknown_fields.field_count(); i++) {
203    const UnknownField& field = unknown_fields.field(i);
204
205    switch (field.type()) {
206      case UnknownField::TYPE_VARINT:
207        target = WireFormatLite::WriteInt64ToArray(
208            field.number(), field.varint(), target);
209        break;
210      case UnknownField::TYPE_FIXED32:
211        target = WireFormatLite::WriteFixed32ToArray(
212            field.number(), field.fixed32(), target);
213        break;
214      case UnknownField::TYPE_FIXED64:
215        target = WireFormatLite::WriteFixed64ToArray(
216            field.number(), field.fixed64(), target);
217        break;
218      case UnknownField::TYPE_LENGTH_DELIMITED:
219        target = WireFormatLite::WriteBytesToArray(
220            field.number(), field.length_delimited(), target);
221        break;
222      case UnknownField::TYPE_GROUP:
223        target = WireFormatLite::WriteTagToArray(
224            field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
225        target = SerializeUnknownFieldsToArray(field.group(), target);
226        target = WireFormatLite::WriteTagToArray(
227            field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
228        break;
229    }
230  }
231  return target;
232}
233
234void WireFormat::SerializeUnknownMessageSetItems(
235    const UnknownFieldSet& unknown_fields,
236    io::CodedOutputStream* output) {
237  for (int i = 0; i < unknown_fields.field_count(); i++) {
238    const UnknownField& field = unknown_fields.field(i);
239    // The only unknown fields that are allowed to exist in a MessageSet are
240    // messages, which are length-delimited.
241    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
242      const string& data = field.length_delimited();
243
244      // Start group.
245      output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
246
247      // Write type ID.
248      output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
249      output->WriteVarint32(field.number());
250
251      // Write message.
252      output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
253      output->WriteVarint32(data.size());
254      output->WriteString(data);
255
256      // End group.
257      output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
258    }
259  }
260}
261
262uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
263    const UnknownFieldSet& unknown_fields,
264    uint8* target) {
265  for (int i = 0; i < unknown_fields.field_count(); i++) {
266    const UnknownField& field = unknown_fields.field(i);
267
268    // The only unknown fields that are allowed to exist in a MessageSet are
269    // messages, which are length-delimited.
270    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
271      const string& data = field.length_delimited();
272
273      // Start group.
274      target = io::CodedOutputStream::WriteTagToArray(
275          WireFormatLite::kMessageSetItemStartTag, target);
276
277      // Write type ID.
278      target = io::CodedOutputStream::WriteTagToArray(
279          WireFormatLite::kMessageSetTypeIdTag, target);
280      target = io::CodedOutputStream::WriteVarint32ToArray(
281          field.number(), target);
282
283      // Write message.
284      target = io::CodedOutputStream::WriteTagToArray(
285          WireFormatLite::kMessageSetMessageTag, target);
286      target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
287      target = io::CodedOutputStream::WriteStringToArray(data, target);
288
289      // End group.
290      target = io::CodedOutputStream::WriteTagToArray(
291          WireFormatLite::kMessageSetItemEndTag, target);
292    }
293  }
294
295  return target;
296}
297
298int WireFormat::ComputeUnknownFieldsSize(
299    const UnknownFieldSet& unknown_fields) {
300  int size = 0;
301  for (int i = 0; i < unknown_fields.field_count(); i++) {
302    const UnknownField& field = unknown_fields.field(i);
303
304    switch (field.type()) {
305      case UnknownField::TYPE_VARINT:
306        size += io::CodedOutputStream::VarintSize32(
307            WireFormatLite::MakeTag(field.number(),
308            WireFormatLite::WIRETYPE_VARINT));
309        size += io::CodedOutputStream::VarintSize64(field.varint());
310        break;
311      case UnknownField::TYPE_FIXED32:
312        size += io::CodedOutputStream::VarintSize32(
313            WireFormatLite::MakeTag(field.number(),
314            WireFormatLite::WIRETYPE_FIXED32));
315        size += sizeof(int32);
316        break;
317      case UnknownField::TYPE_FIXED64:
318        size += io::CodedOutputStream::VarintSize32(
319            WireFormatLite::MakeTag(field.number(),
320            WireFormatLite::WIRETYPE_FIXED64));
321        size += sizeof(int64);
322        break;
323      case UnknownField::TYPE_LENGTH_DELIMITED:
324        size += io::CodedOutputStream::VarintSize32(
325            WireFormatLite::MakeTag(field.number(),
326            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
327        size += io::CodedOutputStream::VarintSize32(
328            field.length_delimited().size());
329        size += field.length_delimited().size();
330        break;
331      case UnknownField::TYPE_GROUP:
332        size += io::CodedOutputStream::VarintSize32(
333            WireFormatLite::MakeTag(field.number(),
334            WireFormatLite::WIRETYPE_START_GROUP));
335        size += ComputeUnknownFieldsSize(field.group());
336        size += io::CodedOutputStream::VarintSize32(
337            WireFormatLite::MakeTag(field.number(),
338            WireFormatLite::WIRETYPE_END_GROUP));
339        break;
340    }
341  }
342
343  return size;
344}
345
346int WireFormat::ComputeUnknownMessageSetItemsSize(
347    const UnknownFieldSet& unknown_fields) {
348  int size = 0;
349  for (int i = 0; i < unknown_fields.field_count(); i++) {
350    const UnknownField& field = unknown_fields.field(i);
351
352    // The only unknown fields that are allowed to exist in a MessageSet are
353    // messages, which are length-delimited.
354    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
355      size += WireFormatLite::kMessageSetItemTagsSize;
356      size += io::CodedOutputStream::VarintSize32(field.number());
357      size += io::CodedOutputStream::VarintSize32(
358        field.length_delimited().size());
359      size += field.length_delimited().size();
360    }
361  }
362
363  return size;
364}
365
366// ===================================================================
367
368bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
369                                      Message* message) {
370  const Descriptor* descriptor = message->GetDescriptor();
371  const Reflection* message_reflection = message->GetReflection();
372
373  while(true) {
374    uint32 tag = input->ReadTag();
375    if (tag == 0) {
376      // End of input.  This is a valid place to end, so return true.
377      return true;
378    }
379
380    if (WireFormatLite::GetTagWireType(tag) ==
381        WireFormatLite::WIRETYPE_END_GROUP) {
382      // Must be the end of the message.
383      return true;
384    }
385
386    const FieldDescriptor* field = NULL;
387
388    if (descriptor != NULL) {
389      int field_number = WireFormatLite::GetTagFieldNumber(tag);
390      field = descriptor->FindFieldByNumber(field_number);
391
392      // If that failed, check if the field is an extension.
393      if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
394        if (input->GetExtensionPool() == NULL) {
395          field = message_reflection->FindKnownExtensionByNumber(field_number);
396        } else {
397          field = input->GetExtensionPool()
398                       ->FindExtensionByNumber(descriptor, field_number);
399        }
400      }
401
402      // If that failed, but we're a MessageSet, and this is the tag for a
403      // MessageSet item, then parse that.
404      if (field == NULL &&
405          descriptor->options().message_set_wire_format() &&
406          tag == WireFormatLite::kMessageSetItemStartTag) {
407        if (!ParseAndMergeMessageSetItem(input, message)) {
408          return false;
409        }
410        continue;  // Skip ParseAndMergeField(); already taken care of.
411      }
412    }
413
414    if (!ParseAndMergeField(tag, field, message, input)) {
415      return false;
416    }
417  }
418}
419
420bool WireFormat::ParseAndMergeField(
421    uint32 tag,
422    const FieldDescriptor* field,        // May be NULL for unknown
423    Message* message,
424    io::CodedInputStream* input) {
425  const Reflection* message_reflection = message->GetReflection();
426
427  enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format;
428
429  if (field == NULL) {
430    value_format = UNKNOWN;
431  } else if (WireFormatLite::GetTagWireType(tag) ==
432             WireTypeForFieldType(field->type())) {
433    value_format = NORMAL_FORMAT;
434  } else if (field->is_packable() &&
435             WireFormatLite::GetTagWireType(tag) ==
436             WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
437    value_format = PACKED_FORMAT;
438  } else {
439    // We don't recognize this field. Either the field number is unknown
440    // or the wire type doesn't match. Put it in our unknown field set.
441    value_format = UNKNOWN;
442  }
443
444  if (value_format == UNKNOWN) {
445    return SkipField(input, tag,
446                     message_reflection->MutableUnknownFields(message));
447  } else if (value_format == PACKED_FORMAT) {
448    uint32 length;
449    if (!input->ReadVarint32(&length)) return false;
450    io::CodedInputStream::Limit limit = input->PushLimit(length);
451
452    switch (field->type()) {
453#define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                      \
454      case FieldDescriptor::TYPE_##TYPE: {                                     \
455        while (input->BytesUntilLimit() > 0) {                                 \
456          CPPTYPE value;                                                       \
457          if (!WireFormatLite::ReadPrimitive<                                  \
458                CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))          \
459            return false;                                                      \
460          message_reflection->Add##CPPTYPE_METHOD(message, field, value);      \
461        }                                                                      \
462        break;                                                                 \
463      }
464
465      HANDLE_PACKED_TYPE( INT32,  int32,  Int32)
466      HANDLE_PACKED_TYPE( INT64,  int64,  Int64)
467      HANDLE_PACKED_TYPE(SINT32,  int32,  Int32)
468      HANDLE_PACKED_TYPE(SINT64,  int64,  Int64)
469      HANDLE_PACKED_TYPE(UINT32, uint32, UInt32)
470      HANDLE_PACKED_TYPE(UINT64, uint64, UInt64)
471
472      HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32)
473      HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64)
474      HANDLE_PACKED_TYPE(SFIXED32,  int32,  Int32)
475      HANDLE_PACKED_TYPE(SFIXED64,  int64,  Int64)
476
477      HANDLE_PACKED_TYPE(FLOAT , float , Float )
478      HANDLE_PACKED_TYPE(DOUBLE, double, Double)
479
480      HANDLE_PACKED_TYPE(BOOL, bool, Bool)
481#undef HANDLE_PACKED_TYPE
482
483      case FieldDescriptor::TYPE_ENUM: {
484        while (input->BytesUntilLimit() > 0) {
485          int value;
486          if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
487                  input, &value)) return false;
488          const EnumValueDescriptor* enum_value =
489              field->enum_type()->FindValueByNumber(value);
490          if (enum_value != NULL) {
491            message_reflection->AddEnum(message, field, enum_value);
492          }
493        }
494
495        break;
496      }
497
498      case FieldDescriptor::TYPE_STRING:
499      case FieldDescriptor::TYPE_GROUP:
500      case FieldDescriptor::TYPE_MESSAGE:
501      case FieldDescriptor::TYPE_BYTES:
502        // Can't have packed fields of these types: these should be caught by
503        // the protocol compiler.
504        return false;
505        break;
506    }
507
508    input->PopLimit(limit);
509  } else {
510    // Non-packed value (value_format == NORMAL_FORMAT)
511    switch (field->type()) {
512#define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                            \
513      case FieldDescriptor::TYPE_##TYPE: {                                    \
514        CPPTYPE value;                                                        \
515        if (!WireFormatLite::ReadPrimitive<                                   \
516                CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))         \
517          return false;                                                       \
518        if (field->is_repeated()) {                                           \
519          message_reflection->Add##CPPTYPE_METHOD(message, field, value);     \
520        } else {                                                              \
521          message_reflection->Set##CPPTYPE_METHOD(message, field, value);     \
522        }                                                                     \
523        break;                                                                \
524      }
525
526      HANDLE_TYPE( INT32,  int32,  Int32)
527      HANDLE_TYPE( INT64,  int64,  Int64)
528      HANDLE_TYPE(SINT32,  int32,  Int32)
529      HANDLE_TYPE(SINT64,  int64,  Int64)
530      HANDLE_TYPE(UINT32, uint32, UInt32)
531      HANDLE_TYPE(UINT64, uint64, UInt64)
532
533      HANDLE_TYPE( FIXED32, uint32, UInt32)
534      HANDLE_TYPE( FIXED64, uint64, UInt64)
535      HANDLE_TYPE(SFIXED32,  int32,  Int32)
536      HANDLE_TYPE(SFIXED64,  int64,  Int64)
537
538      HANDLE_TYPE(FLOAT , float , Float )
539      HANDLE_TYPE(DOUBLE, double, Double)
540
541      HANDLE_TYPE(BOOL, bool, Bool)
542#undef HANDLE_TYPE
543
544      case FieldDescriptor::TYPE_ENUM: {
545        int value;
546        if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
547                input, &value)) return false;
548        const EnumValueDescriptor* enum_value =
549          field->enum_type()->FindValueByNumber(value);
550        if (enum_value != NULL) {
551          if (field->is_repeated()) {
552            message_reflection->AddEnum(message, field, enum_value);
553          } else {
554            message_reflection->SetEnum(message, field, enum_value);
555          }
556        } else {
557          // The enum value is not one of the known values.  Add it to the
558          // UnknownFieldSet.
559          int64 sign_extended_value = static_cast<int64>(value);
560          message_reflection->MutableUnknownFields(message)
561                            ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
562                                        sign_extended_value);
563        }
564        break;
565      }
566
567      // Handle strings separately so that we can optimize the ctype=CORD case.
568      case FieldDescriptor::TYPE_STRING: {
569        string value;
570        if (!WireFormatLite::ReadString(input, &value)) return false;
571        VerifyUTF8String(value.data(), value.length(), PARSE);
572        if (field->is_repeated()) {
573          message_reflection->AddString(message, field, value);
574        } else {
575          message_reflection->SetString(message, field, value);
576        }
577        break;
578      }
579
580      case FieldDescriptor::TYPE_BYTES: {
581        string value;
582        if (!WireFormatLite::ReadBytes(input, &value)) return false;
583        if (field->is_repeated()) {
584          message_reflection->AddString(message, field, value);
585        } else {
586          message_reflection->SetString(message, field, value);
587        }
588        break;
589      }
590
591      case FieldDescriptor::TYPE_GROUP: {
592        Message* sub_message;
593        if (field->is_repeated()) {
594          sub_message = message_reflection->AddMessage(
595              message, field, input->GetExtensionFactory());
596        } else {
597          sub_message = message_reflection->MutableMessage(
598              message, field, input->GetExtensionFactory());
599        }
600
601        if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
602                                       input, sub_message))
603          return false;
604        break;
605      }
606
607      case FieldDescriptor::TYPE_MESSAGE: {
608        Message* sub_message;
609        if (field->is_repeated()) {
610          sub_message = message_reflection->AddMessage(
611              message, field, input->GetExtensionFactory());
612        } else {
613          sub_message = message_reflection->MutableMessage(
614              message, field, input->GetExtensionFactory());
615        }
616
617        if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
618        break;
619      }
620    }
621  }
622
623  return true;
624}
625
626bool WireFormat::ParseAndMergeMessageSetItem(
627    io::CodedInputStream* input,
628    Message* message) {
629  const Reflection* message_reflection = message->GetReflection();
630
631  // This method parses a group which should contain two fields:
632  //   required int32 type_id = 2;
633  //   required data message = 3;
634
635  // Once we see a type_id, we'll construct a fake tag for this extension
636  // which is the tag it would have had under the proto2 extensions wire
637  // format.
638  uint32 fake_tag = 0;
639
640  // Once we see a type_id, we'll look up the FieldDescriptor for the
641  // extension.
642  const FieldDescriptor* field = NULL;
643
644  // If we see message data before the type_id, we'll append it to this so
645  // we can parse it later.  This will probably never happen in practice,
646  // as no MessageSet encoder I know of writes the message before the type ID.
647  // But, it's technically valid so we should allow it.
648  // TODO(kenton):  Use a Cord instead?  Do I care?
649  string message_data;
650
651  while (true) {
652    uint32 tag = input->ReadTag();
653    if (tag == 0) return false;
654
655    switch (tag) {
656      case WireFormatLite::kMessageSetTypeIdTag: {
657        uint32 type_id;
658        if (!input->ReadVarint32(&type_id)) return false;
659        fake_tag = WireFormatLite::MakeTag(
660            type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
661        field = message_reflection->FindKnownExtensionByNumber(type_id);
662
663        if (!message_data.empty()) {
664          // We saw some message data before the type_id.  Have to parse it
665          // now.
666          io::ArrayInputStream raw_input(message_data.data(),
667                                         message_data.size());
668          io::CodedInputStream sub_input(&raw_input);
669          if (!ParseAndMergeField(fake_tag, field, message,
670                                  &sub_input)) {
671            return false;
672          }
673          message_data.clear();
674        }
675
676        break;
677      }
678
679      case WireFormatLite::kMessageSetMessageTag: {
680        if (fake_tag == 0) {
681          // We haven't seen a type_id yet.  Append this data to message_data.
682          string temp;
683          uint32 length;
684          if (!input->ReadVarint32(&length)) return false;
685          if (!input->ReadString(&temp, length)) return false;
686          message_data.append(temp);
687        } else {
688          // Already saw type_id, so we can parse this directly.
689          if (!ParseAndMergeField(fake_tag, field, message, input)) {
690            return false;
691          }
692        }
693
694        break;
695      }
696
697      case WireFormatLite::kMessageSetItemEndTag: {
698        return true;
699      }
700
701      default: {
702        if (!SkipField(input, tag, NULL)) return false;
703      }
704    }
705  }
706}
707
708// ===================================================================
709
710void WireFormat::SerializeWithCachedSizes(
711    const Message& message,
712    int size, io::CodedOutputStream* output) {
713  const Descriptor* descriptor = message.GetDescriptor();
714  const Reflection* message_reflection = message.GetReflection();
715  int expected_endpoint = output->ByteCount() + size;
716
717  vector<const FieldDescriptor*> fields;
718  message_reflection->ListFields(message, &fields);
719  for (int i = 0; i < fields.size(); i++) {
720    SerializeFieldWithCachedSizes(fields[i], message, output);
721  }
722
723  if (descriptor->options().message_set_wire_format()) {
724    SerializeUnknownMessageSetItems(
725        message_reflection->GetUnknownFields(message), output);
726  } else {
727    SerializeUnknownFields(
728        message_reflection->GetUnknownFields(message), output);
729  }
730
731  GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
732    << ": Protocol message serialized to a size different from what was "
733       "originally expected.  Perhaps it was modified by another thread "
734       "during serialization?";
735}
736
737void WireFormat::SerializeFieldWithCachedSizes(
738    const FieldDescriptor* field,
739    const Message& message,
740    io::CodedOutputStream* output) {
741  const Reflection* message_reflection = message.GetReflection();
742
743  if (field->is_extension() &&
744      field->containing_type()->options().message_set_wire_format() &&
745      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
746      !field->is_repeated()) {
747    SerializeMessageSetItemWithCachedSizes(field, message, output);
748    return;
749  }
750
751  int count = 0;
752
753  if (field->is_repeated()) {
754    count = message_reflection->FieldSize(message, field);
755  } else if (message_reflection->HasField(message, field)) {
756    count = 1;
757  }
758
759  const bool is_packed = field->options().packed();
760  if (is_packed && count > 0) {
761    WireFormatLite::WriteTag(field->number(),
762        WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
763    const int data_size = FieldDataOnlyByteSize(field, message);
764    output->WriteVarint32(data_size);
765  }
766
767  for (int j = 0; j < count; j++) {
768    switch (field->type()) {
769#define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD)      \
770      case FieldDescriptor::TYPE_##TYPE: {                                     \
771        const CPPTYPE value = field->is_repeated() ?                           \
772                              message_reflection->GetRepeated##CPPTYPE_METHOD( \
773                                message, field, j) :                           \
774                              message_reflection->Get##CPPTYPE_METHOD(         \
775                                message, field);                               \
776        if (is_packed) {                                                       \
777          WireFormatLite::Write##TYPE_METHOD##NoTag(value, output);            \
778        } else {                                                               \
779          WireFormatLite::Write##TYPE_METHOD(field->number(), value, output);  \
780        }                                                                      \
781        break;                                                                 \
782      }
783
784      HANDLE_PRIMITIVE_TYPE( INT32,  int32,  Int32,  Int32)
785      HANDLE_PRIMITIVE_TYPE( INT64,  int64,  Int64,  Int64)
786      HANDLE_PRIMITIVE_TYPE(SINT32,  int32, SInt32,  Int32)
787      HANDLE_PRIMITIVE_TYPE(SINT64,  int64, SInt64,  Int64)
788      HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
789      HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
790
791      HANDLE_PRIMITIVE_TYPE( FIXED32, uint32,  Fixed32, UInt32)
792      HANDLE_PRIMITIVE_TYPE( FIXED64, uint64,  Fixed64, UInt64)
793      HANDLE_PRIMITIVE_TYPE(SFIXED32,  int32, SFixed32,  Int32)
794      HANDLE_PRIMITIVE_TYPE(SFIXED64,  int64, SFixed64,  Int64)
795
796      HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
797      HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
798
799      HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
800#undef HANDLE_PRIMITIVE_TYPE
801
802#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                       \
803      case FieldDescriptor::TYPE_##TYPE:                                     \
804        WireFormatLite::Write##TYPE_METHOD(                                  \
805              field->number(),                                               \
806              field->is_repeated() ?                                         \
807                message_reflection->GetRepeated##CPPTYPE_METHOD(             \
808                  message, field, j) :                                       \
809                message_reflection->Get##CPPTYPE_METHOD(message, field),     \
810              output);                                                       \
811        break;
812
813      HANDLE_TYPE(GROUP  , Group  , Message)
814      HANDLE_TYPE(MESSAGE, Message, Message)
815#undef HANDLE_TYPE
816
817      case FieldDescriptor::TYPE_ENUM: {
818        const EnumValueDescriptor* value = field->is_repeated() ?
819          message_reflection->GetRepeatedEnum(message, field, j) :
820          message_reflection->GetEnum(message, field);
821        if (is_packed) {
822          WireFormatLite::WriteEnumNoTag(value->number(), output);
823        } else {
824          WireFormatLite::WriteEnum(field->number(), value->number(), output);
825        }
826        break;
827      }
828
829      // Handle strings separately so that we can get string references
830      // instead of copying.
831      case FieldDescriptor::TYPE_STRING: {
832        string scratch;
833        const string& value = field->is_repeated() ?
834          message_reflection->GetRepeatedStringReference(
835            message, field, j, &scratch) :
836          message_reflection->GetStringReference(message, field, &scratch);
837        VerifyUTF8String(value.data(), value.length(), SERIALIZE);
838        WireFormatLite::WriteString(field->number(), value, output);
839        break;
840      }
841
842      case FieldDescriptor::TYPE_BYTES: {
843        string scratch;
844        const string& value = field->is_repeated() ?
845          message_reflection->GetRepeatedStringReference(
846            message, field, j, &scratch) :
847          message_reflection->GetStringReference(message, field, &scratch);
848        WireFormatLite::WriteBytes(field->number(), value, output);
849        break;
850      }
851    }
852  }
853}
854
855void WireFormat::SerializeMessageSetItemWithCachedSizes(
856    const FieldDescriptor* field,
857    const Message& message,
858    io::CodedOutputStream* output) {
859  const Reflection* message_reflection = message.GetReflection();
860
861  // Start group.
862  output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
863
864  // Write type ID.
865  output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
866  output->WriteVarint32(field->number());
867
868  // Write message.
869  output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
870
871  const Message& sub_message = message_reflection->GetMessage(message, field);
872  output->WriteVarint32(sub_message.GetCachedSize());
873  sub_message.SerializeWithCachedSizes(output);
874
875  // End group.
876  output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
877}
878
879// ===================================================================
880
881int WireFormat::ByteSize(const Message& message) {
882  const Descriptor* descriptor = message.GetDescriptor();
883  const Reflection* message_reflection = message.GetReflection();
884
885  int our_size = 0;
886
887  vector<const FieldDescriptor*> fields;
888  message_reflection->ListFields(message, &fields);
889  for (int i = 0; i < fields.size(); i++) {
890    our_size += FieldByteSize(fields[i], message);
891  }
892
893  if (descriptor->options().message_set_wire_format()) {
894    our_size += ComputeUnknownMessageSetItemsSize(
895      message_reflection->GetUnknownFields(message));
896  } else {
897    our_size += ComputeUnknownFieldsSize(
898      message_reflection->GetUnknownFields(message));
899  }
900
901  return our_size;
902}
903
904int WireFormat::FieldByteSize(
905    const FieldDescriptor* field,
906    const Message& message) {
907  const Reflection* message_reflection = message.GetReflection();
908
909  if (field->is_extension() &&
910      field->containing_type()->options().message_set_wire_format() &&
911      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
912      !field->is_repeated()) {
913    return MessageSetItemByteSize(field, message);
914  }
915
916  int count = 0;
917  if (field->is_repeated()) {
918    count = message_reflection->FieldSize(message, field);
919  } else if (message_reflection->HasField(message, field)) {
920    count = 1;
921  }
922
923  const int data_size = FieldDataOnlyByteSize(field, message);
924  int our_size = data_size;
925  if (field->options().packed()) {
926    if (data_size > 0) {
927      // Packed fields get serialized like a string, not their native type.
928      // Technically this doesn't really matter; the size only changes if it's
929      // a GROUP
930      our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
931      our_size += io::CodedOutputStream::VarintSize32(data_size);
932    }
933  } else {
934    our_size += count * TagSize(field->number(), field->type());
935  }
936  return our_size;
937}
938
939int WireFormat::FieldDataOnlyByteSize(
940    const FieldDescriptor* field,
941    const Message& message) {
942  const Reflection* message_reflection = message.GetReflection();
943
944  int count = 0;
945  if (field->is_repeated()) {
946    count = message_reflection->FieldSize(message, field);
947  } else if (message_reflection->HasField(message, field)) {
948    count = 1;
949  }
950
951  int data_size = 0;
952  switch (field->type()) {
953#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                     \
954    case FieldDescriptor::TYPE_##TYPE:                                     \
955      if (field->is_repeated()) {                                          \
956        for (int j = 0; j < count; j++) {                                  \
957          data_size += WireFormatLite::TYPE_METHOD##Size(                  \
958            message_reflection->GetRepeated##CPPTYPE_METHOD(               \
959              message, field, j));                                         \
960        }                                                                  \
961      } else {                                                             \
962        data_size += WireFormatLite::TYPE_METHOD##Size(                    \
963          message_reflection->Get##CPPTYPE_METHOD(message, field));        \
964      }                                                                    \
965      break;
966
967#define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD)                               \
968    case FieldDescriptor::TYPE_##TYPE:                                     \
969      data_size += count * WireFormatLite::k##TYPE_METHOD##Size;           \
970      break;
971
972    HANDLE_TYPE( INT32,  Int32,  Int32)
973    HANDLE_TYPE( INT64,  Int64,  Int64)
974    HANDLE_TYPE(SINT32, SInt32,  Int32)
975    HANDLE_TYPE(SINT64, SInt64,  Int64)
976    HANDLE_TYPE(UINT32, UInt32, UInt32)
977    HANDLE_TYPE(UINT64, UInt64, UInt64)
978
979    HANDLE_FIXED_TYPE( FIXED32,  Fixed32)
980    HANDLE_FIXED_TYPE( FIXED64,  Fixed64)
981    HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
982    HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
983
984    HANDLE_FIXED_TYPE(FLOAT , Float )
985    HANDLE_FIXED_TYPE(DOUBLE, Double)
986
987    HANDLE_FIXED_TYPE(BOOL, Bool)
988
989    HANDLE_TYPE(GROUP  , Group  , Message)
990    HANDLE_TYPE(MESSAGE, Message, Message)
991#undef HANDLE_TYPE
992#undef HANDLE_FIXED_TYPE
993
994    case FieldDescriptor::TYPE_ENUM: {
995      if (field->is_repeated()) {
996        for (int j = 0; j < count; j++) {
997          data_size += WireFormatLite::EnumSize(
998            message_reflection->GetRepeatedEnum(message, field, j)->number());
999        }
1000      } else {
1001        data_size += WireFormatLite::EnumSize(
1002          message_reflection->GetEnum(message, field)->number());
1003      }
1004      break;
1005    }
1006
1007    // Handle strings separately so that we can get string references
1008    // instead of copying.
1009    case FieldDescriptor::TYPE_STRING:
1010    case FieldDescriptor::TYPE_BYTES: {
1011      for (int j = 0; j < count; j++) {
1012        string scratch;
1013        const string& value = field->is_repeated() ?
1014          message_reflection->GetRepeatedStringReference(
1015            message, field, j, &scratch) :
1016          message_reflection->GetStringReference(message, field, &scratch);
1017        data_size += WireFormatLite::StringSize(value);
1018      }
1019      break;
1020    }
1021  }
1022  return data_size;
1023}
1024
1025int WireFormat::MessageSetItemByteSize(
1026    const FieldDescriptor* field,
1027    const Message& message) {
1028  const Reflection* message_reflection = message.GetReflection();
1029
1030  int our_size = WireFormatLite::kMessageSetItemTagsSize;
1031
1032  // type_id
1033  our_size += io::CodedOutputStream::VarintSize32(field->number());
1034
1035  // message
1036  const Message& sub_message = message_reflection->GetMessage(message, field);
1037  int message_size = sub_message.ByteSize();
1038
1039  our_size += io::CodedOutputStream::VarintSize32(message_size);
1040  our_size += message_size;
1041
1042  return our_size;
1043}
1044
1045void WireFormat::VerifyUTF8StringFallback(const char* data,
1046                                          int size,
1047                                          Operation op) {
1048  if (!IsStructurallyValidUTF8(data, size)) {
1049    const char* operation_str = NULL;
1050    switch (op) {
1051      case PARSE:
1052        operation_str = "parsing";
1053        break;
1054      case SERIALIZE:
1055        operation_str = "serializing";
1056        break;
1057      // no default case: have the compiler warn if a case is not covered.
1058    }
1059    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
1060               << operation_str
1061               << " protocol buffer. Strings must contain only UTF-8; "
1062                  "use the 'bytes' type for raw bytes.";
1063  }
1064}
1065
1066
1067}  // namespace internal
1068}  // namespace protobuf
1069}  // namespace google
1070