1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <stack>
36#include <string>
37#include <vector>
38
39#include <google/protobuf/wire_format.h>
40
41#include <google/protobuf/stubs/common.h>
42#include <google/protobuf/stubs/stringprintf.h>
43#include <google/protobuf/descriptor.h>
44#include <google/protobuf/wire_format_lite_inl.h>
45#include <google/protobuf/descriptor.pb.h>
46#include <google/protobuf/io/coded_stream.h>
47#include <google/protobuf/io/zero_copy_stream.h>
48#include <google/protobuf/io/zero_copy_stream_impl.h>
49#include <google/protobuf/unknown_field_set.h>
50
51
52
53namespace google {
54namespace protobuf {
55namespace internal {
56
57namespace {
58
59// This function turns out to be convenient when using some macros later.
60inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
61  return descriptor->number();
62}
63
64}  // anonymous namespace
65
66// ===================================================================
67
68bool UnknownFieldSetFieldSkipper::SkipField(
69    io::CodedInputStream* input, uint32 tag) {
70  return WireFormat::SkipField(input, tag, unknown_fields_);
71}
72
73bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
74  return WireFormat::SkipMessage(input, unknown_fields_);
75}
76
77void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
78    int field_number, int value) {
79  unknown_fields_->AddVarint(field_number, value);
80}
81
82bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
83                           UnknownFieldSet* unknown_fields) {
84  int number = WireFormatLite::GetTagFieldNumber(tag);
85
86  switch (WireFormatLite::GetTagWireType(tag)) {
87    case WireFormatLite::WIRETYPE_VARINT: {
88      uint64 value;
89      if (!input->ReadVarint64(&value)) return false;
90      if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
91      return true;
92    }
93    case WireFormatLite::WIRETYPE_FIXED64: {
94      uint64 value;
95      if (!input->ReadLittleEndian64(&value)) return false;
96      if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
97      return true;
98    }
99    case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
100      uint32 length;
101      if (!input->ReadVarint32(&length)) return false;
102      if (unknown_fields == NULL) {
103        if (!input->Skip(length)) return false;
104      } else {
105        if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
106                               length)) {
107          return false;
108        }
109      }
110      return true;
111    }
112    case WireFormatLite::WIRETYPE_START_GROUP: {
113      if (!input->IncrementRecursionDepth()) return false;
114      if (!SkipMessage(input, (unknown_fields == NULL) ?
115                              NULL : unknown_fields->AddGroup(number))) {
116        return false;
117      }
118      input->DecrementRecursionDepth();
119      // Check that the ending tag matched the starting tag.
120      if (!input->LastTagWas(WireFormatLite::MakeTag(
121          WireFormatLite::GetTagFieldNumber(tag),
122          WireFormatLite::WIRETYPE_END_GROUP))) {
123        return false;
124      }
125      return true;
126    }
127    case WireFormatLite::WIRETYPE_END_GROUP: {
128      return false;
129    }
130    case WireFormatLite::WIRETYPE_FIXED32: {
131      uint32 value;
132      if (!input->ReadLittleEndian32(&value)) return false;
133      if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
134      return true;
135    }
136    default: {
137      return false;
138    }
139  }
140}
141
142bool WireFormat::SkipMessage(io::CodedInputStream* input,
143                             UnknownFieldSet* unknown_fields) {
144  while(true) {
145    uint32 tag = input->ReadTag();
146    if (tag == 0) {
147      // End of input.  This is a valid place to end, so return true.
148      return true;
149    }
150
151    WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
152
153    if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
154      // Must be the end of the message.
155      return true;
156    }
157
158    if (!SkipField(input, tag, unknown_fields)) return false;
159  }
160}
161
162void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
163                                        io::CodedOutputStream* output) {
164  for (int i = 0; i < unknown_fields.field_count(); i++) {
165    const UnknownField& field = unknown_fields.field(i);
166    switch (field.type()) {
167      case UnknownField::TYPE_VARINT:
168        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
169            WireFormatLite::WIRETYPE_VARINT));
170        output->WriteVarint64(field.varint());
171        break;
172      case UnknownField::TYPE_FIXED32:
173        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
174            WireFormatLite::WIRETYPE_FIXED32));
175        output->WriteLittleEndian32(field.fixed32());
176        break;
177      case UnknownField::TYPE_FIXED64:
178        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
179            WireFormatLite::WIRETYPE_FIXED64));
180        output->WriteLittleEndian64(field.fixed64());
181        break;
182      case UnknownField::TYPE_LENGTH_DELIMITED:
183        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
184            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
185        output->WriteVarint32(field.length_delimited().size());
186        output->WriteRawMaybeAliased(field.length_delimited().data(),
187                                     field.length_delimited().size());
188        break;
189      case UnknownField::TYPE_GROUP:
190        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
191            WireFormatLite::WIRETYPE_START_GROUP));
192        SerializeUnknownFields(field.group(), output);
193        output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
194            WireFormatLite::WIRETYPE_END_GROUP));
195        break;
196    }
197  }
198}
199
200uint8* WireFormat::SerializeUnknownFieldsToArray(
201    const UnknownFieldSet& unknown_fields,
202    uint8* target) {
203  for (int i = 0; i < unknown_fields.field_count(); i++) {
204    const UnknownField& field = unknown_fields.field(i);
205
206    switch (field.type()) {
207      case UnknownField::TYPE_VARINT:
208        target = WireFormatLite::WriteInt64ToArray(
209            field.number(), field.varint(), target);
210        break;
211      case UnknownField::TYPE_FIXED32:
212        target = WireFormatLite::WriteFixed32ToArray(
213            field.number(), field.fixed32(), target);
214        break;
215      case UnknownField::TYPE_FIXED64:
216        target = WireFormatLite::WriteFixed64ToArray(
217            field.number(), field.fixed64(), target);
218        break;
219      case UnknownField::TYPE_LENGTH_DELIMITED:
220        target = WireFormatLite::WriteBytesToArray(
221            field.number(), field.length_delimited(), target);
222        break;
223      case UnknownField::TYPE_GROUP:
224        target = WireFormatLite::WriteTagToArray(
225            field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
226        target = SerializeUnknownFieldsToArray(field.group(), target);
227        target = WireFormatLite::WriteTagToArray(
228            field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
229        break;
230    }
231  }
232  return target;
233}
234
235void WireFormat::SerializeUnknownMessageSetItems(
236    const UnknownFieldSet& unknown_fields,
237    io::CodedOutputStream* output) {
238  for (int i = 0; i < unknown_fields.field_count(); i++) {
239    const UnknownField& field = unknown_fields.field(i);
240    // The only unknown fields that are allowed to exist in a MessageSet are
241    // messages, which are length-delimited.
242    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
243      // Start group.
244      output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
245
246      // Write type ID.
247      output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
248      output->WriteVarint32(field.number());
249
250      // Write message.
251      output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
252      field.SerializeLengthDelimitedNoTag(output);
253
254      // End group.
255      output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
256    }
257  }
258}
259
260uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
261    const UnknownFieldSet& unknown_fields,
262    uint8* target) {
263  for (int i = 0; i < unknown_fields.field_count(); i++) {
264    const UnknownField& field = unknown_fields.field(i);
265
266    // The only unknown fields that are allowed to exist in a MessageSet are
267    // messages, which are length-delimited.
268    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
269      // Start group.
270      target = io::CodedOutputStream::WriteTagToArray(
271          WireFormatLite::kMessageSetItemStartTag, target);
272
273      // Write type ID.
274      target = io::CodedOutputStream::WriteTagToArray(
275          WireFormatLite::kMessageSetTypeIdTag, target);
276      target = io::CodedOutputStream::WriteVarint32ToArray(
277          field.number(), target);
278
279      // Write message.
280      target = io::CodedOutputStream::WriteTagToArray(
281          WireFormatLite::kMessageSetMessageTag, target);
282      target = field.SerializeLengthDelimitedNoTagToArray(target);
283
284      // End group.
285      target = io::CodedOutputStream::WriteTagToArray(
286          WireFormatLite::kMessageSetItemEndTag, target);
287    }
288  }
289
290  return target;
291}
292
293int WireFormat::ComputeUnknownFieldsSize(
294    const UnknownFieldSet& unknown_fields) {
295  int size = 0;
296  for (int i = 0; i < unknown_fields.field_count(); i++) {
297    const UnknownField& field = unknown_fields.field(i);
298
299    switch (field.type()) {
300      case UnknownField::TYPE_VARINT:
301        size += io::CodedOutputStream::VarintSize32(
302            WireFormatLite::MakeTag(field.number(),
303            WireFormatLite::WIRETYPE_VARINT));
304        size += io::CodedOutputStream::VarintSize64(field.varint());
305        break;
306      case UnknownField::TYPE_FIXED32:
307        size += io::CodedOutputStream::VarintSize32(
308            WireFormatLite::MakeTag(field.number(),
309            WireFormatLite::WIRETYPE_FIXED32));
310        size += sizeof(int32);
311        break;
312      case UnknownField::TYPE_FIXED64:
313        size += io::CodedOutputStream::VarintSize32(
314            WireFormatLite::MakeTag(field.number(),
315            WireFormatLite::WIRETYPE_FIXED64));
316        size += sizeof(int64);
317        break;
318      case UnknownField::TYPE_LENGTH_DELIMITED:
319        size += io::CodedOutputStream::VarintSize32(
320            WireFormatLite::MakeTag(field.number(),
321            WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
322        size += io::CodedOutputStream::VarintSize32(
323            field.length_delimited().size());
324        size += field.length_delimited().size();
325        break;
326      case UnknownField::TYPE_GROUP:
327        size += io::CodedOutputStream::VarintSize32(
328            WireFormatLite::MakeTag(field.number(),
329            WireFormatLite::WIRETYPE_START_GROUP));
330        size += ComputeUnknownFieldsSize(field.group());
331        size += io::CodedOutputStream::VarintSize32(
332            WireFormatLite::MakeTag(field.number(),
333            WireFormatLite::WIRETYPE_END_GROUP));
334        break;
335    }
336  }
337
338  return size;
339}
340
341int WireFormat::ComputeUnknownMessageSetItemsSize(
342    const UnknownFieldSet& unknown_fields) {
343  int size = 0;
344  for (int i = 0; i < unknown_fields.field_count(); i++) {
345    const UnknownField& field = unknown_fields.field(i);
346
347    // The only unknown fields that are allowed to exist in a MessageSet are
348    // messages, which are length-delimited.
349    if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
350      size += WireFormatLite::kMessageSetItemTagsSize;
351      size += io::CodedOutputStream::VarintSize32(field.number());
352
353      int field_size = field.GetLengthDelimitedSize();
354      size += io::CodedOutputStream::VarintSize32(field_size);
355      size += field_size;
356    }
357  }
358
359  return size;
360}
361
362// ===================================================================
363
364bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
365                                      Message* message) {
366  const Descriptor* descriptor = message->GetDescriptor();
367  const Reflection* message_reflection = message->GetReflection();
368
369  while(true) {
370    uint32 tag = input->ReadTag();
371    if (tag == 0) {
372      // End of input.  This is a valid place to end, so return true.
373      return true;
374    }
375
376    if (WireFormatLite::GetTagWireType(tag) ==
377        WireFormatLite::WIRETYPE_END_GROUP) {
378      // Must be the end of the message.
379      return true;
380    }
381
382    const FieldDescriptor* field = NULL;
383
384    if (descriptor != NULL) {
385      int field_number = WireFormatLite::GetTagFieldNumber(tag);
386      field = descriptor->FindFieldByNumber(field_number);
387
388      // If that failed, check if the field is an extension.
389      if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
390        if (input->GetExtensionPool() == NULL) {
391          field = message_reflection->FindKnownExtensionByNumber(field_number);
392        } else {
393          field = input->GetExtensionPool()
394                       ->FindExtensionByNumber(descriptor, field_number);
395        }
396      }
397
398      // If that failed, but we're a MessageSet, and this is the tag for a
399      // MessageSet item, then parse that.
400      if (field == NULL &&
401          descriptor->options().message_set_wire_format() &&
402          tag == WireFormatLite::kMessageSetItemStartTag) {
403        if (!ParseAndMergeMessageSetItem(input, message)) {
404          return false;
405        }
406        continue;  // Skip ParseAndMergeField(); already taken care of.
407      }
408    }
409
410    if (!ParseAndMergeField(tag, field, message, input)) {
411      return false;
412    }
413  }
414}
415
416bool WireFormat::SkipMessageSetField(io::CodedInputStream* input,
417                                     uint32 field_number,
418                                     UnknownFieldSet* unknown_fields) {
419  uint32 length;
420  if (!input->ReadVarint32(&length)) return false;
421  return input->ReadString(
422      unknown_fields->AddLengthDelimited(field_number), length);
423}
424
425bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
426                                              const FieldDescriptor* field,
427                                              Message* message,
428                                              io::CodedInputStream* input) {
429  const Reflection* message_reflection = message->GetReflection();
430  if (field == NULL) {
431    // We store unknown MessageSet extensions as groups.
432    return SkipMessageSetField(
433        input, field_number, message_reflection->MutableUnknownFields(message));
434  } else if (field->is_repeated() ||
435             field->type() != FieldDescriptor::TYPE_MESSAGE) {
436    // This shouldn't happen as we only allow optional message extensions to
437    // MessageSet.
438    GOOGLE_LOG(ERROR) << "Extensions of MessageSets must be optional messages.";
439    return false;
440  } else {
441    Message* sub_message = message_reflection->MutableMessage(
442        message, field, input->GetExtensionFactory());
443    return WireFormatLite::ReadMessage(input, sub_message);
444  }
445}
446
447bool WireFormat::ParseAndMergeField(
448    uint32 tag,
449    const FieldDescriptor* field,        // May be NULL for unknown
450    Message* message,
451    io::CodedInputStream* input) {
452  const Reflection* message_reflection = message->GetReflection();
453
454  enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format;
455
456  if (field == NULL) {
457    value_format = UNKNOWN;
458  } else if (WireFormatLite::GetTagWireType(tag) ==
459             WireTypeForFieldType(field->type())) {
460    value_format = NORMAL_FORMAT;
461  } else if (field->is_packable() &&
462             WireFormatLite::GetTagWireType(tag) ==
463             WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
464    value_format = PACKED_FORMAT;
465  } else {
466    // We don't recognize this field. Either the field number is unknown
467    // or the wire type doesn't match. Put it in our unknown field set.
468    value_format = UNKNOWN;
469  }
470
471  if (value_format == UNKNOWN) {
472    return SkipField(input, tag,
473                     message_reflection->MutableUnknownFields(message));
474  } else if (value_format == PACKED_FORMAT) {
475    uint32 length;
476    if (!input->ReadVarint32(&length)) return false;
477    io::CodedInputStream::Limit limit = input->PushLimit(length);
478
479    switch (field->type()) {
480#define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                      \
481      case FieldDescriptor::TYPE_##TYPE: {                                     \
482        while (input->BytesUntilLimit() > 0) {                                 \
483          CPPTYPE value;                                                       \
484          if (!WireFormatLite::ReadPrimitive<                                  \
485                CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))          \
486            return false;                                                      \
487          message_reflection->Add##CPPTYPE_METHOD(message, field, value);      \
488        }                                                                      \
489        break;                                                                 \
490      }
491
492      HANDLE_PACKED_TYPE( INT32,  int32,  Int32)
493      HANDLE_PACKED_TYPE( INT64,  int64,  Int64)
494      HANDLE_PACKED_TYPE(SINT32,  int32,  Int32)
495      HANDLE_PACKED_TYPE(SINT64,  int64,  Int64)
496      HANDLE_PACKED_TYPE(UINT32, uint32, UInt32)
497      HANDLE_PACKED_TYPE(UINT64, uint64, UInt64)
498
499      HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32)
500      HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64)
501      HANDLE_PACKED_TYPE(SFIXED32,  int32,  Int32)
502      HANDLE_PACKED_TYPE(SFIXED64,  int64,  Int64)
503
504      HANDLE_PACKED_TYPE(FLOAT , float , Float )
505      HANDLE_PACKED_TYPE(DOUBLE, double, Double)
506
507      HANDLE_PACKED_TYPE(BOOL, bool, Bool)
508#undef HANDLE_PACKED_TYPE
509
510      case FieldDescriptor::TYPE_ENUM: {
511        while (input->BytesUntilLimit() > 0) {
512          int value;
513          if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
514                  input, &value)) return false;
515          const EnumValueDescriptor* enum_value =
516              field->enum_type()->FindValueByNumber(value);
517          if (enum_value != NULL) {
518            message_reflection->AddEnum(message, field, enum_value);
519          }
520        }
521
522        break;
523      }
524
525      case FieldDescriptor::TYPE_STRING:
526      case FieldDescriptor::TYPE_GROUP:
527      case FieldDescriptor::TYPE_MESSAGE:
528      case FieldDescriptor::TYPE_BYTES:
529        // Can't have packed fields of these types: these should be caught by
530        // the protocol compiler.
531        return false;
532        break;
533    }
534
535    input->PopLimit(limit);
536  } else {
537    // Non-packed value (value_format == NORMAL_FORMAT)
538    switch (field->type()) {
539#define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                            \
540      case FieldDescriptor::TYPE_##TYPE: {                                    \
541        CPPTYPE value;                                                        \
542        if (!WireFormatLite::ReadPrimitive<                                   \
543                CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))         \
544          return false;                                                       \
545        if (field->is_repeated()) {                                           \
546          message_reflection->Add##CPPTYPE_METHOD(message, field, value);     \
547        } else {                                                              \
548          message_reflection->Set##CPPTYPE_METHOD(message, field, value);     \
549        }                                                                     \
550        break;                                                                \
551      }
552
553      HANDLE_TYPE( INT32,  int32,  Int32)
554      HANDLE_TYPE( INT64,  int64,  Int64)
555      HANDLE_TYPE(SINT32,  int32,  Int32)
556      HANDLE_TYPE(SINT64,  int64,  Int64)
557      HANDLE_TYPE(UINT32, uint32, UInt32)
558      HANDLE_TYPE(UINT64, uint64, UInt64)
559
560      HANDLE_TYPE( FIXED32, uint32, UInt32)
561      HANDLE_TYPE( FIXED64, uint64, UInt64)
562      HANDLE_TYPE(SFIXED32,  int32,  Int32)
563      HANDLE_TYPE(SFIXED64,  int64,  Int64)
564
565      HANDLE_TYPE(FLOAT , float , Float )
566      HANDLE_TYPE(DOUBLE, double, Double)
567
568      HANDLE_TYPE(BOOL, bool, Bool)
569#undef HANDLE_TYPE
570
571      case FieldDescriptor::TYPE_ENUM: {
572        int value;
573        if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
574                input, &value)) return false;
575        const EnumValueDescriptor* enum_value =
576          field->enum_type()->FindValueByNumber(value);
577        if (enum_value != NULL) {
578          if (field->is_repeated()) {
579            message_reflection->AddEnum(message, field, enum_value);
580          } else {
581            message_reflection->SetEnum(message, field, enum_value);
582          }
583        } else {
584          // The enum value is not one of the known values.  Add it to the
585          // UnknownFieldSet.
586          int64 sign_extended_value = static_cast<int64>(value);
587          message_reflection->MutableUnknownFields(message)
588                            ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
589                                        sign_extended_value);
590        }
591        break;
592      }
593
594      // Handle strings separately so that we can optimize the ctype=CORD case.
595      case FieldDescriptor::TYPE_STRING: {
596        string value;
597        if (!WireFormatLite::ReadString(input, &value)) return false;
598        VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
599                                   field->name().c_str());
600        if (field->is_repeated()) {
601          message_reflection->AddString(message, field, value);
602        } else {
603          message_reflection->SetString(message, field, value);
604        }
605        break;
606      }
607
608      case FieldDescriptor::TYPE_BYTES: {
609        string value;
610        if (!WireFormatLite::ReadBytes(input, &value)) return false;
611        if (field->is_repeated()) {
612          message_reflection->AddString(message, field, value);
613        } else {
614          message_reflection->SetString(message, field, value);
615        }
616        break;
617      }
618
619      case FieldDescriptor::TYPE_GROUP: {
620        Message* sub_message;
621        if (field->is_repeated()) {
622          sub_message = message_reflection->AddMessage(
623              message, field, input->GetExtensionFactory());
624        } else {
625          sub_message = message_reflection->MutableMessage(
626              message, field, input->GetExtensionFactory());
627        }
628
629        if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
630                                       input, sub_message))
631          return false;
632        break;
633      }
634
635      case FieldDescriptor::TYPE_MESSAGE: {
636        Message* sub_message;
637        if (field->is_repeated()) {
638          sub_message = message_reflection->AddMessage(
639              message, field, input->GetExtensionFactory());
640        } else {
641          sub_message = message_reflection->MutableMessage(
642              message, field, input->GetExtensionFactory());
643        }
644
645        if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
646        break;
647      }
648    }
649  }
650
651  return true;
652}
653
654bool WireFormat::ParseAndMergeMessageSetItem(
655    io::CodedInputStream* input,
656    Message* message) {
657  const Reflection* message_reflection = message->GetReflection();
658
659  // This method parses a group which should contain two fields:
660  //   required int32 type_id = 2;
661  //   required data message = 3;
662
663  uint32 last_type_id = 0;
664
665  // Once we see a type_id, we'll look up the FieldDescriptor for the
666  // extension.
667  const FieldDescriptor* field = NULL;
668
669  // If we see message data before the type_id, we'll append it to this so
670  // we can parse it later.
671  string message_data;
672
673  while (true) {
674    uint32 tag = input->ReadTag();
675    if (tag == 0) return false;
676
677    switch (tag) {
678      case WireFormatLite::kMessageSetTypeIdTag: {
679        uint32 type_id;
680        if (!input->ReadVarint32(&type_id)) return false;
681        last_type_id = type_id;
682        field = message_reflection->FindKnownExtensionByNumber(type_id);
683
684        if (!message_data.empty()) {
685          // We saw some message data before the type_id.  Have to parse it
686          // now.
687          io::ArrayInputStream raw_input(message_data.data(),
688                                         message_data.size());
689          io::CodedInputStream sub_input(&raw_input);
690          if (!ParseAndMergeMessageSetField(last_type_id, field, message,
691                                            &sub_input)) {
692            return false;
693          }
694          message_data.clear();
695        }
696
697        break;
698      }
699
700      case WireFormatLite::kMessageSetMessageTag: {
701        if (last_type_id == 0) {
702          // We haven't seen a type_id yet.  Append this data to message_data.
703          string temp;
704          uint32 length;
705          if (!input->ReadVarint32(&length)) return false;
706          if (!input->ReadString(&temp, length)) return false;
707          io::StringOutputStream output_stream(&message_data);
708          io::CodedOutputStream coded_output(&output_stream);
709          coded_output.WriteVarint32(length);
710          coded_output.WriteString(temp);
711        } else {
712          // Already saw type_id, so we can parse this directly.
713          if (!ParseAndMergeMessageSetField(last_type_id, field, message,
714                                            input)) {
715            return false;
716          }
717        }
718
719        break;
720      }
721
722      case WireFormatLite::kMessageSetItemEndTag: {
723        return true;
724      }
725
726      default: {
727        if (!SkipField(input, tag, NULL)) return false;
728      }
729    }
730  }
731}
732
733// ===================================================================
734
735void WireFormat::SerializeWithCachedSizes(
736    const Message& message,
737    int size, io::CodedOutputStream* output) {
738  const Descriptor* descriptor = message.GetDescriptor();
739  const Reflection* message_reflection = message.GetReflection();
740  int expected_endpoint = output->ByteCount() + size;
741
742  vector<const FieldDescriptor*> fields;
743  message_reflection->ListFields(message, &fields);
744  for (int i = 0; i < fields.size(); i++) {
745    SerializeFieldWithCachedSizes(fields[i], message, output);
746  }
747
748  if (descriptor->options().message_set_wire_format()) {
749    SerializeUnknownMessageSetItems(
750        message_reflection->GetUnknownFields(message), output);
751  } else {
752    SerializeUnknownFields(
753        message_reflection->GetUnknownFields(message), output);
754  }
755
756  GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
757    << ": Protocol message serialized to a size different from what was "
758       "originally expected.  Perhaps it was modified by another thread "
759       "during serialization?";
760}
761
762void WireFormat::SerializeFieldWithCachedSizes(
763    const FieldDescriptor* field,
764    const Message& message,
765    io::CodedOutputStream* output) {
766  const Reflection* message_reflection = message.GetReflection();
767
768  if (field->is_extension() &&
769      field->containing_type()->options().message_set_wire_format() &&
770      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
771      !field->is_repeated()) {
772    SerializeMessageSetItemWithCachedSizes(field, message, output);
773    return;
774  }
775
776  int count = 0;
777
778  if (field->is_repeated()) {
779    count = message_reflection->FieldSize(message, field);
780  } else if (message_reflection->HasField(message, field)) {
781    count = 1;
782  }
783
784  const bool is_packed = field->options().packed();
785  if (is_packed && count > 0) {
786    WireFormatLite::WriteTag(field->number(),
787        WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
788    const int data_size = FieldDataOnlyByteSize(field, message);
789    output->WriteVarint32(data_size);
790  }
791
792  for (int j = 0; j < count; j++) {
793    switch (field->type()) {
794#define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD)      \
795      case FieldDescriptor::TYPE_##TYPE: {                                     \
796        const CPPTYPE value = field->is_repeated() ?                           \
797                              message_reflection->GetRepeated##CPPTYPE_METHOD( \
798                                message, field, j) :                           \
799                              message_reflection->Get##CPPTYPE_METHOD(         \
800                                message, field);                               \
801        if (is_packed) {                                                       \
802          WireFormatLite::Write##TYPE_METHOD##NoTag(value, output);            \
803        } else {                                                               \
804          WireFormatLite::Write##TYPE_METHOD(field->number(), value, output);  \
805        }                                                                      \
806        break;                                                                 \
807      }
808
809      HANDLE_PRIMITIVE_TYPE( INT32,  int32,  Int32,  Int32)
810      HANDLE_PRIMITIVE_TYPE( INT64,  int64,  Int64,  Int64)
811      HANDLE_PRIMITIVE_TYPE(SINT32,  int32, SInt32,  Int32)
812      HANDLE_PRIMITIVE_TYPE(SINT64,  int64, SInt64,  Int64)
813      HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
814      HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
815
816      HANDLE_PRIMITIVE_TYPE( FIXED32, uint32,  Fixed32, UInt32)
817      HANDLE_PRIMITIVE_TYPE( FIXED64, uint64,  Fixed64, UInt64)
818      HANDLE_PRIMITIVE_TYPE(SFIXED32,  int32, SFixed32,  Int32)
819      HANDLE_PRIMITIVE_TYPE(SFIXED64,  int64, SFixed64,  Int64)
820
821      HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
822      HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
823
824      HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
825#undef HANDLE_PRIMITIVE_TYPE
826
827#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                       \
828      case FieldDescriptor::TYPE_##TYPE:                                     \
829        WireFormatLite::Write##TYPE_METHOD(                                  \
830              field->number(),                                               \
831              field->is_repeated() ?                                         \
832                message_reflection->GetRepeated##CPPTYPE_METHOD(             \
833                  message, field, j) :                                       \
834                message_reflection->Get##CPPTYPE_METHOD(message, field),     \
835              output);                                                       \
836        break;
837
838      HANDLE_TYPE(GROUP  , Group  , Message)
839      HANDLE_TYPE(MESSAGE, Message, Message)
840#undef HANDLE_TYPE
841
842      case FieldDescriptor::TYPE_ENUM: {
843        const EnumValueDescriptor* value = field->is_repeated() ?
844          message_reflection->GetRepeatedEnum(message, field, j) :
845          message_reflection->GetEnum(message, field);
846        if (is_packed) {
847          WireFormatLite::WriteEnumNoTag(value->number(), output);
848        } else {
849          WireFormatLite::WriteEnum(field->number(), value->number(), output);
850        }
851        break;
852      }
853
854      // Handle strings separately so that we can get string references
855      // instead of copying.
856      case FieldDescriptor::TYPE_STRING: {
857        string scratch;
858        const string& value = field->is_repeated() ?
859          message_reflection->GetRepeatedStringReference(
860            message, field, j, &scratch) :
861          message_reflection->GetStringReference(message, field, &scratch);
862        VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
863                                   field->name().c_str());
864        WireFormatLite::WriteString(field->number(), value, output);
865        break;
866      }
867
868      case FieldDescriptor::TYPE_BYTES: {
869        string scratch;
870        const string& value = field->is_repeated() ?
871          message_reflection->GetRepeatedStringReference(
872            message, field, j, &scratch) :
873          message_reflection->GetStringReference(message, field, &scratch);
874        WireFormatLite::WriteBytes(field->number(), value, output);
875        break;
876      }
877    }
878  }
879}
880
881void WireFormat::SerializeMessageSetItemWithCachedSizes(
882    const FieldDescriptor* field,
883    const Message& message,
884    io::CodedOutputStream* output) {
885  const Reflection* message_reflection = message.GetReflection();
886
887  // Start group.
888  output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
889
890  // Write type ID.
891  output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
892  output->WriteVarint32(field->number());
893
894  // Write message.
895  output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
896
897  const Message& sub_message = message_reflection->GetMessage(message, field);
898  output->WriteVarint32(sub_message.GetCachedSize());
899  sub_message.SerializeWithCachedSizes(output);
900
901  // End group.
902  output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
903}
904
905// ===================================================================
906
907int WireFormat::ByteSize(const Message& message) {
908  const Descriptor* descriptor = message.GetDescriptor();
909  const Reflection* message_reflection = message.GetReflection();
910
911  int our_size = 0;
912
913  vector<const FieldDescriptor*> fields;
914  message_reflection->ListFields(message, &fields);
915  for (int i = 0; i < fields.size(); i++) {
916    our_size += FieldByteSize(fields[i], message);
917  }
918
919  if (descriptor->options().message_set_wire_format()) {
920    our_size += ComputeUnknownMessageSetItemsSize(
921      message_reflection->GetUnknownFields(message));
922  } else {
923    our_size += ComputeUnknownFieldsSize(
924      message_reflection->GetUnknownFields(message));
925  }
926
927  return our_size;
928}
929
930int WireFormat::FieldByteSize(
931    const FieldDescriptor* field,
932    const Message& message) {
933  const Reflection* message_reflection = message.GetReflection();
934
935  if (field->is_extension() &&
936      field->containing_type()->options().message_set_wire_format() &&
937      field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
938      !field->is_repeated()) {
939    return MessageSetItemByteSize(field, message);
940  }
941
942  int count = 0;
943  if (field->is_repeated()) {
944    count = message_reflection->FieldSize(message, field);
945  } else if (message_reflection->HasField(message, field)) {
946    count = 1;
947  }
948
949  const int data_size = FieldDataOnlyByteSize(field, message);
950  int our_size = data_size;
951  if (field->options().packed()) {
952    if (data_size > 0) {
953      // Packed fields get serialized like a string, not their native type.
954      // Technically this doesn't really matter; the size only changes if it's
955      // a GROUP
956      our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
957      our_size += io::CodedOutputStream::VarintSize32(data_size);
958    }
959  } else {
960    our_size += count * TagSize(field->number(), field->type());
961  }
962  return our_size;
963}
964
965int WireFormat::FieldDataOnlyByteSize(
966    const FieldDescriptor* field,
967    const Message& message) {
968  const Reflection* message_reflection = message.GetReflection();
969
970  int count = 0;
971  if (field->is_repeated()) {
972    count = message_reflection->FieldSize(message, field);
973  } else if (message_reflection->HasField(message, field)) {
974    count = 1;
975  }
976
977  int data_size = 0;
978  switch (field->type()) {
979#define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                     \
980    case FieldDescriptor::TYPE_##TYPE:                                     \
981      if (field->is_repeated()) {                                          \
982        for (int j = 0; j < count; j++) {                                  \
983          data_size += WireFormatLite::TYPE_METHOD##Size(                  \
984            message_reflection->GetRepeated##CPPTYPE_METHOD(               \
985              message, field, j));                                         \
986        }                                                                  \
987      } else {                                                             \
988        data_size += WireFormatLite::TYPE_METHOD##Size(                    \
989          message_reflection->Get##CPPTYPE_METHOD(message, field));        \
990      }                                                                    \
991      break;
992
993#define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD)                               \
994    case FieldDescriptor::TYPE_##TYPE:                                     \
995      data_size += count * WireFormatLite::k##TYPE_METHOD##Size;           \
996      break;
997
998    HANDLE_TYPE( INT32,  Int32,  Int32)
999    HANDLE_TYPE( INT64,  Int64,  Int64)
1000    HANDLE_TYPE(SINT32, SInt32,  Int32)
1001    HANDLE_TYPE(SINT64, SInt64,  Int64)
1002    HANDLE_TYPE(UINT32, UInt32, UInt32)
1003    HANDLE_TYPE(UINT64, UInt64, UInt64)
1004
1005    HANDLE_FIXED_TYPE( FIXED32,  Fixed32)
1006    HANDLE_FIXED_TYPE( FIXED64,  Fixed64)
1007    HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
1008    HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
1009
1010    HANDLE_FIXED_TYPE(FLOAT , Float )
1011    HANDLE_FIXED_TYPE(DOUBLE, Double)
1012
1013    HANDLE_FIXED_TYPE(BOOL, Bool)
1014
1015    HANDLE_TYPE(GROUP  , Group  , Message)
1016    HANDLE_TYPE(MESSAGE, Message, Message)
1017#undef HANDLE_TYPE
1018#undef HANDLE_FIXED_TYPE
1019
1020    case FieldDescriptor::TYPE_ENUM: {
1021      if (field->is_repeated()) {
1022        for (int j = 0; j < count; j++) {
1023          data_size += WireFormatLite::EnumSize(
1024            message_reflection->GetRepeatedEnum(message, field, j)->number());
1025        }
1026      } else {
1027        data_size += WireFormatLite::EnumSize(
1028          message_reflection->GetEnum(message, field)->number());
1029      }
1030      break;
1031    }
1032
1033    // Handle strings separately so that we can get string references
1034    // instead of copying.
1035    case FieldDescriptor::TYPE_STRING:
1036    case FieldDescriptor::TYPE_BYTES: {
1037      for (int j = 0; j < count; j++) {
1038        string scratch;
1039        const string& value = field->is_repeated() ?
1040          message_reflection->GetRepeatedStringReference(
1041            message, field, j, &scratch) :
1042          message_reflection->GetStringReference(message, field, &scratch);
1043        data_size += WireFormatLite::StringSize(value);
1044      }
1045      break;
1046    }
1047  }
1048  return data_size;
1049}
1050
1051int WireFormat::MessageSetItemByteSize(
1052    const FieldDescriptor* field,
1053    const Message& message) {
1054  const Reflection* message_reflection = message.GetReflection();
1055
1056  int our_size = WireFormatLite::kMessageSetItemTagsSize;
1057
1058  // type_id
1059  our_size += io::CodedOutputStream::VarintSize32(field->number());
1060
1061  // message
1062  const Message& sub_message = message_reflection->GetMessage(message, field);
1063  int message_size = sub_message.ByteSize();
1064
1065  our_size += io::CodedOutputStream::VarintSize32(message_size);
1066  our_size += message_size;
1067
1068  return our_size;
1069}
1070
1071void WireFormat::VerifyUTF8StringFallback(const char* data,
1072                                          int size,
1073                                          Operation op,
1074                                          const char* field_name) {
1075  if (!IsStructurallyValidUTF8(data, size)) {
1076    const char* operation_str = NULL;
1077    switch (op) {
1078      case PARSE:
1079        operation_str = "parsing";
1080        break;
1081      case SERIALIZE:
1082        operation_str = "serializing";
1083        break;
1084      // no default case: have the compiler warn if a case is not covered.
1085    }
1086    string quoted_field_name = "";
1087    if (field_name != NULL) {
1088      quoted_field_name = StringPrintf(" '%s'", field_name);
1089    }
1090    // no space below to avoid double space when the field name is missing.
1091    GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
1092               << "UTF-8 data when " << operation_str << " a protocol "
1093               << "buffer. Use the 'bytes' type if you intend to send raw "
1094               << "bytes. ";
1095  }
1096}
1097
1098
1099}  // namespace internal
1100}  // namespace protobuf
1101}  // namespace google
1102