dynamic_message.cc revision ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16
1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// http://code.google.com/p/protobuf/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34//
35// DynamicMessage is implemented by constructing a data structure which
36// has roughly the same memory layout as a generated message would have.
37// Then, we use GeneratedMessageReflection to implement our reflection
38// interface.  All the other operations we need to implement (e.g.
39// parsing, copying, etc.) are already implemented in terms of
40// Reflection, so the rest is easy.
41//
42// The up side of this strategy is that it's very efficient.  We don't
43// need to use hash_maps or generic representations of fields.  The
44// down side is that this is a low-level memory management hack which
45// can be tricky to get right.
46//
47// As mentioned in the header, we only expose a DynamicMessageFactory
48// publicly, not the DynamicMessage class itself.  This is because
49// GenericMessageReflection wants to have a pointer to a "default"
50// copy of the class, with all fields initialized to their default
51// values.  We only want to construct one of these per message type,
52// so DynamicMessageFactory stores a cache of default messages for
53// each type it sees (each unique Descriptor pointer).  The code
54// refers to the "default" copy of the class as the "prototype".
55//
56// Note on memory allocation:  This module often calls "operator new()"
57// to allocate untyped memory, rather than calling something like
58// "new uint8[]".  This is because "operator new()" means "Give me some
59// space which I can use as I please." while "new uint8[]" means "Give
60// me an array of 8-bit integers.".  In practice, the later may return
61// a pointer that is not aligned correctly for general use.  I believe
62// Item 8 of "More Effective C++" discusses this in more detail, though
63// I don't have the book on me right now so I'm not sure.
64
65#include <algorithm>
66#include <google/protobuf/stubs/hash.h>
67
68#include <google/protobuf/stubs/common.h>
69
70#include <google/protobuf/dynamic_message.h>
71#include <google/protobuf/descriptor.h>
72#include <google/protobuf/descriptor.pb.h>
73#include <google/protobuf/generated_message_util.h>
74#include <google/protobuf/generated_message_reflection.h>
75#include <google/protobuf/reflection_ops.h>
76#include <google/protobuf/repeated_field.h>
77#include <google/protobuf/extension_set.h>
78#include <google/protobuf/wire_format.h>
79
80namespace google {
81namespace protobuf {
82
83using internal::WireFormat;
84using internal::ExtensionSet;
85using internal::GeneratedMessageReflection;
86
87
88// ===================================================================
89// Some helper tables and functions...
90
91namespace {
92
93// Compute the byte size of the in-memory representation of the field.
94int FieldSpaceUsed(const FieldDescriptor* field) {
95  typedef FieldDescriptor FD;  // avoid line wrapping
96  if (field->label() == FD::LABEL_REPEATED) {
97    switch (field->cpp_type()) {
98      case FD::CPPTYPE_INT32  : return sizeof(RepeatedField<int32   >);
99      case FD::CPPTYPE_INT64  : return sizeof(RepeatedField<int64   >);
100      case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32  >);
101      case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64  >);
102      case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double  >);
103      case FD::CPPTYPE_FLOAT  : return sizeof(RepeatedField<float   >);
104      case FD::CPPTYPE_BOOL   : return sizeof(RepeatedField<bool    >);
105      case FD::CPPTYPE_ENUM   : return sizeof(RepeatedField<int     >);
106      case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
107
108      case FD::CPPTYPE_STRING:
109        switch (field->options().ctype()) {
110          default:  // TODO(kenton):  Support other string reps.
111          case FieldOptions::STRING:
112            return sizeof(RepeatedPtrField<string>);
113        }
114        break;
115    }
116  } else {
117    switch (field->cpp_type()) {
118      case FD::CPPTYPE_INT32  : return sizeof(int32   );
119      case FD::CPPTYPE_INT64  : return sizeof(int64   );
120      case FD::CPPTYPE_UINT32 : return sizeof(uint32  );
121      case FD::CPPTYPE_UINT64 : return sizeof(uint64  );
122      case FD::CPPTYPE_DOUBLE : return sizeof(double  );
123      case FD::CPPTYPE_FLOAT  : return sizeof(float   );
124      case FD::CPPTYPE_BOOL   : return sizeof(bool    );
125      case FD::CPPTYPE_ENUM   : return sizeof(int     );
126
127      case FD::CPPTYPE_MESSAGE:
128        return sizeof(Message*);
129
130      case FD::CPPTYPE_STRING:
131        switch (field->options().ctype()) {
132          default:  // TODO(kenton):  Support other string reps.
133          case FieldOptions::STRING:
134            return sizeof(string*);
135        }
136        break;
137    }
138  }
139
140  GOOGLE_LOG(DFATAL) << "Can't get here.";
141  return 0;
142}
143
144inline int DivideRoundingUp(int i, int j) {
145  return (i + (j - 1)) / j;
146}
147
148static const int kSafeAlignment = sizeof(uint64);
149
150inline int AlignTo(int offset, int alignment) {
151  return DivideRoundingUp(offset, alignment) * alignment;
152}
153
154// Rounds the given byte offset up to the next offset aligned such that any
155// type may be stored at it.
156inline int AlignOffset(int offset) {
157  return AlignTo(offset, kSafeAlignment);
158}
159
160#define bitsizeof(T) (sizeof(T) * 8)
161
162}  // namespace
163
164// ===================================================================
165
166class DynamicMessage : public Message {
167 public:
168  struct TypeInfo {
169    int size;
170    int has_bits_offset;
171    int unknown_fields_offset;
172    int extensions_offset;
173
174    // Not owned by the TypeInfo.
175    DynamicMessageFactory* factory;  // The factory that created this object.
176    const DescriptorPool* pool;      // The factory's DescriptorPool.
177    const Descriptor* type;          // Type of this DynamicMessage.
178
179    // Warning:  The order in which the following pointers are defined is
180    //   important (the prototype must be deleted *before* the offsets).
181    scoped_array<int> offsets;
182    scoped_ptr<const GeneratedMessageReflection> reflection;
183    // Don't use a scoped_ptr to hold the prototype: the destructor for
184    // DynamicMessage needs to know whether it is the prototype, and does so by
185    // looking back at this field. This would assume details about the
186    // implementation of scoped_ptr.
187    const DynamicMessage* prototype;
188
189    TypeInfo() : prototype(NULL) {}
190
191    ~TypeInfo() {
192      delete prototype;
193    }
194  };
195
196  DynamicMessage(const TypeInfo* type_info);
197  ~DynamicMessage();
198
199  // Called on the prototype after construction to initialize message fields.
200  void CrossLinkPrototypes();
201
202  // implements Message ----------------------------------------------
203
204  Message* New() const;
205
206  int GetCachedSize() const;
207  void SetCachedSize(int size) const;
208
209  Metadata GetMetadata() const;
210
211  // We actually allocate more memory than sizeof(*this) when this
212  // class's memory is allocated via the global operator new. Thus, we need to
213  // manually call the global operator delete. Calling the destructor is taken
214  // care of for us.
215  static void operator delete(void* ptr) {
216    ::operator delete(ptr);
217  }
218
219 private:
220  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
221
222  inline bool is_prototype() const {
223    return type_info_->prototype == this ||
224           // If type_info_->prototype is NULL, then we must be constructing
225           // the prototype now, which means we must be the prototype.
226           type_info_->prototype == NULL;
227  }
228
229  inline void* OffsetToPointer(int offset) {
230    return reinterpret_cast<uint8*>(this) + offset;
231  }
232  inline const void* OffsetToPointer(int offset) const {
233    return reinterpret_cast<const uint8*>(this) + offset;
234  }
235
236  const TypeInfo* type_info_;
237
238  // TODO(kenton):  Make this an atomic<int> when C++ supports it.
239  mutable int cached_byte_size_;
240};
241
242DynamicMessage::DynamicMessage(const TypeInfo* type_info)
243  : type_info_(type_info),
244    cached_byte_size_(0) {
245  // We need to call constructors for various fields manually and set
246  // default values where appropriate.  We use placement new to call
247  // constructors.  If you haven't heard of placement new, I suggest Googling
248  // it now.  We use placement new even for primitive types that don't have
249  // constructors for consistency.  (In theory, placement new should be used
250  // any time you are trying to convert untyped memory to typed memory, though
251  // in practice that's not strictly necessary for types that don't have a
252  // constructor.)
253
254  const Descriptor* descriptor = type_info_->type;
255
256  new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet;
257
258  if (type_info_->extensions_offset != -1) {
259    new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet;
260  }
261
262  for (int i = 0; i < descriptor->field_count(); i++) {
263    const FieldDescriptor* field = descriptor->field(i);
264    void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
265    switch (field->cpp_type()) {
266#define HANDLE_TYPE(CPPTYPE, TYPE)                                           \
267      case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
268        if (!field->is_repeated()) {                                         \
269          new(field_ptr) TYPE(field->default_value_##TYPE());                \
270        } else {                                                             \
271          new(field_ptr) RepeatedField<TYPE>();                              \
272        }                                                                    \
273        break;
274
275      HANDLE_TYPE(INT32 , int32 );
276      HANDLE_TYPE(INT64 , int64 );
277      HANDLE_TYPE(UINT32, uint32);
278      HANDLE_TYPE(UINT64, uint64);
279      HANDLE_TYPE(DOUBLE, double);
280      HANDLE_TYPE(FLOAT , float );
281      HANDLE_TYPE(BOOL  , bool  );
282#undef HANDLE_TYPE
283
284      case FieldDescriptor::CPPTYPE_ENUM:
285        if (!field->is_repeated()) {
286          new(field_ptr) int(field->default_value_enum()->number());
287        } else {
288          new(field_ptr) RepeatedField<int>();
289        }
290        break;
291
292      case FieldDescriptor::CPPTYPE_STRING:
293        switch (field->options().ctype()) {
294          default:  // TODO(kenton):  Support other string reps.
295          case FieldOptions::STRING:
296            if (!field->is_repeated()) {
297              if (is_prototype()) {
298                new(field_ptr) const string*(&field->default_value_string());
299              } else {
300                string* default_value =
301                  *reinterpret_cast<string* const*>(
302                    type_info_->prototype->OffsetToPointer(
303                      type_info_->offsets[i]));
304                new(field_ptr) string*(default_value);
305              }
306            } else {
307              new(field_ptr) RepeatedPtrField<string>();
308            }
309            break;
310        }
311        break;
312
313      case FieldDescriptor::CPPTYPE_MESSAGE: {
314        if (!field->is_repeated()) {
315          new(field_ptr) Message*(NULL);
316        } else {
317          new(field_ptr) RepeatedPtrField<Message>();
318        }
319        break;
320      }
321    }
322  }
323}
324
325DynamicMessage::~DynamicMessage() {
326  const Descriptor* descriptor = type_info_->type;
327
328  reinterpret_cast<UnknownFieldSet*>(
329    OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet();
330
331  if (type_info_->extensions_offset != -1) {
332    reinterpret_cast<ExtensionSet*>(
333      OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet();
334  }
335
336  // We need to manually run the destructors for repeated fields and strings,
337  // just as we ran their constructors in the the DynamicMessage constructor.
338  // Additionally, if any singular embedded messages have been allocated, we
339  // need to delete them, UNLESS we are the prototype message of this type,
340  // in which case any embedded messages are other prototypes and shouldn't
341  // be touched.
342  for (int i = 0; i < descriptor->field_count(); i++) {
343    const FieldDescriptor* field = descriptor->field(i);
344    void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
345
346    if (field->is_repeated()) {
347      switch (field->cpp_type()) {
348#define HANDLE_TYPE(UPPERCASE, LOWERCASE)                                     \
349        case FieldDescriptor::CPPTYPE_##UPPERCASE :                           \
350          reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr)              \
351              ->~RepeatedField<LOWERCASE>();                                  \
352          break
353
354        HANDLE_TYPE( INT32,  int32);
355        HANDLE_TYPE( INT64,  int64);
356        HANDLE_TYPE(UINT32, uint32);
357        HANDLE_TYPE(UINT64, uint64);
358        HANDLE_TYPE(DOUBLE, double);
359        HANDLE_TYPE( FLOAT,  float);
360        HANDLE_TYPE(  BOOL,   bool);
361        HANDLE_TYPE(  ENUM,    int);
362#undef HANDLE_TYPE
363
364        case FieldDescriptor::CPPTYPE_STRING:
365          switch (field->options().ctype()) {
366            default:  // TODO(kenton):  Support other string reps.
367            case FieldOptions::STRING:
368              reinterpret_cast<RepeatedPtrField<string>*>(field_ptr)
369                  ->~RepeatedPtrField<string>();
370              break;
371          }
372          break;
373
374        case FieldDescriptor::CPPTYPE_MESSAGE:
375          reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr)
376              ->~RepeatedPtrField<Message>();
377          break;
378      }
379
380    } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
381      switch (field->options().ctype()) {
382        default:  // TODO(kenton):  Support other string reps.
383        case FieldOptions::STRING: {
384          string* ptr = *reinterpret_cast<string**>(field_ptr);
385          if (ptr != &field->default_value_string()) {
386            delete ptr;
387          }
388          break;
389        }
390      }
391    } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
392      if (!is_prototype()) {
393        Message* message = *reinterpret_cast<Message**>(field_ptr);
394        if (message != NULL) {
395          delete message;
396        }
397      }
398    }
399  }
400}
401
402void DynamicMessage::CrossLinkPrototypes() {
403  // This should only be called on the prototype message.
404  GOOGLE_CHECK(is_prototype());
405
406  DynamicMessageFactory* factory = type_info_->factory;
407  const Descriptor* descriptor = type_info_->type;
408
409  // Cross-link default messages.
410  for (int i = 0; i < descriptor->field_count(); i++) {
411    const FieldDescriptor* field = descriptor->field(i);
412    void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
413
414    if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
415        !field->is_repeated()) {
416      // For fields with message types, we need to cross-link with the
417      // prototype for the field's type.
418      // For singular fields, the field is just a pointer which should
419      // point to the prototype.
420      *reinterpret_cast<const Message**>(field_ptr) =
421        factory->GetPrototypeNoLock(field->message_type());
422    }
423  }
424}
425
426Message* DynamicMessage::New() const {
427  void* new_base = operator new(type_info_->size);
428  memset(new_base, 0, type_info_->size);
429  return new(new_base) DynamicMessage(type_info_);
430}
431
432int DynamicMessage::GetCachedSize() const {
433  return cached_byte_size_;
434}
435
436void DynamicMessage::SetCachedSize(int size) const {
437  // This is theoretically not thread-compatible, but in practice it works
438  // because if multiple threads write this simultaneously, they will be
439  // writing the exact same value.
440  cached_byte_size_ = size;
441}
442
443Metadata DynamicMessage::GetMetadata() const {
444  Metadata metadata;
445  metadata.descriptor = type_info_->type;
446  metadata.reflection = type_info_->reflection.get();
447  return metadata;
448}
449
450// ===================================================================
451
452struct DynamicMessageFactory::PrototypeMap {
453  typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map;
454  Map map_;
455};
456
457DynamicMessageFactory::DynamicMessageFactory()
458  : pool_(NULL), delegate_to_generated_factory_(false),
459    prototypes_(new PrototypeMap) {
460}
461
462DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
463  : pool_(pool), delegate_to_generated_factory_(false),
464    prototypes_(new PrototypeMap) {
465}
466
467DynamicMessageFactory::~DynamicMessageFactory() {
468  for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
469       iter != prototypes_->map_.end(); ++iter) {
470    delete iter->second;
471  }
472}
473
474const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
475  MutexLock lock(&prototypes_mutex_);
476  return GetPrototypeNoLock(type);
477}
478
479const Message* DynamicMessageFactory::GetPrototypeNoLock(
480    const Descriptor* type) {
481  if (delegate_to_generated_factory_ &&
482      type->file()->pool() == DescriptorPool::generated_pool()) {
483    return MessageFactory::generated_factory()->GetPrototype(type);
484  }
485
486  const DynamicMessage::TypeInfo** target = &prototypes_->map_[type];
487  if (*target != NULL) {
488    // Already exists.
489    return (*target)->prototype;
490  }
491
492  DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo;
493  *target = type_info;
494
495  type_info->type = type;
496  type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_;
497  type_info->factory = this;
498
499  // We need to construct all the structures passed to
500  // GeneratedMessageReflection's constructor.  This includes:
501  // - A block of memory that contains space for all the message's fields.
502  // - An array of integers indicating the byte offset of each field within
503  //   this block.
504  // - A big bitfield containing a bit for each field indicating whether
505  //   or not that field is set.
506
507  // Compute size and offsets.
508  int* offsets = new int[type->field_count()];
509  type_info->offsets.reset(offsets);
510
511  // Decide all field offsets by packing in order.
512  // We place the DynamicMessage object itself at the beginning of the allocated
513  // space.
514  int size = sizeof(DynamicMessage);
515  size = AlignOffset(size);
516
517  // Next the has_bits, which is an array of uint32s.
518  type_info->has_bits_offset = size;
519  int has_bits_array_size =
520    DivideRoundingUp(type->field_count(), bitsizeof(uint32));
521  size += has_bits_array_size * sizeof(uint32);
522  size = AlignOffset(size);
523
524  // The ExtensionSet, if any.
525  if (type->extension_range_count() > 0) {
526    type_info->extensions_offset = size;
527    size += sizeof(ExtensionSet);
528    size = AlignOffset(size);
529  } else {
530    // No extensions.
531    type_info->extensions_offset = -1;
532  }
533
534  // All the fields.
535  for (int i = 0; i < type->field_count(); i++) {
536    // Make sure field is aligned to avoid bus errors.
537    int field_size = FieldSpaceUsed(type->field(i));
538    size = AlignTo(size, min(kSafeAlignment, field_size));
539    offsets[i] = size;
540    size += field_size;
541  }
542
543  // Add the UnknownFieldSet to the end.
544  size = AlignOffset(size);
545  type_info->unknown_fields_offset = size;
546  size += sizeof(UnknownFieldSet);
547
548  // Align the final size to make sure no clever allocators think that
549  // alignment is not necessary.
550  size = AlignOffset(size);
551  type_info->size = size;
552
553  // Allocate the prototype.
554  void* base = operator new(size);
555  memset(base, 0, size);
556  DynamicMessage* prototype = new(base) DynamicMessage(type_info);
557  type_info->prototype = prototype;
558
559  // Construct the reflection object.
560  type_info->reflection.reset(
561    new GeneratedMessageReflection(
562      type_info->type,
563      type_info->prototype,
564      type_info->offsets.get(),
565      type_info->has_bits_offset,
566      type_info->unknown_fields_offset,
567      type_info->extensions_offset,
568      type_info->pool,
569      this,
570      type_info->size));
571
572  // Cross link prototypes.
573  prototype->CrossLinkPrototypes();
574
575  return prototype;
576}
577
578}  // namespace protobuf
579}  // namespace google
580