1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Author: kenton@google.com (Kenton Varda) 32// Based on original Protocol Buffers design by 33// Sanjay Ghemawat, Jeff Dean, and others. 34// 35// DynamicMessage is implemented by constructing a data structure which 36// has roughly the same memory layout as a generated message would have. 37// Then, we use GeneratedMessageReflection to implement our reflection 38// interface. All the other operations we need to implement (e.g. 39// parsing, copying, etc.) are already implemented in terms of 40// Reflection, so the rest is easy. 41// 42// The up side of this strategy is that it's very efficient. We don't 43// need to use hash_maps or generic representations of fields. The 44// down side is that this is a low-level memory management hack which 45// can be tricky to get right. 46// 47// As mentioned in the header, we only expose a DynamicMessageFactory 48// publicly, not the DynamicMessage class itself. This is because 49// GenericMessageReflection wants to have a pointer to a "default" 50// copy of the class, with all fields initialized to their default 51// values. We only want to construct one of these per message type, 52// so DynamicMessageFactory stores a cache of default messages for 53// each type it sees (each unique Descriptor pointer). The code 54// refers to the "default" copy of the class as the "prototype". 55// 56// Note on memory allocation: This module often calls "operator new()" 57// to allocate untyped memory, rather than calling something like 58// "new uint8[]". This is because "operator new()" means "Give me some 59// space which I can use as I please." while "new uint8[]" means "Give 60// me an array of 8-bit integers.". In practice, the later may return 61// a pointer that is not aligned correctly for general use. I believe 62// Item 8 of "More Effective C++" discusses this in more detail, though 63// I don't have the book on me right now so I'm not sure. 64 65#include <algorithm> 66#include <google/protobuf/stubs/hash.h> 67 68#include <google/protobuf/stubs/common.h> 69 70#include <google/protobuf/dynamic_message.h> 71#include <google/protobuf/descriptor.h> 72#include <google/protobuf/descriptor.pb.h> 73#include <google/protobuf/generated_message_util.h> 74#include <google/protobuf/generated_message_reflection.h> 75#include <google/protobuf/reflection_ops.h> 76#include <google/protobuf/repeated_field.h> 77#include <google/protobuf/extension_set.h> 78#include <google/protobuf/wire_format.h> 79 80namespace google { 81namespace protobuf { 82 83using internal::WireFormat; 84using internal::ExtensionSet; 85using internal::GeneratedMessageReflection; 86 87 88// =================================================================== 89// Some helper tables and functions... 90 91namespace { 92 93// Compute the byte size of the in-memory representation of the field. 94int FieldSpaceUsed(const FieldDescriptor* field) { 95 typedef FieldDescriptor FD; // avoid line wrapping 96 if (field->label() == FD::LABEL_REPEATED) { 97 switch (field->cpp_type()) { 98 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >); 99 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >); 100 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >); 101 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >); 102 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >); 103 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >); 104 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >); 105 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >); 106 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>); 107 108 case FD::CPPTYPE_STRING: 109 switch (field->options().ctype()) { 110 default: // TODO(kenton): Support other string reps. 111 case FieldOptions::STRING: 112 return sizeof(RepeatedPtrField<string>); 113 } 114 break; 115 } 116 } else { 117 switch (field->cpp_type()) { 118 case FD::CPPTYPE_INT32 : return sizeof(int32 ); 119 case FD::CPPTYPE_INT64 : return sizeof(int64 ); 120 case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); 121 case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); 122 case FD::CPPTYPE_DOUBLE : return sizeof(double ); 123 case FD::CPPTYPE_FLOAT : return sizeof(float ); 124 case FD::CPPTYPE_BOOL : return sizeof(bool ); 125 case FD::CPPTYPE_ENUM : return sizeof(int ); 126 127 case FD::CPPTYPE_MESSAGE: 128 return sizeof(Message*); 129 130 case FD::CPPTYPE_STRING: 131 switch (field->options().ctype()) { 132 default: // TODO(kenton): Support other string reps. 133 case FieldOptions::STRING: 134 return sizeof(string*); 135 } 136 break; 137 } 138 } 139 140 GOOGLE_LOG(DFATAL) << "Can't get here."; 141 return 0; 142} 143 144inline int DivideRoundingUp(int i, int j) { 145 return (i + (j - 1)) / j; 146} 147 148static const int kSafeAlignment = sizeof(uint64); 149 150inline int AlignTo(int offset, int alignment) { 151 return DivideRoundingUp(offset, alignment) * alignment; 152} 153 154// Rounds the given byte offset up to the next offset aligned such that any 155// type may be stored at it. 156inline int AlignOffset(int offset) { 157 return AlignTo(offset, kSafeAlignment); 158} 159 160#define bitsizeof(T) (sizeof(T) * 8) 161 162} // namespace 163 164// =================================================================== 165 166class DynamicMessage : public Message { 167 public: 168 struct TypeInfo { 169 int size; 170 int has_bits_offset; 171 int unknown_fields_offset; 172 int extensions_offset; 173 174 // Not owned by the TypeInfo. 175 DynamicMessageFactory* factory; // The factory that created this object. 176 const DescriptorPool* pool; // The factory's DescriptorPool. 177 const Descriptor* type; // Type of this DynamicMessage. 178 179 // Warning: The order in which the following pointers are defined is 180 // important (the prototype must be deleted *before* the offsets). 181 scoped_array<int> offsets; 182 scoped_ptr<const GeneratedMessageReflection> reflection; 183 // Don't use a scoped_ptr to hold the prototype: the destructor for 184 // DynamicMessage needs to know whether it is the prototype, and does so by 185 // looking back at this field. This would assume details about the 186 // implementation of scoped_ptr. 187 const DynamicMessage* prototype; 188 189 TypeInfo() : prototype(NULL) {} 190 191 ~TypeInfo() { 192 delete prototype; 193 } 194 }; 195 196 DynamicMessage(const TypeInfo* type_info); 197 ~DynamicMessage(); 198 199 // Called on the prototype after construction to initialize message fields. 200 void CrossLinkPrototypes(); 201 202 // implements Message ---------------------------------------------- 203 204 Message* New() const; 205 206 int GetCachedSize() const; 207 void SetCachedSize(int size) const; 208 209 Metadata GetMetadata() const; 210 211 private: 212 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); 213 214 inline bool is_prototype() const { 215 return type_info_->prototype == this || 216 // If type_info_->prototype is NULL, then we must be constructing 217 // the prototype now, which means we must be the prototype. 218 type_info_->prototype == NULL; 219 } 220 221 inline void* OffsetToPointer(int offset) { 222 return reinterpret_cast<uint8*>(this) + offset; 223 } 224 inline const void* OffsetToPointer(int offset) const { 225 return reinterpret_cast<const uint8*>(this) + offset; 226 } 227 228 const TypeInfo* type_info_; 229 230 // TODO(kenton): Make this an atomic<int> when C++ supports it. 231 mutable int cached_byte_size_; 232}; 233 234DynamicMessage::DynamicMessage(const TypeInfo* type_info) 235 : type_info_(type_info), 236 cached_byte_size_(0) { 237 // We need to call constructors for various fields manually and set 238 // default values where appropriate. We use placement new to call 239 // constructors. If you haven't heard of placement new, I suggest Googling 240 // it now. We use placement new even for primitive types that don't have 241 // constructors for consistency. (In theory, placement new should be used 242 // any time you are trying to convert untyped memory to typed memory, though 243 // in practice that's not strictly necessary for types that don't have a 244 // constructor.) 245 246 const Descriptor* descriptor = type_info_->type; 247 248 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet; 249 250 if (type_info_->extensions_offset != -1) { 251 new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet; 252 } 253 254 for (int i = 0; i < descriptor->field_count(); i++) { 255 const FieldDescriptor* field = descriptor->field(i); 256 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 257 switch (field->cpp_type()) { 258#define HANDLE_TYPE(CPPTYPE, TYPE) \ 259 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 260 if (!field->is_repeated()) { \ 261 new(field_ptr) TYPE(field->default_value_##TYPE()); \ 262 } else { \ 263 new(field_ptr) RepeatedField<TYPE>(); \ 264 } \ 265 break; 266 267 HANDLE_TYPE(INT32 , int32 ); 268 HANDLE_TYPE(INT64 , int64 ); 269 HANDLE_TYPE(UINT32, uint32); 270 HANDLE_TYPE(UINT64, uint64); 271 HANDLE_TYPE(DOUBLE, double); 272 HANDLE_TYPE(FLOAT , float ); 273 HANDLE_TYPE(BOOL , bool ); 274#undef HANDLE_TYPE 275 276 case FieldDescriptor::CPPTYPE_ENUM: 277 if (!field->is_repeated()) { 278 new(field_ptr) int(field->default_value_enum()->number()); 279 } else { 280 new(field_ptr) RepeatedField<int>(); 281 } 282 break; 283 284 case FieldDescriptor::CPPTYPE_STRING: 285 switch (field->options().ctype()) { 286 default: // TODO(kenton): Support other string reps. 287 case FieldOptions::STRING: 288 if (!field->is_repeated()) { 289 if (is_prototype()) { 290 new(field_ptr) const string*(&field->default_value_string()); 291 } else { 292 string* default_value = 293 *reinterpret_cast<string* const*>( 294 type_info_->prototype->OffsetToPointer( 295 type_info_->offsets[i])); 296 new(field_ptr) string*(default_value); 297 } 298 } else { 299 new(field_ptr) RepeatedPtrField<string>(); 300 } 301 break; 302 } 303 break; 304 305 case FieldDescriptor::CPPTYPE_MESSAGE: { 306 if (!field->is_repeated()) { 307 new(field_ptr) Message*(NULL); 308 } else { 309 new(field_ptr) RepeatedPtrField<Message>(); 310 } 311 break; 312 } 313 } 314 } 315} 316 317DynamicMessage::~DynamicMessage() { 318 const Descriptor* descriptor = type_info_->type; 319 320 reinterpret_cast<UnknownFieldSet*>( 321 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet(); 322 323 if (type_info_->extensions_offset != -1) { 324 reinterpret_cast<ExtensionSet*>( 325 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet(); 326 } 327 328 // We need to manually run the destructors for repeated fields and strings, 329 // just as we ran their constructors in the the DynamicMessage constructor. 330 // Additionally, if any singular embedded messages have been allocated, we 331 // need to delete them, UNLESS we are the prototype message of this type, 332 // in which case any embedded messages are other prototypes and shouldn't 333 // be touched. 334 for (int i = 0; i < descriptor->field_count(); i++) { 335 const FieldDescriptor* field = descriptor->field(i); 336 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 337 338 if (field->is_repeated()) { 339 switch (field->cpp_type()) { 340#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ 341 case FieldDescriptor::CPPTYPE_##UPPERCASE : \ 342 reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr) \ 343 ->~RepeatedField<LOWERCASE>(); \ 344 break 345 346 HANDLE_TYPE( INT32, int32); 347 HANDLE_TYPE( INT64, int64); 348 HANDLE_TYPE(UINT32, uint32); 349 HANDLE_TYPE(UINT64, uint64); 350 HANDLE_TYPE(DOUBLE, double); 351 HANDLE_TYPE( FLOAT, float); 352 HANDLE_TYPE( BOOL, bool); 353 HANDLE_TYPE( ENUM, int); 354#undef HANDLE_TYPE 355 356 case FieldDescriptor::CPPTYPE_STRING: 357 switch (field->options().ctype()) { 358 default: // TODO(kenton): Support other string reps. 359 case FieldOptions::STRING: 360 reinterpret_cast<RepeatedPtrField<string>*>(field_ptr) 361 ->~RepeatedPtrField<string>(); 362 break; 363 } 364 break; 365 366 case FieldDescriptor::CPPTYPE_MESSAGE: 367 reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr) 368 ->~RepeatedPtrField<Message>(); 369 break; 370 } 371 372 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { 373 switch (field->options().ctype()) { 374 default: // TODO(kenton): Support other string reps. 375 case FieldOptions::STRING: { 376 string* ptr = *reinterpret_cast<string**>(field_ptr); 377 if (ptr != &field->default_value_string()) { 378 delete ptr; 379 } 380 break; 381 } 382 } 383 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 384 if (!is_prototype()) { 385 Message* message = *reinterpret_cast<Message**>(field_ptr); 386 if (message != NULL) { 387 delete message; 388 } 389 } 390 } 391 } 392} 393 394void DynamicMessage::CrossLinkPrototypes() { 395 // This should only be called on the prototype message. 396 GOOGLE_CHECK(is_prototype()); 397 398 DynamicMessageFactory* factory = type_info_->factory; 399 const Descriptor* descriptor = type_info_->type; 400 401 // Cross-link default messages. 402 for (int i = 0; i < descriptor->field_count(); i++) { 403 const FieldDescriptor* field = descriptor->field(i); 404 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 405 406 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 407 !field->is_repeated()) { 408 // For fields with message types, we need to cross-link with the 409 // prototype for the field's type. 410 // For singular fields, the field is just a pointer which should 411 // point to the prototype. 412 *reinterpret_cast<const Message**>(field_ptr) = 413 factory->GetPrototypeNoLock(field->message_type()); 414 } 415 } 416} 417 418Message* DynamicMessage::New() const { 419 void* new_base = operator new(type_info_->size); 420 memset(new_base, 0, type_info_->size); 421 return new(new_base) DynamicMessage(type_info_); 422} 423 424int DynamicMessage::GetCachedSize() const { 425 return cached_byte_size_; 426} 427 428void DynamicMessage::SetCachedSize(int size) const { 429 // This is theoretically not thread-compatible, but in practice it works 430 // because if multiple threads write this simultaneously, they will be 431 // writing the exact same value. 432 cached_byte_size_ = size; 433} 434 435Metadata DynamicMessage::GetMetadata() const { 436 Metadata metadata; 437 metadata.descriptor = type_info_->type; 438 metadata.reflection = type_info_->reflection.get(); 439 return metadata; 440} 441 442// =================================================================== 443 444struct DynamicMessageFactory::PrototypeMap { 445 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map; 446 Map map_; 447}; 448 449DynamicMessageFactory::DynamicMessageFactory() 450 : pool_(NULL), delegate_to_generated_factory_(false), 451 prototypes_(new PrototypeMap) { 452} 453 454DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) 455 : pool_(pool), delegate_to_generated_factory_(false), 456 prototypes_(new PrototypeMap) { 457} 458 459DynamicMessageFactory::~DynamicMessageFactory() { 460 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); 461 iter != prototypes_->map_.end(); ++iter) { 462 delete iter->second; 463 } 464} 465 466const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { 467 MutexLock lock(&prototypes_mutex_); 468 return GetPrototypeNoLock(type); 469} 470 471const Message* DynamicMessageFactory::GetPrototypeNoLock( 472 const Descriptor* type) { 473 if (delegate_to_generated_factory_ && 474 type->file()->pool() == DescriptorPool::generated_pool()) { 475 return MessageFactory::generated_factory()->GetPrototype(type); 476 } 477 478 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type]; 479 if (*target != NULL) { 480 // Already exists. 481 return (*target)->prototype; 482 } 483 484 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo; 485 *target = type_info; 486 487 type_info->type = type; 488 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_; 489 type_info->factory = this; 490 491 // We need to construct all the structures passed to 492 // GeneratedMessageReflection's constructor. This includes: 493 // - A block of memory that contains space for all the message's fields. 494 // - An array of integers indicating the byte offset of each field within 495 // this block. 496 // - A big bitfield containing a bit for each field indicating whether 497 // or not that field is set. 498 499 // Compute size and offsets. 500 int* offsets = new int[type->field_count()]; 501 type_info->offsets.reset(offsets); 502 503 // Decide all field offsets by packing in order. 504 // We place the DynamicMessage object itself at the beginning of the allocated 505 // space. 506 int size = sizeof(DynamicMessage); 507 size = AlignOffset(size); 508 509 // Next the has_bits, which is an array of uint32s. 510 type_info->has_bits_offset = size; 511 int has_bits_array_size = 512 DivideRoundingUp(type->field_count(), bitsizeof(uint32)); 513 size += has_bits_array_size * sizeof(uint32); 514 size = AlignOffset(size); 515 516 // The ExtensionSet, if any. 517 if (type->extension_range_count() > 0) { 518 type_info->extensions_offset = size; 519 size += sizeof(ExtensionSet); 520 size = AlignOffset(size); 521 } else { 522 // No extensions. 523 type_info->extensions_offset = -1; 524 } 525 526 // All the fields. 527 for (int i = 0; i < type->field_count(); i++) { 528 // Make sure field is aligned to avoid bus errors. 529 int field_size = FieldSpaceUsed(type->field(i)); 530 size = AlignTo(size, min(kSafeAlignment, field_size)); 531 offsets[i] = size; 532 size += field_size; 533 } 534 535 // Add the UnknownFieldSet to the end. 536 size = AlignOffset(size); 537 type_info->unknown_fields_offset = size; 538 size += sizeof(UnknownFieldSet); 539 540 // Align the final size to make sure no clever allocators think that 541 // alignment is not necessary. 542 size = AlignOffset(size); 543 type_info->size = size; 544 545 // Allocate the prototype. 546 void* base = operator new(size); 547 memset(base, 0, size); 548 DynamicMessage* prototype = new(base) DynamicMessage(type_info); 549 type_info->prototype = prototype; 550 551 // Construct the reflection object. 552 type_info->reflection.reset( 553 new GeneratedMessageReflection( 554 type_info->type, 555 type_info->prototype, 556 type_info->offsets.get(), 557 type_info->has_bits_offset, 558 type_info->unknown_fields_offset, 559 type_info->extensions_offset, 560 type_info->pool, 561 this, 562 type_info->size)); 563 564 // Cross link prototypes. 565 prototype->CrossLinkPrototypes(); 566 567 return prototype; 568} 569 570} // namespace protobuf 571} // namespace google 572