1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Author: kenton@google.com (Kenton Varda) 32// Based on original Protocol Buffers design by 33// Sanjay Ghemawat, Jeff Dean, and others. 34// 35// DynamicMessage is implemented by constructing a data structure which 36// has roughly the same memory layout as a generated message would have. 37// Then, we use GeneratedMessageReflection to implement our reflection 38// interface. All the other operations we need to implement (e.g. 39// parsing, copying, etc.) are already implemented in terms of 40// Reflection, so the rest is easy. 41// 42// The up side of this strategy is that it's very efficient. We don't 43// need to use hash_maps or generic representations of fields. The 44// down side is that this is a low-level memory management hack which 45// can be tricky to get right. 46// 47// As mentioned in the header, we only expose a DynamicMessageFactory 48// publicly, not the DynamicMessage class itself. This is because 49// GenericMessageReflection wants to have a pointer to a "default" 50// copy of the class, with all fields initialized to their default 51// values. We only want to construct one of these per message type, 52// so DynamicMessageFactory stores a cache of default messages for 53// each type it sees (each unique Descriptor pointer). The code 54// refers to the "default" copy of the class as the "prototype". 55// 56// Note on memory allocation: This module often calls "operator new()" 57// to allocate untyped memory, rather than calling something like 58// "new uint8[]". This is because "operator new()" means "Give me some 59// space which I can use as I please." while "new uint8[]" means "Give 60// me an array of 8-bit integers.". In practice, the later may return 61// a pointer that is not aligned correctly for general use. I believe 62// Item 8 of "More Effective C++" discusses this in more detail, though 63// I don't have the book on me right now so I'm not sure. 64 65#include <algorithm> 66#include <google/protobuf/stubs/hash.h> 67 68#include <google/protobuf/stubs/common.h> 69 70#include <google/protobuf/dynamic_message.h> 71#include <google/protobuf/descriptor.h> 72#include <google/protobuf/descriptor.pb.h> 73#include <google/protobuf/generated_message_util.h> 74#include <google/protobuf/generated_message_reflection.h> 75#include <google/protobuf/reflection_ops.h> 76#include <google/protobuf/repeated_field.h> 77#include <google/protobuf/extension_set.h> 78#include <google/protobuf/wire_format.h> 79 80namespace google { 81namespace protobuf { 82 83using internal::WireFormat; 84using internal::ExtensionSet; 85using internal::GeneratedMessageReflection; 86 87 88// =================================================================== 89// Some helper tables and functions... 90 91namespace { 92 93// Compute the byte size of the in-memory representation of the field. 94int FieldSpaceUsed(const FieldDescriptor* field) { 95 typedef FieldDescriptor FD; // avoid line wrapping 96 if (field->label() == FD::LABEL_REPEATED) { 97 switch (field->cpp_type()) { 98 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >); 99 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >); 100 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >); 101 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >); 102 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >); 103 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >); 104 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >); 105 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >); 106 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>); 107 108 case FD::CPPTYPE_STRING: 109 switch (field->options().ctype()) { 110 default: // TODO(kenton): Support other string reps. 111 case FieldOptions::STRING: 112 return sizeof(RepeatedPtrField<string>); 113 } 114 break; 115 } 116 } else { 117 switch (field->cpp_type()) { 118 case FD::CPPTYPE_INT32 : return sizeof(int32 ); 119 case FD::CPPTYPE_INT64 : return sizeof(int64 ); 120 case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); 121 case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); 122 case FD::CPPTYPE_DOUBLE : return sizeof(double ); 123 case FD::CPPTYPE_FLOAT : return sizeof(float ); 124 case FD::CPPTYPE_BOOL : return sizeof(bool ); 125 case FD::CPPTYPE_ENUM : return sizeof(int ); 126 case FD::CPPTYPE_MESSAGE: return sizeof(Message*); 127 128 case FD::CPPTYPE_STRING: 129 switch (field->options().ctype()) { 130 default: // TODO(kenton): Support other string reps. 131 case FieldOptions::STRING: 132 return sizeof(string*); 133 } 134 break; 135 } 136 } 137 138 GOOGLE_LOG(DFATAL) << "Can't get here."; 139 return 0; 140} 141 142inline int DivideRoundingUp(int i, int j) { 143 return (i + (j - 1)) / j; 144} 145 146static const int kSafeAlignment = sizeof(uint64); 147 148inline int AlignTo(int offset, int alignment) { 149 return DivideRoundingUp(offset, alignment) * alignment; 150} 151 152// Rounds the given byte offset up to the next offset aligned such that any 153// type may be stored at it. 154inline int AlignOffset(int offset) { 155 return AlignTo(offset, kSafeAlignment); 156} 157 158#define bitsizeof(T) (sizeof(T) * 8) 159 160} // namespace 161 162// =================================================================== 163 164class DynamicMessage : public Message { 165 public: 166 struct TypeInfo { 167 int size; 168 int has_bits_offset; 169 int unknown_fields_offset; 170 int extensions_offset; 171 172 // Not owned by the TypeInfo. 173 DynamicMessageFactory* factory; // The factory that created this object. 174 const DescriptorPool* pool; // The factory's DescriptorPool. 175 const Descriptor* type; // Type of this DynamicMessage. 176 177 // Warning: The order in which the following pointers are defined is 178 // important (the prototype must be deleted *before* the offsets). 179 scoped_array<int> offsets; 180 scoped_ptr<const GeneratedMessageReflection> reflection; 181 scoped_ptr<const DynamicMessage> prototype; 182 }; 183 184 DynamicMessage(const TypeInfo* type_info); 185 ~DynamicMessage(); 186 187 // Called on the prototype after construction to initialize message fields. 188 void CrossLinkPrototypes(); 189 190 // implements Message ---------------------------------------------- 191 192 Message* New() const; 193 194 int GetCachedSize() const; 195 void SetCachedSize(int size) const; 196 197 Metadata GetMetadata() const; 198 199 private: 200 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); 201 202 inline bool is_prototype() const { 203 return type_info_->prototype == this || 204 // If type_info_->prototype is NULL, then we must be constructing 205 // the prototype now, which means we must be the prototype. 206 type_info_->prototype == NULL; 207 } 208 209 inline void* OffsetToPointer(int offset) { 210 return reinterpret_cast<uint8*>(this) + offset; 211 } 212 inline const void* OffsetToPointer(int offset) const { 213 return reinterpret_cast<const uint8*>(this) + offset; 214 } 215 216 const TypeInfo* type_info_; 217 218 // TODO(kenton): Make this an atomic<int> when C++ supports it. 219 mutable int cached_byte_size_; 220}; 221 222DynamicMessage::DynamicMessage(const TypeInfo* type_info) 223 : type_info_(type_info), 224 cached_byte_size_(0) { 225 // We need to call constructors for various fields manually and set 226 // default values where appropriate. We use placement new to call 227 // constructors. If you haven't heard of placement new, I suggest Googling 228 // it now. We use placement new even for primitive types that don't have 229 // constructors for consistency. (In theory, placement new should be used 230 // any time you are trying to convert untyped memory to typed memory, though 231 // in practice that's not strictly necessary for types that don't have a 232 // constructor.) 233 234 const Descriptor* descriptor = type_info_->type; 235 236 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet; 237 238 if (type_info_->extensions_offset != -1) { 239 new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet; 240 } 241 242 for (int i = 0; i < descriptor->field_count(); i++) { 243 const FieldDescriptor* field = descriptor->field(i); 244 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 245 switch (field->cpp_type()) { 246#define HANDLE_TYPE(CPPTYPE, TYPE) \ 247 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 248 if (!field->is_repeated()) { \ 249 new(field_ptr) TYPE(field->default_value_##TYPE()); \ 250 } else { \ 251 new(field_ptr) RepeatedField<TYPE>(); \ 252 } \ 253 break; 254 255 HANDLE_TYPE(INT32 , int32 ); 256 HANDLE_TYPE(INT64 , int64 ); 257 HANDLE_TYPE(UINT32, uint32); 258 HANDLE_TYPE(UINT64, uint64); 259 HANDLE_TYPE(DOUBLE, double); 260 HANDLE_TYPE(FLOAT , float ); 261 HANDLE_TYPE(BOOL , bool ); 262#undef HANDLE_TYPE 263 264 case FieldDescriptor::CPPTYPE_ENUM: 265 if (!field->is_repeated()) { 266 new(field_ptr) int(field->default_value_enum()->number()); 267 } else { 268 new(field_ptr) RepeatedField<int>(); 269 } 270 break; 271 272 case FieldDescriptor::CPPTYPE_STRING: 273 switch (field->options().ctype()) { 274 default: // TODO(kenton): Support other string reps. 275 case FieldOptions::STRING: 276 if (!field->is_repeated()) { 277 if (is_prototype()) { 278 new(field_ptr) const string*(&field->default_value_string()); 279 } else { 280 string* default_value = 281 *reinterpret_cast<string* const*>( 282 type_info_->prototype->OffsetToPointer( 283 type_info_->offsets[i])); 284 new(field_ptr) string*(default_value); 285 } 286 } else { 287 new(field_ptr) RepeatedPtrField<string>(); 288 } 289 break; 290 } 291 break; 292 293 case FieldDescriptor::CPPTYPE_MESSAGE: { 294 if (!field->is_repeated()) { 295 new(field_ptr) Message*(NULL); 296 } else { 297 new(field_ptr) RepeatedPtrField<Message>(); 298 } 299 break; 300 } 301 } 302 } 303} 304 305DynamicMessage::~DynamicMessage() { 306 const Descriptor* descriptor = type_info_->type; 307 308 reinterpret_cast<UnknownFieldSet*>( 309 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet(); 310 311 if (type_info_->extensions_offset != -1) { 312 reinterpret_cast<ExtensionSet*>( 313 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet(); 314 } 315 316 // We need to manually run the destructors for repeated fields and strings, 317 // just as we ran their constructors in the the DynamicMessage constructor. 318 // Additionally, if any singular embedded messages have been allocated, we 319 // need to delete them, UNLESS we are the prototype message of this type, 320 // in which case any embedded messages are other prototypes and shouldn't 321 // be touched. 322 for (int i = 0; i < descriptor->field_count(); i++) { 323 const FieldDescriptor* field = descriptor->field(i); 324 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 325 326 if (field->is_repeated()) { 327 switch (field->cpp_type()) { 328#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ 329 case FieldDescriptor::CPPTYPE_##UPPERCASE : \ 330 reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr) \ 331 ->~RepeatedField<LOWERCASE>(); \ 332 break 333 334 HANDLE_TYPE( INT32, int32); 335 HANDLE_TYPE( INT64, int64); 336 HANDLE_TYPE(UINT32, uint32); 337 HANDLE_TYPE(UINT64, uint64); 338 HANDLE_TYPE(DOUBLE, double); 339 HANDLE_TYPE( FLOAT, float); 340 HANDLE_TYPE( BOOL, bool); 341 HANDLE_TYPE( ENUM, int); 342#undef HANDLE_TYPE 343 344 case FieldDescriptor::CPPTYPE_STRING: 345 switch (field->options().ctype()) { 346 default: // TODO(kenton): Support other string reps. 347 case FieldOptions::STRING: 348 reinterpret_cast<RepeatedPtrField<string>*>(field_ptr) 349 ->~RepeatedPtrField<string>(); 350 break; 351 } 352 break; 353 354 case FieldDescriptor::CPPTYPE_MESSAGE: 355 reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr) 356 ->~RepeatedPtrField<Message>(); 357 break; 358 } 359 360 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { 361 switch (field->options().ctype()) { 362 default: // TODO(kenton): Support other string reps. 363 case FieldOptions::STRING: { 364 string* ptr = *reinterpret_cast<string**>(field_ptr); 365 if (ptr != &field->default_value_string()) { 366 delete ptr; 367 } 368 break; 369 } 370 } 371 } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) && 372 !is_prototype()) { 373 Message* message = *reinterpret_cast<Message**>(field_ptr); 374 if (message != NULL) { 375 delete message; 376 } 377 } 378 } 379} 380 381void DynamicMessage::CrossLinkPrototypes() { 382 // This should only be called on the prototype message. 383 GOOGLE_CHECK(is_prototype()); 384 385 DynamicMessageFactory* factory = type_info_->factory; 386 const Descriptor* descriptor = type_info_->type; 387 388 // Cross-link default messages. 389 for (int i = 0; i < descriptor->field_count(); i++) { 390 const FieldDescriptor* field = descriptor->field(i); 391 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 392 393 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 394 !field->is_repeated()) { 395 // For fields with message types, we need to cross-link with the 396 // prototype for the field's type. 397 // For singular fields, the field is just a pointer which should 398 // point to the prototype. 399 *reinterpret_cast<const Message**>(field_ptr) = 400 factory->GetPrototypeNoLock(field->message_type()); 401 } 402 } 403} 404 405Message* DynamicMessage::New() const { 406 void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size)); 407 memset(new_base, 0, type_info_->size); 408 return new(new_base) DynamicMessage(type_info_); 409} 410 411int DynamicMessage::GetCachedSize() const { 412 return cached_byte_size_; 413} 414 415void DynamicMessage::SetCachedSize(int size) const { 416 // This is theoretically not thread-compatible, but in practice it works 417 // because if multiple threads write this simultaneously, they will be 418 // writing the exact same value. 419 cached_byte_size_ = size; 420} 421 422Metadata DynamicMessage::GetMetadata() const { 423 Metadata metadata; 424 metadata.descriptor = type_info_->type; 425 metadata.reflection = type_info_->reflection.get(); 426 return metadata; 427} 428 429// =================================================================== 430 431struct DynamicMessageFactory::PrototypeMap { 432 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map; 433 Map map_; 434}; 435 436DynamicMessageFactory::DynamicMessageFactory() 437 : pool_(NULL), delegate_to_generated_factory_(false), 438 prototypes_(new PrototypeMap) { 439} 440 441DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) 442 : pool_(pool), delegate_to_generated_factory_(false), 443 prototypes_(new PrototypeMap) { 444} 445 446DynamicMessageFactory::~DynamicMessageFactory() { 447 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); 448 iter != prototypes_->map_.end(); ++iter) { 449 delete iter->second; 450 } 451} 452 453const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { 454 MutexLock lock(&prototypes_mutex_); 455 return GetPrototypeNoLock(type); 456} 457 458const Message* DynamicMessageFactory::GetPrototypeNoLock( 459 const Descriptor* type) { 460 if (delegate_to_generated_factory_ && 461 type->file()->pool() == DescriptorPool::generated_pool()) { 462 return MessageFactory::generated_factory()->GetPrototype(type); 463 } 464 465 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type]; 466 if (*target != NULL) { 467 // Already exists. 468 return (*target)->prototype.get(); 469 } 470 471 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo; 472 *target = type_info; 473 474 type_info->type = type; 475 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_; 476 type_info->factory = this; 477 478 // We need to construct all the structures passed to 479 // GeneratedMessageReflection's constructor. This includes: 480 // - A block of memory that contains space for all the message's fields. 481 // - An array of integers indicating the byte offset of each field within 482 // this block. 483 // - A big bitfield containing a bit for each field indicating whether 484 // or not that field is set. 485 486 // Compute size and offsets. 487 int* offsets = new int[type->field_count()]; 488 type_info->offsets.reset(offsets); 489 490 // Decide all field offsets by packing in order. 491 // We place the DynamicMessage object itself at the beginning of the allocated 492 // space. 493 int size = sizeof(DynamicMessage); 494 size = AlignOffset(size); 495 496 // Next the has_bits, which is an array of uint32s. 497 type_info->has_bits_offset = size; 498 int has_bits_array_size = 499 DivideRoundingUp(type->field_count(), bitsizeof(uint32)); 500 size += has_bits_array_size * sizeof(uint32); 501 size = AlignOffset(size); 502 503 // The ExtensionSet, if any. 504 if (type->extension_range_count() > 0) { 505 type_info->extensions_offset = size; 506 size += sizeof(ExtensionSet); 507 size = AlignOffset(size); 508 } else { 509 // No extensions. 510 type_info->extensions_offset = -1; 511 } 512 513 // All the fields. 514 for (int i = 0; i < type->field_count(); i++) { 515 // Make sure field is aligned to avoid bus errors. 516 int field_size = FieldSpaceUsed(type->field(i)); 517 size = AlignTo(size, min(kSafeAlignment, field_size)); 518 offsets[i] = size; 519 size += field_size; 520 } 521 522 // Add the UnknownFieldSet to the end. 523 size = AlignOffset(size); 524 type_info->unknown_fields_offset = size; 525 size += sizeof(UnknownFieldSet); 526 527 // Align the final size to make sure no clever allocators think that 528 // alignment is not necessary. 529 size = AlignOffset(size); 530 type_info->size = size; 531 532 // Allocate the prototype. 533 void* base = operator new(size); 534 memset(base, 0, size); 535 DynamicMessage* prototype = new(base) DynamicMessage(type_info); 536 type_info->prototype.reset(prototype); 537 538 // Construct the reflection object. 539 type_info->reflection.reset( 540 new GeneratedMessageReflection( 541 type_info->type, 542 type_info->prototype.get(), 543 type_info->offsets.get(), 544 type_info->has_bits_offset, 545 type_info->unknown_fields_offset, 546 type_info->extensions_offset, 547 type_info->pool, 548 this, 549 type_info->size)); 550 551 // Cross link prototypes. 552 prototype->CrossLinkPrototypes(); 553 554 return prototype; 555} 556 557} // namespace protobuf 558} // namespace google 559