dynamic_message.cc revision ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16
1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// Author: kenton@google.com (Kenton Varda) 32// Based on original Protocol Buffers design by 33// Sanjay Ghemawat, Jeff Dean, and others. 34// 35// DynamicMessage is implemented by constructing a data structure which 36// has roughly the same memory layout as a generated message would have. 37// Then, we use GeneratedMessageReflection to implement our reflection 38// interface. All the other operations we need to implement (e.g. 39// parsing, copying, etc.) are already implemented in terms of 40// Reflection, so the rest is easy. 41// 42// The up side of this strategy is that it's very efficient. We don't 43// need to use hash_maps or generic representations of fields. The 44// down side is that this is a low-level memory management hack which 45// can be tricky to get right. 46// 47// As mentioned in the header, we only expose a DynamicMessageFactory 48// publicly, not the DynamicMessage class itself. This is because 49// GenericMessageReflection wants to have a pointer to a "default" 50// copy of the class, with all fields initialized to their default 51// values. We only want to construct one of these per message type, 52// so DynamicMessageFactory stores a cache of default messages for 53// each type it sees (each unique Descriptor pointer). The code 54// refers to the "default" copy of the class as the "prototype". 55// 56// Note on memory allocation: This module often calls "operator new()" 57// to allocate untyped memory, rather than calling something like 58// "new uint8[]". This is because "operator new()" means "Give me some 59// space which I can use as I please." while "new uint8[]" means "Give 60// me an array of 8-bit integers.". In practice, the later may return 61// a pointer that is not aligned correctly for general use. I believe 62// Item 8 of "More Effective C++" discusses this in more detail, though 63// I don't have the book on me right now so I'm not sure. 64 65#include <algorithm> 66#include <google/protobuf/stubs/hash.h> 67 68#include <google/protobuf/stubs/common.h> 69 70#include <google/protobuf/dynamic_message.h> 71#include <google/protobuf/descriptor.h> 72#include <google/protobuf/descriptor.pb.h> 73#include <google/protobuf/generated_message_util.h> 74#include <google/protobuf/generated_message_reflection.h> 75#include <google/protobuf/reflection_ops.h> 76#include <google/protobuf/repeated_field.h> 77#include <google/protobuf/extension_set.h> 78#include <google/protobuf/wire_format.h> 79 80namespace google { 81namespace protobuf { 82 83using internal::WireFormat; 84using internal::ExtensionSet; 85using internal::GeneratedMessageReflection; 86 87 88// =================================================================== 89// Some helper tables and functions... 90 91namespace { 92 93// Compute the byte size of the in-memory representation of the field. 94int FieldSpaceUsed(const FieldDescriptor* field) { 95 typedef FieldDescriptor FD; // avoid line wrapping 96 if (field->label() == FD::LABEL_REPEATED) { 97 switch (field->cpp_type()) { 98 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >); 99 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >); 100 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >); 101 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >); 102 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >); 103 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >); 104 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >); 105 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >); 106 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>); 107 108 case FD::CPPTYPE_STRING: 109 switch (field->options().ctype()) { 110 default: // TODO(kenton): Support other string reps. 111 case FieldOptions::STRING: 112 return sizeof(RepeatedPtrField<string>); 113 } 114 break; 115 } 116 } else { 117 switch (field->cpp_type()) { 118 case FD::CPPTYPE_INT32 : return sizeof(int32 ); 119 case FD::CPPTYPE_INT64 : return sizeof(int64 ); 120 case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); 121 case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); 122 case FD::CPPTYPE_DOUBLE : return sizeof(double ); 123 case FD::CPPTYPE_FLOAT : return sizeof(float ); 124 case FD::CPPTYPE_BOOL : return sizeof(bool ); 125 case FD::CPPTYPE_ENUM : return sizeof(int ); 126 127 case FD::CPPTYPE_MESSAGE: 128 return sizeof(Message*); 129 130 case FD::CPPTYPE_STRING: 131 switch (field->options().ctype()) { 132 default: // TODO(kenton): Support other string reps. 133 case FieldOptions::STRING: 134 return sizeof(string*); 135 } 136 break; 137 } 138 } 139 140 GOOGLE_LOG(DFATAL) << "Can't get here."; 141 return 0; 142} 143 144inline int DivideRoundingUp(int i, int j) { 145 return (i + (j - 1)) / j; 146} 147 148static const int kSafeAlignment = sizeof(uint64); 149 150inline int AlignTo(int offset, int alignment) { 151 return DivideRoundingUp(offset, alignment) * alignment; 152} 153 154// Rounds the given byte offset up to the next offset aligned such that any 155// type may be stored at it. 156inline int AlignOffset(int offset) { 157 return AlignTo(offset, kSafeAlignment); 158} 159 160#define bitsizeof(T) (sizeof(T) * 8) 161 162} // namespace 163 164// =================================================================== 165 166class DynamicMessage : public Message { 167 public: 168 struct TypeInfo { 169 int size; 170 int has_bits_offset; 171 int unknown_fields_offset; 172 int extensions_offset; 173 174 // Not owned by the TypeInfo. 175 DynamicMessageFactory* factory; // The factory that created this object. 176 const DescriptorPool* pool; // The factory's DescriptorPool. 177 const Descriptor* type; // Type of this DynamicMessage. 178 179 // Warning: The order in which the following pointers are defined is 180 // important (the prototype must be deleted *before* the offsets). 181 scoped_array<int> offsets; 182 scoped_ptr<const GeneratedMessageReflection> reflection; 183 // Don't use a scoped_ptr to hold the prototype: the destructor for 184 // DynamicMessage needs to know whether it is the prototype, and does so by 185 // looking back at this field. This would assume details about the 186 // implementation of scoped_ptr. 187 const DynamicMessage* prototype; 188 189 TypeInfo() : prototype(NULL) {} 190 191 ~TypeInfo() { 192 delete prototype; 193 } 194 }; 195 196 DynamicMessage(const TypeInfo* type_info); 197 ~DynamicMessage(); 198 199 // Called on the prototype after construction to initialize message fields. 200 void CrossLinkPrototypes(); 201 202 // implements Message ---------------------------------------------- 203 204 Message* New() const; 205 206 int GetCachedSize() const; 207 void SetCachedSize(int size) const; 208 209 Metadata GetMetadata() const; 210 211 // We actually allocate more memory than sizeof(*this) when this 212 // class's memory is allocated via the global operator new. Thus, we need to 213 // manually call the global operator delete. Calling the destructor is taken 214 // care of for us. 215 static void operator delete(void* ptr) { 216 ::operator delete(ptr); 217 } 218 219 private: 220 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); 221 222 inline bool is_prototype() const { 223 return type_info_->prototype == this || 224 // If type_info_->prototype is NULL, then we must be constructing 225 // the prototype now, which means we must be the prototype. 226 type_info_->prototype == NULL; 227 } 228 229 inline void* OffsetToPointer(int offset) { 230 return reinterpret_cast<uint8*>(this) + offset; 231 } 232 inline const void* OffsetToPointer(int offset) const { 233 return reinterpret_cast<const uint8*>(this) + offset; 234 } 235 236 const TypeInfo* type_info_; 237 238 // TODO(kenton): Make this an atomic<int> when C++ supports it. 239 mutable int cached_byte_size_; 240}; 241 242DynamicMessage::DynamicMessage(const TypeInfo* type_info) 243 : type_info_(type_info), 244 cached_byte_size_(0) { 245 // We need to call constructors for various fields manually and set 246 // default values where appropriate. We use placement new to call 247 // constructors. If you haven't heard of placement new, I suggest Googling 248 // it now. We use placement new even for primitive types that don't have 249 // constructors for consistency. (In theory, placement new should be used 250 // any time you are trying to convert untyped memory to typed memory, though 251 // in practice that's not strictly necessary for types that don't have a 252 // constructor.) 253 254 const Descriptor* descriptor = type_info_->type; 255 256 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet; 257 258 if (type_info_->extensions_offset != -1) { 259 new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet; 260 } 261 262 for (int i = 0; i < descriptor->field_count(); i++) { 263 const FieldDescriptor* field = descriptor->field(i); 264 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 265 switch (field->cpp_type()) { 266#define HANDLE_TYPE(CPPTYPE, TYPE) \ 267 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 268 if (!field->is_repeated()) { \ 269 new(field_ptr) TYPE(field->default_value_##TYPE()); \ 270 } else { \ 271 new(field_ptr) RepeatedField<TYPE>(); \ 272 } \ 273 break; 274 275 HANDLE_TYPE(INT32 , int32 ); 276 HANDLE_TYPE(INT64 , int64 ); 277 HANDLE_TYPE(UINT32, uint32); 278 HANDLE_TYPE(UINT64, uint64); 279 HANDLE_TYPE(DOUBLE, double); 280 HANDLE_TYPE(FLOAT , float ); 281 HANDLE_TYPE(BOOL , bool ); 282#undef HANDLE_TYPE 283 284 case FieldDescriptor::CPPTYPE_ENUM: 285 if (!field->is_repeated()) { 286 new(field_ptr) int(field->default_value_enum()->number()); 287 } else { 288 new(field_ptr) RepeatedField<int>(); 289 } 290 break; 291 292 case FieldDescriptor::CPPTYPE_STRING: 293 switch (field->options().ctype()) { 294 default: // TODO(kenton): Support other string reps. 295 case FieldOptions::STRING: 296 if (!field->is_repeated()) { 297 if (is_prototype()) { 298 new(field_ptr) const string*(&field->default_value_string()); 299 } else { 300 string* default_value = 301 *reinterpret_cast<string* const*>( 302 type_info_->prototype->OffsetToPointer( 303 type_info_->offsets[i])); 304 new(field_ptr) string*(default_value); 305 } 306 } else { 307 new(field_ptr) RepeatedPtrField<string>(); 308 } 309 break; 310 } 311 break; 312 313 case FieldDescriptor::CPPTYPE_MESSAGE: { 314 if (!field->is_repeated()) { 315 new(field_ptr) Message*(NULL); 316 } else { 317 new(field_ptr) RepeatedPtrField<Message>(); 318 } 319 break; 320 } 321 } 322 } 323} 324 325DynamicMessage::~DynamicMessage() { 326 const Descriptor* descriptor = type_info_->type; 327 328 reinterpret_cast<UnknownFieldSet*>( 329 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet(); 330 331 if (type_info_->extensions_offset != -1) { 332 reinterpret_cast<ExtensionSet*>( 333 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet(); 334 } 335 336 // We need to manually run the destructors for repeated fields and strings, 337 // just as we ran their constructors in the the DynamicMessage constructor. 338 // Additionally, if any singular embedded messages have been allocated, we 339 // need to delete them, UNLESS we are the prototype message of this type, 340 // in which case any embedded messages are other prototypes and shouldn't 341 // be touched. 342 for (int i = 0; i < descriptor->field_count(); i++) { 343 const FieldDescriptor* field = descriptor->field(i); 344 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 345 346 if (field->is_repeated()) { 347 switch (field->cpp_type()) { 348#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ 349 case FieldDescriptor::CPPTYPE_##UPPERCASE : \ 350 reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr) \ 351 ->~RepeatedField<LOWERCASE>(); \ 352 break 353 354 HANDLE_TYPE( INT32, int32); 355 HANDLE_TYPE( INT64, int64); 356 HANDLE_TYPE(UINT32, uint32); 357 HANDLE_TYPE(UINT64, uint64); 358 HANDLE_TYPE(DOUBLE, double); 359 HANDLE_TYPE( FLOAT, float); 360 HANDLE_TYPE( BOOL, bool); 361 HANDLE_TYPE( ENUM, int); 362#undef HANDLE_TYPE 363 364 case FieldDescriptor::CPPTYPE_STRING: 365 switch (field->options().ctype()) { 366 default: // TODO(kenton): Support other string reps. 367 case FieldOptions::STRING: 368 reinterpret_cast<RepeatedPtrField<string>*>(field_ptr) 369 ->~RepeatedPtrField<string>(); 370 break; 371 } 372 break; 373 374 case FieldDescriptor::CPPTYPE_MESSAGE: 375 reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr) 376 ->~RepeatedPtrField<Message>(); 377 break; 378 } 379 380 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { 381 switch (field->options().ctype()) { 382 default: // TODO(kenton): Support other string reps. 383 case FieldOptions::STRING: { 384 string* ptr = *reinterpret_cast<string**>(field_ptr); 385 if (ptr != &field->default_value_string()) { 386 delete ptr; 387 } 388 break; 389 } 390 } 391 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 392 if (!is_prototype()) { 393 Message* message = *reinterpret_cast<Message**>(field_ptr); 394 if (message != NULL) { 395 delete message; 396 } 397 } 398 } 399 } 400} 401 402void DynamicMessage::CrossLinkPrototypes() { 403 // This should only be called on the prototype message. 404 GOOGLE_CHECK(is_prototype()); 405 406 DynamicMessageFactory* factory = type_info_->factory; 407 const Descriptor* descriptor = type_info_->type; 408 409 // Cross-link default messages. 410 for (int i = 0; i < descriptor->field_count(); i++) { 411 const FieldDescriptor* field = descriptor->field(i); 412 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 413 414 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 415 !field->is_repeated()) { 416 // For fields with message types, we need to cross-link with the 417 // prototype for the field's type. 418 // For singular fields, the field is just a pointer which should 419 // point to the prototype. 420 *reinterpret_cast<const Message**>(field_ptr) = 421 factory->GetPrototypeNoLock(field->message_type()); 422 } 423 } 424} 425 426Message* DynamicMessage::New() const { 427 void* new_base = operator new(type_info_->size); 428 memset(new_base, 0, type_info_->size); 429 return new(new_base) DynamicMessage(type_info_); 430} 431 432int DynamicMessage::GetCachedSize() const { 433 return cached_byte_size_; 434} 435 436void DynamicMessage::SetCachedSize(int size) const { 437 // This is theoretically not thread-compatible, but in practice it works 438 // because if multiple threads write this simultaneously, they will be 439 // writing the exact same value. 440 cached_byte_size_ = size; 441} 442 443Metadata DynamicMessage::GetMetadata() const { 444 Metadata metadata; 445 metadata.descriptor = type_info_->type; 446 metadata.reflection = type_info_->reflection.get(); 447 return metadata; 448} 449 450// =================================================================== 451 452struct DynamicMessageFactory::PrototypeMap { 453 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map; 454 Map map_; 455}; 456 457DynamicMessageFactory::DynamicMessageFactory() 458 : pool_(NULL), delegate_to_generated_factory_(false), 459 prototypes_(new PrototypeMap) { 460} 461 462DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) 463 : pool_(pool), delegate_to_generated_factory_(false), 464 prototypes_(new PrototypeMap) { 465} 466 467DynamicMessageFactory::~DynamicMessageFactory() { 468 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); 469 iter != prototypes_->map_.end(); ++iter) { 470 delete iter->second; 471 } 472} 473 474const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { 475 MutexLock lock(&prototypes_mutex_); 476 return GetPrototypeNoLock(type); 477} 478 479const Message* DynamicMessageFactory::GetPrototypeNoLock( 480 const Descriptor* type) { 481 if (delegate_to_generated_factory_ && 482 type->file()->pool() == DescriptorPool::generated_pool()) { 483 return MessageFactory::generated_factory()->GetPrototype(type); 484 } 485 486 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type]; 487 if (*target != NULL) { 488 // Already exists. 489 return (*target)->prototype; 490 } 491 492 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo; 493 *target = type_info; 494 495 type_info->type = type; 496 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_; 497 type_info->factory = this; 498 499 // We need to construct all the structures passed to 500 // GeneratedMessageReflection's constructor. This includes: 501 // - A block of memory that contains space for all the message's fields. 502 // - An array of integers indicating the byte offset of each field within 503 // this block. 504 // - A big bitfield containing a bit for each field indicating whether 505 // or not that field is set. 506 507 // Compute size and offsets. 508 int* offsets = new int[type->field_count()]; 509 type_info->offsets.reset(offsets); 510 511 // Decide all field offsets by packing in order. 512 // We place the DynamicMessage object itself at the beginning of the allocated 513 // space. 514 int size = sizeof(DynamicMessage); 515 size = AlignOffset(size); 516 517 // Next the has_bits, which is an array of uint32s. 518 type_info->has_bits_offset = size; 519 int has_bits_array_size = 520 DivideRoundingUp(type->field_count(), bitsizeof(uint32)); 521 size += has_bits_array_size * sizeof(uint32); 522 size = AlignOffset(size); 523 524 // The ExtensionSet, if any. 525 if (type->extension_range_count() > 0) { 526 type_info->extensions_offset = size; 527 size += sizeof(ExtensionSet); 528 size = AlignOffset(size); 529 } else { 530 // No extensions. 531 type_info->extensions_offset = -1; 532 } 533 534 // All the fields. 535 for (int i = 0; i < type->field_count(); i++) { 536 // Make sure field is aligned to avoid bus errors. 537 int field_size = FieldSpaceUsed(type->field(i)); 538 size = AlignTo(size, min(kSafeAlignment, field_size)); 539 offsets[i] = size; 540 size += field_size; 541 } 542 543 // Add the UnknownFieldSet to the end. 544 size = AlignOffset(size); 545 type_info->unknown_fields_offset = size; 546 size += sizeof(UnknownFieldSet); 547 548 // Align the final size to make sure no clever allocators think that 549 // alignment is not necessary. 550 size = AlignOffset(size); 551 type_info->size = size; 552 553 // Allocate the prototype. 554 void* base = operator new(size); 555 memset(base, 0, size); 556 DynamicMessage* prototype = new(base) DynamicMessage(type_info); 557 type_info->prototype = prototype; 558 559 // Construct the reflection object. 560 type_info->reflection.reset( 561 new GeneratedMessageReflection( 562 type_info->type, 563 type_info->prototype, 564 type_info->offsets.get(), 565 type_info->has_bits_offset, 566 type_info->unknown_fields_offset, 567 type_info->extensions_offset, 568 type_info->pool, 569 this, 570 type_info->size)); 571 572 // Cross link prototypes. 573 prototype->CrossLinkPrototypes(); 574 575 return prototype; 576} 577 578} // namespace protobuf 579} // namespace google 580