1// Copyright 2012 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_SERIALIZE_H_ 29#define V8_SERIALIZE_H_ 30 31#include "hashmap.h" 32 33namespace v8 { 34namespace internal { 35 36// A TypeCode is used to distinguish different kinds of external reference. 37// It is a single bit to make testing for types easy. 38enum TypeCode { 39 UNCLASSIFIED, // One-of-a-kind references. 40 BUILTIN, 41 RUNTIME_FUNCTION, 42 IC_UTILITY, 43 DEBUG_ADDRESS, 44 STATS_COUNTER, 45 TOP_ADDRESS, 46 C_BUILTIN, 47 EXTENSION, 48 ACCESSOR, 49 RUNTIME_ENTRY, 50 STUB_CACHE_TABLE, 51 LAZY_DEOPTIMIZATION 52}; 53 54const int kTypeCodeCount = LAZY_DEOPTIMIZATION + 1; 55const int kFirstTypeCode = UNCLASSIFIED; 56 57const int kReferenceIdBits = 16; 58const int kReferenceIdMask = (1 << kReferenceIdBits) - 1; 59const int kReferenceTypeShift = kReferenceIdBits; 60const int kDebugRegisterBits = 4; 61const int kDebugIdShift = kDebugRegisterBits; 62 63const int kDeoptTableSerializeEntryCount = 8; 64 65// ExternalReferenceTable is a helper class that defines the relationship 66// between external references and their encodings. It is used to build 67// hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder. 68class ExternalReferenceTable { 69 public: 70 static ExternalReferenceTable* instance(Isolate* isolate); 71 72 ~ExternalReferenceTable() { } 73 74 int size() const { return refs_.length(); } 75 76 Address address(int i) { return refs_[i].address; } 77 78 uint32_t code(int i) { return refs_[i].code; } 79 80 const char* name(int i) { return refs_[i].name; } 81 82 int max_id(int code) { return max_id_[code]; } 83 84 private: 85 explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) { 86 PopulateTable(isolate); 87 } 88 89 struct ExternalReferenceEntry { 90 Address address; 91 uint32_t code; 92 const char* name; 93 }; 94 95 void PopulateTable(Isolate* isolate); 96 97 // For a few types of references, we can get their address from their id. 98 void AddFromId(TypeCode type, 99 uint16_t id, 100 const char* name, 101 Isolate* isolate); 102 103 // For other types of references, the caller will figure out the address. 104 void Add(Address address, TypeCode type, uint16_t id, const char* name); 105 106 List<ExternalReferenceEntry> refs_; 107 int max_id_[kTypeCodeCount]; 108}; 109 110 111class ExternalReferenceEncoder { 112 public: 113 ExternalReferenceEncoder(); 114 115 uint32_t Encode(Address key) const; 116 117 const char* NameOfAddress(Address key) const; 118 119 private: 120 HashMap encodings_; 121 static uint32_t Hash(Address key) { 122 return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2); 123 } 124 125 int IndexOf(Address key) const; 126 127 static bool Match(void* key1, void* key2) { return key1 == key2; } 128 129 void Put(Address key, int index); 130 131 Isolate* isolate_; 132}; 133 134 135class ExternalReferenceDecoder { 136 public: 137 ExternalReferenceDecoder(); 138 ~ExternalReferenceDecoder(); 139 140 Address Decode(uint32_t key) const { 141 if (key == 0) return NULL; 142 return *Lookup(key); 143 } 144 145 private: 146 Address** encodings_; 147 148 Address* Lookup(uint32_t key) const { 149 int type = key >> kReferenceTypeShift; 150 ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount); 151 int id = key & kReferenceIdMask; 152 return &encodings_[type][id]; 153 } 154 155 void Put(uint32_t key, Address value) { 156 *Lookup(key) = value; 157 } 158 159 Isolate* isolate_; 160}; 161 162 163class SnapshotByteSource { 164 public: 165 SnapshotByteSource(const byte* array, int length) 166 : data_(array), length_(length), position_(0) { } 167 168 bool HasMore() { return position_ < length_; } 169 170 int Get() { 171 ASSERT(position_ < length_); 172 return data_[position_++]; 173 } 174 175 int32_t GetUnalignedInt() { 176#if defined(V8_HOST_CAN_READ_UNALIGNED) && __BYTE_ORDER == __LITTLE_ENDIAN 177 int32_t answer; 178 ASSERT(position_ + sizeof(answer) <= length_ + 0u); 179 answer = *reinterpret_cast<const int32_t*>(data_ + position_); 180#else 181 int32_t answer = data_[position_]; 182 answer |= data_[position_ + 1] << 8; 183 answer |= data_[position_ + 2] << 16; 184 answer |= data_[position_ + 3] << 24; 185#endif 186 return answer; 187 } 188 189 void Advance(int by) { position_ += by; } 190 191 inline void CopyRaw(byte* to, int number_of_bytes); 192 193 inline int GetInt(); 194 195 bool AtEOF(); 196 197 int position() { return position_; } 198 199 private: 200 const byte* data_; 201 int length_; 202 int position_; 203}; 204 205 206// The Serializer/Deserializer class is a common superclass for Serializer and 207// Deserializer which is used to store common constants and methods used by 208// both. 209class SerializerDeserializer: public ObjectVisitor { 210 public: 211 static void Iterate(ObjectVisitor* visitor); 212 213 static int nop() { return kNop; } 214 215 protected: 216 // Where the pointed-to object can be found: 217 enum Where { 218 kNewObject = 0, // Object is next in snapshot. 219 // 1-6 One per space. 220 kRootArray = 0x9, // Object is found in root array. 221 kPartialSnapshotCache = 0xa, // Object is in the cache. 222 kExternalReference = 0xb, // Pointer to an external reference. 223 kSkip = 0xc, // Skip n bytes. 224 kNop = 0xd, // Does nothing, used to pad. 225 // 0xe-0xf Free. 226 kBackref = 0x10, // Object is described relative to end. 227 // 0x11-0x16 One per space. 228 kBackrefWithSkip = 0x18, // Object is described relative to end. 229 // 0x19-0x1e One per space. 230 // 0x20-0x3f Used by misc. tags below. 231 kPointedToMask = 0x3f 232 }; 233 234 // How to code the pointer to the object. 235 enum HowToCode { 236 kPlain = 0, // Straight pointer. 237 // What this means depends on the architecture: 238 kFromCode = 0x40, // A pointer inlined in code. 239 kHowToCodeMask = 0x40 240 }; 241 242 // For kRootArrayConstants 243 enum WithSkip { 244 kNoSkipDistance = 0, 245 kHasSkipDistance = 0x40, 246 kWithSkipMask = 0x40 247 }; 248 249 // Where to point within the object. 250 enum WhereToPoint { 251 kStartOfObject = 0, 252 kInnerPointer = 0x80, // First insn in code object or payload of cell. 253 kWhereToPointMask = 0x80 254 }; 255 256 // Misc. 257 // Raw data to be copied from the snapshot. This byte code does not advance 258 // the current pointer, which is used for code objects, where we write the 259 // entire code in one memcpy, then fix up stuff with kSkip and other byte 260 // codes that overwrite data. 261 static const int kRawData = 0x20; 262 // Some common raw lengths: 0x21-0x3f. These autoadvance the current pointer. 263 // A tag emitted at strategic points in the snapshot to delineate sections. 264 // If the deserializer does not find these at the expected moments then it 265 // is an indication that the snapshot and the VM do not fit together. 266 // Examine the build process for architecture, version or configuration 267 // mismatches. 268 static const int kSynchronize = 0x70; 269 // Used for the source code of the natives, which is in the executable, but 270 // is referred to from external strings in the snapshot. 271 static const int kNativesStringResource = 0x71; 272 static const int kRepeat = 0x72; 273 static const int kConstantRepeat = 0x73; 274 // 0x73-0x7f Repeat last word (subtract 0x72 to get the count). 275 static const int kMaxRepeats = 0x7f - 0x72; 276 static int CodeForRepeats(int repeats) { 277 ASSERT(repeats >= 1 && repeats <= kMaxRepeats); 278 return 0x72 + repeats; 279 } 280 static int RepeatsForCode(int byte_code) { 281 ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f); 282 return byte_code - 0x72; 283 } 284 static const int kRootArrayConstants = 0xa0; 285 // 0xa0-0xbf Things from the first 32 elements of the root array. 286 static const int kRootArrayNumberOfConstantEncodings = 0x20; 287 static int RootArrayConstantFromByteCode(int byte_code) { 288 return byte_code & 0x1f; 289 } 290 291 static const int kNumberOfSpaces = LO_SPACE; 292 static const int kAnyOldSpace = -1; 293 294 // A bitmask for getting the space out of an instruction. 295 static const int kSpaceMask = 7; 296}; 297 298 299int SnapshotByteSource::GetInt() { 300 // This way of variable-length encoding integers does not suffer from branch 301 // mispredictions. 302 uint32_t answer = GetUnalignedInt(); 303 int bytes = answer & 3; 304 Advance(bytes); 305 uint32_t mask = 0xffffffffu; 306 mask >>= 32 - (bytes << 3); 307 answer &= mask; 308 answer >>= 2; 309 return answer; 310} 311 312 313void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) { 314 OS::MemCopy(to, data_ + position_, number_of_bytes); 315 position_ += number_of_bytes; 316} 317 318 319// A Deserializer reads a snapshot and reconstructs the Object graph it defines. 320class Deserializer: public SerializerDeserializer { 321 public: 322 // Create a deserializer from a snapshot byte source. 323 explicit Deserializer(SnapshotByteSource* source); 324 325 virtual ~Deserializer(); 326 327 // Deserialize the snapshot into an empty heap. 328 void Deserialize(); 329 330 // Deserialize a single object and the objects reachable from it. 331 void DeserializePartial(Object** root); 332 333 void set_reservation(int space_number, int reservation) { 334 ASSERT(space_number >= 0); 335 ASSERT(space_number <= LAST_SPACE); 336 reservations_[space_number] = reservation; 337 } 338 339 private: 340 virtual void VisitPointers(Object** start, Object** end); 341 342 virtual void VisitExternalReferences(Address* start, Address* end) { 343 UNREACHABLE(); 344 } 345 346 virtual void VisitRuntimeEntry(RelocInfo* rinfo) { 347 UNREACHABLE(); 348 } 349 350 // Allocation sites are present in the snapshot, and must be linked into 351 // a list at deserialization time. 352 void RelinkAllocationSite(AllocationSite* site); 353 354 // Fills in some heap data in an area from start to end (non-inclusive). The 355 // space id is used for the write barrier. The object_address is the address 356 // of the object we are writing into, or NULL if we are not writing into an 357 // object, i.e. if we are writing a series of tagged values that are not on 358 // the heap. 359 void ReadChunk( 360 Object** start, Object** end, int space, Address object_address); 361 void ReadObject(int space_number, Object** write_back); 362 363 // This routine both allocates a new object, and also keeps 364 // track of where objects have been allocated so that we can 365 // fix back references when deserializing. 366 Address Allocate(int space_index, int size) { 367 Address address = high_water_[space_index]; 368 high_water_[space_index] = address + size; 369 return address; 370 } 371 372 // This returns the address of an object that has been described in the 373 // snapshot as being offset bytes back in a particular space. 374 HeapObject* GetAddressFromEnd(int space) { 375 int offset = source_->GetInt(); 376 offset <<= kObjectAlignmentBits; 377 return HeapObject::FromAddress(high_water_[space] - offset); 378 } 379 380 381 // Cached current isolate. 382 Isolate* isolate_; 383 384 SnapshotByteSource* source_; 385 // This is the address of the next object that will be allocated in each 386 // space. It is used to calculate the addresses of back-references. 387 Address high_water_[LAST_SPACE + 1]; 388 389 int reservations_[LAST_SPACE + 1]; 390 static const intptr_t kUninitializedReservation = -1; 391 392 ExternalReferenceDecoder* external_reference_decoder_; 393 394 DISALLOW_COPY_AND_ASSIGN(Deserializer); 395}; 396 397 398class SnapshotByteSink { 399 public: 400 virtual ~SnapshotByteSink() { } 401 virtual void Put(int byte, const char* description) = 0; 402 virtual void PutSection(int byte, const char* description) { 403 Put(byte, description); 404 } 405 void PutInt(uintptr_t integer, const char* description); 406 virtual int Position() = 0; 407}; 408 409 410// Mapping objects to their location after deserialization. 411// This is used during building, but not at runtime by V8. 412class SerializationAddressMapper { 413 public: 414 SerializationAddressMapper() 415 : no_allocation_(), 416 serialization_map_(new HashMap(&SerializationMatchFun)) { } 417 418 ~SerializationAddressMapper() { 419 delete serialization_map_; 420 } 421 422 bool IsMapped(HeapObject* obj) { 423 return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL; 424 } 425 426 int MappedTo(HeapObject* obj) { 427 ASSERT(IsMapped(obj)); 428 return static_cast<int>(reinterpret_cast<intptr_t>( 429 serialization_map_->Lookup(Key(obj), Hash(obj), false)->value)); 430 } 431 432 void AddMapping(HeapObject* obj, int to) { 433 ASSERT(!IsMapped(obj)); 434 HashMap::Entry* entry = 435 serialization_map_->Lookup(Key(obj), Hash(obj), true); 436 entry->value = Value(to); 437 } 438 439 private: 440 static bool SerializationMatchFun(void* key1, void* key2) { 441 return key1 == key2; 442 } 443 444 static uint32_t Hash(HeapObject* obj) { 445 return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address())); 446 } 447 448 static void* Key(HeapObject* obj) { 449 return reinterpret_cast<void*>(obj->address()); 450 } 451 452 static void* Value(int v) { 453 return reinterpret_cast<void*>(v); 454 } 455 456 DisallowHeapAllocation no_allocation_; 457 HashMap* serialization_map_; 458 DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper); 459}; 460 461 462class CodeAddressMap; 463 464// There can be only one serializer per V8 process. 465class Serializer : public SerializerDeserializer { 466 public: 467 explicit Serializer(SnapshotByteSink* sink); 468 ~Serializer(); 469 void VisitPointers(Object** start, Object** end); 470 // You can call this after serialization to find out how much space was used 471 // in each space. 472 int CurrentAllocationAddress(int space) { 473 ASSERT(space < kNumberOfSpaces); 474 return fullness_[space]; 475 } 476 477 static void Enable(); 478 static void Disable(); 479 480 // Call this when you have made use of the fact that there is no serialization 481 // going on. 482 static void TooLateToEnableNow() { too_late_to_enable_now_ = true; } 483 static bool enabled() { return serialization_enabled_; } 484 SerializationAddressMapper* address_mapper() { return &address_mapper_; } 485 void PutRoot(int index, 486 HeapObject* object, 487 HowToCode how, 488 WhereToPoint where, 489 int skip); 490 491 protected: 492 static const int kInvalidRootIndex = -1; 493 494 int RootIndex(HeapObject* heap_object, HowToCode from); 495 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0; 496 intptr_t root_index_wave_front() { return root_index_wave_front_; } 497 void set_root_index_wave_front(intptr_t value) { 498 ASSERT(value >= root_index_wave_front_); 499 root_index_wave_front_ = value; 500 } 501 502 class ObjectSerializer : public ObjectVisitor { 503 public: 504 ObjectSerializer(Serializer* serializer, 505 Object* o, 506 SnapshotByteSink* sink, 507 HowToCode how_to_code, 508 WhereToPoint where_to_point) 509 : serializer_(serializer), 510 object_(HeapObject::cast(o)), 511 sink_(sink), 512 reference_representation_(how_to_code + where_to_point), 513 bytes_processed_so_far_(0), 514 code_object_(o->IsCode()), 515 code_has_been_output_(false) { } 516 void Serialize(); 517 void VisitPointers(Object** start, Object** end); 518 void VisitEmbeddedPointer(RelocInfo* target); 519 void VisitExternalReferences(Address* start, Address* end); 520 void VisitExternalReference(RelocInfo* rinfo); 521 void VisitCodeTarget(RelocInfo* target); 522 void VisitCodeEntry(Address entry_address); 523 void VisitCell(RelocInfo* rinfo); 524 void VisitRuntimeEntry(RelocInfo* reloc); 525 // Used for seralizing the external strings that hold the natives source. 526 void VisitExternalAsciiString( 527 v8::String::ExternalAsciiStringResource** resource); 528 // We can't serialize a heap with external two byte strings. 529 void VisitExternalTwoByteString( 530 v8::String::ExternalStringResource** resource) { 531 UNREACHABLE(); 532 } 533 534 private: 535 enum ReturnSkip { kCanReturnSkipInsteadOfSkipping, kIgnoringReturn }; 536 // This function outputs or skips the raw data between the last pointer and 537 // up to the current position. It optionally can just return the number of 538 // bytes to skip instead of performing a skip instruction, in case the skip 539 // can be merged into the next instruction. 540 int OutputRawData(Address up_to, ReturnSkip return_skip = kIgnoringReturn); 541 542 Serializer* serializer_; 543 HeapObject* object_; 544 SnapshotByteSink* sink_; 545 int reference_representation_; 546 int bytes_processed_so_far_; 547 bool code_object_; 548 bool code_has_been_output_; 549 }; 550 551 virtual void SerializeObject(Object* o, 552 HowToCode how_to_code, 553 WhereToPoint where_to_point, 554 int skip) = 0; 555 void SerializeReferenceToPreviousObject( 556 int space, 557 int address, 558 HowToCode how_to_code, 559 WhereToPoint where_to_point, 560 int skip); 561 void InitializeAllocators(); 562 // This will return the space for an object. 563 static int SpaceOfObject(HeapObject* object); 564 int Allocate(int space, int size); 565 int EncodeExternalReference(Address addr) { 566 return external_reference_encoder_->Encode(addr); 567 } 568 569 int SpaceAreaSize(int space); 570 571 Isolate* isolate_; 572 // Keep track of the fullness of each space in order to generate 573 // relative addresses for back references. 574 int fullness_[LAST_SPACE + 1]; 575 SnapshotByteSink* sink_; 576 int current_root_index_; 577 ExternalReferenceEncoder* external_reference_encoder_; 578 static bool serialization_enabled_; 579 // Did we already make use of the fact that serialization was not enabled? 580 static bool too_late_to_enable_now_; 581 SerializationAddressMapper address_mapper_; 582 intptr_t root_index_wave_front_; 583 void Pad(); 584 585 friend class ObjectSerializer; 586 friend class Deserializer; 587 588 private: 589 static CodeAddressMap* code_address_map_; 590 DISALLOW_COPY_AND_ASSIGN(Serializer); 591}; 592 593 594class PartialSerializer : public Serializer { 595 public: 596 PartialSerializer(Serializer* startup_snapshot_serializer, 597 SnapshotByteSink* sink) 598 : Serializer(sink), 599 startup_serializer_(startup_snapshot_serializer) { 600 set_root_index_wave_front(Heap::kStrongRootListLength); 601 } 602 603 // Serialize the objects reachable from a single object pointer. 604 virtual void Serialize(Object** o); 605 virtual void SerializeObject(Object* o, 606 HowToCode how_to_code, 607 WhereToPoint where_to_point, 608 int skip); 609 610 protected: 611 virtual int PartialSnapshotCacheIndex(HeapObject* o); 612 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 613 // Scripts should be referred only through shared function infos. We can't 614 // allow them to be part of the partial snapshot because they contain a 615 // unique ID, and deserializing several partial snapshots containing script 616 // would cause dupes. 617 ASSERT(!o->IsScript()); 618 return o->IsName() || o->IsSharedFunctionInfo() || 619 o->IsHeapNumber() || o->IsCode() || 620 o->IsScopeInfo() || 621 o->map() == HEAP->fixed_cow_array_map(); 622 } 623 624 private: 625 Serializer* startup_serializer_; 626 DISALLOW_COPY_AND_ASSIGN(PartialSerializer); 627}; 628 629 630class StartupSerializer : public Serializer { 631 public: 632 explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) { 633 // Clear the cache of objects used by the partial snapshot. After the 634 // strong roots have been serialized we can create a partial snapshot 635 // which will repopulate the cache with objects needed by that partial 636 // snapshot. 637 Isolate::Current()->set_serialize_partial_snapshot_cache_length(0); 638 } 639 // Serialize the current state of the heap. The order is: 640 // 1) Strong references. 641 // 2) Partial snapshot cache. 642 // 3) Weak references (e.g. the string table). 643 virtual void SerializeStrongReferences(); 644 virtual void SerializeObject(Object* o, 645 HowToCode how_to_code, 646 WhereToPoint where_to_point, 647 int skip); 648 void SerializeWeakReferences(); 649 void Serialize() { 650 SerializeStrongReferences(); 651 SerializeWeakReferences(); 652 Pad(); 653 } 654 655 private: 656 virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) { 657 return false; 658 } 659}; 660 661 662} } // namespace v8::internal 663 664#endif // V8_SERIALIZE_H_ 665