1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_SERIALIZE_H_
29#define V8_SERIALIZE_H_
30
31#include "hashmap.h"
32
33namespace v8 {
34namespace internal {
35
36// A TypeCode is used to distinguish different kinds of external reference.
37// It is a single bit to make testing for types easy.
38enum TypeCode {
39  UNCLASSIFIED,        // One-of-a-kind references.
40  BUILTIN,
41  RUNTIME_FUNCTION,
42  IC_UTILITY,
43  DEBUG_ADDRESS,
44  STATS_COUNTER,
45  TOP_ADDRESS,
46  C_BUILTIN,
47  EXTENSION,
48  ACCESSOR,
49  RUNTIME_ENTRY,
50  STUB_CACHE_TABLE,
51  LAZY_DEOPTIMIZATION
52};
53
54const int kTypeCodeCount = LAZY_DEOPTIMIZATION + 1;
55const int kFirstTypeCode = UNCLASSIFIED;
56
57const int kReferenceIdBits = 16;
58const int kReferenceIdMask = (1 << kReferenceIdBits) - 1;
59const int kReferenceTypeShift = kReferenceIdBits;
60const int kDebugRegisterBits = 4;
61const int kDebugIdShift = kDebugRegisterBits;
62
63const int kDeoptTableSerializeEntryCount = 8;
64
65// ExternalReferenceTable is a helper class that defines the relationship
66// between external references and their encodings. It is used to build
67// hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder.
68class ExternalReferenceTable {
69 public:
70  static ExternalReferenceTable* instance(Isolate* isolate);
71
72  ~ExternalReferenceTable() { }
73
74  int size() const { return refs_.length(); }
75
76  Address address(int i) { return refs_[i].address; }
77
78  uint32_t code(int i) { return refs_[i].code; }
79
80  const char* name(int i) { return refs_[i].name; }
81
82  int max_id(int code) { return max_id_[code]; }
83
84 private:
85  explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) {
86      PopulateTable(isolate);
87  }
88
89  struct ExternalReferenceEntry {
90    Address address;
91    uint32_t code;
92    const char* name;
93  };
94
95  void PopulateTable(Isolate* isolate);
96
97  // For a few types of references, we can get their address from their id.
98  void AddFromId(TypeCode type,
99                 uint16_t id,
100                 const char* name,
101                 Isolate* isolate);
102
103  // For other types of references, the caller will figure out the address.
104  void Add(Address address, TypeCode type, uint16_t id, const char* name);
105
106  List<ExternalReferenceEntry> refs_;
107  int max_id_[kTypeCodeCount];
108};
109
110
111class ExternalReferenceEncoder {
112 public:
113  ExternalReferenceEncoder();
114
115  uint32_t Encode(Address key) const;
116
117  const char* NameOfAddress(Address key) const;
118
119 private:
120  HashMap encodings_;
121  static uint32_t Hash(Address key) {
122    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2);
123  }
124
125  int IndexOf(Address key) const;
126
127  static bool Match(void* key1, void* key2) { return key1 == key2; }
128
129  void Put(Address key, int index);
130
131  Isolate* isolate_;
132};
133
134
135class ExternalReferenceDecoder {
136 public:
137  ExternalReferenceDecoder();
138  ~ExternalReferenceDecoder();
139
140  Address Decode(uint32_t key) const {
141    if (key == 0) return NULL;
142    return *Lookup(key);
143  }
144
145 private:
146  Address** encodings_;
147
148  Address* Lookup(uint32_t key) const {
149    int type = key >> kReferenceTypeShift;
150    ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount);
151    int id = key & kReferenceIdMask;
152    return &encodings_[type][id];
153  }
154
155  void Put(uint32_t key, Address value) {
156    *Lookup(key) = value;
157  }
158
159  Isolate* isolate_;
160};
161
162
163class SnapshotByteSource {
164 public:
165  SnapshotByteSource(const byte* array, int length)
166    : data_(array), length_(length), position_(0) { }
167
168  bool HasMore() { return position_ < length_; }
169
170  int Get() {
171    ASSERT(position_ < length_);
172    return data_[position_++];
173  }
174
175  int32_t GetUnalignedInt() {
176#if defined(V8_HOST_CAN_READ_UNALIGNED) &&  __BYTE_ORDER == __LITTLE_ENDIAN
177    int32_t answer;
178    ASSERT(position_ + sizeof(answer) <= length_ + 0u);
179    answer = *reinterpret_cast<const int32_t*>(data_ + position_);
180#else
181    int32_t answer = data_[position_];
182    answer |= data_[position_ + 1] << 8;
183    answer |= data_[position_ + 2] << 16;
184    answer |= data_[position_ + 3] << 24;
185#endif
186    return answer;
187  }
188
189  void Advance(int by) { position_ += by; }
190
191  inline void CopyRaw(byte* to, int number_of_bytes);
192
193  inline int GetInt();
194
195  bool AtEOF();
196
197  int position() { return position_; }
198
199 private:
200  const byte* data_;
201  int length_;
202  int position_;
203};
204
205
206// The Serializer/Deserializer class is a common superclass for Serializer and
207// Deserializer which is used to store common constants and methods used by
208// both.
209class SerializerDeserializer: public ObjectVisitor {
210 public:
211  static void Iterate(ObjectVisitor* visitor);
212
213  static int nop() { return kNop; }
214
215 protected:
216  // Where the pointed-to object can be found:
217  enum Where {
218    kNewObject = 0,                 // Object is next in snapshot.
219    // 1-6                             One per space.
220    kRootArray = 0x9,               // Object is found in root array.
221    kPartialSnapshotCache = 0xa,    // Object is in the cache.
222    kExternalReference = 0xb,       // Pointer to an external reference.
223    kSkip = 0xc,                    // Skip n bytes.
224    kNop = 0xd,                     // Does nothing, used to pad.
225    // 0xe-0xf                         Free.
226    kBackref = 0x10,                // Object is described relative to end.
227    // 0x11-0x16                       One per space.
228    kBackrefWithSkip = 0x18,        // Object is described relative to end.
229    // 0x19-0x1e                       One per space.
230    // 0x20-0x3f                       Used by misc. tags below.
231    kPointedToMask = 0x3f
232  };
233
234  // How to code the pointer to the object.
235  enum HowToCode {
236    kPlain = 0,                          // Straight pointer.
237    // What this means depends on the architecture:
238    kFromCode = 0x40,                    // A pointer inlined in code.
239    kHowToCodeMask = 0x40
240  };
241
242  // For kRootArrayConstants
243  enum WithSkip {
244    kNoSkipDistance = 0,
245    kHasSkipDistance = 0x40,
246    kWithSkipMask = 0x40
247  };
248
249  // Where to point within the object.
250  enum WhereToPoint {
251    kStartOfObject = 0,
252    kInnerPointer = 0x80,  // First insn in code object or payload of cell.
253    kWhereToPointMask = 0x80
254  };
255
256  // Misc.
257  // Raw data to be copied from the snapshot.  This byte code does not advance
258  // the current pointer, which is used for code objects, where we write the
259  // entire code in one memcpy, then fix up stuff with kSkip and other byte
260  // codes that overwrite data.
261  static const int kRawData = 0x20;
262  // Some common raw lengths: 0x21-0x3f.  These autoadvance the current pointer.
263  // A tag emitted at strategic points in the snapshot to delineate sections.
264  // If the deserializer does not find these at the expected moments then it
265  // is an indication that the snapshot and the VM do not fit together.
266  // Examine the build process for architecture, version or configuration
267  // mismatches.
268  static const int kSynchronize = 0x70;
269  // Used for the source code of the natives, which is in the executable, but
270  // is referred to from external strings in the snapshot.
271  static const int kNativesStringResource = 0x71;
272  static const int kRepeat = 0x72;
273  static const int kConstantRepeat = 0x73;
274  // 0x73-0x7f            Repeat last word (subtract 0x72 to get the count).
275  static const int kMaxRepeats = 0x7f - 0x72;
276  static int CodeForRepeats(int repeats) {
277    ASSERT(repeats >= 1 && repeats <= kMaxRepeats);
278    return 0x72 + repeats;
279  }
280  static int RepeatsForCode(int byte_code) {
281    ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f);
282    return byte_code - 0x72;
283  }
284  static const int kRootArrayConstants = 0xa0;
285  // 0xa0-0xbf            Things from the first 32 elements of the root array.
286  static const int kRootArrayNumberOfConstantEncodings = 0x20;
287  static int RootArrayConstantFromByteCode(int byte_code) {
288    return byte_code & 0x1f;
289  }
290
291  static const int kNumberOfSpaces = LO_SPACE;
292  static const int kAnyOldSpace = -1;
293
294  // A bitmask for getting the space out of an instruction.
295  static const int kSpaceMask = 7;
296};
297
298
299int SnapshotByteSource::GetInt() {
300  // This way of variable-length encoding integers does not suffer from branch
301  // mispredictions.
302  uint32_t answer = GetUnalignedInt();
303  int bytes = answer & 3;
304  Advance(bytes);
305  uint32_t mask = 0xffffffffu;
306  mask >>= 32 - (bytes << 3);
307  answer &= mask;
308  answer >>= 2;
309  return answer;
310}
311
312
313void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) {
314  OS::MemCopy(to, data_ + position_, number_of_bytes);
315  position_ += number_of_bytes;
316}
317
318
319// A Deserializer reads a snapshot and reconstructs the Object graph it defines.
320class Deserializer: public SerializerDeserializer {
321 public:
322  // Create a deserializer from a snapshot byte source.
323  explicit Deserializer(SnapshotByteSource* source);
324
325  virtual ~Deserializer();
326
327  // Deserialize the snapshot into an empty heap.
328  void Deserialize();
329
330  // Deserialize a single object and the objects reachable from it.
331  void DeserializePartial(Object** root);
332
333  void set_reservation(int space_number, int reservation) {
334    ASSERT(space_number >= 0);
335    ASSERT(space_number <= LAST_SPACE);
336    reservations_[space_number] = reservation;
337  }
338
339 private:
340  virtual void VisitPointers(Object** start, Object** end);
341
342  virtual void VisitExternalReferences(Address* start, Address* end) {
343    UNREACHABLE();
344  }
345
346  virtual void VisitRuntimeEntry(RelocInfo* rinfo) {
347    UNREACHABLE();
348  }
349
350  // Allocation sites are present in the snapshot, and must be linked into
351  // a list at deserialization time.
352  void RelinkAllocationSite(AllocationSite* site);
353
354  // Fills in some heap data in an area from start to end (non-inclusive).  The
355  // space id is used for the write barrier.  The object_address is the address
356  // of the object we are writing into, or NULL if we are not writing into an
357  // object, i.e. if we are writing a series of tagged values that are not on
358  // the heap.
359  void ReadChunk(
360      Object** start, Object** end, int space, Address object_address);
361  void ReadObject(int space_number, Object** write_back);
362
363  // This routine both allocates a new object, and also keeps
364  // track of where objects have been allocated so that we can
365  // fix back references when deserializing.
366  Address Allocate(int space_index, int size) {
367    Address address = high_water_[space_index];
368    high_water_[space_index] = address + size;
369    return address;
370  }
371
372  // This returns the address of an object that has been described in the
373  // snapshot as being offset bytes back in a particular space.
374  HeapObject* GetAddressFromEnd(int space) {
375    int offset = source_->GetInt();
376    offset <<= kObjectAlignmentBits;
377    return HeapObject::FromAddress(high_water_[space] - offset);
378  }
379
380
381  // Cached current isolate.
382  Isolate* isolate_;
383
384  SnapshotByteSource* source_;
385  // This is the address of the next object that will be allocated in each
386  // space.  It is used to calculate the addresses of back-references.
387  Address high_water_[LAST_SPACE + 1];
388
389  int reservations_[LAST_SPACE + 1];
390  static const intptr_t kUninitializedReservation = -1;
391
392  ExternalReferenceDecoder* external_reference_decoder_;
393
394  DISALLOW_COPY_AND_ASSIGN(Deserializer);
395};
396
397
398class SnapshotByteSink {
399 public:
400  virtual ~SnapshotByteSink() { }
401  virtual void Put(int byte, const char* description) = 0;
402  virtual void PutSection(int byte, const char* description) {
403    Put(byte, description);
404  }
405  void PutInt(uintptr_t integer, const char* description);
406  virtual int Position() = 0;
407};
408
409
410// Mapping objects to their location after deserialization.
411// This is used during building, but not at runtime by V8.
412class SerializationAddressMapper {
413 public:
414  SerializationAddressMapper()
415      : no_allocation_(),
416        serialization_map_(new HashMap(&SerializationMatchFun)) { }
417
418  ~SerializationAddressMapper() {
419    delete serialization_map_;
420  }
421
422  bool IsMapped(HeapObject* obj) {
423    return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL;
424  }
425
426  int MappedTo(HeapObject* obj) {
427    ASSERT(IsMapped(obj));
428    return static_cast<int>(reinterpret_cast<intptr_t>(
429        serialization_map_->Lookup(Key(obj), Hash(obj), false)->value));
430  }
431
432  void AddMapping(HeapObject* obj, int to) {
433    ASSERT(!IsMapped(obj));
434    HashMap::Entry* entry =
435        serialization_map_->Lookup(Key(obj), Hash(obj), true);
436    entry->value = Value(to);
437  }
438
439 private:
440  static bool SerializationMatchFun(void* key1, void* key2) {
441    return key1 == key2;
442  }
443
444  static uint32_t Hash(HeapObject* obj) {
445    return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address()));
446  }
447
448  static void* Key(HeapObject* obj) {
449    return reinterpret_cast<void*>(obj->address());
450  }
451
452  static void* Value(int v) {
453    return reinterpret_cast<void*>(v);
454  }
455
456  DisallowHeapAllocation no_allocation_;
457  HashMap* serialization_map_;
458  DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper);
459};
460
461
462class CodeAddressMap;
463
464// There can be only one serializer per V8 process.
465class Serializer : public SerializerDeserializer {
466 public:
467  explicit Serializer(SnapshotByteSink* sink);
468  ~Serializer();
469  void VisitPointers(Object** start, Object** end);
470  // You can call this after serialization to find out how much space was used
471  // in each space.
472  int CurrentAllocationAddress(int space) {
473    ASSERT(space < kNumberOfSpaces);
474    return fullness_[space];
475  }
476
477  static void Enable();
478  static void Disable();
479
480  // Call this when you have made use of the fact that there is no serialization
481  // going on.
482  static void TooLateToEnableNow() { too_late_to_enable_now_ = true; }
483  static bool enabled() { return serialization_enabled_; }
484  SerializationAddressMapper* address_mapper() { return &address_mapper_; }
485  void PutRoot(int index,
486               HeapObject* object,
487               HowToCode how,
488               WhereToPoint where,
489               int skip);
490
491 protected:
492  static const int kInvalidRootIndex = -1;
493
494  int RootIndex(HeapObject* heap_object, HowToCode from);
495  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0;
496  intptr_t root_index_wave_front() { return root_index_wave_front_; }
497  void set_root_index_wave_front(intptr_t value) {
498    ASSERT(value >= root_index_wave_front_);
499    root_index_wave_front_ = value;
500  }
501
502  class ObjectSerializer : public ObjectVisitor {
503   public:
504    ObjectSerializer(Serializer* serializer,
505                     Object* o,
506                     SnapshotByteSink* sink,
507                     HowToCode how_to_code,
508                     WhereToPoint where_to_point)
509      : serializer_(serializer),
510        object_(HeapObject::cast(o)),
511        sink_(sink),
512        reference_representation_(how_to_code + where_to_point),
513        bytes_processed_so_far_(0),
514        code_object_(o->IsCode()),
515        code_has_been_output_(false) { }
516    void Serialize();
517    void VisitPointers(Object** start, Object** end);
518    void VisitEmbeddedPointer(RelocInfo* target);
519    void VisitExternalReferences(Address* start, Address* end);
520    void VisitExternalReference(RelocInfo* rinfo);
521    void VisitCodeTarget(RelocInfo* target);
522    void VisitCodeEntry(Address entry_address);
523    void VisitCell(RelocInfo* rinfo);
524    void VisitRuntimeEntry(RelocInfo* reloc);
525    // Used for seralizing the external strings that hold the natives source.
526    void VisitExternalAsciiString(
527        v8::String::ExternalAsciiStringResource** resource);
528    // We can't serialize a heap with external two byte strings.
529    void VisitExternalTwoByteString(
530        v8::String::ExternalStringResource** resource) {
531      UNREACHABLE();
532    }
533
534   private:
535    enum ReturnSkip { kCanReturnSkipInsteadOfSkipping, kIgnoringReturn };
536    // This function outputs or skips the raw data between the last pointer and
537    // up to the current position.  It optionally can just return the number of
538    // bytes to skip instead of performing a skip instruction, in case the skip
539    // can be merged into the next instruction.
540    int OutputRawData(Address up_to, ReturnSkip return_skip = kIgnoringReturn);
541
542    Serializer* serializer_;
543    HeapObject* object_;
544    SnapshotByteSink* sink_;
545    int reference_representation_;
546    int bytes_processed_so_far_;
547    bool code_object_;
548    bool code_has_been_output_;
549  };
550
551  virtual void SerializeObject(Object* o,
552                               HowToCode how_to_code,
553                               WhereToPoint where_to_point,
554                               int skip) = 0;
555  void SerializeReferenceToPreviousObject(
556      int space,
557      int address,
558      HowToCode how_to_code,
559      WhereToPoint where_to_point,
560      int skip);
561  void InitializeAllocators();
562  // This will return the space for an object.
563  static int SpaceOfObject(HeapObject* object);
564  int Allocate(int space, int size);
565  int EncodeExternalReference(Address addr) {
566    return external_reference_encoder_->Encode(addr);
567  }
568
569  int SpaceAreaSize(int space);
570
571  Isolate* isolate_;
572  // Keep track of the fullness of each space in order to generate
573  // relative addresses for back references.
574  int fullness_[LAST_SPACE + 1];
575  SnapshotByteSink* sink_;
576  int current_root_index_;
577  ExternalReferenceEncoder* external_reference_encoder_;
578  static bool serialization_enabled_;
579  // Did we already make use of the fact that serialization was not enabled?
580  static bool too_late_to_enable_now_;
581  SerializationAddressMapper address_mapper_;
582  intptr_t root_index_wave_front_;
583  void Pad();
584
585  friend class ObjectSerializer;
586  friend class Deserializer;
587
588 private:
589  static CodeAddressMap* code_address_map_;
590  DISALLOW_COPY_AND_ASSIGN(Serializer);
591};
592
593
594class PartialSerializer : public Serializer {
595 public:
596  PartialSerializer(Serializer* startup_snapshot_serializer,
597                    SnapshotByteSink* sink)
598    : Serializer(sink),
599      startup_serializer_(startup_snapshot_serializer) {
600    set_root_index_wave_front(Heap::kStrongRootListLength);
601  }
602
603  // Serialize the objects reachable from a single object pointer.
604  virtual void Serialize(Object** o);
605  virtual void SerializeObject(Object* o,
606                               HowToCode how_to_code,
607                               WhereToPoint where_to_point,
608                               int skip);
609
610 protected:
611  virtual int PartialSnapshotCacheIndex(HeapObject* o);
612  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
613    // Scripts should be referred only through shared function infos.  We can't
614    // allow them to be part of the partial snapshot because they contain a
615    // unique ID, and deserializing several partial snapshots containing script
616    // would cause dupes.
617    ASSERT(!o->IsScript());
618    return o->IsName() || o->IsSharedFunctionInfo() ||
619           o->IsHeapNumber() || o->IsCode() ||
620           o->IsScopeInfo() ||
621           o->map() == HEAP->fixed_cow_array_map();
622  }
623
624 private:
625  Serializer* startup_serializer_;
626  DISALLOW_COPY_AND_ASSIGN(PartialSerializer);
627};
628
629
630class StartupSerializer : public Serializer {
631 public:
632  explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) {
633    // Clear the cache of objects used by the partial snapshot.  After the
634    // strong roots have been serialized we can create a partial snapshot
635    // which will repopulate the cache with objects needed by that partial
636    // snapshot.
637    Isolate::Current()->set_serialize_partial_snapshot_cache_length(0);
638  }
639  // Serialize the current state of the heap.  The order is:
640  // 1) Strong references.
641  // 2) Partial snapshot cache.
642  // 3) Weak references (e.g. the string table).
643  virtual void SerializeStrongReferences();
644  virtual void SerializeObject(Object* o,
645                               HowToCode how_to_code,
646                               WhereToPoint where_to_point,
647                               int skip);
648  void SerializeWeakReferences();
649  void Serialize() {
650    SerializeStrongReferences();
651    SerializeWeakReferences();
652    Pad();
653  }
654
655 private:
656  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
657    return false;
658  }
659};
660
661
662} }  // namespace v8::internal
663
664#endif  // V8_SERIALIZE_H_
665