1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_SERIALIZE_H_
29#define V8_SERIALIZE_H_
30
31#include "hashmap.h"
32
33namespace v8 {
34namespace internal {
35
36// A TypeCode is used to distinguish different kinds of external reference.
37// It is a single bit to make testing for types easy.
38enum TypeCode {
39  UNCLASSIFIED,        // One-of-a-kind references.
40  BUILTIN,
41  RUNTIME_FUNCTION,
42  IC_UTILITY,
43  DEBUG_ADDRESS,
44  STATS_COUNTER,
45  TOP_ADDRESS,
46  C_BUILTIN,
47  EXTENSION,
48  ACCESSOR,
49  RUNTIME_ENTRY,
50  STUB_CACHE_TABLE
51};
52
53const int kTypeCodeCount = STUB_CACHE_TABLE + 1;
54const int kFirstTypeCode = UNCLASSIFIED;
55
56const int kReferenceIdBits = 16;
57const int kReferenceIdMask = (1 << kReferenceIdBits) - 1;
58const int kReferenceTypeShift = kReferenceIdBits;
59const int kDebugRegisterBits = 4;
60const int kDebugIdShift = kDebugRegisterBits;
61
62
63// ExternalReferenceTable is a helper class that defines the relationship
64// between external references and their encodings. It is used to build
65// hashmaps in ExternalReferenceEncoder and ExternalReferenceDecoder.
66class ExternalReferenceTable {
67 public:
68  static ExternalReferenceTable* instance(Isolate* isolate);
69
70  ~ExternalReferenceTable() { }
71
72  int size() const { return refs_.length(); }
73
74  Address address(int i) { return refs_[i].address; }
75
76  uint32_t code(int i) { return refs_[i].code; }
77
78  const char* name(int i) { return refs_[i].name; }
79
80  int max_id(int code) { return max_id_[code]; }
81
82 private:
83  explicit ExternalReferenceTable(Isolate* isolate) : refs_(64) {
84      PopulateTable(isolate);
85  }
86
87  struct ExternalReferenceEntry {
88    Address address;
89    uint32_t code;
90    const char* name;
91  };
92
93  void PopulateTable(Isolate* isolate);
94
95  // For a few types of references, we can get their address from their id.
96  void AddFromId(TypeCode type,
97                 uint16_t id,
98                 const char* name,
99                 Isolate* isolate);
100
101  // For other types of references, the caller will figure out the address.
102  void Add(Address address, TypeCode type, uint16_t id, const char* name);
103
104  List<ExternalReferenceEntry> refs_;
105  int max_id_[kTypeCodeCount];
106};
107
108
109class ExternalReferenceEncoder {
110 public:
111  ExternalReferenceEncoder();
112
113  uint32_t Encode(Address key) const;
114
115  const char* NameOfAddress(Address key) const;
116
117 private:
118  HashMap encodings_;
119  static uint32_t Hash(Address key) {
120    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2);
121  }
122
123  int IndexOf(Address key) const;
124
125  static bool Match(void* key1, void* key2) { return key1 == key2; }
126
127  void Put(Address key, int index);
128
129  Isolate* isolate_;
130};
131
132
133class ExternalReferenceDecoder {
134 public:
135  ExternalReferenceDecoder();
136  ~ExternalReferenceDecoder();
137
138  Address Decode(uint32_t key) const {
139    if (key == 0) return NULL;
140    return *Lookup(key);
141  }
142
143 private:
144  Address** encodings_;
145
146  Address* Lookup(uint32_t key) const {
147    int type = key >> kReferenceTypeShift;
148    ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount);
149    int id = key & kReferenceIdMask;
150    return &encodings_[type][id];
151  }
152
153  void Put(uint32_t key, Address value) {
154    *Lookup(key) = value;
155  }
156
157  Isolate* isolate_;
158};
159
160
161class SnapshotByteSource {
162 public:
163  SnapshotByteSource(const byte* array, int length)
164    : data_(array), length_(length), position_(0) { }
165
166  bool HasMore() { return position_ < length_; }
167
168  int Get() {
169    ASSERT(position_ < length_);
170    return data_[position_++];
171  }
172
173  inline void CopyRaw(byte* to, int number_of_bytes);
174
175  inline int GetInt();
176
177  bool AtEOF() {
178    return position_ == length_;
179  }
180
181  int position() { return position_; }
182
183 private:
184  const byte* data_;
185  int length_;
186  int position_;
187};
188
189
190#define COMMON_RAW_LENGTHS(f)        \
191  f(1, 1)  \
192  f(2, 2)  \
193  f(3, 3)  \
194  f(4, 4)  \
195  f(5, 5)  \
196  f(6, 6)  \
197  f(7, 7)  \
198  f(8, 8)  \
199  f(9, 12)  \
200  f(10, 16) \
201  f(11, 20) \
202  f(12, 24) \
203  f(13, 28) \
204  f(14, 32) \
205  f(15, 36)
206
207// The Serializer/Deserializer class is a common superclass for Serializer and
208// Deserializer which is used to store common constants and methods used by
209// both.
210class SerializerDeserializer: public ObjectVisitor {
211 public:
212  static void Iterate(ObjectVisitor* visitor);
213  static void SetSnapshotCacheSize(int size);
214
215 protected:
216  // Where the pointed-to object can be found:
217  enum Where {
218    kNewObject = 0,                 // Object is next in snapshot.
219    // 1-8                             One per space.
220    kRootArray = 0x9,               // Object is found in root array.
221    kPartialSnapshotCache = 0xa,    // Object is in the cache.
222    kExternalReference = 0xb,       // Pointer to an external reference.
223    kSkip = 0xc,                    // Skip a pointer sized cell.
224    // 0xd-0xf                         Free.
225    kBackref = 0x10,                 // Object is described relative to end.
226    // 0x11-0x18                       One per space.
227    // 0x19-0x1f                       Free.
228    kFromStart = 0x20,              // Object is described relative to start.
229    // 0x21-0x28                       One per space.
230    // 0x29-0x2f                       Free.
231    // 0x30-0x3f                       Used by misc. tags below.
232    kPointedToMask = 0x3f
233  };
234
235  // How to code the pointer to the object.
236  enum HowToCode {
237    kPlain = 0,                          // Straight pointer.
238    // What this means depends on the architecture:
239    kFromCode = 0x40,                    // A pointer inlined in code.
240    kHowToCodeMask = 0x40
241  };
242
243  // Where to point within the object.
244  enum WhereToPoint {
245    kStartOfObject = 0,
246    kFirstInstruction = 0x80,
247    kWhereToPointMask = 0x80
248  };
249
250  // Misc.
251  // Raw data to be copied from the snapshot.
252  static const int kRawData = 0x30;
253  // Some common raw lengths: 0x31-0x3f
254  // A tag emitted at strategic points in the snapshot to delineate sections.
255  // If the deserializer does not find these at the expected moments then it
256  // is an indication that the snapshot and the VM do not fit together.
257  // Examine the build process for architecture, version or configuration
258  // mismatches.
259  static const int kSynchronize = 0x70;
260  // Used for the source code of the natives, which is in the executable, but
261  // is referred to from external strings in the snapshot.
262  static const int kNativesStringResource = 0x71;
263  static const int kNewPage = 0x72;
264  static const int kRepeat = 0x73;
265  static const int kConstantRepeat = 0x74;
266  // 0x74-0x7f            Repeat last word (subtract 0x73 to get the count).
267  static const int kMaxRepeats = 0x7f - 0x73;
268  static int CodeForRepeats(int repeats) {
269    ASSERT(repeats >= 1 && repeats <= kMaxRepeats);
270    return 0x73 + repeats;
271  }
272  static int RepeatsForCode(int byte_code) {
273    ASSERT(byte_code >= kConstantRepeat && byte_code <= 0x7f);
274    return byte_code - 0x73;
275  }
276  static const int kRootArrayLowConstants = 0xb0;
277  // 0xb0-0xbf            Things from the first 16 elements of the root array.
278  static const int kRootArrayHighConstants = 0xf0;
279  // 0xf0-0xff            Things from the next 16 elements of the root array.
280  static const int kRootArrayNumberOfConstantEncodings = 0x20;
281  static const int kRootArrayNumberOfLowConstantEncodings = 0x10;
282  static int RootArrayConstantFromByteCode(int byte_code) {
283    int constant = (byte_code & 0xf) | ((byte_code & 0x40) >> 2);
284    ASSERT(constant >= 0 && constant < kRootArrayNumberOfConstantEncodings);
285    return constant;
286  }
287
288
289  static const int kLargeData = LAST_SPACE;
290  static const int kLargeCode = kLargeData + 1;
291  static const int kLargeFixedArray = kLargeCode + 1;
292  static const int kNumberOfSpaces = kLargeFixedArray + 1;
293  static const int kAnyOldSpace = -1;
294
295  // A bitmask for getting the space out of an instruction.
296  static const int kSpaceMask = 15;
297
298  static inline bool SpaceIsLarge(int space) { return space >= kLargeData; }
299  static inline bool SpaceIsPaged(int space) {
300    return space >= FIRST_PAGED_SPACE && space <= LAST_PAGED_SPACE;
301  }
302};
303
304
305int SnapshotByteSource::GetInt() {
306  // A little unwind to catch the really small ints.
307  int snapshot_byte = Get();
308  if ((snapshot_byte & 0x80) == 0) {
309    return snapshot_byte;
310  }
311  int accumulator = (snapshot_byte & 0x7f) << 7;
312  while (true) {
313    snapshot_byte = Get();
314    if ((snapshot_byte & 0x80) == 0) {
315      return accumulator | snapshot_byte;
316    }
317    accumulator = (accumulator | (snapshot_byte & 0x7f)) << 7;
318  }
319  UNREACHABLE();
320  return accumulator;
321}
322
323
324void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) {
325  memcpy(to, data_ + position_, number_of_bytes);
326  position_ += number_of_bytes;
327}
328
329
330// A Deserializer reads a snapshot and reconstructs the Object graph it defines.
331class Deserializer: public SerializerDeserializer {
332 public:
333  // Create a deserializer from a snapshot byte source.
334  explicit Deserializer(SnapshotByteSource* source);
335
336  virtual ~Deserializer();
337
338  // Deserialize the snapshot into an empty heap.
339  void Deserialize();
340
341  // Deserialize a single object and the objects reachable from it.
342  void DeserializePartial(Object** root);
343
344 private:
345  virtual void VisitPointers(Object** start, Object** end);
346
347  virtual void VisitExternalReferences(Address* start, Address* end) {
348    UNREACHABLE();
349  }
350
351  virtual void VisitRuntimeEntry(RelocInfo* rinfo) {
352    UNREACHABLE();
353  }
354
355  // Fills in some heap data in an area from start to end (non-inclusive).  The
356  // space id is used for the write barrier.  The object_address is the address
357  // of the object we are writing into, or NULL if we are not writing into an
358  // object, i.e. if we are writing a series of tagged values that are not on
359  // the heap.
360  void ReadChunk(
361      Object** start, Object** end, int space, Address object_address);
362  HeapObject* GetAddressFromStart(int space);
363  inline HeapObject* GetAddressFromEnd(int space);
364  Address Allocate(int space_number, Space* space, int size);
365  void ReadObject(int space_number, Space* space, Object** write_back);
366
367  // Cached current isolate.
368  Isolate* isolate_;
369
370  // Keep track of the pages in the paged spaces.
371  // (In large object space we are keeping track of individual objects
372  // rather than pages.)  In new space we just need the address of the
373  // first object and the others will flow from that.
374  List<Address> pages_[SerializerDeserializer::kNumberOfSpaces];
375
376  SnapshotByteSource* source_;
377  // This is the address of the next object that will be allocated in each
378  // space.  It is used to calculate the addresses of back-references.
379  Address high_water_[LAST_SPACE + 1];
380  // This is the address of the most recent object that was allocated.  It
381  // is used to set the location of the new page when we encounter a
382  // START_NEW_PAGE_SERIALIZATION tag.
383  Address last_object_address_;
384
385  ExternalReferenceDecoder* external_reference_decoder_;
386
387  DISALLOW_COPY_AND_ASSIGN(Deserializer);
388};
389
390
391class SnapshotByteSink {
392 public:
393  virtual ~SnapshotByteSink() { }
394  virtual void Put(int byte, const char* description) = 0;
395  virtual void PutSection(int byte, const char* description) {
396    Put(byte, description);
397  }
398  void PutInt(uintptr_t integer, const char* description);
399  virtual int Position() = 0;
400};
401
402
403// Mapping objects to their location after deserialization.
404// This is used during building, but not at runtime by V8.
405class SerializationAddressMapper {
406 public:
407  SerializationAddressMapper()
408      : serialization_map_(new HashMap(&SerializationMatchFun)),
409        no_allocation_(new AssertNoAllocation()) { }
410
411  ~SerializationAddressMapper() {
412    delete serialization_map_;
413    delete no_allocation_;
414  }
415
416  bool IsMapped(HeapObject* obj) {
417    return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL;
418  }
419
420  int MappedTo(HeapObject* obj) {
421    ASSERT(IsMapped(obj));
422    return static_cast<int>(reinterpret_cast<intptr_t>(
423        serialization_map_->Lookup(Key(obj), Hash(obj), false)->value));
424  }
425
426  void AddMapping(HeapObject* obj, int to) {
427    ASSERT(!IsMapped(obj));
428    HashMap::Entry* entry =
429        serialization_map_->Lookup(Key(obj), Hash(obj), true);
430    entry->value = Value(to);
431  }
432
433 private:
434  static bool SerializationMatchFun(void* key1, void* key2) {
435    return key1 == key2;
436  }
437
438  static uint32_t Hash(HeapObject* obj) {
439    return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address()));
440  }
441
442  static void* Key(HeapObject* obj) {
443    return reinterpret_cast<void*>(obj->address());
444  }
445
446  static void* Value(int v) {
447    return reinterpret_cast<void*>(v);
448  }
449
450  HashMap* serialization_map_;
451  AssertNoAllocation* no_allocation_;
452  DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper);
453};
454
455
456// There can be only one serializer per V8 process.
457class Serializer : public SerializerDeserializer {
458 public:
459  explicit Serializer(SnapshotByteSink* sink);
460  ~Serializer();
461  void VisitPointers(Object** start, Object** end);
462  // You can call this after serialization to find out how much space was used
463  // in each space.
464  int CurrentAllocationAddress(int space) {
465    if (SpaceIsLarge(space)) return large_object_total_;
466    return fullness_[space];
467  }
468
469  static void Enable() {
470    if (!serialization_enabled_) {
471      ASSERT(!too_late_to_enable_now_);
472    }
473    serialization_enabled_ = true;
474  }
475
476  static void Disable() { serialization_enabled_ = false; }
477  // Call this when you have made use of the fact that there is no serialization
478  // going on.
479  static void TooLateToEnableNow() { too_late_to_enable_now_ = true; }
480  static bool enabled() { return serialization_enabled_; }
481  SerializationAddressMapper* address_mapper() { return &address_mapper_; }
482  void PutRoot(
483      int index, HeapObject* object, HowToCode how, WhereToPoint where);
484
485 protected:
486  static const int kInvalidRootIndex = -1;
487
488  int RootIndex(HeapObject* heap_object, HowToCode from);
489  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0;
490  intptr_t root_index_wave_front() { return root_index_wave_front_; }
491  void set_root_index_wave_front(intptr_t value) {
492    ASSERT(value >= root_index_wave_front_);
493    root_index_wave_front_ = value;
494  }
495
496  class ObjectSerializer : public ObjectVisitor {
497   public:
498    ObjectSerializer(Serializer* serializer,
499                     Object* o,
500                     SnapshotByteSink* sink,
501                     HowToCode how_to_code,
502                     WhereToPoint where_to_point)
503      : serializer_(serializer),
504        object_(HeapObject::cast(o)),
505        sink_(sink),
506        reference_representation_(how_to_code + where_to_point),
507        bytes_processed_so_far_(0) { }
508    void Serialize();
509    void VisitPointers(Object** start, Object** end);
510    void VisitEmbeddedPointer(RelocInfo* target);
511    void VisitExternalReferences(Address* start, Address* end);
512    void VisitExternalReference(RelocInfo* rinfo);
513    void VisitCodeTarget(RelocInfo* target);
514    void VisitCodeEntry(Address entry_address);
515    void VisitGlobalPropertyCell(RelocInfo* rinfo);
516    void VisitRuntimeEntry(RelocInfo* reloc);
517    // Used for seralizing the external strings that hold the natives source.
518    void VisitExternalAsciiString(
519        v8::String::ExternalAsciiStringResource** resource);
520    // We can't serialize a heap with external two byte strings.
521    void VisitExternalTwoByteString(
522        v8::String::ExternalStringResource** resource) {
523      UNREACHABLE();
524    }
525
526   private:
527    void OutputRawData(Address up_to);
528
529    Serializer* serializer_;
530    HeapObject* object_;
531    SnapshotByteSink* sink_;
532    int reference_representation_;
533    int bytes_processed_so_far_;
534  };
535
536  virtual void SerializeObject(Object* o,
537                               HowToCode how_to_code,
538                               WhereToPoint where_to_point) = 0;
539  void SerializeReferenceToPreviousObject(
540      int space,
541      int address,
542      HowToCode how_to_code,
543      WhereToPoint where_to_point);
544  void InitializeAllocators();
545  // This will return the space for an object.  If the object is in large
546  // object space it may return kLargeCode or kLargeFixedArray in order
547  // to indicate to the deserializer what kind of large object allocation
548  // to make.
549  static int SpaceOfObject(HeapObject* object);
550  // This just returns the space of the object.  It will return LO_SPACE
551  // for all large objects since you can't check the type of the object
552  // once the map has been used for the serialization address.
553  static int SpaceOfAlreadySerializedObject(HeapObject* object);
554  int Allocate(int space, int size, bool* new_page_started);
555  int EncodeExternalReference(Address addr) {
556    return external_reference_encoder_->Encode(addr);
557  }
558
559  int SpaceAreaSize(int space);
560
561  Isolate* isolate_;
562  // Keep track of the fullness of each space in order to generate
563  // relative addresses for back references.  Large objects are
564  // just numbered sequentially since relative addresses make no
565  // sense in large object space.
566  int fullness_[LAST_SPACE + 1];
567  SnapshotByteSink* sink_;
568  int current_root_index_;
569  ExternalReferenceEncoder* external_reference_encoder_;
570  static bool serialization_enabled_;
571  // Did we already make use of the fact that serialization was not enabled?
572  static bool too_late_to_enable_now_;
573  int large_object_total_;
574  SerializationAddressMapper address_mapper_;
575  intptr_t root_index_wave_front_;
576
577  friend class ObjectSerializer;
578  friend class Deserializer;
579
580 private:
581  DISALLOW_COPY_AND_ASSIGN(Serializer);
582};
583
584
585class PartialSerializer : public Serializer {
586 public:
587  PartialSerializer(Serializer* startup_snapshot_serializer,
588                    SnapshotByteSink* sink)
589    : Serializer(sink),
590      startup_serializer_(startup_snapshot_serializer) {
591    set_root_index_wave_front(Heap::kStrongRootListLength);
592  }
593
594  // Serialize the objects reachable from a single object pointer.
595  virtual void Serialize(Object** o);
596  virtual void SerializeObject(Object* o,
597                               HowToCode how_to_code,
598                               WhereToPoint where_to_point);
599
600 protected:
601  virtual int PartialSnapshotCacheIndex(HeapObject* o);
602  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
603    // Scripts should be referred only through shared function infos.  We can't
604    // allow them to be part of the partial snapshot because they contain a
605    // unique ID, and deserializing several partial snapshots containing script
606    // would cause dupes.
607    ASSERT(!o->IsScript());
608    return o->IsString() || o->IsSharedFunctionInfo() ||
609           o->IsHeapNumber() || o->IsCode() ||
610           o->IsScopeInfo() ||
611           o->map() == HEAP->fixed_cow_array_map();
612  }
613
614 private:
615  Serializer* startup_serializer_;
616  DISALLOW_COPY_AND_ASSIGN(PartialSerializer);
617};
618
619
620class StartupSerializer : public Serializer {
621 public:
622  explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) {
623    // Clear the cache of objects used by the partial snapshot.  After the
624    // strong roots have been serialized we can create a partial snapshot
625    // which will repopulate the cache with objects needed by that partial
626    // snapshot.
627    Isolate::Current()->set_serialize_partial_snapshot_cache_length(0);
628  }
629  // Serialize the current state of the heap.  The order is:
630  // 1) Strong references.
631  // 2) Partial snapshot cache.
632  // 3) Weak references (e.g. the symbol table).
633  virtual void SerializeStrongReferences();
634  virtual void SerializeObject(Object* o,
635                               HowToCode how_to_code,
636                               WhereToPoint where_to_point);
637  void SerializeWeakReferences();
638  void Serialize() {
639    SerializeStrongReferences();
640    SerializeWeakReferences();
641  }
642
643 private:
644  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
645    return false;
646  }
647};
648
649
650} }  // namespace v8::internal
651
652#endif  // V8_SERIALIZE_H_
653