1// Copyright 2006-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_SERIALIZE_H_
29#define V8_SERIALIZE_H_
30
31#include "hashmap.h"
32
33namespace v8 {
34namespace internal {
35
36// A TypeCode is used to distinguish different kinds of external reference.
37// It is a single bit to make testing for types easy.
38enum TypeCode {
39  UNCLASSIFIED,        // One-of-a-kind references.
40  BUILTIN,
41  RUNTIME_FUNCTION,
42  IC_UTILITY,
43  DEBUG_ADDRESS,
44  STATS_COUNTER,
45  TOP_ADDRESS,
46  C_BUILTIN,
47  EXTENSION,
48  ACCESSOR,
49  RUNTIME_ENTRY,
50  STUB_CACHE_TABLE
51};
52
53const int kTypeCodeCount = STUB_CACHE_TABLE + 1;
54const int kFirstTypeCode = UNCLASSIFIED;
55
56const int kReferenceIdBits = 16;
57const int kReferenceIdMask = (1 << kReferenceIdBits) - 1;
58const int kReferenceTypeShift = kReferenceIdBits;
59const int kDebugRegisterBits = 4;
60const int kDebugIdShift = kDebugRegisterBits;
61
62
63class ExternalReferenceEncoder {
64 public:
65  ExternalReferenceEncoder();
66
67  uint32_t Encode(Address key) const;
68
69  const char* NameOfAddress(Address key) const;
70
71 private:
72  HashMap encodings_;
73  static uint32_t Hash(Address key) {
74    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key) >> 2);
75  }
76
77  int IndexOf(Address key) const;
78
79  static bool Match(void* key1, void* key2) { return key1 == key2; }
80
81  void Put(Address key, int index);
82
83  Isolate* isolate_;
84};
85
86
87class ExternalReferenceDecoder {
88 public:
89  ExternalReferenceDecoder();
90  ~ExternalReferenceDecoder();
91
92  Address Decode(uint32_t key) const {
93    if (key == 0) return NULL;
94    return *Lookup(key);
95  }
96
97 private:
98  Address** encodings_;
99
100  Address* Lookup(uint32_t key) const {
101    int type = key >> kReferenceTypeShift;
102    ASSERT(kFirstTypeCode <= type && type < kTypeCodeCount);
103    int id = key & kReferenceIdMask;
104    return &encodings_[type][id];
105  }
106
107  void Put(uint32_t key, Address value) {
108    *Lookup(key) = value;
109  }
110
111  Isolate* isolate_;
112};
113
114
115class SnapshotByteSource {
116 public:
117  SnapshotByteSource(const byte* array, int length)
118    : data_(array), length_(length), position_(0) { }
119
120  bool HasMore() { return position_ < length_; }
121
122  int Get() {
123    ASSERT(position_ < length_);
124    return data_[position_++];
125  }
126
127  inline void CopyRaw(byte* to, int number_of_bytes);
128
129  inline int GetInt();
130
131  bool AtEOF() {
132    return position_ == length_;
133  }
134
135  int position() { return position_; }
136
137 private:
138  const byte* data_;
139  int length_;
140  int position_;
141};
142
143
144// It is very common to have a reference to objects at certain offsets in the
145// heap.  These offsets have been determined experimentally.  We code
146// references to such objects in a single byte that encodes the way the pointer
147// is written (only plain pointers allowed), the space number and the offset.
148// This only works for objects in the first page of a space.  Don't use this for
149// things in newspace since it bypasses the write barrier.
150
151RLYSTC const int k64 = (sizeof(uintptr_t) - 4) / 4;
152
153#define COMMON_REFERENCE_PATTERNS(f)                               \
154  f(kNumberOfSpaces, 2, (11 - k64))                                \
155  f((kNumberOfSpaces + 1), 2, 0)                                   \
156  f((kNumberOfSpaces + 2), 2, (142 - 16 * k64))                    \
157  f((kNumberOfSpaces + 3), 2, (74 - 15 * k64))                     \
158  f((kNumberOfSpaces + 4), 2, 5)                                   \
159  f((kNumberOfSpaces + 5), 1, 135)                                 \
160  f((kNumberOfSpaces + 6), 2, (228 - 39 * k64))
161
162#define COMMON_RAW_LENGTHS(f)        \
163  f(1, 1)  \
164  f(2, 2)  \
165  f(3, 3)  \
166  f(4, 4)  \
167  f(5, 5)  \
168  f(6, 6)  \
169  f(7, 7)  \
170  f(8, 8)  \
171  f(9, 12)  \
172  f(10, 16) \
173  f(11, 20) \
174  f(12, 24) \
175  f(13, 28) \
176  f(14, 32) \
177  f(15, 36)
178
179// The Serializer/Deserializer class is a common superclass for Serializer and
180// Deserializer which is used to store common constants and methods used by
181// both.
182class SerializerDeserializer: public ObjectVisitor {
183 public:
184  RLYSTC void Iterate(ObjectVisitor* visitor);
185  RLYSTC void SetSnapshotCacheSize(int size);
186
187 protected:
188  // Where the pointed-to object can be found:
189  enum Where {
190    kNewObject = 0,                 // Object is next in snapshot.
191    // 1-8                             One per space.
192    kRootArray = 0x9,               // Object is found in root array.
193    kPartialSnapshotCache = 0xa,    // Object is in the cache.
194    kExternalReference = 0xb,       // Pointer to an external reference.
195    // 0xc-0xf                         Free.
196    kBackref = 0x10,                 // Object is described relative to end.
197    // 0x11-0x18                       One per space.
198    // 0x19-0x1f                       Common backref offsets.
199    kFromStart = 0x20,              // Object is described relative to start.
200    // 0x21-0x28                       One per space.
201    // 0x29-0x2f                       Free.
202    // 0x30-0x3f                       Used by misc tags below.
203    kPointedToMask = 0x3f
204  };
205
206  // How to code the pointer to the object.
207  enum HowToCode {
208    kPlain = 0,                          // Straight pointer.
209    // What this means depends on the architecture:
210    kFromCode = 0x40,                    // A pointer inlined in code.
211    kHowToCodeMask = 0x40
212  };
213
214  // Where to point within the object.
215  enum WhereToPoint {
216    kStartOfObject = 0,
217    kFirstInstruction = 0x80,
218    kWhereToPointMask = 0x80
219  };
220
221  // Misc.
222  // Raw data to be copied from the snapshot.
223  RLYSTC const int kRawData = 0x30;
224  // Some common raw lengths: 0x31-0x3f
225  // A tag emitted at strategic points in the snapshot to delineate sections.
226  // If the deserializer does not find these at the expected moments then it
227  // is an indication that the snapshot and the VM do not fit together.
228  // Examine the build process for architecture, version or configuration
229  // mismatches.
230  RLYSTC const int kSynchronize = 0x70;
231  // Used for the source code of the natives, which is in the executable, but
232  // is referred to from external strings in the snapshot.
233  RLYSTC const int kNativesStringResource = 0x71;
234  RLYSTC const int kNewPage = 0x72;
235  // 0x73-0x7f                            Free.
236  // 0xb0-0xbf                            Free.
237  // 0xf0-0xff                            Free.
238
239
240  RLYSTC const int kLargeData = LAST_SPACE;
241  RLYSTC const int kLargeCode = kLargeData + 1;
242  RLYSTC const int kLargeFixedArray = kLargeCode + 1;
243  RLYSTC const int kNumberOfSpaces = kLargeFixedArray + 1;
244  RLYSTC const int kAnyOldSpace = -1;
245
246  // A bitmask for getting the space out of an instruction.
247  RLYSTC const int kSpaceMask = 15;
248
249  RLYSTC inline bool SpaceIsLarge(int space) { return space >= kLargeData; }
250  RLYSTC inline bool SpaceIsPaged(int space) {
251    return space >= FIRST_PAGED_SPACE && space <= LAST_PAGED_SPACE;
252  }
253};
254
255
256int SnapshotByteSource::GetInt() {
257  // A little unwind to catch the really small ints.
258  int snapshot_byte = Get();
259  if ((snapshot_byte & 0x80) == 0) {
260    return snapshot_byte;
261  }
262  int accumulator = (snapshot_byte & 0x7f) << 7;
263  while (true) {
264    snapshot_byte = Get();
265    if ((snapshot_byte & 0x80) == 0) {
266      return accumulator | snapshot_byte;
267    }
268    accumulator = (accumulator | (snapshot_byte & 0x7f)) << 7;
269  }
270  UNREACHABLE();
271  return accumulator;
272}
273
274
275void SnapshotByteSource::CopyRaw(byte* to, int number_of_bytes) {
276  memcpy(to, data_ + position_, number_of_bytes);
277  position_ += number_of_bytes;
278}
279
280
281// A Deserializer reads a snapshot and reconstructs the Object graph it defines.
282class Deserializer: public SerializerDeserializer {
283 public:
284  // Create a deserializer from a snapshot byte source.
285  explicit Deserializer(SnapshotByteSource* source);
286
287  virtual ~Deserializer();
288
289  // Deserialize the snapshot into an empty heap.
290  void Deserialize();
291
292  // Deserialize a single object and the objects reachable from it.
293  void DeserializePartial(Object** root);
294
295#ifdef DEBUG
296  virtual void Synchronize(const char* tag);
297#endif
298
299 private:
300  virtual void VisitPointers(Object** start, Object** end);
301
302  virtual void VisitExternalReferences(Address* start, Address* end) {
303    UNREACHABLE();
304  }
305
306  virtual void VisitRuntimeEntry(RelocInfo* rinfo) {
307    UNREACHABLE();
308  }
309
310  void ReadChunk(Object** start, Object** end, int space, Address address);
311  HeapObject* GetAddressFromStart(int space);
312  inline HeapObject* GetAddressFromEnd(int space);
313  Address Allocate(int space_number, Space* space, int size);
314  void ReadObject(int space_number, Space* space, Object** write_back);
315
316  // Cached current isolate.
317  Isolate* isolate_;
318
319  // Keep track of the pages in the paged spaces.
320  // (In large object space we are keeping track of individual objects
321  // rather than pages.)  In new space we just need the address of the
322  // first object and the others will flow from that.
323  List<Address> pages_[SerializerDeserializer::kNumberOfSpaces];
324
325  SnapshotByteSource* source_;
326  // This is the address of the next object that will be allocated in each
327  // space.  It is used to calculate the addresses of back-references.
328  Address high_water_[LAST_SPACE + 1];
329  // This is the address of the most recent object that was allocated.  It
330  // is used to set the location of the new page when we encounter a
331  // START_NEW_PAGE_SERIALIZATION tag.
332  Address last_object_address_;
333
334  ExternalReferenceDecoder* external_reference_decoder_;
335
336  DISALLOW_COPY_AND_ASSIGN(Deserializer);
337};
338
339
340class SnapshotByteSink {
341 public:
342  virtual ~SnapshotByteSink() { }
343  virtual void Put(int byte, const char* description) = 0;
344  virtual void PutSection(int byte, const char* description) {
345    Put(byte, description);
346  }
347  void PutInt(uintptr_t integer, const char* description);
348  virtual int Position() = 0;
349};
350
351
352// Mapping objects to their location after deserialization.
353// This is used during building, but not at runtime by V8.
354class SerializationAddressMapper {
355 public:
356  SerializationAddressMapper()
357      : serialization_map_(new HashMap(&SerializationMatchFun)),
358        no_allocation_(new AssertNoAllocation()) { }
359
360  ~SerializationAddressMapper() {
361    delete serialization_map_;
362    delete no_allocation_;
363  }
364
365  bool IsMapped(HeapObject* obj) {
366    return serialization_map_->Lookup(Key(obj), Hash(obj), false) != NULL;
367  }
368
369  int MappedTo(HeapObject* obj) {
370    ASSERT(IsMapped(obj));
371    return static_cast<int>(reinterpret_cast<intptr_t>(
372        serialization_map_->Lookup(Key(obj), Hash(obj), false)->value));
373  }
374
375  void AddMapping(HeapObject* obj, int to) {
376    ASSERT(!IsMapped(obj));
377    HashMap::Entry* entry =
378        serialization_map_->Lookup(Key(obj), Hash(obj), true);
379    entry->value = Value(to);
380  }
381
382 private:
383  RLYSTC bool SerializationMatchFun(void* key1, void* key2) {
384    return key1 == key2;
385  }
386
387  RLYSTC uint32_t Hash(HeapObject* obj) {
388    return static_cast<int32_t>(reinterpret_cast<intptr_t>(obj->address()));
389  }
390
391  RLYSTC void* Key(HeapObject* obj) {
392    return reinterpret_cast<void*>(obj->address());
393  }
394
395  RLYSTC void* Value(int v) {
396    return reinterpret_cast<void*>(v);
397  }
398
399  HashMap* serialization_map_;
400  AssertNoAllocation* no_allocation_;
401  DISALLOW_COPY_AND_ASSIGN(SerializationAddressMapper);
402};
403
404
405// There can be only one serializer per V8 process.
406STATIC_CLASS Serializer : public SerializerDeserializer {
407 public:
408  explicit Serializer(SnapshotByteSink* sink);
409  ~Serializer();
410  void VisitPointers(Object** start, Object** end);
411  // You can call this after serialization to find out how much space was used
412  // in each space.
413  int CurrentAllocationAddress(int space) {
414    if (SpaceIsLarge(space)) return large_object_total_;
415    return fullness_[space];
416  }
417
418  RLYSTC void Enable() {
419    if (!serialization_enabled_) {
420      ASSERT(!too_late_to_enable_now_);
421    }
422    serialization_enabled_ = true;
423  }
424
425  RLYSTC void Disable() { serialization_enabled_ = false; }
426  // Call this when you have made use of the fact that there is no serialization
427  // going on.
428  RLYSTC void TooLateToEnableNow() { too_late_to_enable_now_ = true; }
429  RLYSTC bool enabled() { return serialization_enabled_; }
430  SerializationAddressMapper* address_mapper() { return &address_mapper_; }
431#ifdef DEBUG
432  virtual void Synchronize(const char* tag);
433#endif
434
435 protected:
436  RLYSTC const int kInvalidRootIndex = -1;
437  virtual int RootIndex(HeapObject* heap_object) = 0;
438  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) = 0;
439
440  class ObjectSerializer : public ObjectVisitor {
441   public:
442    ObjectSerializer(Serializer* serializer,
443                     Object* o,
444                     SnapshotByteSink* sink,
445                     HowToCode how_to_code,
446                     WhereToPoint where_to_point)
447      : serializer_(serializer),
448        object_(HeapObject::cast(o)),
449        sink_(sink),
450        reference_representation_(how_to_code + where_to_point),
451        bytes_processed_so_far_(0) { }
452    void Serialize();
453    void VisitPointers(Object** start, Object** end);
454    void VisitExternalReferences(Address* start, Address* end);
455    void VisitCodeTarget(RelocInfo* target);
456    void VisitCodeEntry(Address entry_address);
457    void VisitGlobalPropertyCell(RelocInfo* rinfo);
458    void VisitRuntimeEntry(RelocInfo* reloc);
459    // Used for seralizing the external strings that hold the natives source.
460    void VisitExternalAsciiString(
461        v8::String::ExternalAsciiStringResource** resource);
462    // We can't serialize a heap with external two byte strings.
463    void VisitExternalTwoByteString(
464        v8::String::ExternalStringResource** resource) {
465      UNREACHABLE();
466    }
467
468   private:
469    void OutputRawData(Address up_to);
470
471    Serializer* serializer_;
472    HeapObject* object_;
473    SnapshotByteSink* sink_;
474    int reference_representation_;
475    int bytes_processed_so_far_;
476  };
477
478  virtual void SerializeObject(Object* o,
479                               HowToCode how_to_code,
480                               WhereToPoint where_to_point) = 0;
481  void SerializeReferenceToPreviousObject(
482      int space,
483      int address,
484      HowToCode how_to_code,
485      WhereToPoint where_to_point);
486  void InitializeAllocators();
487  // This will return the space for an object.  If the object is in large
488  // object space it may return kLargeCode or kLargeFixedArray in order
489  // to indicate to the deserializer what kind of large object allocation
490  // to make.
491  RLYSTC int SpaceOfObject(HeapObject* object);
492  // This just returns the space of the object.  It will return LO_SPACE
493  // for all large objects since you can't check the type of the object
494  // once the map has been used for the serialization address.
495  RLYSTC int SpaceOfAlreadySerializedObject(HeapObject* object);
496  int Allocate(int space, int size, bool* new_page_started);
497  int EncodeExternalReference(Address addr) {
498    return external_reference_encoder_->Encode(addr);
499  }
500
501  // Keep track of the fullness of each space in order to generate
502  // relative addresses for back references.  Large objects are
503  // just numbered sequentially since relative addresses make no
504  // sense in large object space.
505  int fullness_[LAST_SPACE + 1];
506  SnapshotByteSink* sink_;
507  int current_root_index_;
508  ExternalReferenceEncoder* external_reference_encoder_;
509  RLYSTC bool serialization_enabled_;
510  // Did we already make use of the fact that serialization was not enabled?
511  RLYSTC bool too_late_to_enable_now_;
512  int large_object_total_;
513  SerializationAddressMapper address_mapper_;
514
515  friend class ObjectSerializer;
516  friend class Deserializer;
517
518  DISALLOW_COPY_AND_ASSIGN(Serializer);
519};
520
521
522class PartialSerializer : public Serializer {
523 public:
524  PartialSerializer(Serializer* startup_snapshot_serializer,
525                    SnapshotByteSink* sink)
526    : Serializer(sink),
527      startup_serializer_(startup_snapshot_serializer) {
528  }
529
530  // Serialize the objects reachable from a single object pointer.
531  virtual void Serialize(Object** o);
532  virtual void SerializeObject(Object* o,
533                               HowToCode how_to_code,
534                               WhereToPoint where_to_point);
535
536 protected:
537  virtual int RootIndex(HeapObject* o);
538  virtual int PartialSnapshotCacheIndex(HeapObject* o);
539  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
540    // Scripts should be referred only through shared function infos.  We can't
541    // allow them to be part of the partial snapshot because they contain a
542    // unique ID, and deserializing several partial snapshots containing script
543    // would cause dupes.
544    ASSERT(!o->IsScript());
545    return o->IsString() || o->IsSharedFunctionInfo() ||
546           o->IsHeapNumber() || o->IsCode() ||
547           o->map() == HEAP->fixed_cow_array_map();
548  }
549
550 private:
551  Serializer* startup_serializer_;
552  DISALLOW_COPY_AND_ASSIGN(PartialSerializer);
553};
554
555
556class StartupSerializer : public Serializer {
557 public:
558  explicit StartupSerializer(SnapshotByteSink* sink) : Serializer(sink) {
559    // Clear the cache of objects used by the partial snapshot.  After the
560    // strong roots have been serialized we can create a partial snapshot
561    // which will repopulate the cache with objects neede by that partial
562    // snapshot.
563    Isolate::Current()->set_serialize_partial_snapshot_cache_length(0);
564  }
565  // Serialize the current state of the heap.  The order is:
566  // 1) Strong references.
567  // 2) Partial snapshot cache.
568  // 3) Weak references (eg the symbol table).
569  virtual void SerializeStrongReferences();
570  virtual void SerializeObject(Object* o,
571                               HowToCode how_to_code,
572                               WhereToPoint where_to_point);
573  void SerializeWeakReferences();
574  void Serialize() {
575    SerializeStrongReferences();
576    SerializeWeakReferences();
577  }
578
579 private:
580  virtual int RootIndex(HeapObject* o) { return kInvalidRootIndex; }
581  virtual bool ShouldBeInThePartialSnapshotCache(HeapObject* o) {
582    return false;
583  }
584};
585
586
587} }  // namespace v8::internal
588
589#endif  // V8_SERIALIZE_H_
590