1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/memory-optimizer.h"
6
7#include "src/compiler/js-graph.h"
8#include "src/compiler/linkage.h"
9#include "src/compiler/node-matchers.h"
10#include "src/compiler/node-properties.h"
11#include "src/compiler/node.h"
12#include "src/compiler/simplified-operator.h"
13
14namespace v8 {
15namespace internal {
16namespace compiler {
17
18MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)
19    : jsgraph_(jsgraph),
20      empty_state_(AllocationState::Empty(zone)),
21      pending_(zone),
22      tokens_(zone),
23      zone_(zone),
24      graph_assembler_(jsgraph, nullptr, nullptr, zone) {}
25
26void MemoryOptimizer::Optimize() {
27  EnqueueUses(graph()->start(), empty_state());
28  while (!tokens_.empty()) {
29    Token const token = tokens_.front();
30    tokens_.pop();
31    VisitNode(token.node, token.state);
32  }
33  DCHECK(pending_.empty());
34  DCHECK(tokens_.empty());
35}
36
37MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
38                                                  PretenureFlag pretenure,
39                                                  Zone* zone)
40    : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
41  node_ids_.insert(node->id());
42}
43
44MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
45                                                  PretenureFlag pretenure,
46                                                  Node* size, Zone* zone)
47    : node_ids_(zone), pretenure_(pretenure), size_(size) {
48  node_ids_.insert(node->id());
49}
50
51void MemoryOptimizer::AllocationGroup::Add(Node* node) {
52  node_ids_.insert(node->id());
53}
54
55bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
56  return node_ids_.find(node->id()) != node_ids_.end();
57}
58
59MemoryOptimizer::AllocationState::AllocationState()
60    : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
61
62MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
63    : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
64
65MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
66                                                  int size, Node* top)
67    : group_(group), size_(size), top_(top) {}
68
69bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
70  return group() && group()->IsNewSpaceAllocation();
71}
72
73void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
74  DCHECK(!node->IsDead());
75  DCHECK_LT(0, node->op()->EffectInputCount());
76  switch (node->opcode()) {
77    case IrOpcode::kAllocate:
78      return VisitAllocate(node, state);
79    case IrOpcode::kCall:
80      return VisitCall(node, state);
81    case IrOpcode::kLoadElement:
82      return VisitLoadElement(node, state);
83    case IrOpcode::kLoadField:
84      return VisitLoadField(node, state);
85    case IrOpcode::kStoreElement:
86      return VisitStoreElement(node, state);
87    case IrOpcode::kStoreField:
88      return VisitStoreField(node, state);
89    case IrOpcode::kCheckedLoad:
90    case IrOpcode::kCheckedStore:
91    case IrOpcode::kDeoptimizeIf:
92    case IrOpcode::kDeoptimizeUnless:
93    case IrOpcode::kIfException:
94    case IrOpcode::kLoad:
95    case IrOpcode::kProtectedLoad:
96    case IrOpcode::kStore:
97    case IrOpcode::kProtectedStore:
98    case IrOpcode::kRetain:
99    case IrOpcode::kUnsafePointerAdd:
100      return VisitOtherEffect(node, state);
101    default:
102      break;
103  }
104  DCHECK_EQ(0, node->op()->EffectOutputCount());
105}
106
107#define __ gasm()->
108
109void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {
110  DCHECK_EQ(IrOpcode::kAllocate, node->opcode());
111  Node* value;
112  Node* size = node->InputAt(0);
113  Node* effect = node->InputAt(1);
114  Node* control = node->InputAt(2);
115
116  gasm()->Reset(effect, control);
117
118  PretenureFlag pretenure = PretenureFlagOf(node->op());
119
120  // Propagate tenuring from outer allocations to inner allocations, i.e.
121  // when we allocate an object in old space and store a newly allocated
122  // child object into the pretenured object, then the newly allocated
123  // child object also should get pretenured to old space.
124  if (pretenure == TENURED) {
125    for (Edge const edge : node->use_edges()) {
126      Node* const user = edge.from();
127      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
128        Node* const child = user->InputAt(1);
129        if (child->opcode() == IrOpcode::kAllocate &&
130            PretenureFlagOf(child->op()) == NOT_TENURED) {
131          NodeProperties::ChangeOp(child, node->op());
132          break;
133        }
134      }
135    }
136  } else {
137    DCHECK_EQ(NOT_TENURED, pretenure);
138    for (Edge const edge : node->use_edges()) {
139      Node* const user = edge.from();
140      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
141        Node* const parent = user->InputAt(0);
142        if (parent->opcode() == IrOpcode::kAllocate &&
143            PretenureFlagOf(parent->op()) == TENURED) {
144          pretenure = TENURED;
145          break;
146        }
147      }
148    }
149  }
150
151  // Determine the top/limit addresses.
152  Node* top_address = __ ExternalConstant(
153      pretenure == NOT_TENURED
154          ? ExternalReference::new_space_allocation_top_address(isolate())
155          : ExternalReference::old_space_allocation_top_address(isolate()));
156  Node* limit_address = __ ExternalConstant(
157      pretenure == NOT_TENURED
158          ? ExternalReference::new_space_allocation_limit_address(isolate())
159          : ExternalReference::old_space_allocation_limit_address(isolate()));
160
161  // Check if we can fold this allocation into a previous allocation represented
162  // by the incoming {state}.
163  Int32Matcher m(size);
164  if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
165    int32_t const object_size = m.Value();
166    if (state->size() <= kMaxRegularHeapObjectSize - object_size &&
167        state->group()->pretenure() == pretenure) {
168      // We can fold this Allocate {node} into the allocation {group}
169      // represented by the given {state}. Compute the upper bound for
170      // the new {state}.
171      int32_t const state_size = state->size() + object_size;
172
173      // Update the reservation check to the actual maximum upper bound.
174      AllocationGroup* const group = state->group();
175      if (OpParameter<int32_t>(group->size()) < state_size) {
176        NodeProperties::ChangeOp(group->size(),
177                                 common()->Int32Constant(state_size));
178      }
179
180      // Update the allocation top with the new object allocation.
181      // TODO(bmeurer): Defer writing back top as much as possible.
182      Node* top = __ IntAdd(state->top(), __ IntPtrConstant(object_size));
183      __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
184                                   kNoWriteBarrier),
185               top_address, __ IntPtrConstant(0), top);
186
187      // Compute the effective inner allocated address.
188      value = __ BitcastWordToTagged(
189          __ IntAdd(state->top(), __ IntPtrConstant(kHeapObjectTag)));
190
191      // Extend the allocation {group}.
192      group->Add(value);
193      state = AllocationState::Open(group, state_size, top, zone());
194    } else {
195      auto call_runtime = __ MakeDeferredLabel<1>();
196      auto done = __ MakeLabel<2>(MachineType::PointerRepresentation());
197
198      // Setup a mutable reservation size node; will be patched as we fold
199      // additional allocations into this new group.
200      Node* size = __ UniqueInt32Constant(object_size);
201
202      // Load allocation top and limit.
203      Node* top =
204          __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
205      Node* limit =
206          __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
207
208      // Check if we need to collect garbage before we can start bump pointer
209      // allocation (always done for folded allocations).
210      Node* check = __ UintLessThan(
211          __ IntAdd(top,
212                    machine()->Is64() ? __ ChangeInt32ToInt64(size) : size),
213          limit);
214
215      __ GotoUnless(check, &call_runtime);
216      __ Goto(&done, top);
217
218      __ Bind(&call_runtime);
219      {
220        Node* target =
221            pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
222                                     : __
223                                       AllocateInOldSpaceStubConstant();
224        if (!allocate_operator_.is_set()) {
225          CallDescriptor* descriptor =
226              Linkage::GetAllocateCallDescriptor(graph()->zone());
227          allocate_operator_.set(common()->Call(descriptor));
228        }
229        Node* vfalse = __ Call(allocate_operator_.get(), target, size);
230        vfalse = __ IntSub(vfalse, __ IntPtrConstant(kHeapObjectTag));
231        __ Goto(&done, vfalse);
232      }
233
234      __ Bind(&done);
235
236      // Compute the new top and write it back.
237      top = __ IntAdd(done.PhiAt(0), __ IntPtrConstant(object_size));
238      __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
239                                   kNoWriteBarrier),
240               top_address, __ IntPtrConstant(0), top);
241
242      // Compute the initial object address.
243      value = __ BitcastWordToTagged(
244          __ IntAdd(done.PhiAt(0), __ IntPtrConstant(kHeapObjectTag)));
245
246      // Start a new allocation group.
247      AllocationGroup* group =
248          new (zone()) AllocationGroup(value, pretenure, size, zone());
249      state = AllocationState::Open(group, object_size, top, zone());
250    }
251  } else {
252    auto call_runtime = __ MakeDeferredLabel<1>();
253    auto done = __ MakeLabel<2>(MachineRepresentation::kTaggedPointer);
254
255    // Load allocation top and limit.
256    Node* top =
257        __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
258    Node* limit =
259        __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));
260
261    // Compute the new top.
262    Node* new_top =
263        __ IntAdd(top, machine()->Is64() ? __ ChangeInt32ToInt64(size) : size);
264
265    // Check if we can do bump pointer allocation here.
266    Node* check = __ UintLessThan(new_top, limit);
267    __ GotoUnless(check, &call_runtime);
268    __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
269                                 kNoWriteBarrier),
270             top_address, __ IntPtrConstant(0), new_top);
271    __ Goto(&done, __ BitcastWordToTagged(
272                       __ IntAdd(top, __ IntPtrConstant(kHeapObjectTag))));
273
274    __ Bind(&call_runtime);
275    Node* target =
276        pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
277                                 : __
278                                   AllocateInOldSpaceStubConstant();
279    if (!allocate_operator_.is_set()) {
280      CallDescriptor* descriptor =
281          Linkage::GetAllocateCallDescriptor(graph()->zone());
282      allocate_operator_.set(common()->Call(descriptor));
283    }
284    __ Goto(&done, __ Call(allocate_operator_.get(), target, size));
285
286    __ Bind(&done);
287    value = done.PhiAt(0);
288
289    // Create an unfoldable allocation group.
290    AllocationGroup* group =
291        new (zone()) AllocationGroup(value, pretenure, zone());
292    state = AllocationState::Closed(group, zone());
293  }
294
295  effect = __ ExtractCurrentEffect();
296  control = __ ExtractCurrentControl();
297  USE(control);  // Floating control, dropped on the floor.
298
299  // Replace all effect uses of {node} with the {effect}, enqueue the
300  // effect uses for further processing, and replace all value uses of
301  // {node} with the {value}.
302  for (Edge edge : node->use_edges()) {
303    if (NodeProperties::IsEffectEdge(edge)) {
304      EnqueueUse(edge.from(), edge.index(), state);
305      edge.UpdateTo(effect);
306    } else {
307      DCHECK(NodeProperties::IsValueEdge(edge));
308      edge.UpdateTo(value);
309    }
310  }
311
312  // Kill the {node} to make sure we don't leave dangling dead uses.
313  node->Kill();
314}
315
316#undef __
317
318void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
319  DCHECK_EQ(IrOpcode::kCall, node->opcode());
320  // If the call can allocate, we start with a fresh state.
321  if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
322    state = empty_state();
323  }
324  EnqueueUses(node, state);
325}
326
327void MemoryOptimizer::VisitLoadElement(Node* node,
328                                       AllocationState const* state) {
329  DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
330  ElementAccess const& access = ElementAccessOf(node->op());
331  Node* index = node->InputAt(1);
332  node->ReplaceInput(1, ComputeIndex(access, index));
333  NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
334  EnqueueUses(node, state);
335}
336
337void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
338  DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
339  FieldAccess const& access = FieldAccessOf(node->op());
340  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
341  node->InsertInput(graph()->zone(), 1, offset);
342  NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
343  EnqueueUses(node, state);
344}
345
346void MemoryOptimizer::VisitStoreElement(Node* node,
347                                        AllocationState const* state) {
348  DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
349  ElementAccess const& access = ElementAccessOf(node->op());
350  Node* object = node->InputAt(0);
351  Node* index = node->InputAt(1);
352  WriteBarrierKind write_barrier_kind =
353      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
354  node->ReplaceInput(1, ComputeIndex(access, index));
355  NodeProperties::ChangeOp(
356      node, machine()->Store(StoreRepresentation(
357                access.machine_type.representation(), write_barrier_kind)));
358  EnqueueUses(node, state);
359}
360
361void MemoryOptimizer::VisitStoreField(Node* node,
362                                      AllocationState const* state) {
363  DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
364  FieldAccess const& access = FieldAccessOf(node->op());
365  Node* object = node->InputAt(0);
366  WriteBarrierKind write_barrier_kind =
367      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
368  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
369  node->InsertInput(graph()->zone(), 1, offset);
370  NodeProperties::ChangeOp(
371      node, machine()->Store(StoreRepresentation(
372                access.machine_type.representation(), write_barrier_kind)));
373  EnqueueUses(node, state);
374}
375
376void MemoryOptimizer::VisitOtherEffect(Node* node,
377                                       AllocationState const* state) {
378  EnqueueUses(node, state);
379}
380
381Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
382  Node* index;
383  if (machine()->Is64()) {
384    // On 64-bit platforms, we need to feed a Word64 index to the Load and
385    // Store operators. Since LoadElement or StoreElement don't do any bounds
386    // checking themselves, we can be sure that the {key} was already checked
387    // and is in valid range, so we can do the further address computation on
388    // Word64 below, which ideally allows us to fuse the address computation
389    // with the actual memory access operation on Intel platforms.
390    index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
391  } else {
392    index = key;
393  }
394  int const element_size_shift =
395      ElementSizeLog2Of(access.machine_type.representation());
396  if (element_size_shift) {
397    index = graph()->NewNode(machine()->WordShl(), index,
398                             jsgraph()->IntPtrConstant(element_size_shift));
399  }
400  int const fixed_offset = access.header_size - access.tag();
401  if (fixed_offset) {
402    index = graph()->NewNode(machine()->IntAdd(), index,
403                             jsgraph()->IntPtrConstant(fixed_offset));
404  }
405  return index;
406}
407
408WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
409    Node* object, AllocationState const* state,
410    WriteBarrierKind write_barrier_kind) {
411  if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
412    write_barrier_kind = kNoWriteBarrier;
413  }
414  return write_barrier_kind;
415}
416
417MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
418    AllocationStates const& states) {
419  // Check if all states are the same; or at least if all allocation
420  // states belong to the same allocation group.
421  AllocationState const* state = states.front();
422  AllocationGroup* group = state->group();
423  for (size_t i = 1; i < states.size(); ++i) {
424    if (states[i] != state) state = nullptr;
425    if (states[i]->group() != group) group = nullptr;
426  }
427  if (state == nullptr) {
428    if (group != nullptr) {
429      // We cannot fold any more allocations into this group, but we can still
430      // eliminate write barriers on stores to this group.
431      // TODO(bmeurer): We could potentially just create a Phi here to merge
432      // the various tops; but we need to pay special attention not to create
433      // an unschedulable graph.
434      state = AllocationState::Closed(group, zone());
435    } else {
436      // The states are from different allocation groups.
437      state = empty_state();
438    }
439  }
440  return state;
441}
442
443void MemoryOptimizer::EnqueueMerge(Node* node, int index,
444                                   AllocationState const* state) {
445  DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
446  int const input_count = node->InputCount() - 1;
447  DCHECK_LT(0, input_count);
448  Node* const control = node->InputAt(input_count);
449  if (control->opcode() == IrOpcode::kLoop) {
450    // For loops we always start with an empty state at the beginning.
451    if (index == 0) EnqueueUses(node, empty_state());
452  } else {
453    DCHECK_EQ(IrOpcode::kMerge, control->opcode());
454    // Check if we already know about this pending merge.
455    NodeId const id = node->id();
456    auto it = pending_.find(id);
457    if (it == pending_.end()) {
458      // Insert a new pending merge.
459      it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
460    }
461    // Add the next input state.
462    it->second.push_back(state);
463    // Check if states for all inputs are available by now.
464    if (it->second.size() == static_cast<size_t>(input_count)) {
465      // All inputs to this effect merge are done, merge the states given all
466      // input constraints, drop the pending merge and enqueue uses of the
467      // EffectPhi {node}.
468      state = MergeStates(it->second);
469      EnqueueUses(node, state);
470      pending_.erase(it);
471    }
472  }
473}
474
475void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
476  for (Edge const edge : node->use_edges()) {
477    if (NodeProperties::IsEffectEdge(edge)) {
478      EnqueueUse(edge.from(), edge.index(), state);
479    }
480  }
481}
482
483void MemoryOptimizer::EnqueueUse(Node* node, int index,
484                                 AllocationState const* state) {
485  if (node->opcode() == IrOpcode::kEffectPhi) {
486    // An EffectPhi represents a merge of different effect chains, which
487    // needs special handling depending on whether the merge is part of a
488    // loop or just a normal control join.
489    EnqueueMerge(node, index, state);
490  } else {
491    Token token = {node, state};
492    tokens_.push(token);
493  }
494}
495
496Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }
497
498Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }
499
500CommonOperatorBuilder* MemoryOptimizer::common() const {
501  return jsgraph()->common();
502}
503
504MachineOperatorBuilder* MemoryOptimizer::machine() const {
505  return jsgraph()->machine();
506}
507
508}  // namespace compiler
509}  // namespace internal
510}  // namespace v8
511