1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/compiler/memory-optimizer.h" 6 7#include "src/compiler/js-graph.h" 8#include "src/compiler/linkage.h" 9#include "src/compiler/node-matchers.h" 10#include "src/compiler/node-properties.h" 11#include "src/compiler/node.h" 12#include "src/compiler/simplified-operator.h" 13 14namespace v8 { 15namespace internal { 16namespace compiler { 17 18MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone) 19 : jsgraph_(jsgraph), 20 empty_state_(AllocationState::Empty(zone)), 21 pending_(zone), 22 tokens_(zone), 23 zone_(zone), 24 graph_assembler_(jsgraph, nullptr, nullptr, zone) {} 25 26void MemoryOptimizer::Optimize() { 27 EnqueueUses(graph()->start(), empty_state()); 28 while (!tokens_.empty()) { 29 Token const token = tokens_.front(); 30 tokens_.pop(); 31 VisitNode(token.node, token.state); 32 } 33 DCHECK(pending_.empty()); 34 DCHECK(tokens_.empty()); 35} 36 37MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node, 38 PretenureFlag pretenure, 39 Zone* zone) 40 : node_ids_(zone), pretenure_(pretenure), size_(nullptr) { 41 node_ids_.insert(node->id()); 42} 43 44MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node, 45 PretenureFlag pretenure, 46 Node* size, Zone* zone) 47 : node_ids_(zone), pretenure_(pretenure), size_(size) { 48 node_ids_.insert(node->id()); 49} 50 51void MemoryOptimizer::AllocationGroup::Add(Node* node) { 52 node_ids_.insert(node->id()); 53} 54 55bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const { 56 return node_ids_.find(node->id()) != node_ids_.end(); 57} 58 59MemoryOptimizer::AllocationState::AllocationState() 60 : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {} 61 62MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group) 63 : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {} 64 65MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group, 66 int size, Node* top) 67 : group_(group), size_(size), top_(top) {} 68 69bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const { 70 return group() && group()->IsNewSpaceAllocation(); 71} 72 73void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) { 74 DCHECK(!node->IsDead()); 75 DCHECK_LT(0, node->op()->EffectInputCount()); 76 switch (node->opcode()) { 77 case IrOpcode::kAllocate: 78 return VisitAllocate(node, state); 79 case IrOpcode::kCall: 80 return VisitCall(node, state); 81 case IrOpcode::kLoadElement: 82 return VisitLoadElement(node, state); 83 case IrOpcode::kLoadField: 84 return VisitLoadField(node, state); 85 case IrOpcode::kStoreElement: 86 return VisitStoreElement(node, state); 87 case IrOpcode::kStoreField: 88 return VisitStoreField(node, state); 89 case IrOpcode::kCheckedLoad: 90 case IrOpcode::kCheckedStore: 91 case IrOpcode::kDeoptimizeIf: 92 case IrOpcode::kDeoptimizeUnless: 93 case IrOpcode::kIfException: 94 case IrOpcode::kLoad: 95 case IrOpcode::kProtectedLoad: 96 case IrOpcode::kStore: 97 case IrOpcode::kProtectedStore: 98 case IrOpcode::kRetain: 99 case IrOpcode::kUnsafePointerAdd: 100 return VisitOtherEffect(node, state); 101 default: 102 break; 103 } 104 DCHECK_EQ(0, node->op()->EffectOutputCount()); 105} 106 107#define __ gasm()-> 108 109void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) { 110 DCHECK_EQ(IrOpcode::kAllocate, node->opcode()); 111 Node* value; 112 Node* size = node->InputAt(0); 113 Node* effect = node->InputAt(1); 114 Node* control = node->InputAt(2); 115 116 gasm()->Reset(effect, control); 117 118 PretenureFlag pretenure = PretenureFlagOf(node->op()); 119 120 // Propagate tenuring from outer allocations to inner allocations, i.e. 121 // when we allocate an object in old space and store a newly allocated 122 // child object into the pretenured object, then the newly allocated 123 // child object also should get pretenured to old space. 124 if (pretenure == TENURED) { 125 for (Edge const edge : node->use_edges()) { 126 Node* const user = edge.from(); 127 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) { 128 Node* const child = user->InputAt(1); 129 if (child->opcode() == IrOpcode::kAllocate && 130 PretenureFlagOf(child->op()) == NOT_TENURED) { 131 NodeProperties::ChangeOp(child, node->op()); 132 break; 133 } 134 } 135 } 136 } else { 137 DCHECK_EQ(NOT_TENURED, pretenure); 138 for (Edge const edge : node->use_edges()) { 139 Node* const user = edge.from(); 140 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) { 141 Node* const parent = user->InputAt(0); 142 if (parent->opcode() == IrOpcode::kAllocate && 143 PretenureFlagOf(parent->op()) == TENURED) { 144 pretenure = TENURED; 145 break; 146 } 147 } 148 } 149 } 150 151 // Determine the top/limit addresses. 152 Node* top_address = __ ExternalConstant( 153 pretenure == NOT_TENURED 154 ? ExternalReference::new_space_allocation_top_address(isolate()) 155 : ExternalReference::old_space_allocation_top_address(isolate())); 156 Node* limit_address = __ ExternalConstant( 157 pretenure == NOT_TENURED 158 ? ExternalReference::new_space_allocation_limit_address(isolate()) 159 : ExternalReference::old_space_allocation_limit_address(isolate())); 160 161 // Check if we can fold this allocation into a previous allocation represented 162 // by the incoming {state}. 163 Int32Matcher m(size); 164 if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) { 165 int32_t const object_size = m.Value(); 166 if (state->size() <= kMaxRegularHeapObjectSize - object_size && 167 state->group()->pretenure() == pretenure) { 168 // We can fold this Allocate {node} into the allocation {group} 169 // represented by the given {state}. Compute the upper bound for 170 // the new {state}. 171 int32_t const state_size = state->size() + object_size; 172 173 // Update the reservation check to the actual maximum upper bound. 174 AllocationGroup* const group = state->group(); 175 if (OpParameter<int32_t>(group->size()) < state_size) { 176 NodeProperties::ChangeOp(group->size(), 177 common()->Int32Constant(state_size)); 178 } 179 180 // Update the allocation top with the new object allocation. 181 // TODO(bmeurer): Defer writing back top as much as possible. 182 Node* top = __ IntAdd(state->top(), __ IntPtrConstant(object_size)); 183 __ Store(StoreRepresentation(MachineType::PointerRepresentation(), 184 kNoWriteBarrier), 185 top_address, __ IntPtrConstant(0), top); 186 187 // Compute the effective inner allocated address. 188 value = __ BitcastWordToTagged( 189 __ IntAdd(state->top(), __ IntPtrConstant(kHeapObjectTag))); 190 191 // Extend the allocation {group}. 192 group->Add(value); 193 state = AllocationState::Open(group, state_size, top, zone()); 194 } else { 195 auto call_runtime = __ MakeDeferredLabel<1>(); 196 auto done = __ MakeLabel<2>(MachineType::PointerRepresentation()); 197 198 // Setup a mutable reservation size node; will be patched as we fold 199 // additional allocations into this new group. 200 Node* size = __ UniqueInt32Constant(object_size); 201 202 // Load allocation top and limit. 203 Node* top = 204 __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0)); 205 Node* limit = 206 __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0)); 207 208 // Check if we need to collect garbage before we can start bump pointer 209 // allocation (always done for folded allocations). 210 Node* check = __ UintLessThan( 211 __ IntAdd(top, 212 machine()->Is64() ? __ ChangeInt32ToInt64(size) : size), 213 limit); 214 215 __ GotoUnless(check, &call_runtime); 216 __ Goto(&done, top); 217 218 __ Bind(&call_runtime); 219 { 220 Node* target = 221 pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant() 222 : __ 223 AllocateInOldSpaceStubConstant(); 224 if (!allocate_operator_.is_set()) { 225 CallDescriptor* descriptor = 226 Linkage::GetAllocateCallDescriptor(graph()->zone()); 227 allocate_operator_.set(common()->Call(descriptor)); 228 } 229 Node* vfalse = __ Call(allocate_operator_.get(), target, size); 230 vfalse = __ IntSub(vfalse, __ IntPtrConstant(kHeapObjectTag)); 231 __ Goto(&done, vfalse); 232 } 233 234 __ Bind(&done); 235 236 // Compute the new top and write it back. 237 top = __ IntAdd(done.PhiAt(0), __ IntPtrConstant(object_size)); 238 __ Store(StoreRepresentation(MachineType::PointerRepresentation(), 239 kNoWriteBarrier), 240 top_address, __ IntPtrConstant(0), top); 241 242 // Compute the initial object address. 243 value = __ BitcastWordToTagged( 244 __ IntAdd(done.PhiAt(0), __ IntPtrConstant(kHeapObjectTag))); 245 246 // Start a new allocation group. 247 AllocationGroup* group = 248 new (zone()) AllocationGroup(value, pretenure, size, zone()); 249 state = AllocationState::Open(group, object_size, top, zone()); 250 } 251 } else { 252 auto call_runtime = __ MakeDeferredLabel<1>(); 253 auto done = __ MakeLabel<2>(MachineRepresentation::kTaggedPointer); 254 255 // Load allocation top and limit. 256 Node* top = 257 __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0)); 258 Node* limit = 259 __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0)); 260 261 // Compute the new top. 262 Node* new_top = 263 __ IntAdd(top, machine()->Is64() ? __ ChangeInt32ToInt64(size) : size); 264 265 // Check if we can do bump pointer allocation here. 266 Node* check = __ UintLessThan(new_top, limit); 267 __ GotoUnless(check, &call_runtime); 268 __ Store(StoreRepresentation(MachineType::PointerRepresentation(), 269 kNoWriteBarrier), 270 top_address, __ IntPtrConstant(0), new_top); 271 __ Goto(&done, __ BitcastWordToTagged( 272 __ IntAdd(top, __ IntPtrConstant(kHeapObjectTag)))); 273 274 __ Bind(&call_runtime); 275 Node* target = 276 pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant() 277 : __ 278 AllocateInOldSpaceStubConstant(); 279 if (!allocate_operator_.is_set()) { 280 CallDescriptor* descriptor = 281 Linkage::GetAllocateCallDescriptor(graph()->zone()); 282 allocate_operator_.set(common()->Call(descriptor)); 283 } 284 __ Goto(&done, __ Call(allocate_operator_.get(), target, size)); 285 286 __ Bind(&done); 287 value = done.PhiAt(0); 288 289 // Create an unfoldable allocation group. 290 AllocationGroup* group = 291 new (zone()) AllocationGroup(value, pretenure, zone()); 292 state = AllocationState::Closed(group, zone()); 293 } 294 295 effect = __ ExtractCurrentEffect(); 296 control = __ ExtractCurrentControl(); 297 USE(control); // Floating control, dropped on the floor. 298 299 // Replace all effect uses of {node} with the {effect}, enqueue the 300 // effect uses for further processing, and replace all value uses of 301 // {node} with the {value}. 302 for (Edge edge : node->use_edges()) { 303 if (NodeProperties::IsEffectEdge(edge)) { 304 EnqueueUse(edge.from(), edge.index(), state); 305 edge.UpdateTo(effect); 306 } else { 307 DCHECK(NodeProperties::IsValueEdge(edge)); 308 edge.UpdateTo(value); 309 } 310 } 311 312 // Kill the {node} to make sure we don't leave dangling dead uses. 313 node->Kill(); 314} 315 316#undef __ 317 318void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) { 319 DCHECK_EQ(IrOpcode::kCall, node->opcode()); 320 // If the call can allocate, we start with a fresh state. 321 if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) { 322 state = empty_state(); 323 } 324 EnqueueUses(node, state); 325} 326 327void MemoryOptimizer::VisitLoadElement(Node* node, 328 AllocationState const* state) { 329 DCHECK_EQ(IrOpcode::kLoadElement, node->opcode()); 330 ElementAccess const& access = ElementAccessOf(node->op()); 331 Node* index = node->InputAt(1); 332 node->ReplaceInput(1, ComputeIndex(access, index)); 333 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type)); 334 EnqueueUses(node, state); 335} 336 337void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) { 338 DCHECK_EQ(IrOpcode::kLoadField, node->opcode()); 339 FieldAccess const& access = FieldAccessOf(node->op()); 340 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag()); 341 node->InsertInput(graph()->zone(), 1, offset); 342 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type)); 343 EnqueueUses(node, state); 344} 345 346void MemoryOptimizer::VisitStoreElement(Node* node, 347 AllocationState const* state) { 348 DCHECK_EQ(IrOpcode::kStoreElement, node->opcode()); 349 ElementAccess const& access = ElementAccessOf(node->op()); 350 Node* object = node->InputAt(0); 351 Node* index = node->InputAt(1); 352 WriteBarrierKind write_barrier_kind = 353 ComputeWriteBarrierKind(object, state, access.write_barrier_kind); 354 node->ReplaceInput(1, ComputeIndex(access, index)); 355 NodeProperties::ChangeOp( 356 node, machine()->Store(StoreRepresentation( 357 access.machine_type.representation(), write_barrier_kind))); 358 EnqueueUses(node, state); 359} 360 361void MemoryOptimizer::VisitStoreField(Node* node, 362 AllocationState const* state) { 363 DCHECK_EQ(IrOpcode::kStoreField, node->opcode()); 364 FieldAccess const& access = FieldAccessOf(node->op()); 365 Node* object = node->InputAt(0); 366 WriteBarrierKind write_barrier_kind = 367 ComputeWriteBarrierKind(object, state, access.write_barrier_kind); 368 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag()); 369 node->InsertInput(graph()->zone(), 1, offset); 370 NodeProperties::ChangeOp( 371 node, machine()->Store(StoreRepresentation( 372 access.machine_type.representation(), write_barrier_kind))); 373 EnqueueUses(node, state); 374} 375 376void MemoryOptimizer::VisitOtherEffect(Node* node, 377 AllocationState const* state) { 378 EnqueueUses(node, state); 379} 380 381Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) { 382 Node* index; 383 if (machine()->Is64()) { 384 // On 64-bit platforms, we need to feed a Word64 index to the Load and 385 // Store operators. Since LoadElement or StoreElement don't do any bounds 386 // checking themselves, we can be sure that the {key} was already checked 387 // and is in valid range, so we can do the further address computation on 388 // Word64 below, which ideally allows us to fuse the address computation 389 // with the actual memory access operation on Intel platforms. 390 index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key); 391 } else { 392 index = key; 393 } 394 int const element_size_shift = 395 ElementSizeLog2Of(access.machine_type.representation()); 396 if (element_size_shift) { 397 index = graph()->NewNode(machine()->WordShl(), index, 398 jsgraph()->IntPtrConstant(element_size_shift)); 399 } 400 int const fixed_offset = access.header_size - access.tag(); 401 if (fixed_offset) { 402 index = graph()->NewNode(machine()->IntAdd(), index, 403 jsgraph()->IntPtrConstant(fixed_offset)); 404 } 405 return index; 406} 407 408WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind( 409 Node* object, AllocationState const* state, 410 WriteBarrierKind write_barrier_kind) { 411 if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) { 412 write_barrier_kind = kNoWriteBarrier; 413 } 414 return write_barrier_kind; 415} 416 417MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates( 418 AllocationStates const& states) { 419 // Check if all states are the same; or at least if all allocation 420 // states belong to the same allocation group. 421 AllocationState const* state = states.front(); 422 AllocationGroup* group = state->group(); 423 for (size_t i = 1; i < states.size(); ++i) { 424 if (states[i] != state) state = nullptr; 425 if (states[i]->group() != group) group = nullptr; 426 } 427 if (state == nullptr) { 428 if (group != nullptr) { 429 // We cannot fold any more allocations into this group, but we can still 430 // eliminate write barriers on stores to this group. 431 // TODO(bmeurer): We could potentially just create a Phi here to merge 432 // the various tops; but we need to pay special attention not to create 433 // an unschedulable graph. 434 state = AllocationState::Closed(group, zone()); 435 } else { 436 // The states are from different allocation groups. 437 state = empty_state(); 438 } 439 } 440 return state; 441} 442 443void MemoryOptimizer::EnqueueMerge(Node* node, int index, 444 AllocationState const* state) { 445 DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode()); 446 int const input_count = node->InputCount() - 1; 447 DCHECK_LT(0, input_count); 448 Node* const control = node->InputAt(input_count); 449 if (control->opcode() == IrOpcode::kLoop) { 450 // For loops we always start with an empty state at the beginning. 451 if (index == 0) EnqueueUses(node, empty_state()); 452 } else { 453 DCHECK_EQ(IrOpcode::kMerge, control->opcode()); 454 // Check if we already know about this pending merge. 455 NodeId const id = node->id(); 456 auto it = pending_.find(id); 457 if (it == pending_.end()) { 458 // Insert a new pending merge. 459 it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first; 460 } 461 // Add the next input state. 462 it->second.push_back(state); 463 // Check if states for all inputs are available by now. 464 if (it->second.size() == static_cast<size_t>(input_count)) { 465 // All inputs to this effect merge are done, merge the states given all 466 // input constraints, drop the pending merge and enqueue uses of the 467 // EffectPhi {node}. 468 state = MergeStates(it->second); 469 EnqueueUses(node, state); 470 pending_.erase(it); 471 } 472 } 473} 474 475void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) { 476 for (Edge const edge : node->use_edges()) { 477 if (NodeProperties::IsEffectEdge(edge)) { 478 EnqueueUse(edge.from(), edge.index(), state); 479 } 480 } 481} 482 483void MemoryOptimizer::EnqueueUse(Node* node, int index, 484 AllocationState const* state) { 485 if (node->opcode() == IrOpcode::kEffectPhi) { 486 // An EffectPhi represents a merge of different effect chains, which 487 // needs special handling depending on whether the merge is part of a 488 // loop or just a normal control join. 489 EnqueueMerge(node, index, state); 490 } else { 491 Token token = {node, state}; 492 tokens_.push(token); 493 } 494} 495 496Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); } 497 498Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); } 499 500CommonOperatorBuilder* MemoryOptimizer::common() const { 501 return jsgraph()->common(); 502} 503 504MachineOperatorBuilder* MemoryOptimizer::machine() const { 505 return jsgraph()->machine(); 506} 507 508} // namespace compiler 509} // namespace internal 510} // namespace v8 511