register_allocator.cc revision 3fc992f9dfe8f49ff350132323cc635f102b7b62
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "register_allocator.h" 18 19#include <iostream> 20#include <sstream> 21 22#include "base/bit_vector-inl.h" 23#include "code_generator.h" 24#include "ssa_liveness_analysis.h" 25 26namespace art { 27 28static constexpr size_t kMaxLifetimePosition = -1; 29static constexpr size_t kDefaultNumberOfSpillSlots = 4; 30 31// For simplicity, we implement register pairs as (reg, reg + 1). 32// Note that this is a requirement for double registers on ARM, since we 33// allocate SRegister. 34static int GetHighForLowRegister(int reg) { return reg + 1; } 35static bool IsLowRegister(int reg) { return (reg & 1) == 0; } 36static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { 37 return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); 38} 39 40RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, 41 CodeGenerator* codegen, 42 const SsaLivenessAnalysis& liveness) 43 : allocator_(allocator), 44 codegen_(codegen), 45 liveness_(liveness), 46 unhandled_core_intervals_(allocator, 0), 47 unhandled_fp_intervals_(allocator, 0), 48 unhandled_(nullptr), 49 handled_(allocator, 0), 50 active_(allocator, 0), 51 inactive_(allocator, 0), 52 physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), 53 physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), 54 temp_intervals_(allocator, 4), 55 int_spill_slots_(allocator, kDefaultNumberOfSpillSlots), 56 long_spill_slots_(allocator, kDefaultNumberOfSpillSlots), 57 float_spill_slots_(allocator, kDefaultNumberOfSpillSlots), 58 double_spill_slots_(allocator, kDefaultNumberOfSpillSlots), 59 safepoints_(allocator, 0), 60 processing_core_registers_(false), 61 number_of_registers_(-1), 62 registers_array_(nullptr), 63 blocked_core_registers_(codegen->GetBlockedCoreRegisters()), 64 blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), 65 reserved_out_slots_(0), 66 maximum_number_of_live_core_registers_(0), 67 maximum_number_of_live_fp_registers_(0) { 68 static constexpr bool kIsBaseline = false; 69 codegen->SetupBlockedRegisters(kIsBaseline); 70 physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); 71 physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); 72 // Always reserve for the current method and the graph's max out registers. 73 // TODO: compute it instead. 74 reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); 75} 76 77bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, 78 InstructionSet instruction_set) { 79 return instruction_set == kArm64 80 || instruction_set == kX86_64 81 || instruction_set == kArm 82 || instruction_set == kX86 83 || instruction_set == kThumb2; 84} 85 86static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { 87 if (interval == nullptr) return false; 88 bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) 89 && (interval->GetType() != Primitive::kPrimFloat); 90 return processing_core_registers == is_core_register; 91} 92 93void RegisterAllocator::AllocateRegisters() { 94 AllocateRegistersInternal(); 95 Resolve(); 96 97 if (kIsDebugBuild) { 98 processing_core_registers_ = true; 99 ValidateInternal(true); 100 processing_core_registers_ = false; 101 ValidateInternal(true); 102 // Check that the linear order is still correct with regards to lifetime positions. 103 // Since only parallel moves have been inserted during the register allocation, 104 // these checks are mostly for making sure these moves have been added correctly. 105 size_t current_liveness = 0; 106 for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { 107 HBasicBlock* block = it.Current(); 108 for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { 109 HInstruction* instruction = inst_it.Current(); 110 DCHECK_LE(current_liveness, instruction->GetLifetimePosition()); 111 current_liveness = instruction->GetLifetimePosition(); 112 } 113 for (HInstructionIterator inst_it(block->GetInstructions()); 114 !inst_it.Done(); 115 inst_it.Advance()) { 116 HInstruction* instruction = inst_it.Current(); 117 DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName(); 118 current_liveness = instruction->GetLifetimePosition(); 119 } 120 } 121 } 122} 123 124void RegisterAllocator::BlockRegister(Location location, 125 size_t start, 126 size_t end) { 127 int reg = location.reg(); 128 DCHECK(location.IsRegister() || location.IsFpuRegister()); 129 LiveInterval* interval = location.IsRegister() 130 ? physical_core_register_intervals_.Get(reg) 131 : physical_fp_register_intervals_.Get(reg); 132 Primitive::Type type = location.IsRegister() 133 ? Primitive::kPrimInt 134 : Primitive::kPrimFloat; 135 if (interval == nullptr) { 136 interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); 137 if (location.IsRegister()) { 138 physical_core_register_intervals_.Put(reg, interval); 139 } else { 140 physical_fp_register_intervals_.Put(reg, interval); 141 } 142 } 143 DCHECK(interval->GetRegister() == reg); 144 interval->AddRange(start, end); 145} 146 147void RegisterAllocator::AllocateRegistersInternal() { 148 // Iterate post-order, to ensure the list is sorted, and the last added interval 149 // is the one with the lowest start position. 150 for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { 151 HBasicBlock* block = it.Current(); 152 for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); 153 back_it.Advance()) { 154 ProcessInstruction(back_it.Current()); 155 } 156 for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { 157 ProcessInstruction(inst_it.Current()); 158 } 159 } 160 161 number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); 162 registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); 163 processing_core_registers_ = true; 164 unhandled_ = &unhandled_core_intervals_; 165 for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { 166 LiveInterval* fixed = physical_core_register_intervals_.Get(i); 167 if (fixed != nullptr) { 168 // Fixed interval is added to inactive_ instead of unhandled_. 169 // It's also the only type of inactive interval whose start position 170 // can be after the current interval during linear scan. 171 // Fixed interval is never split and never moves to unhandled_. 172 inactive_.Add(fixed); 173 } 174 } 175 LinearScan(); 176 177 inactive_.Reset(); 178 active_.Reset(); 179 handled_.Reset(); 180 181 number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); 182 registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); 183 processing_core_registers_ = false; 184 unhandled_ = &unhandled_fp_intervals_; 185 for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { 186 LiveInterval* fixed = physical_fp_register_intervals_.Get(i); 187 if (fixed != nullptr) { 188 // Fixed interval is added to inactive_ instead of unhandled_. 189 // It's also the only type of inactive interval whose start position 190 // can be after the current interval during linear scan. 191 // Fixed interval is never split and never moves to unhandled_. 192 inactive_.Add(fixed); 193 } 194 } 195 LinearScan(); 196} 197 198void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { 199 LocationSummary* locations = instruction->GetLocations(); 200 size_t position = instruction->GetLifetimePosition(); 201 202 if (locations == nullptr) return; 203 204 // Create synthesized intervals for temporaries. 205 for (size_t i = 0; i < locations->GetTempCount(); ++i) { 206 Location temp = locations->GetTemp(i); 207 if (temp.IsRegister() || temp.IsFpuRegister()) { 208 BlockRegister(temp, position, position + 1); 209 } else { 210 DCHECK(temp.IsUnallocated()); 211 switch (temp.GetPolicy()) { 212 case Location::kRequiresRegister: { 213 LiveInterval* interval = 214 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); 215 temp_intervals_.Add(interval); 216 interval->AddTempUse(instruction, i); 217 unhandled_core_intervals_.Add(interval); 218 break; 219 } 220 221 case Location::kRequiresFpuRegister: { 222 LiveInterval* interval = 223 LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); 224 temp_intervals_.Add(interval); 225 interval->AddTempUse(instruction, i); 226 if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { 227 interval->AddHighInterval(/* is_temp */ true); 228 LiveInterval* high = interval->GetHighInterval(); 229 temp_intervals_.Add(high); 230 unhandled_fp_intervals_.Add(high); 231 } 232 unhandled_fp_intervals_.Add(interval); 233 break; 234 } 235 236 default: 237 LOG(FATAL) << "Unexpected policy for temporary location " 238 << temp.GetPolicy(); 239 } 240 } 241 } 242 243 bool core_register = (instruction->GetType() != Primitive::kPrimDouble) 244 && (instruction->GetType() != Primitive::kPrimFloat); 245 246 if (locations->CanCall()) { 247 if (codegen_->IsLeafMethod()) { 248 // TODO: We do this here because we do not want the suspend check to artificially 249 // create live registers. We should find another place, but this is currently the 250 // simplest. 251 DCHECK(instruction->IsSuspendCheckEntry()); 252 instruction->GetBlock()->RemoveInstruction(instruction); 253 return; 254 } 255 safepoints_.Add(instruction); 256 if (locations->OnlyCallsOnSlowPath()) { 257 // We add a synthesized range at this position to record the live registers 258 // at this position. Ideally, we could just update the safepoints when locations 259 // are updated, but we currently need to know the full stack size before updating 260 // locations (because of parameters and the fact that we don't have a frame pointer). 261 // And knowing the full stack size requires to know the maximum number of live 262 // registers at calls in slow paths. 263 // By adding the following interval in the algorithm, we can compute this 264 // maximum before updating locations. 265 LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); 266 interval->AddRange(position, position + 1); 267 AddSorted(&unhandled_core_intervals_, interval); 268 AddSorted(&unhandled_fp_intervals_, interval); 269 } 270 } 271 272 if (locations->WillCall()) { 273 // Block all registers. 274 for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { 275 if (!codegen_->IsCoreCalleeSaveRegister(i)) { 276 BlockRegister(Location::RegisterLocation(i), 277 position, 278 position + 1); 279 } 280 } 281 for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { 282 if (!codegen_->IsFloatingPointCalleeSaveRegister(i)) { 283 BlockRegister(Location::FpuRegisterLocation(i), 284 position, 285 position + 1); 286 } 287 } 288 } 289 290 for (size_t i = 0; i < instruction->InputCount(); ++i) { 291 Location input = locations->InAt(i); 292 if (input.IsRegister() || input.IsFpuRegister()) { 293 BlockRegister(input, position, position + 1); 294 } else if (input.IsPair()) { 295 BlockRegister(input.ToLow(), position, position + 1); 296 BlockRegister(input.ToHigh(), position, position + 1); 297 } 298 } 299 300 LiveInterval* current = instruction->GetLiveInterval(); 301 if (current == nullptr) return; 302 303 GrowableArray<LiveInterval*>& unhandled = core_register 304 ? unhandled_core_intervals_ 305 : unhandled_fp_intervals_; 306 307 DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); 308 309 if (codegen_->NeedsTwoRegisters(current->GetType())) { 310 current->AddHighInterval(); 311 } 312 313 for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) { 314 HInstruction* safepoint = safepoints_.Get(safepoint_index - 1); 315 size_t safepoint_position = safepoint->GetLifetimePosition(); 316 317 // Test that safepoints are ordered in the optimal way. 318 DCHECK(safepoint_index == safepoints_.Size() 319 || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position); 320 321 if (safepoint_position == current->GetStart()) { 322 // The safepoint is for this instruction, so the location of the instruction 323 // does not need to be saved. 324 DCHECK_EQ(safepoint_index, safepoints_.Size()); 325 DCHECK_EQ(safepoint, instruction); 326 continue; 327 } else if (current->IsDeadAt(safepoint_position)) { 328 break; 329 } else if (!current->Covers(safepoint_position)) { 330 // Hole in the interval. 331 continue; 332 } 333 current->AddSafepoint(safepoint); 334 } 335 current->ResetSearchCache(); 336 337 // Some instructions define their output in fixed register/stack slot. We need 338 // to ensure we know these locations before doing register allocation. For a 339 // given register, we create an interval that covers these locations. The register 340 // will be unavailable at these locations when trying to allocate one for an 341 // interval. 342 // 343 // The backwards walking ensures the ranges are ordered on increasing start positions. 344 Location output = locations->Out(); 345 if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) { 346 Location first = locations->InAt(0); 347 if (first.IsRegister() || first.IsFpuRegister()) { 348 current->SetFrom(position + 1); 349 current->SetRegister(first.reg()); 350 } else if (first.IsPair()) { 351 current->SetFrom(position + 1); 352 current->SetRegister(first.low()); 353 LiveInterval* high = current->GetHighInterval(); 354 high->SetRegister(first.high()); 355 high->SetFrom(position + 1); 356 } 357 } else if (output.IsRegister() || output.IsFpuRegister()) { 358 // Shift the interval's start by one to account for the blocked register. 359 current->SetFrom(position + 1); 360 current->SetRegister(output.reg()); 361 BlockRegister(output, position, position + 1); 362 } else if (output.IsPair()) { 363 current->SetFrom(position + 1); 364 current->SetRegister(output.low()); 365 LiveInterval* high = current->GetHighInterval(); 366 high->SetRegister(output.high()); 367 high->SetFrom(position + 1); 368 BlockRegister(output.ToLow(), position, position + 1); 369 BlockRegister(output.ToHigh(), position, position + 1); 370 } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { 371 current->SetSpillSlot(output.GetStackIndex()); 372 } else { 373 DCHECK(output.IsUnallocated() || output.IsConstant()); 374 } 375 376 // If needed, add interval to the list of unhandled intervals. 377 if (current->HasSpillSlot() || instruction->IsConstant()) { 378 // Split just before first register use. 379 size_t first_register_use = current->FirstRegisterUse(); 380 if (first_register_use != kNoLifetime) { 381 LiveInterval* split = Split(current, first_register_use - 1); 382 // Don't add directly to `unhandled`, it needs to be sorted and the start 383 // of this new interval might be after intervals already in the list. 384 AddSorted(&unhandled, split); 385 } else { 386 // Nothing to do, we won't allocate a register for this value. 387 } 388 } else { 389 // Don't add directly to `unhandled`, temp or safepoint intervals 390 // for this instruction may have been added, and those can be 391 // processed first. 392 AddSorted(&unhandled, current); 393 } 394} 395 396class AllRangesIterator : public ValueObject { 397 public: 398 explicit AllRangesIterator(LiveInterval* interval) 399 : current_interval_(interval), 400 current_range_(interval->GetFirstRange()) {} 401 402 bool Done() const { return current_interval_ == nullptr; } 403 LiveRange* CurrentRange() const { return current_range_; } 404 LiveInterval* CurrentInterval() const { return current_interval_; } 405 406 void Advance() { 407 current_range_ = current_range_->GetNext(); 408 if (current_range_ == nullptr) { 409 current_interval_ = current_interval_->GetNextSibling(); 410 if (current_interval_ != nullptr) { 411 current_range_ = current_interval_->GetFirstRange(); 412 } 413 } 414 } 415 416 private: 417 LiveInterval* current_interval_; 418 LiveRange* current_range_; 419 420 DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); 421}; 422 423bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { 424 // To simplify unit testing, we eagerly create the array of intervals, and 425 // call the helper method. 426 GrowableArray<LiveInterval*> intervals(allocator_, 0); 427 for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { 428 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); 429 if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { 430 intervals.Add(instruction->GetLiveInterval()); 431 } 432 } 433 434 if (processing_core_registers_) { 435 for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { 436 LiveInterval* fixed = physical_core_register_intervals_.Get(i); 437 if (fixed != nullptr) { 438 intervals.Add(fixed); 439 } 440 } 441 } else { 442 for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { 443 LiveInterval* fixed = physical_fp_register_intervals_.Get(i); 444 if (fixed != nullptr) { 445 intervals.Add(fixed); 446 } 447 } 448 } 449 450 for (size_t i = 0, e = temp_intervals_.Size(); i < e; ++i) { 451 LiveInterval* temp = temp_intervals_.Get(i); 452 if (ShouldProcess(processing_core_registers_, temp)) { 453 intervals.Add(temp); 454 } 455 } 456 457 return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, 458 allocator_, processing_core_registers_, log_fatal_on_failure); 459} 460 461bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, 462 size_t number_of_spill_slots, 463 size_t number_of_out_slots, 464 const CodeGenerator& codegen, 465 ArenaAllocator* allocator, 466 bool processing_core_registers, 467 bool log_fatal_on_failure) { 468 size_t number_of_registers = processing_core_registers 469 ? codegen.GetNumberOfCoreRegisters() 470 : codegen.GetNumberOfFloatingPointRegisters(); 471 GrowableArray<ArenaBitVector*> liveness_of_values( 472 allocator, number_of_registers + number_of_spill_slots); 473 474 // Allocate a bit vector per register. A live interval that has a register 475 // allocated will populate the associated bit vector based on its live ranges. 476 for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { 477 liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true)); 478 } 479 480 for (size_t i = 0, e = intervals.Size(); i < e; ++i) { 481 for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { 482 LiveInterval* current = it.CurrentInterval(); 483 HInstruction* defined_by = current->GetParent()->GetDefinedBy(); 484 if (current->GetParent()->HasSpillSlot() 485 // Parameters have their own stack slot. 486 && !(defined_by != nullptr && defined_by->IsParameterValue())) { 487 BitVector* liveness_of_spill_slot = liveness_of_values.Get(number_of_registers 488 + current->GetParent()->GetSpillSlot() / kVRegSize 489 - number_of_out_slots); 490 for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { 491 if (liveness_of_spill_slot->IsBitSet(j)) { 492 if (log_fatal_on_failure) { 493 std::ostringstream message; 494 message << "Spill slot conflict at " << j; 495 LOG(FATAL) << message.str(); 496 } else { 497 return false; 498 } 499 } else { 500 liveness_of_spill_slot->SetBit(j); 501 } 502 } 503 } 504 505 if (current->HasRegister()) { 506 BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); 507 for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { 508 if (liveness_of_register->IsBitSet(j)) { 509 if (current->IsUsingInputRegister() && current->CanUseInputRegister()) { 510 continue; 511 } 512 if (log_fatal_on_failure) { 513 std::ostringstream message; 514 message << "Register conflict at " << j << " "; 515 if (defined_by != nullptr) { 516 message << "(" << defined_by->DebugName() << ")"; 517 } 518 message << "for "; 519 if (processing_core_registers) { 520 codegen.DumpCoreRegister(message, current->GetRegister()); 521 } else { 522 codegen.DumpFloatingPointRegister(message, current->GetRegister()); 523 } 524 LOG(FATAL) << message.str(); 525 } else { 526 return false; 527 } 528 } else { 529 liveness_of_register->SetBit(j); 530 } 531 } 532 } 533 } 534 } 535 return true; 536} 537 538void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const { 539 interval->Dump(stream); 540 stream << ": "; 541 if (interval->HasRegister()) { 542 if (interval->IsFloatingPoint()) { 543 codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); 544 } else { 545 codegen_->DumpCoreRegister(stream, interval->GetRegister()); 546 } 547 } else { 548 stream << "spilled"; 549 } 550 stream << std::endl; 551} 552 553void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const { 554 stream << "inactive: " << std::endl; 555 for (size_t i = 0; i < inactive_.Size(); i ++) { 556 DumpInterval(stream, inactive_.Get(i)); 557 } 558 stream << "active: " << std::endl; 559 for (size_t i = 0; i < active_.Size(); i ++) { 560 DumpInterval(stream, active_.Get(i)); 561 } 562 stream << "unhandled: " << std::endl; 563 auto unhandled = (unhandled_ != nullptr) ? 564 unhandled_ : &unhandled_core_intervals_; 565 for (size_t i = 0; i < unhandled->Size(); i ++) { 566 DumpInterval(stream, unhandled->Get(i)); 567 } 568 stream << "handled: " << std::endl; 569 for (size_t i = 0; i < handled_.Size(); i ++) { 570 DumpInterval(stream, handled_.Get(i)); 571 } 572} 573 574// By the book implementation of a linear scan register allocator. 575void RegisterAllocator::LinearScan() { 576 while (!unhandled_->IsEmpty()) { 577 // (1) Remove interval with the lowest start position from unhandled. 578 LiveInterval* current = unhandled_->Pop(); 579 DCHECK(!current->IsFixed() && !current->HasSpillSlot()); 580 DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); 581 DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval()); 582 583 size_t position = current->GetStart(); 584 585 // Remember the inactive_ size here since the ones moved to inactive_ from 586 // active_ below shouldn't need to be re-checked. 587 size_t inactive_intervals_to_handle = inactive_.Size(); 588 589 // (2) Remove currently active intervals that are dead at this position. 590 // Move active intervals that have a lifetime hole at this position 591 // to inactive. 592 for (size_t i = 0; i < active_.Size(); ++i) { 593 LiveInterval* interval = active_.Get(i); 594 if (interval->IsDeadAt(position)) { 595 active_.Delete(interval); 596 --i; 597 handled_.Add(interval); 598 } else if (!interval->Covers(position)) { 599 active_.Delete(interval); 600 --i; 601 inactive_.Add(interval); 602 } 603 } 604 605 // (3) Remove currently inactive intervals that are dead at this position. 606 // Move inactive intervals that cover this position to active. 607 for (size_t i = 0; i < inactive_intervals_to_handle; ++i) { 608 LiveInterval* interval = inactive_.Get(i); 609 DCHECK(interval->GetStart() < position || interval->IsFixed()); 610 if (interval->IsDeadAt(position)) { 611 inactive_.Delete(interval); 612 --i; 613 --inactive_intervals_to_handle; 614 handled_.Add(interval); 615 } else if (interval->Covers(position)) { 616 inactive_.Delete(interval); 617 --i; 618 --inactive_intervals_to_handle; 619 active_.Add(interval); 620 } 621 } 622 623 if (current->IsSlowPathSafepoint()) { 624 // Synthesized interval to record the maximum number of live registers 625 // at safepoints. No need to allocate a register for it. 626 if (processing_core_registers_) { 627 maximum_number_of_live_core_registers_ = 628 std::max(maximum_number_of_live_core_registers_, active_.Size()); 629 } else { 630 maximum_number_of_live_fp_registers_ = 631 std::max(maximum_number_of_live_fp_registers_, active_.Size()); 632 } 633 DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); 634 continue; 635 } 636 637 if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { 638 DCHECK(!current->HasRegister()); 639 // Allocating the low part was unsucessful. The splitted interval for the high part 640 // will be handled next (it is in the `unhandled_` list). 641 continue; 642 } 643 644 // (4) Try to find an available register. 645 bool success = TryAllocateFreeReg(current); 646 647 // (5) If no register could be found, we need to spill. 648 if (!success) { 649 success = AllocateBlockedReg(current); 650 } 651 652 // (6) If the interval had a register allocated, add it to the list of active 653 // intervals. 654 if (success) { 655 codegen_->AddAllocatedRegister(processing_core_registers_ 656 ? Location::RegisterLocation(current->GetRegister()) 657 : Location::FpuRegisterLocation(current->GetRegister())); 658 active_.Add(current); 659 if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { 660 current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); 661 } 662 } 663 } 664} 665 666static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) { 667 DCHECK(!interval->IsHighInterval()); 668 // Note that the same instruction may occur multiple times in the input list, 669 // so `free_until` may have changed already. 670 // Since `position` is not the current scan position, we need to use CoversSlow. 671 if (interval->IsDeadAt(position)) { 672 // Set the register to be free. Note that inactive intervals might later 673 // update this. 674 free_until[interval->GetRegister()] = kMaxLifetimePosition; 675 if (interval->HasHighInterval()) { 676 DCHECK(interval->GetHighInterval()->IsDeadAt(position)); 677 free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition; 678 } 679 } else if (!interval->CoversSlow(position)) { 680 // The interval becomes inactive at `defined_by`. We make its register 681 // available only until the next use strictly after `defined_by`. 682 free_until[interval->GetRegister()] = interval->FirstUseAfter(position); 683 if (interval->HasHighInterval()) { 684 DCHECK(!interval->GetHighInterval()->CoversSlow(position)); 685 free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()]; 686 } 687 } 688} 689 690// Find a free register. If multiple are found, pick the register that 691// is free the longest. 692bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { 693 size_t* free_until = registers_array_; 694 695 // First set all registers to be free. 696 for (size_t i = 0; i < number_of_registers_; ++i) { 697 free_until[i] = kMaxLifetimePosition; 698 } 699 700 // For each active interval, set its register to not free. 701 for (size_t i = 0, e = active_.Size(); i < e; ++i) { 702 LiveInterval* interval = active_.Get(i); 703 DCHECK(interval->HasRegister()); 704 free_until[interval->GetRegister()] = 0; 705 } 706 707 // An interval that starts an instruction (that is, it is not split), may 708 // re-use the registers used by the inputs of that instruciton, based on the 709 // location summary. 710 HInstruction* defined_by = current->GetDefinedBy(); 711 if (defined_by != nullptr && !current->IsSplit()) { 712 LocationSummary* locations = defined_by->GetLocations(); 713 if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { 714 for (HInputIterator it(defined_by); !it.Done(); it.Advance()) { 715 // Take the last interval of the input. It is the location of that interval 716 // that will be used at `defined_by`. 717 LiveInterval* interval = it.Current()->GetLiveInterval()->GetLastSibling(); 718 // Note that interval may have not been processed yet. 719 // TODO: Handle non-split intervals last in the work list. 720 if (interval->HasRegister() && interval->SameRegisterKind(*current)) { 721 // The input must be live until the end of `defined_by`, to comply to 722 // the linear scan algorithm. So we use `defined_by`'s end lifetime 723 // position to check whether the input is dead or is inactive after 724 // `defined_by`. 725 DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition())); 726 size_t position = defined_by->GetLifetimePosition() + 1; 727 FreeIfNotCoverAt(interval, position, free_until); 728 } 729 } 730 } 731 } 732 733 // For each inactive interval, set its register to be free until 734 // the next intersection with `current`. 735 for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { 736 LiveInterval* inactive = inactive_.Get(i); 737 // Temp/Slow-path-safepoint interval has no holes. 738 DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); 739 if (!current->IsSplit() && !inactive->IsFixed()) { 740 // Neither current nor inactive are fixed. 741 // Thanks to SSA, a non-split interval starting in a hole of an 742 // inactive interval should never intersect with that inactive interval. 743 // Only if it's not fixed though, because fixed intervals don't come from SSA. 744 DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); 745 continue; 746 } 747 748 DCHECK(inactive->HasRegister()); 749 if (free_until[inactive->GetRegister()] == 0) { 750 // Already used by some active interval. No need to intersect. 751 continue; 752 } 753 size_t next_intersection = inactive->FirstIntersectionWith(current); 754 if (next_intersection != kNoLifetime) { 755 free_until[inactive->GetRegister()] = 756 std::min(free_until[inactive->GetRegister()], next_intersection); 757 } 758 } 759 760 int reg = kNoRegister; 761 if (current->HasRegister()) { 762 // Some instructions have a fixed register output. 763 reg = current->GetRegister(); 764 if (free_until[reg] == 0) { 765 DCHECK(current->IsHighInterval()); 766 // AllocateBlockedReg will spill the holder of the register. 767 return false; 768 } 769 } else { 770 DCHECK(!current->IsHighInterval()); 771 int hint = current->FindFirstRegisterHint(free_until); 772 if (hint != kNoRegister) { 773 DCHECK(!IsBlocked(hint)); 774 reg = hint; 775 } else if (current->IsLowInterval()) { 776 reg = FindAvailableRegisterPair(free_until, current->GetStart()); 777 } else { 778 reg = FindAvailableRegister(free_until); 779 } 780 } 781 782 DCHECK_NE(reg, kNoRegister); 783 // If we could not find a register, we need to spill. 784 if (free_until[reg] == 0) { 785 return false; 786 } 787 788 if (current->IsLowInterval()) { 789 // If the high register of this interval is not available, we need to spill. 790 int high_reg = current->GetHighInterval()->GetRegister(); 791 if (high_reg == kNoRegister) { 792 high_reg = GetHighForLowRegister(reg); 793 } 794 if (free_until[high_reg] == 0) { 795 return false; 796 } 797 } 798 799 current->SetRegister(reg); 800 if (!current->IsDeadAt(free_until[reg])) { 801 // If the register is only available for a subset of live ranges 802 // covered by `current`, split `current` at the position where 803 // the register is not available anymore. 804 LiveInterval* split = Split(current, free_until[reg]); 805 DCHECK(split != nullptr); 806 AddSorted(unhandled_, split); 807 } 808 return true; 809} 810 811bool RegisterAllocator::IsBlocked(int reg) const { 812 return processing_core_registers_ 813 ? blocked_core_registers_[reg] 814 : blocked_fp_registers_[reg]; 815} 816 817int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const { 818 int reg = kNoRegister; 819 // Pick the register pair that is used the last. 820 for (size_t i = 0; i < number_of_registers_; ++i) { 821 if (IsBlocked(i)) continue; 822 if (!IsLowRegister(i)) continue; 823 int high_register = GetHighForLowRegister(i); 824 if (IsBlocked(high_register)) continue; 825 int existing_high_register = GetHighForLowRegister(reg); 826 if ((reg == kNoRegister) || (next_use[i] >= next_use[reg] 827 && next_use[high_register] >= next_use[existing_high_register])) { 828 reg = i; 829 if (next_use[i] == kMaxLifetimePosition 830 && next_use[high_register] == kMaxLifetimePosition) { 831 break; 832 } 833 } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) { 834 // If one of the current register is known to be unavailable, just unconditionally 835 // try a new one. 836 reg = i; 837 } 838 } 839 return reg; 840} 841 842int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { 843 int reg = kNoRegister; 844 // Pick the register that is used the last. 845 for (size_t i = 0; i < number_of_registers_; ++i) { 846 if (IsBlocked(i)) continue; 847 if (reg == kNoRegister || next_use[i] > next_use[reg]) { 848 reg = i; 849 if (next_use[i] == kMaxLifetimePosition) break; 850 } 851 } 852 return reg; 853} 854 855bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, 856 size_t first_register_use, 857 size_t* next_use) { 858 for (size_t i = 0, e = active_.Size(); i < e; ++i) { 859 LiveInterval* active = active_.Get(i); 860 DCHECK(active->HasRegister()); 861 if (active->IsFixed()) continue; 862 if (active->IsHighInterval()) continue; 863 if (first_register_use > next_use[active->GetRegister()]) continue; 864 865 // Split the first interval found. 866 if (!active->IsLowInterval() || IsLowOfUnalignedPairInterval(active)) { 867 LiveInterval* split = Split(active, position); 868 active_.DeleteAt(i); 869 if (split != active) { 870 handled_.Add(active); 871 } 872 AddSorted(unhandled_, split); 873 return true; 874 } 875 } 876 return false; 877} 878 879bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, 880 GrowableArray<LiveInterval*>* intervals, 881 size_t index) { 882 if (interval->IsLowInterval()) { 883 DCHECK_EQ(intervals->Get(index), interval->GetHighInterval()); 884 intervals->DeleteAt(index); 885 return true; 886 } else if (interval->IsHighInterval()) { 887 DCHECK_GT(index, 0u); 888 DCHECK_EQ(intervals->Get(index - 1), interval->GetLowInterval()); 889 intervals->DeleteAt(index - 1); 890 return true; 891 } else { 892 return false; 893 } 894} 895 896// Find the register that is used the last, and spill the interval 897// that holds it. If the first use of `current` is after that register 898// we spill `current` instead. 899bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { 900 size_t first_register_use = current->FirstRegisterUse(); 901 if (first_register_use == kNoLifetime) { 902 AllocateSpillSlotFor(current); 903 return false; 904 } 905 906 // First set all registers as not being used. 907 size_t* next_use = registers_array_; 908 for (size_t i = 0; i < number_of_registers_; ++i) { 909 next_use[i] = kMaxLifetimePosition; 910 } 911 912 // For each active interval, find the next use of its register after the 913 // start of current. 914 for (size_t i = 0, e = active_.Size(); i < e; ++i) { 915 LiveInterval* active = active_.Get(i); 916 DCHECK(active->HasRegister()); 917 if (active->IsFixed()) { 918 next_use[active->GetRegister()] = current->GetStart(); 919 } else { 920 size_t use = active->FirstRegisterUseAfter(current->GetStart()); 921 if (use != kNoLifetime) { 922 next_use[active->GetRegister()] = use; 923 } 924 } 925 } 926 927 // For each inactive interval, find the next use of its register after the 928 // start of current. 929 for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { 930 LiveInterval* inactive = inactive_.Get(i); 931 // Temp/Slow-path-safepoint interval has no holes. 932 DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); 933 if (!current->IsSplit() && !inactive->IsFixed()) { 934 // Neither current nor inactive are fixed. 935 // Thanks to SSA, a non-split interval starting in a hole of an 936 // inactive interval should never intersect with that inactive interval. 937 // Only if it's not fixed though, because fixed intervals don't come from SSA. 938 DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); 939 continue; 940 } 941 DCHECK(inactive->HasRegister()); 942 size_t next_intersection = inactive->FirstIntersectionWith(current); 943 if (next_intersection != kNoLifetime) { 944 if (inactive->IsFixed()) { 945 next_use[inactive->GetRegister()] = 946 std::min(next_intersection, next_use[inactive->GetRegister()]); 947 } else { 948 size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); 949 if (use != kNoLifetime) { 950 next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); 951 } 952 } 953 } 954 } 955 956 int reg = kNoRegister; 957 bool should_spill = false; 958 if (current->HasRegister()) { 959 DCHECK(current->IsHighInterval()); 960 reg = current->GetRegister(); 961 // When allocating the low part, we made sure the high register was available. 962 DCHECK_LT(first_register_use, next_use[reg]); 963 } else if (current->IsLowInterval()) { 964 reg = FindAvailableRegisterPair(next_use, first_register_use); 965 // We should spill if both registers are not available. 966 should_spill = (first_register_use >= next_use[reg]) 967 || (first_register_use >= next_use[GetHighForLowRegister(reg)]); 968 } else { 969 DCHECK(!current->IsHighInterval()); 970 reg = FindAvailableRegister(next_use); 971 should_spill = (first_register_use >= next_use[reg]); 972 } 973 974 DCHECK_NE(reg, kNoRegister); 975 if (should_spill) { 976 DCHECK(!current->IsHighInterval()); 977 bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); 978 if (current->IsLowInterval() 979 && is_allocation_at_use_site 980 && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), 981 first_register_use, 982 next_use)) { 983 // If we're allocating a register for `current` because the instruction at 984 // that position requires it, but we think we should spill, then there are 985 // non-pair intervals or unaligned pair intervals blocking the allocation. 986 // We split the first interval found, and put ourselves first in the 987 // `unhandled_` list. 988 LiveInterval* existing = unhandled_->Peek(); 989 DCHECK(existing->IsHighInterval()); 990 DCHECK_EQ(existing->GetLowInterval(), current); 991 unhandled_->Add(current); 992 } else { 993 // If the first use of that instruction is after the last use of the found 994 // register, we split this interval just before its first register use. 995 AllocateSpillSlotFor(current); 996 LiveInterval* split = Split(current, first_register_use - 1); 997 if (current == split) { 998 DumpInterval(std::cerr, current); 999 DumpAllIntervals(std::cerr); 1000 // This situation has the potential to infinite loop, so we make it a non-debug CHECK. 1001 CHECK(false) << "There is not enough registers available for " 1002 << split->GetParent()->GetDefinedBy()->DebugName() << " " 1003 << split->GetParent()->GetDefinedBy()->GetId() 1004 << " at " << first_register_use - 1; 1005 } 1006 AddSorted(unhandled_, split); 1007 } 1008 return false; 1009 } else { 1010 // Use this register and spill the active and inactives interval that 1011 // have that register. 1012 current->SetRegister(reg); 1013 1014 for (size_t i = 0, e = active_.Size(); i < e; ++i) { 1015 LiveInterval* active = active_.Get(i); 1016 if (active->GetRegister() == reg) { 1017 DCHECK(!active->IsFixed()); 1018 LiveInterval* split = Split(active, current->GetStart()); 1019 if (split != active) { 1020 handled_.Add(active); 1021 } 1022 active_.DeleteAt(i); 1023 PotentiallyRemoveOtherHalf(active, &active_, i); 1024 AddSorted(unhandled_, split); 1025 break; 1026 } 1027 } 1028 1029 for (size_t i = 0; i < inactive_.Size(); ++i) { 1030 LiveInterval* inactive = inactive_.Get(i); 1031 if (inactive->GetRegister() == reg) { 1032 if (!current->IsSplit() && !inactive->IsFixed()) { 1033 // Neither current nor inactive are fixed. 1034 // Thanks to SSA, a non-split interval starting in a hole of an 1035 // inactive interval should never intersect with that inactive interval. 1036 // Only if it's not fixed though, because fixed intervals don't come from SSA. 1037 DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); 1038 continue; 1039 } 1040 size_t next_intersection = inactive->FirstIntersectionWith(current); 1041 if (next_intersection != kNoLifetime) { 1042 if (inactive->IsFixed()) { 1043 LiveInterval* split = Split(current, next_intersection); 1044 DCHECK_NE(split, current); 1045 AddSorted(unhandled_, split); 1046 } else { 1047 // Split at the start of `current`, which will lead to splitting 1048 // at the end of the lifetime hole of `inactive`. 1049 LiveInterval* split = Split(inactive, current->GetStart()); 1050 // If it's inactive, it must start before the current interval. 1051 DCHECK_NE(split, inactive); 1052 inactive_.DeleteAt(i); 1053 if (PotentiallyRemoveOtherHalf(inactive, &inactive_, i) && inactive->IsHighInterval()) { 1054 // We have removed an entry prior to `inactive`. So we need to decrement. 1055 --i; 1056 } 1057 // Decrement because we have removed `inactive` from the list. 1058 --i; 1059 handled_.Add(inactive); 1060 AddSorted(unhandled_, split); 1061 } 1062 } 1063 } 1064 } 1065 1066 return true; 1067 } 1068} 1069 1070void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) { 1071 DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); 1072 size_t insert_at = 0; 1073 for (size_t i = array->Size(); i > 0; --i) { 1074 LiveInterval* current = array->Get(i - 1); 1075 // High intervals must be processed right after their low equivalent. 1076 if (current->StartsAfter(interval) && !current->IsHighInterval()) { 1077 insert_at = i; 1078 break; 1079 } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { 1080 // Ensure the slow path interval is the last to be processed at its location: we want the 1081 // interval to know all live registers at this location. 1082 DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current)); 1083 insert_at = i; 1084 break; 1085 } 1086 } 1087 1088 array->InsertAt(insert_at, interval); 1089 // Insert the high interval before the low, to ensure the low is processed before. 1090 if (interval->HasHighInterval()) { 1091 array->InsertAt(insert_at, interval->GetHighInterval()); 1092 } else if (interval->HasLowInterval()) { 1093 array->InsertAt(insert_at + 1, interval->GetLowInterval()); 1094 } 1095} 1096 1097LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { 1098 DCHECK_GE(position, interval->GetStart()); 1099 DCHECK(!interval->IsDeadAt(position)); 1100 if (position == interval->GetStart()) { 1101 // Spill slot will be allocated when handling `interval` again. 1102 interval->ClearRegister(); 1103 if (interval->HasHighInterval()) { 1104 interval->GetHighInterval()->ClearRegister(); 1105 } else if (interval->HasLowInterval()) { 1106 interval->GetLowInterval()->ClearRegister(); 1107 } 1108 return interval; 1109 } else { 1110 LiveInterval* new_interval = interval->SplitAt(position); 1111 if (interval->HasHighInterval()) { 1112 LiveInterval* high = interval->GetHighInterval()->SplitAt(position); 1113 new_interval->SetHighInterval(high); 1114 high->SetLowInterval(new_interval); 1115 } else if (interval->HasLowInterval()) { 1116 LiveInterval* low = interval->GetLowInterval()->SplitAt(position); 1117 new_interval->SetLowInterval(low); 1118 low->SetHighInterval(new_interval); 1119 } 1120 return new_interval; 1121 } 1122} 1123 1124void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { 1125 if (interval->IsHighInterval()) { 1126 // The low interval will contain the spill slot. 1127 return; 1128 } 1129 1130 LiveInterval* parent = interval->GetParent(); 1131 1132 // An instruction gets a spill slot for its entire lifetime. If the parent 1133 // of this interval already has a spill slot, there is nothing to do. 1134 if (parent->HasSpillSlot()) { 1135 return; 1136 } 1137 1138 HInstruction* defined_by = parent->GetDefinedBy(); 1139 if (defined_by->IsParameterValue()) { 1140 // Parameters have their own stack slot. 1141 parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); 1142 return; 1143 } 1144 1145 if (defined_by->IsConstant()) { 1146 // Constants don't need a spill slot. 1147 return; 1148 } 1149 1150 LiveInterval* last_sibling = interval; 1151 while (last_sibling->GetNextSibling() != nullptr) { 1152 last_sibling = last_sibling->GetNextSibling(); 1153 } 1154 size_t end = last_sibling->GetEnd(); 1155 1156 GrowableArray<size_t>* spill_slots = nullptr; 1157 switch (interval->GetType()) { 1158 case Primitive::kPrimDouble: 1159 spill_slots = &double_spill_slots_; 1160 break; 1161 case Primitive::kPrimLong: 1162 spill_slots = &long_spill_slots_; 1163 break; 1164 case Primitive::kPrimFloat: 1165 spill_slots = &float_spill_slots_; 1166 break; 1167 case Primitive::kPrimNot: 1168 case Primitive::kPrimInt: 1169 case Primitive::kPrimChar: 1170 case Primitive::kPrimByte: 1171 case Primitive::kPrimBoolean: 1172 case Primitive::kPrimShort: 1173 spill_slots = &int_spill_slots_; 1174 break; 1175 case Primitive::kPrimVoid: 1176 LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); 1177 } 1178 1179 // Find an available spill slot. 1180 size_t slot = 0; 1181 for (size_t e = spill_slots->Size(); slot < e; ++slot) { 1182 if (spill_slots->Get(slot) <= parent->GetStart() 1183 && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) { 1184 break; 1185 } 1186 } 1187 1188 if (parent->NeedsTwoSpillSlots()) { 1189 if (slot == spill_slots->Size()) { 1190 // We need a new spill slot. 1191 spill_slots->Add(end); 1192 spill_slots->Add(end); 1193 } else if (slot == spill_slots->Size() - 1) { 1194 spill_slots->Put(slot, end); 1195 spill_slots->Add(end); 1196 } else { 1197 spill_slots->Put(slot, end); 1198 spill_slots->Put(slot + 1, end); 1199 } 1200 } else { 1201 if (slot == spill_slots->Size()) { 1202 // We need a new spill slot. 1203 spill_slots->Add(end); 1204 } else { 1205 spill_slots->Put(slot, end); 1206 } 1207 } 1208 1209 // Note that the exact spill slot location will be computed when we resolve, 1210 // that is when we know the number of spill slots for each type. 1211 parent->SetSpillSlot(slot); 1212} 1213 1214static bool IsValidDestination(Location destination) { 1215 return destination.IsRegister() 1216 || destination.IsRegisterPair() 1217 || destination.IsFpuRegister() 1218 || destination.IsFpuRegisterPair() 1219 || destination.IsStackSlot() 1220 || destination.IsDoubleStackSlot(); 1221} 1222 1223void RegisterAllocator::AddMove(HParallelMove* move, 1224 Location source, 1225 Location destination, 1226 HInstruction* instruction, 1227 Primitive::Type type) const { 1228 if (type == Primitive::kPrimLong 1229 && codegen_->ShouldSplitLongMoves() 1230 // The parallel move resolver knows how to deal with long constants. 1231 && !source.IsConstant()) { 1232 move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction); 1233 move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr); 1234 } else { 1235 move->AddMove(source, destination, type, instruction); 1236 } 1237} 1238 1239void RegisterAllocator::AddInputMoveFor(HInstruction* input, 1240 HInstruction* user, 1241 Location source, 1242 Location destination) const { 1243 if (source.Equals(destination)) return; 1244 1245 DCHECK(!user->IsPhi()); 1246 1247 HInstruction* previous = user->GetPrevious(); 1248 HParallelMove* move = nullptr; 1249 if (previous == nullptr 1250 || !previous->IsParallelMove() 1251 || previous->GetLifetimePosition() < user->GetLifetimePosition()) { 1252 move = new (allocator_) HParallelMove(allocator_); 1253 move->SetLifetimePosition(user->GetLifetimePosition()); 1254 user->GetBlock()->InsertInstructionBefore(move, user); 1255 } else { 1256 move = previous->AsParallelMove(); 1257 } 1258 DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); 1259 AddMove(move, source, destination, nullptr, input->GetType()); 1260} 1261 1262static bool IsInstructionStart(size_t position) { 1263 return (position & 1) == 0; 1264} 1265 1266static bool IsInstructionEnd(size_t position) { 1267 return (position & 1) == 1; 1268} 1269 1270void RegisterAllocator::InsertParallelMoveAt(size_t position, 1271 HInstruction* instruction, 1272 Location source, 1273 Location destination) const { 1274 DCHECK(IsValidDestination(destination)) << destination; 1275 if (source.Equals(destination)) return; 1276 1277 HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); 1278 HParallelMove* move; 1279 if (at == nullptr) { 1280 if (IsInstructionStart(position)) { 1281 // Block boundary, don't do anything the connection of split siblings will handle it. 1282 return; 1283 } else { 1284 // Move must happen before the first instruction of the block. 1285 at = liveness_.GetInstructionFromPosition((position + 1) / 2); 1286 // Note that parallel moves may have already been inserted, so we explicitly 1287 // ask for the first instruction of the block: `GetInstructionFromPosition` does 1288 // not contain the `HParallelMove` instructions. 1289 at = at->GetBlock()->GetFirstInstruction(); 1290 1291 if (at->GetLifetimePosition() < position) { 1292 // We may insert moves for split siblings and phi spills at the beginning of the block. 1293 // Since this is a different lifetime position, we need to go to the next instruction. 1294 DCHECK(at->IsParallelMove()); 1295 at = at->GetNext(); 1296 } 1297 1298 if (at->GetLifetimePosition() != position) { 1299 DCHECK_GT(at->GetLifetimePosition(), position); 1300 move = new (allocator_) HParallelMove(allocator_); 1301 move->SetLifetimePosition(position); 1302 at->GetBlock()->InsertInstructionBefore(move, at); 1303 } else { 1304 DCHECK(at->IsParallelMove()); 1305 move = at->AsParallelMove(); 1306 } 1307 } 1308 } else if (IsInstructionEnd(position)) { 1309 // Move must happen after the instruction. 1310 DCHECK(!at->IsControlFlow()); 1311 move = at->GetNext()->AsParallelMove(); 1312 // This is a parallel move for connecting siblings in a same block. We need to 1313 // differentiate it with moves for connecting blocks, and input moves. 1314 if (move == nullptr || move->GetLifetimePosition() > position) { 1315 move = new (allocator_) HParallelMove(allocator_); 1316 move->SetLifetimePosition(position); 1317 at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); 1318 } 1319 } else { 1320 // Move must happen before the instruction. 1321 HInstruction* previous = at->GetPrevious(); 1322 if (previous == nullptr 1323 || !previous->IsParallelMove() 1324 || previous->GetLifetimePosition() != position) { 1325 // If the previous is a parallel move, then its position must be lower 1326 // than the given `position`: it was added just after the non-parallel 1327 // move instruction that precedes `instruction`. 1328 DCHECK(previous == nullptr 1329 || !previous->IsParallelMove() 1330 || previous->GetLifetimePosition() < position); 1331 move = new (allocator_) HParallelMove(allocator_); 1332 move->SetLifetimePosition(position); 1333 at->GetBlock()->InsertInstructionBefore(move, at); 1334 } else { 1335 move = previous->AsParallelMove(); 1336 } 1337 } 1338 DCHECK_EQ(move->GetLifetimePosition(), position); 1339 AddMove(move, source, destination, instruction, instruction->GetType()); 1340} 1341 1342void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, 1343 HInstruction* instruction, 1344 Location source, 1345 Location destination) const { 1346 DCHECK(IsValidDestination(destination)) << destination; 1347 if (source.Equals(destination)) return; 1348 1349 DCHECK_EQ(block->GetSuccessors().Size(), 1u); 1350 HInstruction* last = block->GetLastInstruction(); 1351 // We insert moves at exit for phi predecessors and connecting blocks. 1352 // A block ending with an if cannot branch to a block with phis because 1353 // we do not allow critical edges. It can also not connect 1354 // a split interval between two blocks: the move has to happen in the successor. 1355 DCHECK(!last->IsIf()); 1356 HInstruction* previous = last->GetPrevious(); 1357 HParallelMove* move; 1358 // This is a parallel move for connecting blocks. We need to differentiate 1359 // it with moves for connecting siblings in a same block, and output moves. 1360 size_t position = last->GetLifetimePosition(); 1361 if (previous == nullptr || !previous->IsParallelMove() 1362 || previous->AsParallelMove()->GetLifetimePosition() != position) { 1363 move = new (allocator_) HParallelMove(allocator_); 1364 move->SetLifetimePosition(position); 1365 block->InsertInstructionBefore(move, last); 1366 } else { 1367 move = previous->AsParallelMove(); 1368 } 1369 AddMove(move, source, destination, instruction, instruction->GetType()); 1370} 1371 1372void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, 1373 HInstruction* instruction, 1374 Location source, 1375 Location destination) const { 1376 DCHECK(IsValidDestination(destination)) << destination; 1377 if (source.Equals(destination)) return; 1378 1379 HInstruction* first = block->GetFirstInstruction(); 1380 HParallelMove* move = first->AsParallelMove(); 1381 size_t position = block->GetLifetimeStart(); 1382 // This is a parallel move for connecting blocks. We need to differentiate 1383 // it with moves for connecting siblings in a same block, and input moves. 1384 if (move == nullptr || move->GetLifetimePosition() != position) { 1385 move = new (allocator_) HParallelMove(allocator_); 1386 move->SetLifetimePosition(position); 1387 block->InsertInstructionBefore(move, first); 1388 } 1389 AddMove(move, source, destination, instruction, instruction->GetType()); 1390} 1391 1392void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, 1393 Location source, 1394 Location destination) const { 1395 DCHECK(IsValidDestination(destination)) << destination; 1396 if (source.Equals(destination)) return; 1397 1398 if (instruction->IsPhi()) { 1399 InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination); 1400 return; 1401 } 1402 1403 size_t position = instruction->GetLifetimePosition() + 1; 1404 HParallelMove* move = instruction->GetNext()->AsParallelMove(); 1405 // This is a parallel move for moving the output of an instruction. We need 1406 // to differentiate with input moves, moves for connecting siblings in a 1407 // and moves for connecting blocks. 1408 if (move == nullptr || move->GetLifetimePosition() != position) { 1409 move = new (allocator_) HParallelMove(allocator_); 1410 move->SetLifetimePosition(position); 1411 instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); 1412 } 1413 AddMove(move, source, destination, instruction, instruction->GetType()); 1414} 1415 1416void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { 1417 LiveInterval* current = interval; 1418 if (current->HasSpillSlot() && current->HasRegister()) { 1419 // We spill eagerly, so move must be at definition. 1420 InsertMoveAfter(interval->GetDefinedBy(), 1421 interval->ToLocation(), 1422 interval->NeedsTwoSpillSlots() 1423 ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) 1424 : Location::StackSlot(interval->GetParent()->GetSpillSlot())); 1425 } 1426 UsePosition* use = current->GetFirstUse(); 1427 1428 // Walk over all siblings, updating locations of use positions, and 1429 // connecting them when they are adjacent. 1430 do { 1431 Location source = current->ToLocation(); 1432 1433 // Walk over all uses covered by this interval, and update the location 1434 // information. 1435 1436 LiveRange* range = current->GetFirstRange(); 1437 while (range != nullptr) { 1438 while (use != nullptr && use->GetPosition() < range->GetStart()) { 1439 DCHECK(use->GetIsEnvironment()); 1440 use = use->GetNext(); 1441 } 1442 while (use != nullptr && use->GetPosition() <= range->GetEnd()) { 1443 DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); 1444 LocationSummary* locations = use->GetUser()->GetLocations(); 1445 if (use->GetIsEnvironment()) { 1446 locations->SetEnvironmentAt(use->GetInputIndex(), source); 1447 } else { 1448 Location expected_location = locations->InAt(use->GetInputIndex()); 1449 // The expected (actual) location may be invalid in case the input is unused. Currently 1450 // this only happens for intrinsics. 1451 if (expected_location.IsValid()) { 1452 if (expected_location.IsUnallocated()) { 1453 locations->SetInAt(use->GetInputIndex(), source); 1454 } else if (!expected_location.IsConstant()) { 1455 AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); 1456 } 1457 } else { 1458 DCHECK(use->GetUser()->IsInvoke()); 1459 DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); 1460 } 1461 } 1462 use = use->GetNext(); 1463 } 1464 range = range->GetNext(); 1465 } 1466 1467 // If the next interval starts just after this one, and has a register, 1468 // insert a move. 1469 LiveInterval* next_sibling = current->GetNextSibling(); 1470 if (next_sibling != nullptr 1471 && next_sibling->HasRegister() 1472 && current->GetEnd() == next_sibling->GetStart()) { 1473 Location destination = next_sibling->ToLocation(); 1474 InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination); 1475 } 1476 1477 for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); 1478 safepoint_position != nullptr; 1479 safepoint_position = safepoint_position->GetNext()) { 1480 DCHECK(current->CoversSlow(safepoint_position->GetPosition())); 1481 1482 LocationSummary* locations = safepoint_position->GetLocations(); 1483 if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { 1484 locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); 1485 } 1486 1487 switch (source.GetKind()) { 1488 case Location::kRegister: { 1489 locations->AddLiveRegister(source); 1490 if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) { 1491 DCHECK_LE(locations->GetNumberOfLiveRegisters(), 1492 maximum_number_of_live_core_registers_ + 1493 maximum_number_of_live_fp_registers_); 1494 } 1495 if (current->GetType() == Primitive::kPrimNot) { 1496 locations->SetRegisterBit(source.reg()); 1497 } 1498 break; 1499 } 1500 case Location::kFpuRegister: { 1501 locations->AddLiveRegister(source); 1502 break; 1503 } 1504 1505 case Location::kRegisterPair: 1506 case Location::kFpuRegisterPair: { 1507 locations->AddLiveRegister(source.ToLow()); 1508 locations->AddLiveRegister(source.ToHigh()); 1509 break; 1510 } 1511 case Location::kStackSlot: // Fall-through 1512 case Location::kDoubleStackSlot: // Fall-through 1513 case Location::kConstant: { 1514 // Nothing to do. 1515 break; 1516 } 1517 default: { 1518 LOG(FATAL) << "Unexpected location for object"; 1519 } 1520 } 1521 } 1522 current = next_sibling; 1523 } while (current != nullptr); 1524 1525 if (kIsDebugBuild) { 1526 // Following uses can only be environment uses. The location for 1527 // these environments will be none. 1528 while (use != nullptr) { 1529 DCHECK(use->GetIsEnvironment()); 1530 use = use->GetNext(); 1531 } 1532 } 1533} 1534 1535void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, 1536 HBasicBlock* from, 1537 HBasicBlock* to) const { 1538 if (interval->GetNextSibling() == nullptr) { 1539 // Nothing to connect. The whole range was allocated to the same location. 1540 return; 1541 } 1542 1543 // Find the intervals that cover `from` and `to`. 1544 LiveInterval* destination = interval->GetSiblingAt(to->GetLifetimeStart()); 1545 LiveInterval* source = interval->GetSiblingAt(from->GetLifetimeEnd() - 1); 1546 1547 if (destination == source) { 1548 // Interval was not split. 1549 return; 1550 } 1551 DCHECK(destination != nullptr && source != nullptr); 1552 1553 if (!destination->HasRegister()) { 1554 // Values are eagerly spilled. Spill slot already contains appropriate value. 1555 return; 1556 } 1557 1558 // If `from` has only one successor, we can put the moves at the exit of it. Otherwise 1559 // we need to put the moves at the entry of `to`. 1560 if (from->GetSuccessors().Size() == 1) { 1561 InsertParallelMoveAtExitOf(from, 1562 interval->GetParent()->GetDefinedBy(), 1563 source->ToLocation(), 1564 destination->ToLocation()); 1565 } else { 1566 DCHECK_EQ(to->GetPredecessors().Size(), 1u); 1567 InsertParallelMoveAtEntryOf(to, 1568 interval->GetParent()->GetDefinedBy(), 1569 source->ToLocation(), 1570 destination->ToLocation()); 1571 } 1572} 1573 1574void RegisterAllocator::Resolve() { 1575 codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(), 1576 maximum_number_of_live_core_registers_, 1577 maximum_number_of_live_fp_registers_, 1578 reserved_out_slots_, 1579 codegen_->GetGraph()->GetLinearOrder()); 1580 1581 // Adjust the Out Location of instructions. 1582 // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. 1583 for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { 1584 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); 1585 LiveInterval* current = instruction->GetLiveInterval(); 1586 LocationSummary* locations = instruction->GetLocations(); 1587 Location location = locations->Out(); 1588 if (instruction->IsParameterValue()) { 1589 // Now that we know the frame size, adjust the parameter's location. 1590 if (location.IsStackSlot()) { 1591 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 1592 current->SetSpillSlot(location.GetStackIndex()); 1593 locations->UpdateOut(location); 1594 } else if (location.IsDoubleStackSlot()) { 1595 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 1596 current->SetSpillSlot(location.GetStackIndex()); 1597 locations->UpdateOut(location); 1598 } else if (current->HasSpillSlot()) { 1599 current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); 1600 } 1601 } else if (current->HasSpillSlot()) { 1602 // Adjust the stack slot, now that we know the number of them for each type. 1603 // The way this implementation lays out the stack is the following: 1604 // [parameter slots ] 1605 // [double spill slots ] 1606 // [long spill slots ] 1607 // [float spill slots ] 1608 // [int/ref values ] 1609 // [maximum out values ] (number of arguments for calls) 1610 // [art method ]. 1611 uint32_t slot = current->GetSpillSlot(); 1612 switch (current->GetType()) { 1613 case Primitive::kPrimDouble: 1614 slot += long_spill_slots_.Size(); 1615 FALLTHROUGH_INTENDED; 1616 case Primitive::kPrimLong: 1617 slot += float_spill_slots_.Size(); 1618 FALLTHROUGH_INTENDED; 1619 case Primitive::kPrimFloat: 1620 slot += int_spill_slots_.Size(); 1621 FALLTHROUGH_INTENDED; 1622 case Primitive::kPrimNot: 1623 case Primitive::kPrimInt: 1624 case Primitive::kPrimChar: 1625 case Primitive::kPrimByte: 1626 case Primitive::kPrimBoolean: 1627 case Primitive::kPrimShort: 1628 slot += reserved_out_slots_; 1629 break; 1630 case Primitive::kPrimVoid: 1631 LOG(FATAL) << "Unexpected type for interval " << current->GetType(); 1632 } 1633 current->SetSpillSlot(slot * kVRegSize); 1634 } 1635 1636 Location source = current->ToLocation(); 1637 1638 if (location.IsUnallocated()) { 1639 if (location.GetPolicy() == Location::kSameAsFirstInput) { 1640 if (locations->InAt(0).IsUnallocated()) { 1641 locations->SetInAt(0, source); 1642 } else { 1643 DCHECK(locations->InAt(0).Equals(source)); 1644 } 1645 } 1646 locations->UpdateOut(source); 1647 } else { 1648 DCHECK(source.Equals(location)); 1649 } 1650 } 1651 1652 // Connect siblings. 1653 for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { 1654 HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); 1655 ConnectSiblings(instruction->GetLiveInterval()); 1656 } 1657 1658 // Resolve non-linear control flow across branches. Order does not matter. 1659 for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { 1660 HBasicBlock* block = it.Current(); 1661 BitVector* live = liveness_.GetLiveInSet(*block); 1662 for (uint32_t idx : live->Indexes()) { 1663 HInstruction* current = liveness_.GetInstructionFromSsaIndex(idx); 1664 LiveInterval* interval = current->GetLiveInterval(); 1665 for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { 1666 ConnectSplitSiblings(interval, block->GetPredecessors().Get(i), block); 1667 } 1668 } 1669 } 1670 1671 // Resolve phi inputs. Order does not matter. 1672 for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { 1673 HBasicBlock* current = it.Current(); 1674 for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) { 1675 HInstruction* phi = inst_it.Current(); 1676 for (size_t i = 0, e = current->GetPredecessors().Size(); i < e; ++i) { 1677 HBasicBlock* predecessor = current->GetPredecessors().Get(i); 1678 DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u); 1679 HInstruction* input = phi->InputAt(i); 1680 Location source = input->GetLiveInterval()->GetLocationAt( 1681 predecessor->GetLifetimeEnd() - 1); 1682 Location destination = phi->GetLiveInterval()->ToLocation(); 1683 InsertParallelMoveAtExitOf(predecessor, phi, source, destination); 1684 } 1685 } 1686 } 1687 1688 // Assign temp locations. 1689 for (size_t i = 0; i < temp_intervals_.Size(); ++i) { 1690 LiveInterval* temp = temp_intervals_.Get(i); 1691 if (temp->IsHighInterval()) { 1692 // High intervals can be skipped, they are already handled by the low interval. 1693 continue; 1694 } 1695 HInstruction* at = liveness_.GetTempUser(temp); 1696 size_t temp_index = liveness_.GetTempIndex(temp); 1697 LocationSummary* locations = at->GetLocations(); 1698 switch (temp->GetType()) { 1699 case Primitive::kPrimInt: 1700 locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister())); 1701 break; 1702 1703 case Primitive::kPrimDouble: 1704 if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { 1705 Location location = Location::FpuRegisterPairLocation( 1706 temp->GetRegister(), temp->GetHighInterval()->GetRegister()); 1707 locations->SetTempAt(temp_index, location); 1708 } else { 1709 locations->SetTempAt(temp_index, Location::FpuRegisterLocation(temp->GetRegister())); 1710 } 1711 break; 1712 1713 default: 1714 LOG(FATAL) << "Unexpected type for temporary location " 1715 << temp->GetType(); 1716 } 1717 } 1718} 1719 1720} // namespace art 1721