1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/compiler/instruction-scheduler.h"
6
7#include "src/base/adapters.h"
8#include "src/base/utils/random-number-generator.h"
9
10namespace v8 {
11namespace internal {
12namespace compiler {
13
14void InstructionScheduler::SchedulingQueueBase::AddNode(
15    ScheduleGraphNode* node) {
16  // We keep the ready list sorted by total latency so that we can quickly find
17  // the next best candidate to schedule.
18  auto it = nodes_.begin();
19  while ((it != nodes_.end()) &&
20         ((*it)->total_latency() >= node->total_latency())) {
21    ++it;
22  }
23  nodes_.insert(it, node);
24}
25
26
27InstructionScheduler::ScheduleGraphNode*
28InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29  DCHECK(!IsEmpty());
30  auto candidate = nodes_.end();
31  for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32    // We only consider instructions that have all their operands ready.
33    if (cycle >= (*iterator)->start_cycle()) {
34      candidate = iterator;
35      break;
36    }
37  }
38
39  if (candidate != nodes_.end()) {
40    ScheduleGraphNode *result = *candidate;
41    nodes_.erase(candidate);
42    return result;
43  }
44
45  return nullptr;
46}
47
48
49InstructionScheduler::ScheduleGraphNode*
50InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
51  DCHECK(!IsEmpty());
52  // Choose a random element from the ready list.
53  auto candidate = nodes_.begin();
54  std::advance(candidate, isolate()->random_number_generator()->NextInt(
55      static_cast<int>(nodes_.size())));
56  ScheduleGraphNode *result = *candidate;
57  nodes_.erase(candidate);
58  return result;
59}
60
61
62InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
63    Zone* zone,
64    Instruction* instr)
65    : instr_(instr),
66      successors_(zone),
67      unscheduled_predecessors_count_(0),
68      latency_(GetInstructionLatency(instr)),
69      total_latency_(-1),
70      start_cycle_(-1) {
71}
72
73
74void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
75    ScheduleGraphNode* node) {
76  successors_.push_back(node);
77  node->unscheduled_predecessors_count_++;
78}
79
80
81InstructionScheduler::InstructionScheduler(Zone* zone,
82                                           InstructionSequence* sequence)
83    : zone_(zone),
84      sequence_(sequence),
85      graph_(zone),
86      last_side_effect_instr_(nullptr),
87      pending_loads_(zone),
88      last_live_in_reg_marker_(nullptr),
89      last_deopt_(nullptr),
90      operands_map_(zone) {}
91
92
93void InstructionScheduler::StartBlock(RpoNumber rpo) {
94  DCHECK(graph_.empty());
95  DCHECK(last_side_effect_instr_ == nullptr);
96  DCHECK(pending_loads_.empty());
97  DCHECK(last_live_in_reg_marker_ == nullptr);
98  DCHECK(last_deopt_ == nullptr);
99  DCHECK(operands_map_.empty());
100  sequence()->StartBlock(rpo);
101}
102
103
104void InstructionScheduler::EndBlock(RpoNumber rpo) {
105  if (FLAG_turbo_stress_instruction_scheduling) {
106    ScheduleBlock<StressSchedulerQueue>();
107  } else {
108    ScheduleBlock<CriticalPathFirstQueue>();
109  }
110  sequence()->EndBlock(rpo);
111  graph_.clear();
112  last_side_effect_instr_ = nullptr;
113  pending_loads_.clear();
114  last_live_in_reg_marker_ = nullptr;
115  last_deopt_ = nullptr;
116  operands_map_.clear();
117}
118
119
120void InstructionScheduler::AddInstruction(Instruction* instr) {
121  ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
122
123  if (IsBlockTerminator(instr)) {
124    // Make sure that basic block terminators are not moved by adding them
125    // as successor of every instruction.
126    for (ScheduleGraphNode* node : graph_) {
127      node->AddSuccessor(new_node);
128    }
129  } else if (IsFixedRegisterParameter(instr)) {
130    if (last_live_in_reg_marker_ != nullptr) {
131      last_live_in_reg_marker_->AddSuccessor(new_node);
132    }
133    last_live_in_reg_marker_ = new_node;
134  } else {
135    if (last_live_in_reg_marker_ != nullptr) {
136      last_live_in_reg_marker_->AddSuccessor(new_node);
137    }
138
139    // Make sure that instructions are not scheduled before the last
140    // deoptimization point when they depend on it.
141    if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) {
142      last_deopt_->AddSuccessor(new_node);
143    }
144
145    // Instructions with side effects and memory operations can't be
146    // reordered with respect to each other.
147    if (HasSideEffect(instr)) {
148      if (last_side_effect_instr_ != nullptr) {
149        last_side_effect_instr_->AddSuccessor(new_node);
150      }
151      for (ScheduleGraphNode* load : pending_loads_) {
152        load->AddSuccessor(new_node);
153      }
154      pending_loads_.clear();
155      last_side_effect_instr_ = new_node;
156    } else if (IsLoadOperation(instr)) {
157      // Load operations can't be reordered with side effects instructions but
158      // independent loads can be reordered with respect to each other.
159      if (last_side_effect_instr_ != nullptr) {
160        last_side_effect_instr_->AddSuccessor(new_node);
161      }
162      pending_loads_.push_back(new_node);
163    } else if (instr->IsDeoptimizeCall()) {
164      // Ensure that deopts are not reordered with respect to side-effect
165      // instructions.
166      if (last_side_effect_instr_ != nullptr) {
167        last_side_effect_instr_->AddSuccessor(new_node);
168      }
169      last_deopt_ = new_node;
170    }
171
172    // Look for operand dependencies.
173    for (size_t i = 0; i < instr->InputCount(); ++i) {
174      const InstructionOperand* input = instr->InputAt(i);
175      if (input->IsUnallocated()) {
176        int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
177        auto it = operands_map_.find(vreg);
178        if (it != operands_map_.end()) {
179          it->second->AddSuccessor(new_node);
180        }
181      }
182    }
183
184    // Record the virtual registers defined by this instruction.
185    for (size_t i = 0; i < instr->OutputCount(); ++i) {
186      const InstructionOperand* output = instr->OutputAt(i);
187      if (output->IsUnallocated()) {
188        operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
189            new_node;
190      } else if (output->IsConstant()) {
191        operands_map_[ConstantOperand::cast(output)->virtual_register()] =
192            new_node;
193      }
194    }
195  }
196
197  graph_.push_back(new_node);
198}
199
200
201template <typename QueueType>
202void InstructionScheduler::ScheduleBlock() {
203  QueueType ready_list(this);
204
205  // Compute total latencies so that we can schedule the critical path first.
206  ComputeTotalLatencies();
207
208  // Add nodes which don't have dependencies to the ready list.
209  for (ScheduleGraphNode* node : graph_) {
210    if (!node->HasUnscheduledPredecessor()) {
211      ready_list.AddNode(node);
212    }
213  }
214
215  // Go through the ready list and schedule the instructions.
216  int cycle = 0;
217  while (!ready_list.IsEmpty()) {
218    ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
219
220    if (candidate != nullptr) {
221      sequence()->AddInstruction(candidate->instruction());
222
223      for (ScheduleGraphNode* successor : candidate->successors()) {
224        successor->DropUnscheduledPredecessor();
225        successor->set_start_cycle(
226            std::max(successor->start_cycle(),
227                     cycle + candidate->latency()));
228
229        if (!successor->HasUnscheduledPredecessor()) {
230          ready_list.AddNode(successor);
231        }
232      }
233    }
234
235    cycle++;
236  }
237}
238
239
240int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
241  switch (instr->arch_opcode()) {
242    case kArchNop:
243    case kArchFramePointer:
244    case kArchParentFramePointer:
245    case kArchTruncateDoubleToI:
246    case kArchStackSlot:
247    case kArchDebugBreak:
248    case kArchComment:
249    case kIeee754Float64Acos:
250    case kIeee754Float64Acosh:
251    case kIeee754Float64Asin:
252    case kIeee754Float64Asinh:
253    case kIeee754Float64Atan:
254    case kIeee754Float64Atanh:
255    case kIeee754Float64Atan2:
256    case kIeee754Float64Cbrt:
257    case kIeee754Float64Cos:
258    case kIeee754Float64Cosh:
259    case kIeee754Float64Exp:
260    case kIeee754Float64Expm1:
261    case kIeee754Float64Log:
262    case kIeee754Float64Log1p:
263    case kIeee754Float64Log10:
264    case kIeee754Float64Log2:
265    case kIeee754Float64Pow:
266    case kIeee754Float64Sin:
267    case kIeee754Float64Sinh:
268    case kIeee754Float64Tan:
269    case kIeee754Float64Tanh:
270      return kNoOpcodeFlags;
271
272    case kArchStackPointer:
273      // ArchStackPointer instruction loads the current stack pointer value and
274      // must not be reordered with instruction with side effects.
275      return kIsLoadOperation;
276
277    case kArchPrepareCallCFunction:
278    case kArchPrepareTailCall:
279    case kArchCallCFunction:
280    case kArchCallCodeObject:
281    case kArchCallJSFunction:
282      return kHasSideEffect;
283
284    case kArchTailCallCodeObjectFromJSFunction:
285    case kArchTailCallCodeObject:
286    case kArchTailCallJSFunctionFromJSFunction:
287    case kArchTailCallAddress:
288      return kHasSideEffect | kIsBlockTerminator;
289
290    case kArchDeoptimize:
291    case kArchJmp:
292    case kArchLookupSwitch:
293    case kArchTableSwitch:
294    case kArchRet:
295    case kArchThrowTerminator:
296      return kIsBlockTerminator;
297
298    case kCheckedLoadInt8:
299    case kCheckedLoadUint8:
300    case kCheckedLoadInt16:
301    case kCheckedLoadUint16:
302    case kCheckedLoadWord32:
303    case kCheckedLoadWord64:
304    case kCheckedLoadFloat32:
305    case kCheckedLoadFloat64:
306      return kIsLoadOperation;
307
308    case kCheckedStoreWord8:
309    case kCheckedStoreWord16:
310    case kCheckedStoreWord32:
311    case kCheckedStoreWord64:
312    case kCheckedStoreFloat32:
313    case kCheckedStoreFloat64:
314    case kArchStoreWithWriteBarrier:
315      return kHasSideEffect;
316
317    case kAtomicLoadInt8:
318    case kAtomicLoadUint8:
319    case kAtomicLoadInt16:
320    case kAtomicLoadUint16:
321    case kAtomicLoadWord32:
322      return kIsLoadOperation;
323
324    case kAtomicStoreWord8:
325    case kAtomicStoreWord16:
326    case kAtomicStoreWord32:
327      return kHasSideEffect;
328
329#define CASE(Name) case k##Name:
330    TARGET_ARCH_OPCODE_LIST(CASE)
331#undef CASE
332      return GetTargetInstructionFlags(instr);
333  }
334
335  UNREACHABLE();
336  return kNoOpcodeFlags;
337}
338
339
340bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
341  return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
342          (instr->flags_mode() == kFlags_branch));
343}
344
345
346void InstructionScheduler::ComputeTotalLatencies() {
347  for (ScheduleGraphNode* node : base::Reversed(graph_)) {
348    int max_latency = 0;
349
350    for (ScheduleGraphNode* successor : node->successors()) {
351      DCHECK(successor->total_latency() != -1);
352      if (successor->total_latency() > max_latency) {
353        max_latency = successor->total_latency();
354      }
355    }
356
357    node->set_total_latency(max_latency + node->latency());
358  }
359}
360
361}  // namespace compiler
362}  // namespace internal
363}  // namespace v8
364