122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/*
222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Copyright (C) 2016 The Android Open Source Project
322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames *
422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Licensed under the Apache License, Version 2.0 (the "License");
522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * you may not use this file except in compliance with the License.
622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * You may obtain a copy of the License at
722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames *
822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames *      http://www.apache.org/licenses/LICENSE-2.0
922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames *
1022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Unless required by applicable law or agreed to in writing, software
1122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * distributed under the License is distributed on an "AS IS" BASIS,
1222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * See the License for the specific language governing permissions and
1422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * limitations under the License.
1522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
1622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
1722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_
1822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#define ART_COMPILER_OPTIMIZING_SCHEDULER_H_
1922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
2022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include <fstream>
2122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
22ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko#include "base/scoped_arena_allocator.h"
23ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko#include "base/scoped_arena_containers.h"
2422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "base/time_utils.h"
258cf9cb386cd9286d67e879f1ee501ec00d72a4e1Andreas Gampe#include "code_generator.h"
2622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "driver/compiler_driver.h"
272a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong#include "load_store_analysis.h"
2822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "nodes.h"
2922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "optimization.h"
3022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
3122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesnamespace art {
3222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
3322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// General description of instruction scheduling.
3422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
3522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// This pass tries to improve the quality of the generated code by reordering
3622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// instructions in the graph to avoid execution delays caused by execution
3722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// dependencies.
3822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Currently, scheduling is performed at the block level, so no `HInstruction`
3922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// ever leaves its block in this pass.
4022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
4122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The scheduling process iterates through blocks in the graph. For blocks that
4222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// we can and want to schedule:
4322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 1) Build a dependency graph for instructions.
4422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    It includes data dependencies (inputs/uses), but also environment
4522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    dependencies and side-effect dependencies.
4622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 2) Schedule the dependency graph.
4722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    This is a topological sort of the dependency graph, using heuristics to
4822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    decide what node to scheduler first when there are multiple candidates.
4922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
5022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// A few factors impacting the quality of the scheduling are:
5122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - The heuristics used to decide what node to schedule in the topological sort
5222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   when there are multiple valid candidates. There is a wide range of
5322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   complexity possible here, going from a simple model only considering
5422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   latencies, to a super detailed CPU pipeline model.
5522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - Fewer dependencies in the dependency graph give more freedom for the
5622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   scheduling heuristics. For example de-aliasing can allow possibilities for
5722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   reordering of memory accesses.
5822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - The level of abstraction of the IR. It is easier to evaluate scheduling for
5922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   IRs that translate to a single assembly instruction than for IRs
6022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   that generate multiple assembly instructions or generate different code
6122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   depending on properties of the IR.
6222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - Scheduling is performed before register allocation, it is not aware of the
6322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//   impact of moving instructions on register allocation.
6422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
6522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
6622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The scheduling code uses the terms predecessors, successors, and dependencies.
6722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// This can be confusing at times, so here are clarifications.
6822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// These terms are used from the point of view of the program dependency graph. So
6922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// the inputs of an instruction are part of its dependencies, and hence part its
7022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// predecessors. So the uses of an instruction are (part of) its successors.
7122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// (Side-effect dependencies can yield predecessors or successors that are not
7222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// inputs or uses.)
7322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
7422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Here is a trivial example. For the Java code:
7522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
7622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    int a = 1 + 2;
7722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
7822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// we would have the instructions
7922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
8022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    i1 HIntConstant 1
8122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    i2 HIntConstant 2
8222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    i3 HAdd [i1,i2]
8322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
8422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `i1` and `i2` are predecessors of `i3`.
8522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `i3` is a successor of `i1` and a successor of `i2`.
8622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In a scheduling graph for this code we would have three nodes `n1`, `n2`,
8722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// and `n3` (respectively for instructions `i1`, `i1`, and `i3`).
8822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Conceptually the program dependency graph for this would contain two edges
8922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
9022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    n1 -> n3
9122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    n2 -> n3
9222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
9322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Since we schedule backwards (starting from the last instruction in each basic
9422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// block), the implementation of nodes keeps a list of pointers their
9522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`.
9622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
9722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Node dependencies are also referred to from the program dependency graph
9822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// point of view: we say that node `B` immediately depends on `A` if there is an
9922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// edge from `A` to `B` in the program dependency graph. `A` is a predecessor of
10022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and
10122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `n2`.
10222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Since nodes in the scheduling graph keep a list of their predecessors, node
10322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `B` will have a pointer to its predecessor `A`.
10422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// As we schedule backwards, `B` will be selected for scheduling before `A` is.
10522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
10622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// So the scheduling for the example above could happen as follow
10722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
10822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    |---------------------------+------------------------|
10922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | candidates for scheduling | instructions scheduled |
11022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | --------------------------+------------------------|
11122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
11222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The only node without successors is `n3`, so it is the only initial
11322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// candidate.
11422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
11522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | n3                        | (none)                 |
11622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
11722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// We schedule `n3` as the last (and only) instruction. All its predecessors
11822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// that do not have any unscheduled successors become candidate. That is, `n1`
11922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// and `n2` become candidates.
12022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
12122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | n1, n2                    | n3                     |
12222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
12322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// One of the candidates is selected. In practice this is where scheduling
12422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// heuristics kick in, to decide which of the candidates should be selected.
12522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In this example, let it be `n1`. It is scheduled before previously scheduled
12622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// nodes (in program order). There are no other nodes to add to the list of
12722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// candidates.
12822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
12922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | n2                        | n1                     |
13022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    |                           | n3                     |
13122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
13222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The only candidate available for scheduling is `n2`. Schedule it before
13322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// (in program order) the previously scheduled nodes.
13422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
13522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    | (none)                    | n2                     |
13622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    |                           | n1                     |
13722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    |                           | n3                     |
13822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//    |---------------------------+------------------------|
13922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames//
14022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// So finally the instructions will be executed in the order `i2`, `i1`, and `i3`.
14122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In this trivial example, it does not matter which of `i1` and `i2` is
14222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// scheduled first since they are constants. However the same process would
14322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`).
14422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
14522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Set to true to have instruction scheduling dump scheduling graphs to the file
14622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`.
14722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesstatic constexpr bool kDumpDotSchedulingGraphs = false;
14822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
14922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Typically used as a default instruction latency.
15022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesstatic constexpr uint32_t kGenericInstructionLatency = 1;
15122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
15222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HScheduler;
15322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
15422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/**
15522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * A node representing an `HInstruction` in the `SchedulingGraph`.
15622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
157ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Markoclass SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> {
15822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
159e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko  SchedulingNode(HInstruction* instr, ScopedArenaAllocator* allocator, bool is_scheduling_barrier)
16022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames      : latency_(0),
16122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        internal_latency_(0),
16222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        critical_path_(0),
16322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        instruction_(instr),
16422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        is_scheduling_barrier_(is_scheduling_barrier),
165e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko        data_predecessors_(allocator->Adapter(kArenaAllocScheduler)),
166e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko        other_predecessors_(allocator->Adapter(kArenaAllocScheduler)),
16722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        num_unscheduled_successors_(0) {
16822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    data_predecessors_.reserve(kPreallocatedPredecessors);
16922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
17022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
17122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddDataPredecessor(SchedulingNode* predecessor) {
17222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    data_predecessors_.push_back(predecessor);
17322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    predecessor->num_unscheduled_successors_++;
17422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
17522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
176ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  const ScopedArenaVector<SchedulingNode*>& GetDataPredecessors() const {
177ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    return data_predecessors_;
178ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  }
179ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko
18022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddOtherPredecessor(SchedulingNode* predecessor) {
18122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    other_predecessors_.push_back(predecessor);
18222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    predecessor->num_unscheduled_successors_++;
18322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
18422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
185ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  const ScopedArenaVector<SchedulingNode*>& GetOtherPredecessors() const {
186ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    return other_predecessors_;
187ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  }
188ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko
18922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void DecrementNumberOfUnscheduledSuccessors() {
19022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    num_unscheduled_successors_--;
19122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
19222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
19322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void MaybeUpdateCriticalPath(uint32_t other_critical_path) {
19422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    critical_path_ = std::max(critical_path_, other_critical_path);
19522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
19622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
19722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool HasUnscheduledSuccessors() const {
19822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    return num_unscheduled_successors_ != 0;
19922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
20022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
20122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  HInstruction* GetInstruction() const { return instruction_; }
20222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t GetLatency() const { return latency_; }
20322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void SetLatency(uint32_t latency) { latency_ = latency; }
20422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t GetInternalLatency() const { return internal_latency_; }
20522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; }
20622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t GetCriticalPath() const { return critical_path_; }
20722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool IsSchedulingBarrier() const { return is_scheduling_barrier_; }
20822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
20922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private:
21022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The latency of this node. It represents the latency between the moment the
21122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // last instruction for this node has executed to the moment the result
21222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // produced by this node is available to users.
21322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t latency_;
21422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // This represents the time spent *within* the generated code for this node.
21522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // It should be zero for nodes that only generate a single instruction.
21622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t internal_latency_;
21722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
21822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The critical path from this instruction to the end of scheduling. It is
21922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // used by the scheduling heuristics to measure the priority of this instruction.
22022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // It is defined as
22122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  //     critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses)
22222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in
22322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // `HScheduler::Schedule(SchedulingNode* scheduling_node)`).
22422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t critical_path_;
22522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
22622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The instruction that this node represents.
22722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  HInstruction* const instruction_;
22822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
22922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // If a node is scheduling barrier, other nodes cannot be scheduled before it.
23022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  const bool is_scheduling_barrier_;
23122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
23222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The lists of predecessors. They cannot be scheduled before this node. Once
23322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // this node is scheduled, we check whether any of its predecessors has become a
23422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // valid candidate for scheduling.
23522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // Predecessors in `data_predecessors_` are data dependencies. Those in
23622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // `other_predecessors_` contain side-effect dependencies, environment
23722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // dependencies, and scheduling barrier dependencies.
238ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  ScopedArenaVector<SchedulingNode*> data_predecessors_;
239ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  ScopedArenaVector<SchedulingNode*> other_predecessors_;
24022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
24122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The number of unscheduled successors for this node. This number is
24222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // decremented as successors are scheduled. When it reaches zero this node
24322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // becomes a valid candidate to schedule.
24422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t num_unscheduled_successors_;
24522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
24622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  static constexpr size_t kPreallocatedPredecessors = 4;
24722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
24822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
24922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/*
25022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Directed acyclic graph for scheduling.
25122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
25222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingGraph : public ValueObject {
25322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
254e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko  SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator)
25522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames      : scheduler_(scheduler),
25669d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko        allocator_(allocator),
25722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        contains_scheduling_barrier_(false),
25869d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko        nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
2592a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong        heap_location_collector_(nullptr) {}
26022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
26122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
262ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    std::unique_ptr<SchedulingNode> node(
26369d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko        new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier));
264ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    SchedulingNode* result = node.get();
265ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    nodes_map_.Insert(std::make_pair(instr, std::move(node)));
26622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    contains_scheduling_barrier_ |= is_scheduling_barrier;
26722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    AddDependencies(instr, is_scheduling_barrier);
268ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko    return result;
26922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
27022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
27122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Clear() {
27222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    nodes_map_.Clear();
27322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    contains_scheduling_barrier_ = false;
27422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
27522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
2762a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) {
2772a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong    heap_location_collector_ = &heap_location_collector;
2782a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  }
2792a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong
28022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingNode* GetNode(const HInstruction* instr) const {
28122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    auto it = nodes_map_.Find(instr);
28222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    if (it == nodes_map_.end()) {
28322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames      return nullptr;
28422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    } else {
285ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko      return it->second.get();
28622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    }
28722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
28822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
28922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool IsSchedulingBarrier(const HInstruction* instruction) const;
29022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
29122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const;
29222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const;
29322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const;
29422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const;
29522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
29622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  size_t Size() const {
29722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    return nodes_map_.Size();
29822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
29922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
30022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // Dump the scheduling graph, in dot file format, appending it to the file
30122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // `scheduling_graphs.dot`.
30222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void DumpAsDotGraph(const std::string& description,
303ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko                      const ScopedArenaVector<SchedulingNode*>& initial_candidates);
30422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
30522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected:
30622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency);
30722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) {
30822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    AddDependency(node, dependency, /*is_data_dependency*/true);
30922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
31022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
31122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    AddDependency(node, dependency, /*is_data_dependency*/false);
31222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
3132a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const;
3142a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const;
3152a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const;
3162a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const;
3172a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const;
3182a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const;
3192a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const;
32022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
32122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects.
32222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false);
32322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
32422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  const HScheduler* const scheduler_;
32522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
32669d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko  ScopedArenaAllocator* const allocator_;
32722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
32822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool contains_scheduling_barrier_;
32922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
330ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
3312a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong
3322a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong  const HeapLocationCollector* heap_location_collector_;
33322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
33422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
33522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/*
33622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * The visitors derived from this base class are used by schedulers to evaluate
33722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * the latencies of `HInstruction`s.
33822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
33922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingLatencyVisitor : public HGraphDelegateVisitor {
34022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
34122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // This class and its sub-classes will never be used to drive a visit of an
34222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
34322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // to pass a valid graph to `HGraphDelegateVisitor()`.
344d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe  SchedulingLatencyVisitor()
345d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe      : HGraphDelegateVisitor(nullptr),
346d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe        last_visited_latency_(0),
347d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe        last_visited_internal_latency_(0) {}
34822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
34922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void VisitInstruction(HInstruction* instruction) OVERRIDE {
35022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
35122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        "Architecture-specific scheduling latency visitors must handle all instructions"
35222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        " (potentially by overriding the generic `VisitInstruction()`.";
35322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    UNREACHABLE();
35422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
35522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
35622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Visit(HInstruction* instruction) {
35722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    instruction->Accept(this);
35822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
35922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
36022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void CalculateLatency(SchedulingNode* node) {
36122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    // By default nodes have no internal latency.
36222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    last_visited_internal_latency_ = 0;
36322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    Visit(node->GetInstruction());
36422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
36522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
36622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t GetLastVisitedLatency() const { return last_visited_latency_; }
36722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; }
36822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
36922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected:
37022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The latency of the most recent visited SchedulingNode.
37122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // This is for reporting the latency value to the user of this visitor.
37222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t last_visited_latency_;
37322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // This represents the time spent *within* the generated code for the most recent visited
37422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // SchedulingNode. This is for reporting the internal latency value to the user of this visitor.
37522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t last_visited_internal_latency_;
37622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
37722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
37822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
37922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
380ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
38122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames                                                 const SchedulingGraph& graph) = 0;
38222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  virtual ~SchedulingNodeSelector() {}
38322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected:
384ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  static void DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode*>* nodes, size_t index) {
38522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    (*nodes)[index] = nodes->back();
38622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    nodes->pop_back();
38722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
38822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
38922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
39022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/*
39122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Select a `SchedulingNode` at random within the candidates.
39222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
39322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass RandomSchedulingNodeSelector : public SchedulingNodeSelector {
39422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
3952ffb703bf431d74326c88266b4ddaf225eb3c6adIgor Murashkin  RandomSchedulingNodeSelector() : seed_(0) {
39622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    seed_  = static_cast<uint32_t>(NanoTime());
39722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    srand(seed_);
39822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
39922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
400ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
40122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames                                         const SchedulingGraph& graph) OVERRIDE {
40222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    UNUSED(graph);
40322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    DCHECK(!nodes->empty());
40422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    size_t select = rand_r(&seed_) % nodes->size();
40522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    SchedulingNode* select_node = (*nodes)[select];
40622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    DeleteNodeAtIndex(nodes, select);
40722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    return select_node;
40822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
40922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
41022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  uint32_t seed_;
41122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
41222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
41322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/*
41422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Select a `SchedulingNode` according to critical path information,
41522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * with heuristics to favor certain instruction patterns like materialized condition.
41622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */
41722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
41822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
41922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
42022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
421ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
42222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames                                         const SchedulingGraph& graph) OVERRIDE;
42322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
42422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected:
42522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
42622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames                                                  SchedulingNode* check) const;
42722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
428ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  SchedulingNode* SelectMaterializedCondition(ScopedArenaVector<SchedulingNode*>* nodes,
429ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko                                              const SchedulingGraph& graph) const;
43022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
43122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private:
43222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  const SchedulingNode* prev_select_;
43322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
43422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
43522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HScheduler {
43622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
437e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko  HScheduler(ScopedArenaAllocator* allocator,
43822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames             SchedulingLatencyVisitor* latency_visitor,
43922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames             SchedulingNodeSelector* selector)
440e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko      : allocator_(allocator),
44122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        latency_visitor_(latency_visitor),
44222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        selector_(selector),
44322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        only_optimize_loop_blocks_(true),
444e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko        scheduling_graph_(this, allocator),
445d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe        cursor_(nullptr),
446e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko        candidates_(allocator_->Adapter(kArenaAllocScheduler)) {}
44722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  virtual ~HScheduler() {}
44822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
44922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Schedule(HGraph* graph);
45022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
45122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; }
45222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
45322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // Instructions can not be rescheduled across a scheduling barrier.
45422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
45522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
45622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected:
45722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Schedule(HBasicBlock* block);
45822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Schedule(SchedulingNode* scheduling_node);
45922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Schedule(HInstruction* instruction);
46022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
46122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // Any instruction returning `false` via this method will prevent its
46222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // containing basic block from being scheduled.
46322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // This method is used to restrict scheduling to instructions that we know are
46422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // safe to handle.
46589ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov  //
46689ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov  // For newly introduced instructions by default HScheduler::IsSchedulable returns false.
46789ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov  // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
46889ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov  // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
46989ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov  // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
47022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  virtual bool IsSchedulable(const HInstruction* instruction) const;
47122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool IsSchedulable(const HBasicBlock* block) const;
47222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
47322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void CalculateLatency(SchedulingNode* node) {
47422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    latency_visitor_->CalculateLatency(node);
47522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    node->SetLatency(latency_visitor_->GetLastVisitedLatency());
47622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
47722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
47822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
479e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko  ScopedArenaAllocator* const allocator_;
48022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingLatencyVisitor* const latency_visitor_;
48122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingNodeSelector* const selector_;
48222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  bool only_optimize_loop_blocks_;
48322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
48422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // We instantiate the members below as part of this class to avoid
48522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // instantiating them locally for every chunk scheduled.
48622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  SchedulingGraph scheduling_graph_;
48722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // A pointer indicating where the next instruction to be scheduled will be inserted.
48822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  HInstruction* cursor_;
48922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // The list of candidates for scheduling. A node becomes a candidate when all
49022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  // its predecessors have been scheduled.
491ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko  ScopedArenaVector<SchedulingNode*> candidates_;
49222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
49322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private:
49422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  DISALLOW_COPY_AND_ASSIGN(HScheduler);
49522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
49622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
49722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesinline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const {
49822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  return scheduler_->IsSchedulingBarrier(instruction);
49922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}
50022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
50122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HInstructionScheduling : public HOptimization {
50222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public:
5032ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik  HInstructionScheduling(HGraph* graph,
5042ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik                         InstructionSet instruction_set,
5052ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik                         CodeGenerator* cg = nullptr,
5062ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik                         const char* name = kInstructionSchedulingPassName)
5072ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik      : HOptimization(graph, name),
508f7caf682c6b4769b2a3dd2f2241532b98709c1a3xueliang.zhong        codegen_(cg),
50922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames        instruction_set_(instruction_set) {}
51022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
51122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Run() {
51222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames    Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
51322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  }
51422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  void Run(bool only_optimize_loop_blocks, bool schedule_randomly);
51522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
5162ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik  static constexpr const char* kInstructionSchedulingPassName = "scheduler";
51722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
5182a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong private:
519f7caf682c6b4769b2a3dd2f2241532b98709c1a3xueliang.zhong  CodeGenerator* const codegen_;
52022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  const InstructionSet instruction_set_;
52122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames  DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling);
52222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames};
52322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
52422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}  // namespace art
52522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames
52622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_H_
527