122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/* 222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Copyright (C) 2016 The Android Open Source Project 322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * 422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Licensed under the Apache License, Version 2.0 (the "License"); 522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * you may not use this file except in compliance with the License. 622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * You may obtain a copy of the License at 722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * 822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * http://www.apache.org/licenses/LICENSE-2.0 922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * 1022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Unless required by applicable law or agreed to in writing, software 1122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * distributed under the License is distributed on an "AS IS" BASIS, 1222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * See the License for the specific language governing permissions and 1422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * limitations under the License. 1522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 1622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 1722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 1822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#define ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 1922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 2022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include <fstream> 2122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 22ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko#include "base/scoped_arena_allocator.h" 23ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko#include "base/scoped_arena_containers.h" 2422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "base/time_utils.h" 258cf9cb386cd9286d67e879f1ee501ec00d72a4e1Andreas Gampe#include "code_generator.h" 2622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "driver/compiler_driver.h" 272a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong#include "load_store_analysis.h" 2822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "nodes.h" 2922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#include "optimization.h" 3022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 3122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesnamespace art { 3222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 3322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// General description of instruction scheduling. 3422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 3522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// This pass tries to improve the quality of the generated code by reordering 3622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// instructions in the graph to avoid execution delays caused by execution 3722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// dependencies. 3822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Currently, scheduling is performed at the block level, so no `HInstruction` 3922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// ever leaves its block in this pass. 4022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 4122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The scheduling process iterates through blocks in the graph. For blocks that 4222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// we can and want to schedule: 4322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 1) Build a dependency graph for instructions. 4422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// It includes data dependencies (inputs/uses), but also environment 4522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// dependencies and side-effect dependencies. 4622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 2) Schedule the dependency graph. 4722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// This is a topological sort of the dependency graph, using heuristics to 4822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// decide what node to scheduler first when there are multiple candidates. 4922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 5022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// A few factors impacting the quality of the scheduling are: 5122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - The heuristics used to decide what node to schedule in the topological sort 5222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// when there are multiple valid candidates. There is a wide range of 5322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// complexity possible here, going from a simple model only considering 5422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// latencies, to a super detailed CPU pipeline model. 5522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - Fewer dependencies in the dependency graph give more freedom for the 5622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// scheduling heuristics. For example de-aliasing can allow possibilities for 5722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// reordering of memory accesses. 5822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - The level of abstraction of the IR. It is easier to evaluate scheduling for 5922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// IRs that translate to a single assembly instruction than for IRs 6022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// that generate multiple assembly instructions or generate different code 6122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// depending on properties of the IR. 6222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// - Scheduling is performed before register allocation, it is not aware of the 6322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// impact of moving instructions on register allocation. 6422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 6522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 6622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The scheduling code uses the terms predecessors, successors, and dependencies. 6722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// This can be confusing at times, so here are clarifications. 6822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// These terms are used from the point of view of the program dependency graph. So 6922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// the inputs of an instruction are part of its dependencies, and hence part its 7022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// predecessors. So the uses of an instruction are (part of) its successors. 7122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// (Side-effect dependencies can yield predecessors or successors that are not 7222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// inputs or uses.) 7322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 7422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Here is a trivial example. For the Java code: 7522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 7622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// int a = 1 + 2; 7722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 7822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// we would have the instructions 7922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 8022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// i1 HIntConstant 1 8122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// i2 HIntConstant 2 8222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// i3 HAdd [i1,i2] 8322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 8422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `i1` and `i2` are predecessors of `i3`. 8522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `i3` is a successor of `i1` and a successor of `i2`. 8622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In a scheduling graph for this code we would have three nodes `n1`, `n2`, 8722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// and `n3` (respectively for instructions `i1`, `i1`, and `i3`). 8822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Conceptually the program dependency graph for this would contain two edges 8922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 9022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// n1 -> n3 9122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// n2 -> n3 9222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 9322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Since we schedule backwards (starting from the last instruction in each basic 9422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// block), the implementation of nodes keeps a list of pointers their 9522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`. 9622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 9722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Node dependencies are also referred to from the program dependency graph 9822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// point of view: we say that node `B` immediately depends on `A` if there is an 9922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// edge from `A` to `B` in the program dependency graph. `A` is a predecessor of 10022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and 10122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `n2`. 10222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Since nodes in the scheduling graph keep a list of their predecessors, node 10322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `B` will have a pointer to its predecessor `A`. 10422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// As we schedule backwards, `B` will be selected for scheduling before `A` is. 10522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 10622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// So the scheduling for the example above could happen as follow 10722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 10822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// |---------------------------+------------------------| 10922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | candidates for scheduling | instructions scheduled | 11022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | --------------------------+------------------------| 11122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 11222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The only node without successors is `n3`, so it is the only initial 11322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// candidate. 11422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 11522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | n3 | (none) | 11622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 11722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// We schedule `n3` as the last (and only) instruction. All its predecessors 11822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// that do not have any unscheduled successors become candidate. That is, `n1` 11922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// and `n2` become candidates. 12022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 12122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | n1, n2 | n3 | 12222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 12322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// One of the candidates is selected. In practice this is where scheduling 12422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// heuristics kick in, to decide which of the candidates should be selected. 12522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In this example, let it be `n1`. It is scheduled before previously scheduled 12622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// nodes (in program order). There are no other nodes to add to the list of 12722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// candidates. 12822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 12922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | n2 | n1 | 13022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | | n3 | 13122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 13222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// The only candidate available for scheduling is `n2`. Schedule it before 13322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// (in program order) the previously scheduled nodes. 13422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 13522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | (none) | n2 | 13622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | | n1 | 13722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// | | n3 | 13822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// |---------------------------+------------------------| 13922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// 14022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// So finally the instructions will be executed in the order `i2`, `i1`, and `i3`. 14122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// In this trivial example, it does not matter which of `i1` and `i2` is 14222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// scheduled first since they are constants. However the same process would 14322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`). 14422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 14522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Set to true to have instruction scheduling dump scheduling graphs to the file 14622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`. 14722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesstatic constexpr bool kDumpDotSchedulingGraphs = false; 14822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 14922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames// Typically used as a default instruction latency. 15022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesstatic constexpr uint32_t kGenericInstructionLatency = 1; 15122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 15222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HScheduler; 15322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 15422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/** 15522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * A node representing an `HInstruction` in the `SchedulingGraph`. 15622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 157ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Markoclass SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { 15822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 159e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko SchedulingNode(HInstruction* instr, ScopedArenaAllocator* allocator, bool is_scheduling_barrier) 16022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames : latency_(0), 16122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames internal_latency_(0), 16222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames critical_path_(0), 16322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames instruction_(instr), 16422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames is_scheduling_barrier_(is_scheduling_barrier), 165e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko data_predecessors_(allocator->Adapter(kArenaAllocScheduler)), 166e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko other_predecessors_(allocator->Adapter(kArenaAllocScheduler)), 16722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames num_unscheduled_successors_(0) { 16822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames data_predecessors_.reserve(kPreallocatedPredecessors); 16922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 17022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 17122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddDataPredecessor(SchedulingNode* predecessor) { 17222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames data_predecessors_.push_back(predecessor); 17322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames predecessor->num_unscheduled_successors_++; 17422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 17522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 176ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko const ScopedArenaVector<SchedulingNode*>& GetDataPredecessors() const { 177ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko return data_predecessors_; 178ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko } 179ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko 18022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddOtherPredecessor(SchedulingNode* predecessor) { 18122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames other_predecessors_.push_back(predecessor); 18222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames predecessor->num_unscheduled_successors_++; 18322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 18422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 185ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko const ScopedArenaVector<SchedulingNode*>& GetOtherPredecessors() const { 186ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko return other_predecessors_; 187ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko } 188ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko 18922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void DecrementNumberOfUnscheduledSuccessors() { 19022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames num_unscheduled_successors_--; 19122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 19222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 19322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void MaybeUpdateCriticalPath(uint32_t other_critical_path) { 19422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames critical_path_ = std::max(critical_path_, other_critical_path); 19522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 19622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 19722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool HasUnscheduledSuccessors() const { 19822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames return num_unscheduled_successors_ != 0; 19922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 20022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 20122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames HInstruction* GetInstruction() const { return instruction_; } 20222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t GetLatency() const { return latency_; } 20322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void SetLatency(uint32_t latency) { latency_ = latency; } 20422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t GetInternalLatency() const { return internal_latency_; } 20522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; } 20622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t GetCriticalPath() const { return critical_path_; } 20722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool IsSchedulingBarrier() const { return is_scheduling_barrier_; } 20822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 20922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private: 21022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The latency of this node. It represents the latency between the moment the 21122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // last instruction for this node has executed to the moment the result 21222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // produced by this node is available to users. 21322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t latency_; 21422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // This represents the time spent *within* the generated code for this node. 21522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // It should be zero for nodes that only generate a single instruction. 21622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t internal_latency_; 21722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 21822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The critical path from this instruction to the end of scheduling. It is 21922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // used by the scheduling heuristics to measure the priority of this instruction. 22022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // It is defined as 22122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses) 22222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in 22322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // `HScheduler::Schedule(SchedulingNode* scheduling_node)`). 22422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t critical_path_; 22522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 22622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The instruction that this node represents. 22722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames HInstruction* const instruction_; 22822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 22922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // If a node is scheduling barrier, other nodes cannot be scheduled before it. 23022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const bool is_scheduling_barrier_; 23122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 23222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The lists of predecessors. They cannot be scheduled before this node. Once 23322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // this node is scheduled, we check whether any of its predecessors has become a 23422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // valid candidate for scheduling. 23522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // Predecessors in `data_predecessors_` are data dependencies. Those in 23622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // `other_predecessors_` contain side-effect dependencies, environment 23722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // dependencies, and scheduling barrier dependencies. 238ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko ScopedArenaVector<SchedulingNode*> data_predecessors_; 239ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko ScopedArenaVector<SchedulingNode*> other_predecessors_; 24022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 24122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The number of unscheduled successors for this node. This number is 24222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // decremented as successors are scheduled. When it reaches zero this node 24322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // becomes a valid candidate to schedule. 24422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t num_unscheduled_successors_; 24522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 24622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames static constexpr size_t kPreallocatedPredecessors = 4; 24722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 24822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 24922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/* 25022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Directed acyclic graph for scheduling. 25122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 25222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingGraph : public ValueObject { 25322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 254e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator) 25522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames : scheduler_(scheduler), 25669d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko allocator_(allocator), 25722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames contains_scheduling_barrier_(false), 25869d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), 2592a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong heap_location_collector_(nullptr) {} 26022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 26122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { 262ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko std::unique_ptr<SchedulingNode> node( 26369d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); 264ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko SchedulingNode* result = node.get(); 265ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko nodes_map_.Insert(std::make_pair(instr, std::move(node))); 26622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames contains_scheduling_barrier_ |= is_scheduling_barrier; 26722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames AddDependencies(instr, is_scheduling_barrier); 268ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko return result; 26922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 27022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 27122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Clear() { 27222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames nodes_map_.Clear(); 27322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames contains_scheduling_barrier_ = false; 27422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 27522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 2762a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) { 2772a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong heap_location_collector_ = &heap_location_collector; 2782a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong } 2792a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong 28022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNode* GetNode(const HInstruction* instr) const { 28122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames auto it = nodes_map_.Find(instr); 28222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames if (it == nodes_map_.end()) { 28322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames return nullptr; 28422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } else { 285ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko return it->second.get(); 28622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 28722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 28822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 28922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool IsSchedulingBarrier(const HInstruction* instruction) const; 29022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 29122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const; 29222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const; 29322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const; 29422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const; 29522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 29622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames size_t Size() const { 29722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames return nodes_map_.Size(); 29822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 29922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 30022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // Dump the scheduling graph, in dot file format, appending it to the file 30122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // `scheduling_graphs.dot`. 30222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void DumpAsDotGraph(const std::string& description, 303ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko const ScopedArenaVector<SchedulingNode*>& initial_candidates); 30422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 30522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected: 30622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency); 30722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) { 30822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames AddDependency(node, dependency, /*is_data_dependency*/true); 30922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 31022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { 31122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames AddDependency(node, dependency, /*is_data_dependency*/false); 31222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 3132a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const; 3142a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const; 3152a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const; 3162a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const; 3172a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const; 3182a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const; 3192a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const; 32022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 32122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects. 32222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false); 32322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 32422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const HScheduler* const scheduler_; 32522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 32669d310e0317e2fce97bf8c9c133c5c2c0332e61dVladimir Marko ScopedArenaAllocator* const allocator_; 32722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 32822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool contains_scheduling_barrier_; 32922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 330ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; 3312a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong 3322a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong const HeapLocationCollector* heap_location_collector_; 33322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 33422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 33522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/* 33622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * The visitors derived from this base class are used by schedulers to evaluate 33722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * the latencies of `HInstruction`s. 33822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 33922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingLatencyVisitor : public HGraphDelegateVisitor { 34022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 34122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // This class and its sub-classes will never be used to drive a visit of an 34222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // `HGraph` but only to visit `HInstructions` one at a time, so we do not need 34322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // to pass a valid graph to `HGraphDelegateVisitor()`. 344d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe SchedulingLatencyVisitor() 345d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe : HGraphDelegateVisitor(nullptr), 346d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe last_visited_latency_(0), 347d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe last_visited_internal_latency_(0) {} 34822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 34922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void VisitInstruction(HInstruction* instruction) OVERRIDE { 35022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " 35122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames "Architecture-specific scheduling latency visitors must handle all instructions" 35222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames " (potentially by overriding the generic `VisitInstruction()`."; 35322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames UNREACHABLE(); 35422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 35522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 35622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Visit(HInstruction* instruction) { 35722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames instruction->Accept(this); 35822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 35922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 36022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void CalculateLatency(SchedulingNode* node) { 36122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // By default nodes have no internal latency. 36222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames last_visited_internal_latency_ = 0; 36322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames Visit(node->GetInstruction()); 36422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 36522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 36622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t GetLastVisitedLatency() const { return last_visited_latency_; } 36722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; } 36822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 36922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected: 37022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The latency of the most recent visited SchedulingNode. 37122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // This is for reporting the latency value to the user of this visitor. 37222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t last_visited_latency_; 37322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // This represents the time spent *within* the generated code for the most recent visited 37422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // SchedulingNode. This is for reporting the internal latency value to the user of this visitor. 37522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t last_visited_internal_latency_; 37622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 37722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 37822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { 37922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 380ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 38122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const SchedulingGraph& graph) = 0; 38222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames virtual ~SchedulingNodeSelector() {} 38322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected: 384ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko static void DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode*>* nodes, size_t index) { 38522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames (*nodes)[index] = nodes->back(); 38622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames nodes->pop_back(); 38722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 38822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 38922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 39022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/* 39122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Select a `SchedulingNode` at random within the candidates. 39222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 39322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass RandomSchedulingNodeSelector : public SchedulingNodeSelector { 39422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 3952ffb703bf431d74326c88266b4ddaf225eb3c6adIgor Murashkin RandomSchedulingNodeSelector() : seed_(0) { 39622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames seed_ = static_cast<uint32_t>(NanoTime()); 39722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames srand(seed_); 39822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 39922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 400ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 40122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const SchedulingGraph& graph) OVERRIDE { 40222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames UNUSED(graph); 40322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames DCHECK(!nodes->empty()); 40422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames size_t select = rand_r(&seed_) % nodes->size(); 40522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNode* select_node = (*nodes)[select]; 40622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames DeleteNodeAtIndex(nodes, select); 40722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames return select_node; 40822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 40922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 41022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames uint32_t seed_; 41122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 41222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 41322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames/* 41422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * Select a `SchedulingNode` according to critical path information, 41522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames * with heuristics to favor certain instruction patterns like materialized condition. 41622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames */ 41722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { 41822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 41922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} 42022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 421ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, 42222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const SchedulingGraph& graph) OVERRIDE; 42322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 42422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected: 42522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate, 42622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNode* check) const; 42722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 428ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko SchedulingNode* SelectMaterializedCondition(ScopedArenaVector<SchedulingNode*>* nodes, 429ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko const SchedulingGraph& graph) const; 43022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 43122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private: 43222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const SchedulingNode* prev_select_; 43322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 43422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 43522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HScheduler { 43622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 437e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko HScheduler(ScopedArenaAllocator* allocator, 43822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingLatencyVisitor* latency_visitor, 43922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNodeSelector* selector) 440e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko : allocator_(allocator), 44122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames latency_visitor_(latency_visitor), 44222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames selector_(selector), 44322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames only_optimize_loop_blocks_(true), 444e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko scheduling_graph_(this, allocator), 445d9911eeca13f609c885e0f6a5ce81af9b6340bfaAndreas Gampe cursor_(nullptr), 446e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko candidates_(allocator_->Adapter(kArenaAllocScheduler)) {} 44722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames virtual ~HScheduler() {} 44822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 44922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Schedule(HGraph* graph); 45022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 45122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; } 45222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 45322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // Instructions can not be rescheduled across a scheduling barrier. 45422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames virtual bool IsSchedulingBarrier(const HInstruction* instruction) const; 45522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 45622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames protected: 45722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Schedule(HBasicBlock* block); 45822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Schedule(SchedulingNode* scheduling_node); 45922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Schedule(HInstruction* instruction); 46022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 46122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // Any instruction returning `false` via this method will prevent its 46222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // containing basic block from being scheduled. 46322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // This method is used to restrict scheduling to instructions that we know are 46422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // safe to handle. 46589ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov // 46689ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov // For newly introduced instructions by default HScheduler::IsSchedulable returns false. 46789ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see 46889ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also 46989ff8b23f7c4189ba82407d95c3100c2f397cf19Artem Serov // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. 47022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames virtual bool IsSchedulable(const HInstruction* instruction) const; 47122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool IsSchedulable(const HBasicBlock* block) const; 47222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 47322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void CalculateLatency(SchedulingNode* node) { 47422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames latency_visitor_->CalculateLatency(node); 47522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames node->SetLatency(latency_visitor_->GetLastVisitedLatency()); 47622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency()); 47722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 47822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 479e764d2e50c544c2cb98ee61a15d613161ac6bd17Vladimir Marko ScopedArenaAllocator* const allocator_; 48022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingLatencyVisitor* const latency_visitor_; 48122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingNodeSelector* const selector_; 48222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames bool only_optimize_loop_blocks_; 48322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 48422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // We instantiate the members below as part of this class to avoid 48522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // instantiating them locally for every chunk scheduled. 48622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames SchedulingGraph scheduling_graph_; 48722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // A pointer indicating where the next instruction to be scheduled will be inserted. 48822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames HInstruction* cursor_; 48922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // The list of candidates for scheduling. A node becomes a candidate when all 49022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames // its predecessors have been scheduled. 491ca6fff898afcb62491458ae8bcd428bfb3043da1Vladimir Marko ScopedArenaVector<SchedulingNode*> candidates_; 49222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 49322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames private: 49422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames DISALLOW_COPY_AND_ASSIGN(HScheduler); 49522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 49622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 49722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesinline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const { 49822aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames return scheduler_->IsSchedulingBarrier(instruction); 49922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames} 50022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 50122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Ramesclass HInstructionScheduling : public HOptimization { 50222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames public: 5032ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik HInstructionScheduling(HGraph* graph, 5042ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik InstructionSet instruction_set, 5052ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik CodeGenerator* cg = nullptr, 5062ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik const char* name = kInstructionSchedulingPassName) 5072ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik : HOptimization(graph, name), 508f7caf682c6b4769b2a3dd2f2241532b98709c1a3xueliang.zhong codegen_(cg), 50922aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames instruction_set_(instruction_set) {} 51022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 51122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Run() { 51222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); 51322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames } 51422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames void Run(bool only_optimize_loop_blocks, bool schedule_randomly); 51522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 5162ca10eb3f47ef3c2535c137853f7a63d10bb908bAart Bik static constexpr const char* kInstructionSchedulingPassName = "scheduler"; 51722aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 5182a3471fc83383bfe3e060799482e372420ba6150xueliang.zhong private: 519f7caf682c6b4769b2a3dd2f2241532b98709c1a3xueliang.zhong CodeGenerator* const codegen_; 52022aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames const InstructionSet instruction_set_; 52122aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling); 52222aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames}; 52322aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 52422aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames} // namespace art 52522aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames 52622aa54bf8469689c7c6c33f15ff4df2ffba8fa15Alexandre Rames#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_H_ 527