optimizing_compiler.cc revision 827eedbfa882496407375f22b08243a38a5bd53b
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "optimizing_compiler.h"
18
19#include <fstream>
20#include <stdint.h>
21
22#include "bounds_check_elimination.h"
23#include "builder.h"
24#include "code_generator.h"
25#include "compiler.h"
26#include "constant_folding.h"
27#include "dead_code_elimination.h"
28#include "dex/quick/dex_file_to_method_inliner_map.h"
29#include "driver/compiler_driver.h"
30#include "driver/dex_compilation_unit.h"
31#include "elf_writer_quick.h"
32#include "graph_visualizer.h"
33#include "gvn.h"
34#include "inliner.h"
35#include "instruction_simplifier.h"
36#include "intrinsics.h"
37#include "jni/quick/jni_compiler.h"
38#include "mirror/art_method-inl.h"
39#include "nodes.h"
40#include "prepare_for_register_allocation.h"
41#include "register_allocator.h"
42#include "side_effects_analysis.h"
43#include "ssa_builder.h"
44#include "ssa_phi_elimination.h"
45#include "ssa_liveness_analysis.h"
46#include "utils/arena_allocator.h"
47
48namespace art {
49
50/**
51 * Used by the code generator, to allocate the code in a vector.
52 */
53class CodeVectorAllocator FINAL : public CodeAllocator {
54 public:
55  CodeVectorAllocator() {}
56
57  virtual uint8_t* Allocate(size_t size) {
58    size_ = size;
59    memory_.resize(size);
60    return &memory_[0];
61  }
62
63  size_t GetSize() const { return size_; }
64  const std::vector<uint8_t>& GetMemory() const { return memory_; }
65
66 private:
67  std::vector<uint8_t> memory_;
68  size_t size_;
69
70  DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
71};
72
73/**
74 * Filter to apply to the visualizer. Methods whose name contain that filter will
75 * be dumped.
76 */
77static const char* kStringFilter = "";
78
79class OptimizingCompiler FINAL : public Compiler {
80 public:
81  explicit OptimizingCompiler(CompilerDriver* driver);
82  ~OptimizingCompiler();
83
84  bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const
85      OVERRIDE;
86
87  CompiledMethod* Compile(const DexFile::CodeItem* code_item,
88                          uint32_t access_flags,
89                          InvokeType invoke_type,
90                          uint16_t class_def_idx,
91                          uint32_t method_idx,
92                          jobject class_loader,
93                          const DexFile& dex_file) const OVERRIDE;
94
95  CompiledMethod* JniCompile(uint32_t access_flags,
96                             uint32_t method_idx,
97                             const DexFile& dex_file) const OVERRIDE;
98
99  uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
100      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
101
102  bool WriteElf(art::File* file,
103                OatWriter* oat_writer,
104                const std::vector<const art::DexFile*>& dex_files,
105                const std::string& android_root,
106                bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
107
108  Backend* GetCodeGenerator(CompilationUnit* cu ATTRIBUTE_UNUSED,
109                            void* compilation_unit ATTRIBUTE_UNUSED) const OVERRIDE {
110    return nullptr;
111  }
112
113  void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {}
114
115  void Init() OVERRIDE;
116
117  void UnInit() const OVERRIDE {}
118
119 private:
120  // Whether we should run any optimization or register allocation. If false, will
121  // just run the code generation after the graph was built.
122  const bool run_optimizations_;
123
124  // Optimize and compile `graph`.
125  CompiledMethod* CompileOptimized(HGraph* graph,
126                                   CodeGenerator* codegen,
127                                   CompilerDriver* driver,
128                                   const DexCompilationUnit& dex_compilation_unit,
129                                   const HGraphVisualizer& visualizer) const;
130
131  // Just compile without doing optimizations.
132  CompiledMethod* CompileBaseline(CodeGenerator* codegen,
133                                  CompilerDriver* driver,
134                                  const DexCompilationUnit& dex_compilation_unit) const;
135
136  mutable OptimizingCompilerStats compilation_stats_;
137
138  std::unique_ptr<std::ostream> visualizer_output_;
139
140  DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
141};
142
143static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
144
145OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
146    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
147      run_optimizations_(
148          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
149      compilation_stats_() {}
150
151void OptimizingCompiler::Init() {
152  // Enable C1visualizer output. Must be done in Init() because the compiler
153  // driver is not fully initialized when passed to the compiler's constructor.
154  CompilerDriver* driver = GetCompilerDriver();
155  const std::string cfg_file_name = driver->GetDumpCfgFileName();
156  if (!cfg_file_name.empty()) {
157    CHECK_EQ(driver->GetThreadCount(), 1U)
158      << "Graph visualizer requires the compiler to run single-threaded. "
159      << "Invoke the compiler with '-j1'.";
160    visualizer_output_.reset(new std::ofstream(cfg_file_name));
161  }
162}
163
164OptimizingCompiler::~OptimizingCompiler() {
165  compilation_stats_.Log();
166}
167
168bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
169                                          const DexFile& dex_file ATTRIBUTE_UNUSED,
170                                          CompilationUnit* cu ATTRIBUTE_UNUSED) const {
171  return true;
172}
173
174CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
175                                               uint32_t method_idx,
176                                               const DexFile& dex_file) const {
177  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
178}
179
180uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
181  return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
182      InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
183}
184
185bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer,
186                                  const std::vector<const art::DexFile*>& dex_files,
187                                  const std::string& android_root, bool is_host) const {
188  return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
189                                       *GetCompilerDriver());
190}
191
192static bool IsInstructionSetSupported(InstructionSet instruction_set) {
193  return instruction_set == kArm64
194      || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
195      || instruction_set == kX86
196      || instruction_set == kX86_64;
197}
198
199static bool CanOptimize(const DexFile::CodeItem& code_item) {
200  // TODO: We currently cannot optimize methods with try/catch.
201  return code_item.tries_size_ == 0;
202}
203
204static void RunOptimizations(HGraph* graph,
205                             CompilerDriver* driver,
206                             OptimizingCompilerStats* stats,
207                             const DexCompilationUnit& dex_compilation_unit,
208                             const HGraphVisualizer& visualizer) {
209  SsaRedundantPhiElimination redundant_phi(graph);
210  SsaDeadPhiElimination dead_phi(graph);
211  HDeadCodeElimination dce(graph);
212  HConstantFolding fold1(graph);
213  InstructionSimplifier simplify1(graph);
214
215  HInliner inliner(graph, dex_compilation_unit, driver, stats);
216
217  HConstantFolding fold2(graph);
218  SideEffectsAnalysis side_effects(graph);
219  GVNOptimization gvn(graph, side_effects);
220  BoundsCheckElimination bce(graph);
221  InstructionSimplifier simplify2(graph);
222
223  IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
224
225  HOptimization* optimizations[] = {
226    &redundant_phi,
227    &dead_phi,
228    &intrinsics,
229    &dce,
230    &fold1,
231    &simplify1,
232    &inliner,
233    &fold2,
234    &side_effects,
235    &gvn,
236    &bce,
237    &simplify2
238  };
239
240  for (size_t i = 0; i < arraysize(optimizations); ++i) {
241    HOptimization* optimization = optimizations[i];
242    visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/false);
243    optimization->Run();
244    visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/true);
245    optimization->Check();
246  }
247}
248
249// The stack map we generate must be 4-byte aligned on ARM. Since existing
250// maps are generated alongside these stack maps, we must also align them.
251static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
252  size_t size = vector.size();
253  size_t aligned_size = RoundUp(size, 4);
254  for (; size < aligned_size; ++size) {
255    vector.push_back(0);
256  }
257  return ArrayRef<const uint8_t>(vector);
258}
259
260
261CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
262                                                     CodeGenerator* codegen,
263                                                     CompilerDriver* compiler_driver,
264                                                     const DexCompilationUnit& dex_compilation_unit,
265                                                     const HGraphVisualizer& visualizer) const {
266  RunOptimizations(
267      graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer);
268
269  PrepareForRegisterAllocation(graph).Run();
270  SsaLivenessAnalysis liveness(*graph, codegen);
271  liveness.Analyze();
272  visualizer.DumpGraph(kLivenessPassName);
273
274  RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
275  register_allocator.AllocateRegisters();
276  visualizer.DumpGraph(kRegisterAllocatorPassName);
277
278  CodeVectorAllocator allocator;
279  codegen->CompileOptimized(&allocator);
280
281  std::vector<uint8_t> stack_map;
282  codegen->BuildStackMaps(&stack_map);
283
284  compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
285
286  return CompiledMethod::SwapAllocCompiledMethodStackMap(
287      compiler_driver,
288      codegen->GetInstructionSet(),
289      ArrayRef<const uint8_t>(allocator.GetMemory()),
290      codegen->GetFrameSize(),
291      codegen->GetCoreSpillMask(),
292      codegen->GetFpuSpillMask(),
293      ArrayRef<const uint8_t>(stack_map));
294}
295
296
297CompiledMethod* OptimizingCompiler::CompileBaseline(
298    CodeGenerator* codegen,
299    CompilerDriver* compiler_driver,
300    const DexCompilationUnit& dex_compilation_unit) const {
301  CodeVectorAllocator allocator;
302  codegen->CompileBaseline(&allocator);
303
304  std::vector<uint8_t> mapping_table;
305  DefaultSrcMap src_mapping_table;
306  bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
307  codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
308  std::vector<uint8_t> vmap_table;
309  codegen->BuildVMapTable(&vmap_table);
310  std::vector<uint8_t> gc_map;
311  codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
312
313  compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
314  return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
315                                                 codegen->GetInstructionSet(),
316                                                 ArrayRef<const uint8_t>(allocator.GetMemory()),
317                                                 codegen->GetFrameSize(),
318                                                 codegen->GetCoreSpillMask(),
319                                                 codegen->GetFpuSpillMask(),
320                                                 &src_mapping_table,
321                                                 AlignVectorSize(mapping_table),
322                                                 AlignVectorSize(vmap_table),
323                                                 AlignVectorSize(gc_map),
324                                                 ArrayRef<const uint8_t>());
325}
326
327CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
328                                            uint32_t access_flags,
329                                            InvokeType invoke_type,
330                                            uint16_t class_def_idx,
331                                            uint32_t method_idx,
332                                            jobject class_loader,
333                                            const DexFile& dex_file) const {
334  UNUSED(invoke_type);
335  compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation);
336  CompilerDriver* compiler_driver = GetCompilerDriver();
337  InstructionSet instruction_set = compiler_driver->GetInstructionSet();
338  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
339  // overflow checks) assume thumb2.
340  if (instruction_set == kArm) {
341    instruction_set = kThumb2;
342  }
343
344  // Do not attempt to compile on architectures we do not support.
345  if (!IsInstructionSetSupported(instruction_set)) {
346    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
347    return nullptr;
348  }
349
350  if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
351    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledPathological);
352    return nullptr;
353  }
354
355  DexCompilationUnit dex_compilation_unit(
356    nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
357    class_def_idx, method_idx, access_flags,
358    compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
359
360  std::string method_name = PrettyMethod(method_idx, dex_file);
361
362  // For testing purposes, we put a special marker on method names that should be compiled
363  // with this compiler. This makes sure we're not regressing.
364  bool shouldCompile = method_name.find("$opt$") != std::string::npos;
365  bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos;
366
367  ArenaPool pool;
368  ArenaAllocator arena(&pool);
369  HGraphBuilder builder(&arena,
370                        &dex_compilation_unit,
371                        &dex_compilation_unit,
372                        &dex_file,
373                        compiler_driver,
374                        &compilation_stats_);
375
376  VLOG(compiler) << "Building " << PrettyMethod(method_idx, dex_file);
377  HGraph* graph = builder.BuildGraph(*code_item);
378  if (graph == nullptr) {
379    CHECK(!shouldCompile) << "Could not build graph in optimizing compiler";
380    return nullptr;
381  }
382
383  std::unique_ptr<CodeGenerator> codegen(
384      CodeGenerator::Create(graph,
385                            instruction_set,
386                            *compiler_driver->GetInstructionSetFeatures(),
387                            compiler_driver->GetCompilerOptions()));
388  if (codegen.get() == nullptr) {
389    CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
390    compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
391    return nullptr;
392  }
393
394  HGraphVisualizer visualizer(
395      visualizer_output_.get(), graph, kStringFilter, *codegen.get(), method_name.c_str());
396  visualizer.DumpGraph("builder");
397
398  bool can_optimize = CanOptimize(*code_item);
399  bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
400  CompiledMethod* result = nullptr;
401  if (run_optimizations_ && can_optimize && can_allocate_registers) {
402    VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file);
403    if (!graph->TryBuildingSsa()) {
404      LOG(INFO) << "Skipping compilation of "
405                << PrettyMethod(method_idx, dex_file)
406                << ": it contains a non natural loop";
407      // We could not transform the graph to SSA, bailout.
408      compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
409    } else {
410      result = CompileOptimized(graph, codegen.get(), compiler_driver, dex_compilation_unit, visualizer);
411    }
412  } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
413    LOG(FATAL) << "Could not allocate registers in optimizing compiler";
414    UNREACHABLE();
415  } else {
416    VLOG(compiler) << "Compile baseline " << PrettyMethod(method_idx, dex_file);
417
418    if (!run_optimizations_) {
419      compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedDisabled);
420    } else if (!can_optimize) {
421      compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedTryCatch);
422    } else if (!can_allocate_registers) {
423      compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
424    }
425
426    result = CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
427  }
428  return result;
429}
430
431Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
432  return new OptimizingCompiler(driver);
433}
434
435}  // namespace art
436