call_arm64.cc revision 3c12c512faf6837844d5465b23b9410889e5eb11
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "gc/accounting/card_table.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24
25namespace art {
26
27bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
28                                  const InlineMethod& special) {
29  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
30  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
31  // return Mir2Lir::GenSpecialCase(bb, mir, special);
32  return false;
33}
34
35/*
36 * The sparse table in the literal pool is an array of <key,displacement>
37 * pairs.  For each set, we'll load them as a pair using ldp.
38 * The test loop will look something like:
39 *
40 *   adr   r_base, <table>
41 *   ldr   r_val, [rA64_SP, v_reg_off]
42 *   mov   r_idx, #table_size
43 * loop:
44 *   cbz   r_idx, quit
45 *   ldp   r_key, r_disp, [r_base], #8
46 *   sub   r_idx, #1
47 *   cmp   r_val, r_key
48 *   b.ne  loop
49 *   adr   r_base, #0        ; This is the instruction from which we compute displacements
50 *   add   r_base, r_disp
51 *   br    r_base
52 * quit:
53 */
54void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
55                                   RegLocation rl_src) {
56  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
57  if (cu_->verbose) {
58    DumpSparseSwitchTable(table);
59  }
60  // Add the table to the list - we'll process it later
61  SwitchTable *tab_rec =
62      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
63  tab_rec->table = table;
64  tab_rec->vaddr = current_dalvik_offset_;
65  uint32_t size = table[1];
66  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
67  switch_tables_.Insert(tab_rec);
68
69  // Get the switch value
70  rl_src = LoadValue(rl_src, kCoreReg);
71  RegStorage r_base = AllocTempWide();
72  // Allocate key and disp temps.
73  RegStorage r_key = AllocTemp();
74  RegStorage r_disp = AllocTemp();
75  // Materialize a pointer to the switch table
76  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
77  // Set up r_idx
78  RegStorage r_idx = AllocTemp();
79  LoadConstant(r_idx, size);
80
81  // Entry of loop.
82  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
83  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
84
85  // Load next key/disp.
86  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
87  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
88
89  // Go to next case, if key does not match.
90  OpRegReg(kOpCmp, r_key, rl_src.reg);
91  OpCondBranch(kCondNe, loop_entry);
92
93  // Key does match: branch to case label.
94  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
95  tab_rec->anchor = switch_label;
96
97  // Add displacement to base branch address and go!
98  OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
99  NewLIR1(kA64Br1x, r_base.GetReg());
100
101  // Loop exit label.
102  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
103  branch_out->target = loop_exit;
104}
105
106
107void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
108                                   RegLocation rl_src) {
109  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
110  if (cu_->verbose) {
111    DumpPackedSwitchTable(table);
112  }
113  // Add the table to the list - we'll process it later
114  SwitchTable *tab_rec =
115      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
116  tab_rec->table = table;
117  tab_rec->vaddr = current_dalvik_offset_;
118  uint32_t size = table[1];
119  tab_rec->targets =
120      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
121  switch_tables_.Insert(tab_rec);
122
123  // Get the switch value
124  rl_src = LoadValue(rl_src, kCoreReg);
125  RegStorage table_base = AllocTempWide();
126  // Materialize a pointer to the switch table
127  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
128  int low_key = s4FromSwitchData(&table[2]);
129  RegStorage key_reg;
130  // Remove the bias, if necessary
131  if (low_key == 0) {
132    key_reg = rl_src.reg;
133  } else {
134    key_reg = AllocTemp();
135    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
136  }
137  // Bounds check - if < 0 or >= size continue following switch
138  OpRegImm(kOpCmp, key_reg, size - 1);
139  LIR* branch_over = OpCondBranch(kCondHi, NULL);
140
141  // Load the displacement from the switch table
142  RegStorage disp_reg = AllocTemp();
143  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
144
145  // Get base branch address.
146  RegStorage branch_reg = AllocTempWide();
147  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
148  tab_rec->anchor = switch_label;
149
150  // Add displacement to base branch address and go!
151  OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
152  NewLIR1(kA64Br1x, branch_reg.GetReg());
153
154  // branch_over target here
155  LIR* target = NewLIR0(kPseudoTargetLabel);
156  branch_over->target = target;
157}
158
159/*
160 * Array data table format:
161 *  ushort ident = 0x0300   magic value
162 *  ushort width            width of each element in the table
163 *  uint   size             number of elements in the table
164 *  ubyte  data[size*width] table of data values (may contain a single-byte
165 *                          padding at the end)
166 *
167 * Total size is 4+(width * size + 1)/2 16-bit code units.
168 */
169void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
170  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
171  // Add the table to the list - we'll process it later
172  FillArrayData *tab_rec =
173      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
174  tab_rec->table = table;
175  tab_rec->vaddr = current_dalvik_offset_;
176  uint16_t width = tab_rec->table[1];
177  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
178  tab_rec->size = (size * width) + 8;
179
180  fill_array_data_.Insert(tab_rec);
181
182  // Making a call - use explicit registers
183  FlushAllRegs();   /* Everything to home location */
184  LoadValueDirectFixed(rl_src, rs_x0);
185  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
186               rs_rA64_LR);
187  // Materialize a pointer to the fill data image
188  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
189  ClobberCallerSave();
190  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
191  MarkSafepointPC(call_inst);
192}
193
194/*
195 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
196 * details see monitor.cc.
197 */
198void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
199  // x0/w0 = object
200  // w1    = thin lock thread id
201  // x2    = address of lock word
202  // w3    = lock word / store failure
203  // TUNING: How much performance we get when we inline this?
204  // Since we've already flush all register.
205  FlushAllRegs();
206  LoadValueDirectFixed(rl_src, rs_w0);
207  LockCallTemps();  // Prepare for explicit register usage
208  LIR* null_check_branch = nullptr;
209  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
210    null_check_branch = nullptr;  // No null check.
211  } else {
212    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
213    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
214      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
215    }
216  }
217  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
218  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
219  NewLIR2(kA64Ldxr2rX, rw3, rx2);
220  MarkPossibleNullPointerException(opt_flags);
221  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
222  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
223  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
224
225  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
226  not_unlocked_branch->target = slow_path_target;
227  if (null_check_branch != nullptr) {
228    null_check_branch->target = slow_path_target;
229  }
230  // TODO: move to a slow path.
231  // Go expensive route - artLockObjectFromCode(obj);
232  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
233  ClobberCallerSave();
234  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
235  MarkSafepointPC(call_inst);
236
237  LIR* success_target = NewLIR0(kPseudoTargetLabel);
238  lock_success_branch->target = success_target;
239  GenMemBarrier(kLoadLoad);
240}
241
242/*
243 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
244 * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
245 * and can only give away ownership if its suspended.
246 */
247void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
248  // x0/w0 = object
249  // w1    = thin lock thread id
250  // w2    = lock word
251  // TUNING: How much performance we get when we inline this?
252  // Since we've already flush all register.
253  FlushAllRegs();
254  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
255  LockCallTemps();  // Prepare for explicit register usage
256  LIR* null_check_branch = nullptr;
257  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
258    null_check_branch = nullptr;  // No null check.
259  } else {
260    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
261    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
262      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
263    }
264  }
265  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
266  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
267  MarkPossibleNullPointerException(opt_flags);
268  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
269  GenMemBarrier(kStoreLoad);
270  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr);
271  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
272
273  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
274  slow_unlock_branch->target = slow_path_target;
275  if (null_check_branch != nullptr) {
276    null_check_branch->target = slow_path_target;
277  }
278  // TODO: move to a slow path.
279  // Go expensive route - artUnlockObjectFromCode(obj);
280  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
281  ClobberCallerSave();
282  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
283  MarkSafepointPC(call_inst);
284
285  LIR* success_target = NewLIR0(kPseudoTargetLabel);
286  unlock_success_branch->target = success_target;
287}
288
289void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
290  int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
291  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
292  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg, kNotVolatile);
293  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr, kNotVolatile);
294  StoreValue(rl_dest, rl_result);
295}
296
297/*
298 * Mark garbage collection card. Skip if the value we're storing is null.
299 */
300void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
301  RegStorage reg_card_base = AllocTempWide();
302  RegStorage reg_card_no = AllocTemp();
303  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
304  LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
305  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
306  // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
307  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
308                   0, kUnsignedByte);
309  LIR* target = NewLIR0(kPseudoTargetLabel);
310  branch_over->target = target;
311  FreeTemp(reg_card_base);
312  FreeTemp(reg_card_no);
313}
314
315void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
316  /*
317   * On entry, x0 to x7 are live.  Let the register allocation
318   * mechanism know so it doesn't try to use any of them when
319   * expanding the frame or flushing.
320   * Reserve x8 & x9 for temporaries.
321   */
322  LockTemp(rs_x0);
323  LockTemp(rs_x1);
324  LockTemp(rs_x2);
325  LockTemp(rs_x3);
326  LockTemp(rs_x4);
327  LockTemp(rs_x5);
328  LockTemp(rs_x6);
329  LockTemp(rs_x7);
330  LockTemp(rs_x8);
331  LockTemp(rs_x9);
332
333  /*
334   * We can safely skip the stack overflow check if we're
335   * a leaf *and* our frame size < fudge factor.
336   */
337  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
338
339  NewLIR0(kPseudoMethodEntry);
340
341  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
342        Thread::kStackOverflowSignalReservedBytes;
343  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
344  const int spill_count = num_core_spills_ + num_fp_spills_;
345  const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
346  const int frame_size_without_spills = frame_size_ - spill_size;
347
348  if (!skip_overflow_check) {
349    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
350      if (!large_frame) {
351        // Load stack limit
352        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
353      }
354    } else {
355      // TODO(Arm64) Implement implicit checks.
356      // Implicit stack overflow check.
357      // Generate a load from [sp, #-framesize].  If this is in the stack
358      // redzone we will get a segmentation fault.
359      // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr);
360      // MarkPossibleStackOverflowException();
361      LOG(FATAL) << "Implicit stack overflow checks not implemented.";
362    }
363  }
364
365  if (frame_size_ > 0) {
366    OpRegImm64(kOpSub, rs_rA64_SP, spill_size);
367  }
368
369  /* Need to spill any FP regs? */
370  if (fp_spill_mask_) {
371    int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
372    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
373  }
374
375  /* Spill core callee saves. */
376  if (core_spill_mask_) {
377    int spill_offset = spill_size - kArm64PointerSize*num_core_spills_;
378    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
379  }
380
381  if (!skip_overflow_check) {
382    if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
383      class StackOverflowSlowPath: public LIRSlowPath {
384      public:
385        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
386              LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr),
387              sp_displace_(sp_displace) {
388        }
389        void Compile() OVERRIDE {
390          m2l_->ResetRegPool();
391          m2l_->ResetDefTracking();
392          GenerateTargetLabel(kPseudoThrowTarget);
393          // Unwinds stack.
394          m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_);
395          m2l_->ClobberCallerSave();
396          ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
397          m2l_->LockTemp(rs_x8);
398          m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8);
399          m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
400          m2l_->FreeTemp(rs_x8);
401        }
402
403      private:
404        const size_t sp_displace_;
405      };
406
407      if (large_frame) {
408        // Compare Expected SP against bottom of stack.
409        // Branch to throw target if there is not enough room.
410        OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
411        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
412        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
413        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
414        OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
415      } else {
416        /*
417         * If the frame is small enough we are guaranteed to have enough space that remains to
418         * handle signals on the user stack.
419         * Establishes stack before checks.
420         */
421        OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills);
422        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr);
423        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
424      }
425    } else {
426      OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
427    }
428  } else {
429    OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
430  }
431
432  FlushIns(ArgLocs, rl_method);
433
434  FreeTemp(rs_x0);
435  FreeTemp(rs_x1);
436  FreeTemp(rs_x2);
437  FreeTemp(rs_x3);
438  FreeTemp(rs_x4);
439  FreeTemp(rs_x5);
440  FreeTemp(rs_x6);
441  FreeTemp(rs_x7);
442  FreeTemp(rs_x8);
443  FreeTemp(rs_x9);
444}
445
446void Arm64Mir2Lir::GenExitSequence() {
447  /*
448   * In the exit path, r0/r1 are live - make sure they aren't
449   * allocated by the register utilities as temps.
450   */
451  LockTemp(rs_x0);
452  LockTemp(rs_x1);
453
454  NewLIR0(kPseudoMethodExit);
455
456  /* Need to restore any FP callee saves? */
457  if (fp_spill_mask_) {
458    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
459    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
460  }
461  if (core_spill_mask_) {
462    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
463    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
464  }
465
466  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_);
467  NewLIR0(kA64Ret);
468}
469
470void Arm64Mir2Lir::GenSpecialExitSequence() {
471  NewLIR0(kA64Ret);
472}
473
474}  // namespace art
475