call_arm.cc revision dd7624d2b9e599d57762d12031b10b89defc9807
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23
24namespace art {
25
26/*
27 * The sparse table in the literal pool is an array of <key,displacement>
28 * pairs.  For each set, we'll load them as a pair using ldmia.
29 * This means that the register number of the temp we use for the key
30 * must be lower than the reg for the displacement.
31 *
32 * The test loop will look something like:
33 *
34 *   adr   r_base, <table>
35 *   ldr   r_val, [rARM_SP, v_reg_off]
36 *   mov   r_idx, #table_size
37 * lp:
38 *   ldmia r_base!, {r_key, r_disp}
39 *   sub   r_idx, #1
40 *   cmp   r_val, r_key
41 *   ifeq
42 *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
43 *   cbnz  r_idx, lp
44 */
45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
46                                 RegLocation rl_src) {
47  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
48  if (cu_->verbose) {
49    DumpSparseSwitchTable(table);
50  }
51  // Add the table to the list - we'll process it later
52  SwitchTable *tab_rec =
53      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
54  tab_rec->table = table;
55  tab_rec->vaddr = current_dalvik_offset_;
56  uint32_t size = table[1];
57  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
58                                                     kArenaAllocLIR));
59  switch_tables_.Insert(tab_rec);
60
61  // Get the switch value
62  rl_src = LoadValue(rl_src, kCoreReg);
63  RegStorage r_base = AllocTemp();
64  /* Allocate key and disp temps */
65  RegStorage r_key = AllocTemp();
66  RegStorage r_disp = AllocTemp();
67  // Make sure r_key's register number is less than r_disp's number for ldmia
68  if (r_key.GetReg() > r_disp.GetReg()) {
69    RegStorage tmp = r_disp;
70    r_disp = r_key;
71    r_key = tmp;
72  }
73  // Materialize a pointer to the switch table
74  NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
75  // Set up r_idx
76  RegStorage r_idx = AllocTemp();
77  LoadConstant(r_idx, size);
78  // Establish loop branch target
79  LIR* target = NewLIR0(kPseudoTargetLabel);
80  // Load next key/disp
81  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetReg()) | (1 << r_disp.GetReg()));
82  OpRegReg(kOpCmp, r_key, rl_src.reg);
83  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
84  OpIT(kCondEq, "");
85  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
86  tab_rec->anchor = switch_branch;
87  // Needs to use setflags encoding here
88  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
89  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
90  OpCondBranch(kCondNe, target);
91}
92
93
94void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
95                                 RegLocation rl_src) {
96  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
97  if (cu_->verbose) {
98    DumpPackedSwitchTable(table);
99  }
100  // Add the table to the list - we'll process it later
101  SwitchTable *tab_rec =
102      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
103  tab_rec->table = table;
104  tab_rec->vaddr = current_dalvik_offset_;
105  uint32_t size = table[1];
106  tab_rec->targets =
107      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
108  switch_tables_.Insert(tab_rec);
109
110  // Get the switch value
111  rl_src = LoadValue(rl_src, kCoreReg);
112  RegStorage table_base = AllocTemp();
113  // Materialize a pointer to the switch table
114  NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
115  int low_key = s4FromSwitchData(&table[2]);
116  RegStorage keyReg;
117  // Remove the bias, if necessary
118  if (low_key == 0) {
119    keyReg = rl_src.reg;
120  } else {
121    keyReg = AllocTemp();
122    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
123  }
124  // Bounds check - if < 0 or >= size continue following switch
125  OpRegImm(kOpCmp, keyReg, size-1);
126  LIR* branch_over = OpCondBranch(kCondHi, NULL);
127
128  // Load the displacement from the switch table
129  RegStorage disp_reg = AllocTemp();
130  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
131
132  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
133  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
134  tab_rec->anchor = switch_branch;
135
136  /* branch_over target here */
137  LIR* target = NewLIR0(kPseudoTargetLabel);
138  branch_over->target = target;
139}
140
141/*
142 * Array data table format:
143 *  ushort ident = 0x0300   magic value
144 *  ushort width            width of each element in the table
145 *  uint   size             number of elements in the table
146 *  ubyte  data[size*width] table of data values (may contain a single-byte
147 *                          padding at the end)
148 *
149 * Total size is 4+(width * size + 1)/2 16-bit code units.
150 */
151void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
152  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
153  // Add the table to the list - we'll process it later
154  FillArrayData *tab_rec =
155      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
156  tab_rec->table = table;
157  tab_rec->vaddr = current_dalvik_offset_;
158  uint16_t width = tab_rec->table[1];
159  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
160  tab_rec->size = (size * width) + 8;
161
162  fill_array_data_.Insert(tab_rec);
163
164  // Making a call - use explicit registers
165  FlushAllRegs();   /* Everything to home location */
166  LoadValueDirectFixed(rl_src, rs_r0);
167  LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
168               rs_rARM_LR);
169  // Materialize a pointer to the fill data image
170  NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec));
171  ClobberCallerSave();
172  LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
173  MarkSafepointPC(call_inst);
174}
175
176/*
177 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
178 * details see monitor.cc.
179 */
180void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
181  FlushAllRegs();
182  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
183  LockCallTemps();  // Prepare for explicit register usage
184  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
185  if (kArchVariantHasGoodBranchPredictor) {
186    LIR* null_check_branch = nullptr;
187    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
188      null_check_branch = nullptr;  // No null check.
189    } else {
190      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
191      if (Runtime::Current()->ExplicitNullChecks()) {
192        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
193      }
194    }
195    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
196    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
197    MarkPossibleNullPointerException(opt_flags);
198    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
199    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
200    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
201
202
203    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
204    not_unlocked_branch->target = slow_path_target;
205    if (null_check_branch != nullptr) {
206      null_check_branch->target = slow_path_target;
207    }
208    // TODO: move to a slow path.
209    // Go expensive route - artLockObjectFromCode(obj);
210    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
211    ClobberCallerSave();
212    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
213    MarkSafepointPC(call_inst);
214
215    LIR* success_target = NewLIR0(kPseudoTargetLabel);
216    lock_success_branch->target = success_target;
217    GenMemBarrier(kLoadLoad);
218  } else {
219    // Explicit null-check as slow-path is entered using an IT.
220    GenNullCheck(rs_r0, opt_flags);
221    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
222    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
223    MarkPossibleNullPointerException(opt_flags);
224    OpRegImm(kOpCmp, rs_r1, 0);
225    OpIT(kCondEq, "");
226    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
227    OpRegImm(kOpCmp, rs_r1, 0);
228    OpIT(kCondNe, "T");
229    // Go expensive route - artLockObjectFromCode(self, obj);
230    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
231    ClobberCallerSave();
232    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
233    MarkSafepointPC(call_inst);
234    GenMemBarrier(kLoadLoad);
235  }
236}
237
238/*
239 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
240 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
241 * and can only give away ownership if its suspended.
242 */
243void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
244  FlushAllRegs();
245  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
246  LockCallTemps();  // Prepare for explicit register usage
247  LIR* null_check_branch = nullptr;
248  LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
249  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
250  if (kArchVariantHasGoodBranchPredictor) {
251    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
252      null_check_branch = nullptr;  // No null check.
253    } else {
254      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
255      if (Runtime::Current()->ExplicitNullChecks()) {
256        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
257      }
258    }
259    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
260    MarkPossibleNullPointerException(opt_flags);
261    LoadConstantNoClobber(rs_r3, 0);
262    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
263    StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
264    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
265
266    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
267    slow_unlock_branch->target = slow_path_target;
268    if (null_check_branch != nullptr) {
269      null_check_branch->target = slow_path_target;
270    }
271    // TODO: move to a slow path.
272    // Go expensive route - artUnlockObjectFromCode(obj);
273    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
274    ClobberCallerSave();
275    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
276    MarkSafepointPC(call_inst);
277
278    LIR* success_target = NewLIR0(kPseudoTargetLabel);
279    unlock_success_branch->target = success_target;
280    GenMemBarrier(kStoreLoad);
281  } else {
282    // Explicit null-check as slow-path is entered using an IT.
283    GenNullCheck(rs_r0, opt_flags);
284    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
285    MarkPossibleNullPointerException(opt_flags);
286    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
287    LoadConstantNoClobber(rs_r3, 0);
288    // Is lock unheld on lock or held by us (==thread_id) on unlock?
289    OpRegReg(kOpCmp, rs_r1, rs_r2);
290    OpIT(kCondEq, "EE");
291    StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
292    // Go expensive route - UnlockObjectFromCode(obj);
293    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
294                       rs_rARM_LR);
295    ClobberCallerSave();
296    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
297    MarkSafepointPC(call_inst);
298    GenMemBarrier(kStoreLoad);
299  }
300}
301
302void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
303  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
304  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
305  RegStorage reset_reg = AllocTemp();
306  LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
307  LoadConstant(reset_reg, 0);
308  StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg);
309  FreeTemp(reset_reg);
310  StoreValue(rl_dest, rl_result);
311}
312
313/*
314 * Mark garbage collection card. Skip if the value we're storing is null.
315 */
316void ArmMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
317  RegStorage reg_card_base = AllocTemp();
318  RegStorage reg_card_no = AllocTemp();
319  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
320  LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
321  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
322  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
323  LIR* target = NewLIR0(kPseudoTargetLabel);
324  branch_over->target = target;
325  FreeTemp(reg_card_base);
326  FreeTemp(reg_card_no);
327}
328
329void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
330  int spill_count = num_core_spills_ + num_fp_spills_;
331  /*
332   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
333   * mechanism know so it doesn't try to use any of them when
334   * expanding the frame or flushing.  This leaves the utility
335   * code with a single temp: r12.  This should be enough.
336   */
337  LockTemp(r0);
338  LockTemp(r1);
339  LockTemp(r2);
340  LockTemp(r3);
341
342  /*
343   * We can safely skip the stack overflow check if we're
344   * a leaf *and* our frame size < fudge factor.
345   */
346  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
347                            (static_cast<size_t>(frame_size_) <
348                            Thread::kStackOverflowReservedBytes));
349  NewLIR0(kPseudoMethodEntry);
350  if (!skip_overflow_check) {
351    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
352      /* Load stack limit */
353      LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
354    }
355  }
356  /* Spill core callee saves */
357  NewLIR1(kThumb2Push, core_spill_mask_);
358  /* Need to spill any FP regs? */
359  if (num_fp_spills_) {
360    /*
361     * NOTE: fp spills are a little different from core spills in that
362     * they are pushed as a contiguous block.  When promoting from
363     * the fp set, we must allocate all singles from s16..highest-promoted
364     */
365    NewLIR1(kThumb2VPushCS, num_fp_spills_);
366  }
367
368  // TODO: 64 bit will be different code.
369  const int frame_size_without_spills = frame_size_ - spill_count * 4;
370  if (!skip_overflow_check) {
371    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
372      class StackOverflowSlowPath : public LIRSlowPath {
373       public:
374        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
375            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
376              sp_displace_(sp_displace) {
377        }
378        void Compile() OVERRIDE {
379          m2l_->ResetRegPool();
380          m2l_->ResetDefTracking();
381          GenerateTargetLabel();
382          if (restore_lr_) {
383            m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
384          }
385          m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
386          m2l_->ClobberCallerSave();
387          ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
388          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
389          // codegen and target are in thumb2 mode.
390          m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
391        }
392
393       private:
394        const bool restore_lr_;
395        const size_t sp_displace_;
396      };
397      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
398        OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
399        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
400        // Need to restore LR since we used it as a temp.
401        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true,
402                                                     frame_size_without_spills));
403        OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
404      } else {
405        // If the frame is small enough we are guaranteed to have enough space that remains to
406        // handle signals on the user stack.
407        OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
408        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
409        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
410      }
411    } else {
412      // Implicit stack overflow check.
413      // Generate a load from [sp, #-overflowsize].  If this is in the stack
414      // redzone we will get a segmentation fault.
415      //
416      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
417      // we need to make sure that it's loadable in an immediate field of
418      // a sub instruction.  Otherwise we will get a temp allocation and the
419      // code size will increase.
420      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
421      LoadWordDisp(rs_r12, 0, rs_r12);
422      MarkPossibleStackOverflowException();
423      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
424    }
425  } else {
426    OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
427  }
428
429  FlushIns(ArgLocs, rl_method);
430
431  FreeTemp(r0);
432  FreeTemp(r1);
433  FreeTemp(r2);
434  FreeTemp(r3);
435}
436
437void ArmMir2Lir::GenExitSequence() {
438  int spill_count = num_core_spills_ + num_fp_spills_;
439  /*
440   * In the exit path, r0/r1 are live - make sure they aren't
441   * allocated by the register utilities as temps.
442   */
443  LockTemp(r0);
444  LockTemp(r1);
445
446  NewLIR0(kPseudoMethodExit);
447  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
448  /* Need to restore any FP callee saves? */
449  if (num_fp_spills_) {
450    NewLIR1(kThumb2VPopCS, num_fp_spills_);
451  }
452  if (core_spill_mask_ & (1 << rARM_LR)) {
453    /* Unspill rARM_LR to rARM_PC */
454    core_spill_mask_ &= ~(1 << rARM_LR);
455    core_spill_mask_ |= (1 << rARM_PC);
456  }
457  NewLIR1(kThumb2Pop, core_spill_mask_);
458  if (!(core_spill_mask_ & (1 << rARM_PC))) {
459    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
460    NewLIR1(kThumbBx, rARM_LR);
461  }
462}
463
464void ArmMir2Lir::GenSpecialExitSequence() {
465  NewLIR1(kThumbBx, rARM_LR);
466}
467
468}  // namespace art
469