call_arm.cc revision d6ed642458c8820e1beca72f3d7b5f0be4a4b64b
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23
24namespace art {
25
26/*
27 * The sparse table in the literal pool is an array of <key,displacement>
28 * pairs.  For each set, we'll load them as a pair using ldmia.
29 * This means that the register number of the temp we use for the key
30 * must be lower than the reg for the displacement.
31 *
32 * The test loop will look something like:
33 *
34 *   adr   r_base, <table>
35 *   ldr   r_val, [rARM_SP, v_reg_off]
36 *   mov   r_idx, #table_size
37 * lp:
38 *   ldmia r_base!, {r_key, r_disp}
39 *   sub   r_idx, #1
40 *   cmp   r_val, r_key
41 *   ifeq
42 *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
43 *   cbnz  r_idx, lp
44 */
45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
46                                 RegLocation rl_src) {
47  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
48  if (cu_->verbose) {
49    DumpSparseSwitchTable(table);
50  }
51  // Add the table to the list - we'll process it later
52  SwitchTable *tab_rec =
53      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
54  tab_rec->table = table;
55  tab_rec->vaddr = current_dalvik_offset_;
56  uint32_t size = table[1];
57  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
58                                                     kArenaAllocLIR));
59  switch_tables_.Insert(tab_rec);
60
61  // Get the switch value
62  rl_src = LoadValue(rl_src, kCoreReg);
63  RegStorage r_base = AllocTemp();
64  /* Allocate key and disp temps */
65  RegStorage r_key = AllocTemp();
66  RegStorage r_disp = AllocTemp();
67  // Make sure r_key's register number is less than r_disp's number for ldmia
68  if (r_key.GetReg() > r_disp.GetReg()) {
69    RegStorage tmp = r_disp;
70    r_disp = r_key;
71    r_key = tmp;
72  }
73  // Materialize a pointer to the switch table
74  NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
75  // Set up r_idx
76  RegStorage r_idx = AllocTemp();
77  LoadConstant(r_idx, size);
78  // Establish loop branch target
79  LIR* target = NewLIR0(kPseudoTargetLabel);
80  // Load next key/disp
81  NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetReg()) | (1 << r_disp.GetReg()));
82  OpRegReg(kOpCmp, r_key, rl_src.reg);
83  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
84  LIR* it = OpIT(kCondEq, "");
85  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
86  OpEndIT(it);
87  tab_rec->anchor = switch_branch;
88  // Needs to use setflags encoding here
89  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
90  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
91  OpCondBranch(kCondNe, target);
92}
93
94
95void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
96                                 RegLocation rl_src) {
97  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
98  if (cu_->verbose) {
99    DumpPackedSwitchTable(table);
100  }
101  // Add the table to the list - we'll process it later
102  SwitchTable *tab_rec =
103      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
104  tab_rec->table = table;
105  tab_rec->vaddr = current_dalvik_offset_;
106  uint32_t size = table[1];
107  tab_rec->targets =
108      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
109  switch_tables_.Insert(tab_rec);
110
111  // Get the switch value
112  rl_src = LoadValue(rl_src, kCoreReg);
113  RegStorage table_base = AllocTemp();
114  // Materialize a pointer to the switch table
115  NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
116  int low_key = s4FromSwitchData(&table[2]);
117  RegStorage keyReg;
118  // Remove the bias, if necessary
119  if (low_key == 0) {
120    keyReg = rl_src.reg;
121  } else {
122    keyReg = AllocTemp();
123    OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
124  }
125  // Bounds check - if < 0 or >= size continue following switch
126  OpRegImm(kOpCmp, keyReg, size-1);
127  LIR* branch_over = OpCondBranch(kCondHi, NULL);
128
129  // Load the displacement from the switch table
130  RegStorage disp_reg = AllocTemp();
131  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
132
133  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
134  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
135  tab_rec->anchor = switch_branch;
136
137  /* branch_over target here */
138  LIR* target = NewLIR0(kPseudoTargetLabel);
139  branch_over->target = target;
140}
141
142/*
143 * Array data table format:
144 *  ushort ident = 0x0300   magic value
145 *  ushort width            width of each element in the table
146 *  uint   size             number of elements in the table
147 *  ubyte  data[size*width] table of data values (may contain a single-byte
148 *                          padding at the end)
149 *
150 * Total size is 4+(width * size + 1)/2 16-bit code units.
151 */
152void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
153  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
154  // Add the table to the list - we'll process it later
155  FillArrayData *tab_rec =
156      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
157  tab_rec->table = table;
158  tab_rec->vaddr = current_dalvik_offset_;
159  uint16_t width = tab_rec->table[1];
160  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
161  tab_rec->size = (size * width) + 8;
162
163  fill_array_data_.Insert(tab_rec);
164
165  // Making a call - use explicit registers
166  FlushAllRegs();   /* Everything to home location */
167  LoadValueDirectFixed(rl_src, rs_r0);
168  LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
169               rs_rARM_LR);
170  // Materialize a pointer to the fill data image
171  NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec));
172  ClobberCallerSave();
173  LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
174  MarkSafepointPC(call_inst);
175}
176
177/*
178 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
179 * details see monitor.cc.
180 */
181void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
182  FlushAllRegs();
183  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
184  LockCallTemps();  // Prepare for explicit register usage
185  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
186  if (kArchVariantHasGoodBranchPredictor) {
187    LIR* null_check_branch = nullptr;
188    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
189      null_check_branch = nullptr;  // No null check.
190    } else {
191      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
192      if (Runtime::Current()->ExplicitNullChecks()) {
193        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
194      }
195    }
196    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
197    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
198    MarkPossibleNullPointerException(opt_flags);
199    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
200    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
201    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
202
203
204    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
205    not_unlocked_branch->target = slow_path_target;
206    if (null_check_branch != nullptr) {
207      null_check_branch->target = slow_path_target;
208    }
209    // TODO: move to a slow path.
210    // Go expensive route - artLockObjectFromCode(obj);
211    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
212    ClobberCallerSave();
213    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
214    MarkSafepointPC(call_inst);
215
216    LIR* success_target = NewLIR0(kPseudoTargetLabel);
217    lock_success_branch->target = success_target;
218    GenMemBarrier(kLoadLoad);
219  } else {
220    // Explicit null-check as slow-path is entered using an IT.
221    GenNullCheck(rs_r0, opt_flags);
222    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
223    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
224    MarkPossibleNullPointerException(opt_flags);
225    OpRegImm(kOpCmp, rs_r1, 0);
226    LIR* it = OpIT(kCondEq, "");
227    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
228    OpEndIT(it);
229    OpRegImm(kOpCmp, rs_r1, 0);
230    it = OpIT(kCondNe, "T");
231    // Go expensive route - artLockObjectFromCode(self, obj);
232    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
233    ClobberCallerSave();
234    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
235    OpEndIT(it);
236    MarkSafepointPC(call_inst);
237    GenMemBarrier(kLoadLoad);
238  }
239}
240
241/*
242 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
243 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
244 * and can only give away ownership if its suspended.
245 */
246void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
247  FlushAllRegs();
248  LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
249  LockCallTemps();  // Prepare for explicit register usage
250  LIR* null_check_branch = nullptr;
251  LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
252  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
253  if (kArchVariantHasGoodBranchPredictor) {
254    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
255      null_check_branch = nullptr;  // No null check.
256    } else {
257      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
258      if (Runtime::Current()->ExplicitNullChecks()) {
259        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
260      }
261    }
262    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
263    MarkPossibleNullPointerException(opt_flags);
264    LoadConstantNoClobber(rs_r3, 0);
265    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
266    StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
267    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
268
269    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
270    slow_unlock_branch->target = slow_path_target;
271    if (null_check_branch != nullptr) {
272      null_check_branch->target = slow_path_target;
273    }
274    // TODO: move to a slow path.
275    // Go expensive route - artUnlockObjectFromCode(obj);
276    LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
277    ClobberCallerSave();
278    LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
279    MarkSafepointPC(call_inst);
280
281    LIR* success_target = NewLIR0(kPseudoTargetLabel);
282    unlock_success_branch->target = success_target;
283    GenMemBarrier(kStoreLoad);
284  } else {
285    // Explicit null-check as slow-path is entered using an IT.
286    GenNullCheck(rs_r0, opt_flags);
287    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
288    MarkPossibleNullPointerException(opt_flags);
289    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
290    LoadConstantNoClobber(rs_r3, 0);
291    // Is lock unheld on lock or held by us (==thread_id) on unlock?
292    OpRegReg(kOpCmp, rs_r1, rs_r2);
293    LIR* it = OpIT(kCondEq, "EE");
294    StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
295    // Go expensive route - UnlockObjectFromCode(obj);
296    LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
297                       rs_rARM_LR);
298    ClobberCallerSave();
299    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
300    OpEndIT(it);
301    MarkSafepointPC(call_inst);
302    GenMemBarrier(kStoreLoad);
303  }
304}
305
306void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
307  int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
308  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
309  RegStorage reset_reg = AllocTemp();
310  LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
311  LoadConstant(reset_reg, 0);
312  StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg);
313  FreeTemp(reset_reg);
314  StoreValue(rl_dest, rl_result);
315}
316
317/*
318 * Mark garbage collection card. Skip if the value we're storing is null.
319 */
320void ArmMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
321  RegStorage reg_card_base = AllocTemp();
322  RegStorage reg_card_no = AllocTemp();
323  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
324  LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
325  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
326  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
327  LIR* target = NewLIR0(kPseudoTargetLabel);
328  branch_over->target = target;
329  FreeTemp(reg_card_base);
330  FreeTemp(reg_card_no);
331}
332
333void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
334  int spill_count = num_core_spills_ + num_fp_spills_;
335  /*
336   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
337   * mechanism know so it doesn't try to use any of them when
338   * expanding the frame or flushing.  This leaves the utility
339   * code with a single temp: r12.  This should be enough.
340   */
341  LockTemp(r0);
342  LockTemp(r1);
343  LockTemp(r2);
344  LockTemp(r3);
345
346  /*
347   * We can safely skip the stack overflow check if we're
348   * a leaf *and* our frame size < fudge factor.
349   */
350  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
351                            (static_cast<size_t>(frame_size_) <
352                            Thread::kStackOverflowReservedBytes));
353  NewLIR0(kPseudoMethodEntry);
354  if (!skip_overflow_check) {
355    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
356      /* Load stack limit */
357      LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
358    }
359  }
360  /* Spill core callee saves */
361  NewLIR1(kThumb2Push, core_spill_mask_);
362  /* Need to spill any FP regs? */
363  if (num_fp_spills_) {
364    /*
365     * NOTE: fp spills are a little different from core spills in that
366     * they are pushed as a contiguous block.  When promoting from
367     * the fp set, we must allocate all singles from s16..highest-promoted
368     */
369    NewLIR1(kThumb2VPushCS, num_fp_spills_);
370  }
371
372  const int spill_size = spill_count * 4;
373  const int frame_size_without_spills = frame_size_ - spill_size;
374  if (!skip_overflow_check) {
375    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
376      class StackOverflowSlowPath : public LIRSlowPath {
377       public:
378        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
379            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
380              sp_displace_(sp_displace) {
381        }
382        void Compile() OVERRIDE {
383          m2l_->ResetRegPool();
384          m2l_->ResetDefTracking();
385          GenerateTargetLabel();
386          if (restore_lr_) {
387            m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
388          }
389          m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
390          m2l_->ClobberCallerSave();
391          ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
392          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
393          // codegen and target are in thumb2 mode.
394          m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
395        }
396
397       private:
398        const bool restore_lr_;
399        const size_t sp_displace_;
400      };
401      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
402        OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
403        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
404        // Need to restore LR since we used it as a temp.
405        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
406        OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
407      } else {
408        // If the frame is small enough we are guaranteed to have enough space that remains to
409        // handle signals on the user stack.
410        OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
411        LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
412        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
413      }
414    } else {
415      // Implicit stack overflow check.
416      // Generate a load from [sp, #-overflowsize].  If this is in the stack
417      // redzone we will get a segmentation fault.
418      //
419      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
420      // we need to make sure that it's loadable in an immediate field of
421      // a sub instruction.  Otherwise we will get a temp allocation and the
422      // code size will increase.
423      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
424      LoadWordDisp(rs_r12, 0, rs_r12);
425      MarkPossibleStackOverflowException();
426      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
427    }
428  } else {
429    OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
430  }
431
432  FlushIns(ArgLocs, rl_method);
433
434  FreeTemp(r0);
435  FreeTemp(r1);
436  FreeTemp(r2);
437  FreeTemp(r3);
438}
439
440void ArmMir2Lir::GenExitSequence() {
441  int spill_count = num_core_spills_ + num_fp_spills_;
442  /*
443   * In the exit path, r0/r1 are live - make sure they aren't
444   * allocated by the register utilities as temps.
445   */
446  LockTemp(r0);
447  LockTemp(r1);
448
449  NewLIR0(kPseudoMethodExit);
450  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
451  /* Need to restore any FP callee saves? */
452  if (num_fp_spills_) {
453    NewLIR1(kThumb2VPopCS, num_fp_spills_);
454  }
455  if (core_spill_mask_ & (1 << rARM_LR)) {
456    /* Unspill rARM_LR to rARM_PC */
457    core_spill_mask_ &= ~(1 << rARM_LR);
458    core_spill_mask_ |= (1 << rARM_PC);
459  }
460  NewLIR1(kThumb2Pop, core_spill_mask_);
461  if (!(core_spill_mask_ & (1 << rARM_PC))) {
462    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
463    NewLIR1(kThumbBx, rARM_LR);
464  }
465}
466
467void ArmMir2Lir::GenSpecialExitSequence() {
468  NewLIR1(kThumbBx, rARM_LR);
469}
470
471}  // namespace art
472