call_arm.cc revision dbb8c49d540edd2a39076093163c7218f03aa502
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23
24namespace art {
25
26/*
27 * The sparse table in the literal pool is an array of <key,displacement>
28 * pairs.  For each set, we'll load them as a pair using ldmia.
29 * This means that the register number of the temp we use for the key
30 * must be lower than the reg for the displacement.
31 *
32 * The test loop will look something like:
33 *
34 *   adr   rBase, <table>
35 *   ldr   r_val, [rARM_SP, v_reg_off]
36 *   mov   r_idx, #table_size
37 * lp:
38 *   ldmia rBase!, {r_key, r_disp}
39 *   sub   r_idx, #1
40 *   cmp   r_val, r_key
41 *   ifeq
42 *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
43 *   cbnz  r_idx, lp
44 */
45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
46                                 RegLocation rl_src) {
47  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
48  if (cu_->verbose) {
49    DumpSparseSwitchTable(table);
50  }
51  // Add the table to the list - we'll process it later
52  SwitchTable *tab_rec =
53      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
54  tab_rec->table = table;
55  tab_rec->vaddr = current_dalvik_offset_;
56  uint32_t size = table[1];
57  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
58                                                     ArenaAllocator::kAllocLIR));
59  switch_tables_.Insert(tab_rec);
60
61  // Get the switch value
62  rl_src = LoadValue(rl_src, kCoreReg);
63  int rBase = AllocTemp();
64  /* Allocate key and disp temps */
65  int r_key = AllocTemp();
66  int r_disp = AllocTemp();
67  // Make sure r_key's register number is less than r_disp's number for ldmia
68  if (r_key > r_disp) {
69    int tmp = r_disp;
70    r_disp = r_key;
71    r_key = tmp;
72  }
73  // Materialize a pointer to the switch table
74  NewLIR3(kThumb2Adr, rBase, 0, WrapPointer(tab_rec));
75  // Set up r_idx
76  int r_idx = AllocTemp();
77  LoadConstant(r_idx, size);
78  // Establish loop branch target
79  LIR* target = NewLIR0(kPseudoTargetLabel);
80  // Load next key/disp
81  NewLIR2(kThumb2LdmiaWB, rBase, (1 << r_key) | (1 << r_disp));
82  OpRegReg(kOpCmp, r_key, rl_src.low_reg);
83  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
84  OpIT(kCondEq, "");
85  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp);
86  tab_rec->anchor = switch_branch;
87  // Needs to use setflags encoding here
88  OpRegRegImm(kOpSub, r_idx, r_idx, 1);  // For value == 1, this should set flags.
89  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
90  OpCondBranch(kCondNe, target);
91}
92
93
94void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
95                                 RegLocation rl_src) {
96  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
97  if (cu_->verbose) {
98    DumpPackedSwitchTable(table);
99  }
100  // Add the table to the list - we'll process it later
101  SwitchTable *tab_rec =
102      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  ArenaAllocator::kAllocData));
103  tab_rec->table = table;
104  tab_rec->vaddr = current_dalvik_offset_;
105  uint32_t size = table[1];
106  tab_rec->targets =
107      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR));
108  switch_tables_.Insert(tab_rec);
109
110  // Get the switch value
111  rl_src = LoadValue(rl_src, kCoreReg);
112  int table_base = AllocTemp();
113  // Materialize a pointer to the switch table
114  NewLIR3(kThumb2Adr, table_base, 0, WrapPointer(tab_rec));
115  int low_key = s4FromSwitchData(&table[2]);
116  int keyReg;
117  // Remove the bias, if necessary
118  if (low_key == 0) {
119    keyReg = rl_src.low_reg;
120  } else {
121    keyReg = AllocTemp();
122    OpRegRegImm(kOpSub, keyReg, rl_src.low_reg, low_key);
123  }
124  // Bounds check - if < 0 or >= size continue following switch
125  OpRegImm(kOpCmp, keyReg, size-1);
126  LIR* branch_over = OpCondBranch(kCondHi, NULL);
127
128  // Load the displacement from the switch table
129  int disp_reg = AllocTemp();
130  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
131
132  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
133  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg);
134  tab_rec->anchor = switch_branch;
135
136  /* branch_over target here */
137  LIR* target = NewLIR0(kPseudoTargetLabel);
138  branch_over->target = target;
139}
140
141/*
142 * Array data table format:
143 *  ushort ident = 0x0300   magic value
144 *  ushort width            width of each element in the table
145 *  uint   size             number of elements in the table
146 *  ubyte  data[size*width] table of data values (may contain a single-byte
147 *                          padding at the end)
148 *
149 * Total size is 4+(width * size + 1)/2 16-bit code units.
150 */
151void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
152  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
153  // Add the table to the list - we'll process it later
154  FillArrayData *tab_rec =
155      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData));
156  tab_rec->table = table;
157  tab_rec->vaddr = current_dalvik_offset_;
158  uint16_t width = tab_rec->table[1];
159  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
160  tab_rec->size = (size * width) + 8;
161
162  fill_array_data_.Insert(tab_rec);
163
164  // Making a call - use explicit registers
165  FlushAllRegs();   /* Everything to home location */
166  LoadValueDirectFixed(rl_src, r0);
167  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData).Int32Value(),
168               rARM_LR);
169  // Materialize a pointer to the fill data image
170  NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec));
171  ClobberCallerSave();
172  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
173  MarkSafepointPC(call_inst);
174}
175
176/*
177 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
178 * details see monitor.cc.
179 */
180void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
181  FlushAllRegs();
182  LoadValueDirectFixed(rl_src, r0);  // Get obj
183  LockCallTemps();  // Prepare for explicit register usage
184  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
185  if (kArchVariantHasGoodBranchPredictor) {
186    LIR* null_check_branch;
187    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
188      null_check_branch = nullptr;  // No null check.
189    } else {
190      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
191      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
192    }
193    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
194    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
195    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL);
196    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
197    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL);
198
199
200    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
201    not_unlocked_branch->target = slow_path_target;
202    if (null_check_branch != nullptr) {
203      null_check_branch->target = slow_path_target;
204    }
205    // TODO: move to a slow path.
206    // Go expensive route - artLockObjectFromCode(obj);
207    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
208    ClobberCallerSave();
209    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
210    MarkSafepointPC(call_inst);
211
212    LIR* success_target = NewLIR0(kPseudoTargetLabel);
213    lock_success_branch->target = success_target;
214    GenMemBarrier(kLoadLoad);
215  } else {
216    // Explicit null-check as slow-path is entered using an IT.
217    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
218    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
219    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
220    OpRegImm(kOpCmp, r1, 0);
221    OpIT(kCondEq, "");
222    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
223    OpRegImm(kOpCmp, r1, 0);
224    OpIT(kCondNe, "T");
225    // Go expensive route - artLockObjectFromCode(self, obj);
226    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
227    ClobberCallerSave();
228    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
229    MarkSafepointPC(call_inst);
230    GenMemBarrier(kLoadLoad);
231  }
232}
233
234/*
235 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
236 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
237 * and can only give away ownership if its suspended.
238 */
239void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
240  FlushAllRegs();
241  LoadValueDirectFixed(rl_src, r0);  // Get obj
242  LockCallTemps();  // Prepare for explicit register usage
243  LIR* null_check_branch;
244  LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
245  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
246  if (kArchVariantHasGoodBranchPredictor) {
247    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
248      null_check_branch = nullptr;  // No null check.
249    } else {
250      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
251      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
252    }
253    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);
254    LoadConstantNoClobber(r3, 0);
255    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL);
256    StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
257    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
258
259    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
260    slow_unlock_branch->target = slow_path_target;
261    if (null_check_branch != nullptr) {
262      null_check_branch->target = slow_path_target;
263    }
264    // TODO: move to a slow path.
265    // Go expensive route - artUnlockObjectFromCode(obj);
266    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
267    ClobberCallerSave();
268    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
269    MarkSafepointPC(call_inst);
270
271    LIR* success_target = NewLIR0(kPseudoTargetLabel);
272    unlock_success_branch->target = success_target;
273    GenMemBarrier(kStoreLoad);
274  } else {
275    // Explicit null-check as slow-path is entered using an IT.
276    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
277    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
278    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
279    LoadConstantNoClobber(r3, 0);
280    // Is lock unheld on lock or held by us (==thread_id) on unlock?
281    OpRegReg(kOpCmp, r1, r2);
282    OpIT(kCondEq, "EE");
283    StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
284    // Go expensive route - UnlockObjectFromCode(obj);
285    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
286    ClobberCallerSave();
287    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
288    MarkSafepointPC(call_inst);
289    GenMemBarrier(kStoreLoad);
290  }
291}
292
293void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
294  int ex_offset = Thread::ExceptionOffset().Int32Value();
295  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
296  int reset_reg = AllocTemp();
297  LoadWordDisp(rARM_SELF, ex_offset, rl_result.low_reg);
298  LoadConstant(reset_reg, 0);
299  StoreWordDisp(rARM_SELF, ex_offset, reset_reg);
300  FreeTemp(reset_reg);
301  StoreValue(rl_dest, rl_result);
302}
303
304/*
305 * Mark garbage collection card. Skip if the value we're storing is null.
306 */
307void ArmMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) {
308  int reg_card_base = AllocTemp();
309  int reg_card_no = AllocTemp();
310  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
311  LoadWordDisp(rARM_SELF, Thread::CardTableOffset().Int32Value(), reg_card_base);
312  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
313  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0,
314                   kUnsignedByte);
315  LIR* target = NewLIR0(kPseudoTargetLabel);
316  branch_over->target = target;
317  FreeTemp(reg_card_base);
318  FreeTemp(reg_card_no);
319}
320
321void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
322  int spill_count = num_core_spills_ + num_fp_spills_;
323  /*
324   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
325   * mechanism know so it doesn't try to use any of them when
326   * expanding the frame or flushing.  This leaves the utility
327   * code with a single temp: r12.  This should be enough.
328   */
329  LockTemp(r0);
330  LockTemp(r1);
331  LockTemp(r2);
332  LockTemp(r3);
333
334  /*
335   * We can safely skip the stack overflow check if we're
336   * a leaf *and* our frame size < fudge factor.
337   */
338  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
339                            (static_cast<size_t>(frame_size_) <
340                            Thread::kStackOverflowReservedBytes));
341  NewLIR0(kPseudoMethodEntry);
342  if (!skip_overflow_check) {
343    /* Load stack limit */
344    LoadWordDisp(rARM_SELF, Thread::StackEndOffset().Int32Value(), r12);
345  }
346  /* Spill core callee saves */
347  NewLIR1(kThumb2Push, core_spill_mask_);
348  /* Need to spill any FP regs? */
349  if (num_fp_spills_) {
350    /*
351     * NOTE: fp spills are a little different from core spills in that
352     * they are pushed as a contiguous block.  When promoting from
353     * the fp set, we must allocate all singles from s16..highest-promoted
354     */
355    NewLIR1(kThumb2VPushCS, num_fp_spills_);
356  }
357  if (!skip_overflow_check) {
358    OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_ - (spill_count * 4));
359    GenRegRegCheck(kCondUlt, rARM_LR, r12, kThrowStackOverflow);
360    OpRegCopy(rARM_SP, rARM_LR);     // Establish stack
361  } else {
362    OpRegImm(kOpSub, rARM_SP, frame_size_ - (spill_count * 4));
363  }
364
365  FlushIns(ArgLocs, rl_method);
366
367  FreeTemp(r0);
368  FreeTemp(r1);
369  FreeTemp(r2);
370  FreeTemp(r3);
371}
372
373void ArmMir2Lir::GenExitSequence() {
374  int spill_count = num_core_spills_ + num_fp_spills_;
375  /*
376   * In the exit path, r0/r1 are live - make sure they aren't
377   * allocated by the register utilities as temps.
378   */
379  LockTemp(r0);
380  LockTemp(r1);
381
382  NewLIR0(kPseudoMethodExit);
383  OpRegImm(kOpAdd, rARM_SP, frame_size_ - (spill_count * 4));
384  /* Need to restore any FP callee saves? */
385  if (num_fp_spills_) {
386    NewLIR1(kThumb2VPopCS, num_fp_spills_);
387  }
388  if (core_spill_mask_ & (1 << rARM_LR)) {
389    /* Unspill rARM_LR to rARM_PC */
390    core_spill_mask_ &= ~(1 << rARM_LR);
391    core_spill_mask_ |= (1 << rARM_PC);
392  }
393  NewLIR1(kThumb2Pop, core_spill_mask_);
394  if (!(core_spill_mask_ & (1 << rARM_PC))) {
395    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
396    NewLIR1(kThumbBx, rARM_LR);
397  }
398}
399
400void ArmMir2Lir::GenSpecialExitSequence() {
401  NewLIR1(kThumbBx, rARM_LR);
402}
403
404}  // namespace art
405