call_arm.cc revision 2c1ed456dcdb027d097825dd98dbe48c71599b6c
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23
24namespace art {
25
26/*
27 * The sparse table in the literal pool is an array of <key,displacement>
28 * pairs.  For each set, we'll load them as a pair using ldmia.
29 * This means that the register number of the temp we use for the key
30 * must be lower than the reg for the displacement.
31 *
32 * The test loop will look something like:
33 *
34 *   adr   rBase, <table>
35 *   ldr   r_val, [rARM_SP, v_reg_off]
36 *   mov   r_idx, #table_size
37 * lp:
38 *   ldmia rBase!, {r_key, r_disp}
39 *   sub   r_idx, #1
40 *   cmp   r_val, r_key
41 *   ifeq
42 *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
43 *   cbnz  r_idx, lp
44 */
45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
46                                 RegLocation rl_src) {
47  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
48  if (cu_->verbose) {
49    DumpSparseSwitchTable(table);
50  }
51  // Add the table to the list - we'll process it later
52  SwitchTable *tab_rec =
53      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData));
54  tab_rec->table = table;
55  tab_rec->vaddr = current_dalvik_offset_;
56  uint32_t size = table[1];
57  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
58                                                     ArenaAllocator::kAllocLIR));
59  switch_tables_.Insert(tab_rec);
60
61  // Get the switch value
62  rl_src = LoadValue(rl_src, kCoreReg);
63  int rBase = AllocTemp();
64  /* Allocate key and disp temps */
65  int r_key = AllocTemp();
66  int r_disp = AllocTemp();
67  // Make sure r_key's register number is less than r_disp's number for ldmia
68  if (r_key > r_disp) {
69    int tmp = r_disp;
70    r_disp = r_key;
71    r_key = tmp;
72  }
73  // Materialize a pointer to the switch table
74  NewLIR3(kThumb2Adr, rBase, 0, WrapPointer(tab_rec));
75  // Set up r_idx
76  int r_idx = AllocTemp();
77  LoadConstant(r_idx, size);
78  // Establish loop branch target
79  LIR* target = NewLIR0(kPseudoTargetLabel);
80  // Load next key/disp
81  NewLIR2(kThumb2LdmiaWB, rBase, (1 << r_key) | (1 << r_disp));
82  OpRegReg(kOpCmp, r_key, rl_src.reg.GetReg());
83  // Go if match. NOTE: No instruction set switch here - must stay Thumb2
84  OpIT(kCondEq, "");
85  LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp);
86  tab_rec->anchor = switch_branch;
87  // Needs to use setflags encoding here
88  NewLIR3(kThumb2SubsRRI12, r_idx, r_idx, 1);
89  OpCondBranch(kCondNe, target);
90}
91
92
93void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
94                                 RegLocation rl_src) {
95  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
96  if (cu_->verbose) {
97    DumpPackedSwitchTable(table);
98  }
99  // Add the table to the list - we'll process it later
100  SwitchTable *tab_rec =
101      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  ArenaAllocator::kAllocData));
102  tab_rec->table = table;
103  tab_rec->vaddr = current_dalvik_offset_;
104  uint32_t size = table[1];
105  tab_rec->targets =
106      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR));
107  switch_tables_.Insert(tab_rec);
108
109  // Get the switch value
110  rl_src = LoadValue(rl_src, kCoreReg);
111  int table_base = AllocTemp();
112  // Materialize a pointer to the switch table
113  NewLIR3(kThumb2Adr, table_base, 0, WrapPointer(tab_rec));
114  int low_key = s4FromSwitchData(&table[2]);
115  int keyReg;
116  // Remove the bias, if necessary
117  if (low_key == 0) {
118    keyReg = rl_src.reg.GetReg();
119  } else {
120    keyReg = AllocTemp();
121    OpRegRegImm(kOpSub, keyReg, rl_src.reg.GetReg(), low_key);
122  }
123  // Bounds check - if < 0 or >= size continue following switch
124  OpRegImm(kOpCmp, keyReg, size-1);
125  LIR* branch_over = OpCondBranch(kCondHi, NULL);
126
127  // Load the displacement from the switch table
128  int disp_reg = AllocTemp();
129  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
130
131  // ..and go! NOTE: No instruction set switch here - must stay Thumb2
132  LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg);
133  tab_rec->anchor = switch_branch;
134
135  /* branch_over target here */
136  LIR* target = NewLIR0(kPseudoTargetLabel);
137  branch_over->target = target;
138}
139
140/*
141 * Array data table format:
142 *  ushort ident = 0x0300   magic value
143 *  ushort width            width of each element in the table
144 *  uint   size             number of elements in the table
145 *  ubyte  data[size*width] table of data values (may contain a single-byte
146 *                          padding at the end)
147 *
148 * Total size is 4+(width * size + 1)/2 16-bit code units.
149 */
150void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
151  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
152  // Add the table to the list - we'll process it later
153  FillArrayData *tab_rec =
154      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData));
155  tab_rec->table = table;
156  tab_rec->vaddr = current_dalvik_offset_;
157  uint16_t width = tab_rec->table[1];
158  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
159  tab_rec->size = (size * width) + 8;
160
161  fill_array_data_.Insert(tab_rec);
162
163  // Making a call - use explicit registers
164  FlushAllRegs();   /* Everything to home location */
165  LoadValueDirectFixed(rl_src, r0);
166  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData).Int32Value(),
167               rARM_LR);
168  // Materialize a pointer to the fill data image
169  NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec));
170  ClobberCallerSave();
171  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
172  MarkSafepointPC(call_inst);
173}
174
175/*
176 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
177 * details see monitor.cc.
178 */
179void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
180  FlushAllRegs();
181  LoadValueDirectFixed(rl_src, r0);  // Get obj
182  LockCallTemps();  // Prepare for explicit register usage
183  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
184  if (kArchVariantHasGoodBranchPredictor) {
185    LIR* null_check_branch;
186    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
187      null_check_branch = nullptr;  // No null check.
188    } else {
189      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
190      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
191    }
192    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
193    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
194    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL);
195    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
196    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL);
197
198
199    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
200    not_unlocked_branch->target = slow_path_target;
201    if (null_check_branch != nullptr) {
202      null_check_branch->target = slow_path_target;
203    }
204    // TODO: move to a slow path.
205    // Go expensive route - artLockObjectFromCode(obj);
206    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
207    ClobberCallerSave();
208    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
209    MarkSafepointPC(call_inst);
210
211    LIR* success_target = NewLIR0(kPseudoTargetLabel);
212    lock_success_branch->target = success_target;
213    GenMemBarrier(kLoadLoad);
214  } else {
215    // Explicit null-check as slow-path is entered using an IT.
216    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
217    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
218    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
219    OpRegImm(kOpCmp, r1, 0);
220    OpIT(kCondEq, "");
221    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
222    OpRegImm(kOpCmp, r1, 0);
223    OpIT(kCondNe, "T");
224    // Go expensive route - artLockObjectFromCode(self, obj);
225    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
226    ClobberCallerSave();
227    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
228    MarkSafepointPC(call_inst);
229    GenMemBarrier(kLoadLoad);
230  }
231}
232
233/*
234 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
235 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
236 * and can only give away ownership if its suspended.
237 */
238void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
239  FlushAllRegs();
240  LoadValueDirectFixed(rl_src, r0);  // Get obj
241  LockCallTemps();  // Prepare for explicit register usage
242  LIR* null_check_branch;
243  LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
244  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
245  if (kArchVariantHasGoodBranchPredictor) {
246    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
247      null_check_branch = nullptr;  // No null check.
248    } else {
249      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
250      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
251    }
252    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);
253    LoadConstantNoClobber(r3, 0);
254    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL);
255    StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
256    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
257
258    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
259    slow_unlock_branch->target = slow_path_target;
260    if (null_check_branch != nullptr) {
261      null_check_branch->target = slow_path_target;
262    }
263    // TODO: move to a slow path.
264    // Go expensive route - artUnlockObjectFromCode(obj);
265    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
266    ClobberCallerSave();
267    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
268    MarkSafepointPC(call_inst);
269
270    LIR* success_target = NewLIR0(kPseudoTargetLabel);
271    unlock_success_branch->target = success_target;
272    GenMemBarrier(kStoreLoad);
273  } else {
274    // Explicit null-check as slow-path is entered using an IT.
275    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
276    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
277    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
278    LoadConstantNoClobber(r3, 0);
279    // Is lock unheld on lock or held by us (==thread_id) on unlock?
280    OpRegReg(kOpCmp, r1, r2);
281    OpIT(kCondEq, "EE");
282    StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
283    // Go expensive route - UnlockObjectFromCode(obj);
284    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
285    ClobberCallerSave();
286    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
287    MarkSafepointPC(call_inst);
288    GenMemBarrier(kStoreLoad);
289  }
290}
291
292void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
293  int ex_offset = Thread::ExceptionOffset().Int32Value();
294  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
295  int reset_reg = AllocTemp();
296  LoadWordDisp(rARM_SELF, ex_offset, rl_result.reg.GetReg());
297  LoadConstant(reset_reg, 0);
298  StoreWordDisp(rARM_SELF, ex_offset, reset_reg);
299  FreeTemp(reset_reg);
300  StoreValue(rl_dest, rl_result);
301}
302
303/*
304 * Mark garbage collection card. Skip if the value we're storing is null.
305 */
306void ArmMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) {
307  int reg_card_base = AllocTemp();
308  int reg_card_no = AllocTemp();
309  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
310  LoadWordDisp(rARM_SELF, Thread::CardTableOffset().Int32Value(), reg_card_base);
311  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
312  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0,
313                   kUnsignedByte);
314  LIR* target = NewLIR0(kPseudoTargetLabel);
315  branch_over->target = target;
316  FreeTemp(reg_card_base);
317  FreeTemp(reg_card_no);
318}
319
320void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
321  int spill_count = num_core_spills_ + num_fp_spills_;
322  /*
323   * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
324   * mechanism know so it doesn't try to use any of them when
325   * expanding the frame or flushing.  This leaves the utility
326   * code with a single temp: r12.  This should be enough.
327   */
328  LockTemp(r0);
329  LockTemp(r1);
330  LockTemp(r2);
331  LockTemp(r3);
332
333  /*
334   * We can safely skip the stack overflow check if we're
335   * a leaf *and* our frame size < fudge factor.
336   */
337  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
338                            (static_cast<size_t>(frame_size_) <
339                            Thread::kStackOverflowReservedBytes));
340  NewLIR0(kPseudoMethodEntry);
341  if (!skip_overflow_check) {
342    /* Load stack limit */
343    LoadWordDisp(rARM_SELF, Thread::StackEndOffset().Int32Value(), r12);
344  }
345  /* Spill core callee saves */
346  NewLIR1(kThumb2Push, core_spill_mask_);
347  /* Need to spill any FP regs? */
348  if (num_fp_spills_) {
349    /*
350     * NOTE: fp spills are a little different from core spills in that
351     * they are pushed as a contiguous block.  When promoting from
352     * the fp set, we must allocate all singles from s16..highest-promoted
353     */
354    NewLIR1(kThumb2VPushCS, num_fp_spills_);
355  }
356  if (!skip_overflow_check) {
357    OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_ - (spill_count * 4));
358    GenRegRegCheck(kCondUlt, rARM_LR, r12, kThrowStackOverflow);
359    OpRegCopy(rARM_SP, rARM_LR);     // Establish stack
360  } else {
361    OpRegImm(kOpSub, rARM_SP, frame_size_ - (spill_count * 4));
362  }
363
364  FlushIns(ArgLocs, rl_method);
365
366  FreeTemp(r0);
367  FreeTemp(r1);
368  FreeTemp(r2);
369  FreeTemp(r3);
370}
371
372void ArmMir2Lir::GenExitSequence() {
373  int spill_count = num_core_spills_ + num_fp_spills_;
374  /*
375   * In the exit path, r0/r1 are live - make sure they aren't
376   * allocated by the register utilities as temps.
377   */
378  LockTemp(r0);
379  LockTemp(r1);
380
381  NewLIR0(kPseudoMethodExit);
382  OpRegImm(kOpAdd, rARM_SP, frame_size_ - (spill_count * 4));
383  /* Need to restore any FP callee saves? */
384  if (num_fp_spills_) {
385    NewLIR1(kThumb2VPopCS, num_fp_spills_);
386  }
387  if (core_spill_mask_ & (1 << rARM_LR)) {
388    /* Unspill rARM_LR to rARM_PC */
389    core_spill_mask_ &= ~(1 << rARM_LR);
390    core_spill_mask_ |= (1 << rARM_PC);
391  }
392  NewLIR1(kThumb2Pop, core_spill_mask_);
393  if (!(core_spill_mask_ & (1 << rARM_PC))) {
394    /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
395    NewLIR1(kThumbBx, rARM_LR);
396  }
397}
398
399void ArmMir2Lir::GenSpecialExitSequence() {
400  NewLIR1(kThumbBx, rARM_LR);
401}
402
403}  // namespace art
404