call_arm64.cc revision fd2e291297463a3d5bdb18adc2a1eacbe2759152
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "gc/accounting/card_table.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24
25namespace art {
26
27bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
28                                  const InlineMethod& special) {
29  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
30  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
31  // return Mir2Lir::GenSpecialCase(bb, mir, special);
32  return false;
33}
34
35/*
36 * The sparse table in the literal pool is an array of <key,displacement>
37 * pairs.  For each set, we'll load them as a pair using ldp.
38 * The test loop will look something like:
39 *
40 *   adr   r_base, <table>
41 *   ldr   r_val, [rA64_SP, v_reg_off]
42 *   mov   r_idx, #table_size
43 * loop:
44 *   cbz   r_idx, quit
45 *   ldp   r_key, r_disp, [r_base], #8
46 *   sub   r_idx, #1
47 *   cmp   r_val, r_key
48 *   b.ne  loop
49 *   adr   r_base, #0        ; This is the instruction from which we compute displacements
50 *   add   r_base, r_disp
51 *   br    r_base
52 * quit:
53 */
54void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
55                                   RegLocation rl_src) {
56  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
57  if (cu_->verbose) {
58    DumpSparseSwitchTable(table);
59  }
60  // Add the table to the list - we'll process it later
61  SwitchTable *tab_rec =
62      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
63  tab_rec->table = table;
64  tab_rec->vaddr = current_dalvik_offset_;
65  uint32_t size = table[1];
66  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
67  switch_tables_.Insert(tab_rec);
68
69  // Get the switch value
70  rl_src = LoadValue(rl_src, kCoreReg);
71  RegStorage r_base = AllocTempWide();
72  // Allocate key and disp temps.
73  RegStorage r_key = AllocTemp();
74  RegStorage r_disp = AllocTemp();
75  // Materialize a pointer to the switch table
76  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
77  // Set up r_idx
78  RegStorage r_idx = AllocTemp();
79  LoadConstant(r_idx, size);
80
81  // Entry of loop.
82  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
83  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
84
85  // Load next key/disp.
86  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
87  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
88
89  // Go to next case, if key does not match.
90  OpRegReg(kOpCmp, r_key, rl_src.reg);
91  OpCondBranch(kCondNe, loop_entry);
92
93  // Key does match: branch to case label.
94  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
95  tab_rec->anchor = switch_label;
96
97  // Add displacement to base branch address and go!
98  // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"?
99  OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT);
100  NewLIR1(kA64Br1x, r_base.GetReg());
101
102  // Loop exit label.
103  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
104  branch_out->target = loop_exit;
105}
106
107
108void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
109                                   RegLocation rl_src) {
110  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
111  if (cu_->verbose) {
112    DumpPackedSwitchTable(table);
113  }
114  // Add the table to the list - we'll process it later
115  SwitchTable *tab_rec =
116      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
117  tab_rec->table = table;
118  tab_rec->vaddr = current_dalvik_offset_;
119  uint32_t size = table[1];
120  tab_rec->targets =
121      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
122  switch_tables_.Insert(tab_rec);
123
124  // Get the switch value
125  rl_src = LoadValue(rl_src, kCoreReg);
126  RegStorage table_base = AllocTempWide();
127  // Materialize a pointer to the switch table
128  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
129  int low_key = s4FromSwitchData(&table[2]);
130  RegStorage key_reg;
131  // Remove the bias, if necessary
132  if (low_key == 0) {
133    key_reg = rl_src.reg;
134  } else {
135    key_reg = AllocTemp();
136    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
137  }
138  // Bounds check - if < 0 or >= size continue following switch
139  OpRegImm(kOpCmp, key_reg, size - 1);
140  LIR* branch_over = OpCondBranch(kCondHi, NULL);
141
142  // Load the displacement from the switch table
143  RegStorage disp_reg = AllocTemp();
144  // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
145  LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32);
146
147  // Get base branch address.
148  RegStorage branch_reg = AllocTempWide();
149  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
150  tab_rec->anchor = switch_label;
151
152  // Add displacement to base branch address and go!
153  // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"?
154  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT);
155  NewLIR1(kA64Br1x, branch_reg.GetReg());
156
157  // branch_over target here
158  LIR* target = NewLIR0(kPseudoTargetLabel);
159  branch_over->target = target;
160}
161
162/*
163 * Array data table format:
164 *  ushort ident = 0x0300   magic value
165 *  ushort width            width of each element in the table
166 *  uint   size             number of elements in the table
167 *  ubyte  data[size*width] table of data values (may contain a single-byte
168 *                          padding at the end)
169 *
170 * Total size is 4+(width * size + 1)/2 16-bit code units.
171 */
172void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
173  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
174  // Add the table to the list - we'll process it later
175  FillArrayData *tab_rec =
176      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
177  tab_rec->table = table;
178  tab_rec->vaddr = current_dalvik_offset_;
179  uint16_t width = tab_rec->table[1];
180  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
181  tab_rec->size = (size * width) + 8;
182
183  fill_array_data_.Insert(tab_rec);
184
185  // Making a call - use explicit registers
186  FlushAllRegs();   /* Everything to home location */
187  LoadValueDirectFixed(rl_src, rs_x0);
188  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
189               rs_rA64_LR);
190  // Materialize a pointer to the fill data image
191  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
192  ClobberCallerSave();
193  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
194  MarkSafepointPC(call_inst);
195}
196
197/*
198 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
199 * details see monitor.cc.
200 */
201void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
202  // x0/w0 = object
203  // w1    = thin lock thread id
204  // x2    = address of lock word
205  // w3    = lock word / store failure
206  // TUNING: How much performance we get when we inline this?
207  // Since we've already flush all register.
208  FlushAllRegs();
209  LoadValueDirectFixed(rl_src, rs_w0);
210  LockCallTemps();  // Prepare for explicit register usage
211  LIR* null_check_branch = nullptr;
212  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
213    null_check_branch = nullptr;  // No null check.
214  } else {
215    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
216    if (Runtime::Current()->ExplicitNullChecks()) {
217      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
218    }
219  }
220  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
221  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
222  NewLIR2(kA64Ldxr2rX, rw3, rx2);
223  MarkPossibleNullPointerException(opt_flags);
224  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
225  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
226  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
227
228  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
229  not_unlocked_branch->target = slow_path_target;
230  if (null_check_branch != nullptr) {
231    null_check_branch->target = slow_path_target;
232  }
233  // TODO: move to a slow path.
234  // Go expensive route - artLockObjectFromCode(obj);
235  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
236  ClobberCallerSave();
237  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
238  MarkSafepointPC(call_inst);
239
240  LIR* success_target = NewLIR0(kPseudoTargetLabel);
241  lock_success_branch->target = success_target;
242  GenMemBarrier(kLoadLoad);
243}
244
245/*
246 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
247 * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
248 * and can only give away ownership if its suspended.
249 */
250void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
251  // x0/w0 = object
252  // w1    = thin lock thread id
253  // w2    = lock word
254  // TUNING: How much performance we get when we inline this?
255  // Since we've already flush all register.
256  FlushAllRegs();
257  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
258  LockCallTemps();  // Prepare for explicit register usage
259  LIR* null_check_branch = nullptr;
260  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
261    null_check_branch = nullptr;  // No null check.
262  } else {
263    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
264    if (Runtime::Current()->ExplicitNullChecks()) {
265      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
266    }
267  }
268  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
269  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
270  MarkPossibleNullPointerException(opt_flags);
271  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
272  GenMemBarrier(kStoreLoad);
273  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr);
274  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
275
276  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
277  slow_unlock_branch->target = slow_path_target;
278  if (null_check_branch != nullptr) {
279    null_check_branch->target = slow_path_target;
280  }
281  // TODO: move to a slow path.
282  // Go expensive route - artUnlockObjectFromCode(obj);
283  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
284  ClobberCallerSave();
285  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
286  MarkSafepointPC(call_inst);
287
288  LIR* success_target = NewLIR0(kPseudoTargetLabel);
289  unlock_success_branch->target = success_target;
290}
291
292void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
293  int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
294  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
295  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg);
296  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr);
297  StoreValue(rl_dest, rl_result);
298}
299
300/*
301 * Mark garbage collection card. Skip if the value we're storing is null.
302 */
303void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
304  RegStorage reg_card_base = AllocTempWide();
305  RegStorage reg_card_no = AllocTemp();
306  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
307  LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
308  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
309  // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
310  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
311                   0, kUnsignedByte);
312  LIR* target = NewLIR0(kPseudoTargetLabel);
313  branch_over->target = target;
314  FreeTemp(reg_card_base);
315  FreeTemp(reg_card_no);
316}
317
318void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
319  /*
320   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
321   * mechanism know so it doesn't try to use any of them when
322   * expanding the frame or flushing.  This leaves the utility
323   * code with a single temp: r12.  This should be enough.
324   */
325  LockTemp(rs_x0);
326  LockTemp(rs_x1);
327  LockTemp(rs_x2);
328  LockTemp(rs_x3);
329
330  /*
331   * We can safely skip the stack overflow check if we're
332   * a leaf *and* our frame size < fudge factor.
333   */
334  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
335                            (static_cast<size_t>(frame_size_) <
336                            Thread::kStackOverflowReservedBytes));
337  NewLIR0(kPseudoMethodEntry);
338
339  if (!skip_overflow_check) {
340    LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
341    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
342    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
343      /* Load stack limit */
344      // TODO(Arm64): fix the line below:
345      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
346    } else {
347      // Implicit stack overflow check.
348      // Generate a load from [sp, #-framesize].  If this is in the stack
349      // redzone we will get a segmentation fault.
350      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
351      Load32Disp(rs_rA64_SP, 0, rs_wzr);
352      MarkPossibleStackOverflowException();
353    }
354  } else if (frame_size_ > 0) {
355    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
356  }
357
358  /* Need to spill any FP regs? */
359  if (fp_spill_mask_) {
360    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
361    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
362  }
363
364  /* Spill core callee saves. */
365  if (core_spill_mask_) {
366    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
367    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
368  }
369
370  FlushIns(ArgLocs, rl_method);
371
372  FreeTemp(rs_x0);
373  FreeTemp(rs_x1);
374  FreeTemp(rs_x2);
375  FreeTemp(rs_x3);
376}
377
378void Arm64Mir2Lir::GenExitSequence() {
379  /*
380   * In the exit path, r0/r1 are live - make sure they aren't
381   * allocated by the register utilities as temps.
382   */
383  LockTemp(rs_x0);
384  LockTemp(rs_x1);
385
386  NewLIR0(kPseudoMethodExit);
387
388  /* Need to restore any FP callee saves? */
389  if (fp_spill_mask_) {
390    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
391    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
392  }
393  if (core_spill_mask_) {
394    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
395    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
396  }
397
398  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_);
399  NewLIR0(kA64Ret);
400}
401
402void Arm64Mir2Lir::GenSpecialExitSequence() {
403  NewLIR0(kA64Ret);
404}
405
406}  // namespace art
407