call_arm64.cc revision bc6d197cdb02eeac0c98ec4ed37f530b003a4e7a
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23
24namespace art {
25
26bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
27                                  const InlineMethod& special) {
28  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
29  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
30  // return Mir2Lir::GenSpecialCase(bb, mir, special);
31  return false;
32}
33
34/*
35 * The sparse table in the literal pool is an array of <key,displacement>
36 * pairs.  For each set, we'll load them as a pair using ldp.
37 * The test loop will look something like:
38 *
39 *   adr   r_base, <table>
40 *   ldr   r_val, [rA64_SP, v_reg_off]
41 *   mov   r_idx, #table_size
42 * loop:
43 *   cbz   r_idx, quit
44 *   ldp   r_key, r_disp, [r_base], #8
45 *   sub   r_idx, #1
46 *   cmp   r_val, r_key
47 *   b.ne  loop
48 *   adr   r_base, #0        ; This is the instruction from which we compute displacements
49 *   add   r_base, r_disp
50 *   br    r_base
51 * quit:
52 */
53void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
54                                   RegLocation rl_src) {
55  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
56  if (cu_->verbose) {
57    DumpSparseSwitchTable(table);
58  }
59  // Add the table to the list - we'll process it later
60  SwitchTable *tab_rec =
61      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
62  tab_rec->table = table;
63  tab_rec->vaddr = current_dalvik_offset_;
64  uint32_t size = table[1];
65  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
66  switch_tables_.Insert(tab_rec);
67
68  // Get the switch value
69  rl_src = LoadValue(rl_src, kCoreReg);
70  RegStorage r_base = AllocTemp();
71  // Allocate key and disp temps.
72  RegStorage r_key = AllocTemp();
73  RegStorage r_disp = AllocTemp();
74  // Materialize a pointer to the switch table
75  NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
76  // Set up r_idx
77  RegStorage r_idx = AllocTemp();
78  LoadConstant(r_idx, size);
79
80  // Entry of loop.
81  LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
82  LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
83
84  // Load next key/disp.
85  NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
86  OpRegRegImm(kOpSub, r_idx, r_idx, 1);
87
88  // Go to next case, if key does not match.
89  OpRegReg(kOpCmp, r_key, rl_src.reg);
90  OpCondBranch(kCondNe, loop_entry);
91
92  // Key does match: branch to case label.
93  LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
94  tab_rec->anchor = switch_label;
95
96  // Add displacement to base branch address and go!
97  OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(),
98                   ENCODE_NO_SHIFT, true);
99  NewLIR1(kA64Br1x, r_base.GetReg());
100
101  // Loop exit label.
102  LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
103  branch_out->target = loop_exit;
104}
105
106
107void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
108                                 RegLocation rl_src) {
109  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
110  if (cu_->verbose) {
111    DumpPackedSwitchTable(table);
112  }
113  // Add the table to the list - we'll process it later
114  SwitchTable *tab_rec =
115      static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
116  tab_rec->table = table;
117  tab_rec->vaddr = current_dalvik_offset_;
118  uint32_t size = table[1];
119  tab_rec->targets =
120      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
121  switch_tables_.Insert(tab_rec);
122
123  // Get the switch value
124  rl_src = LoadValue(rl_src, kCoreReg);
125  RegStorage table_base = AllocTemp();
126  // Materialize a pointer to the switch table
127  NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
128  int low_key = s4FromSwitchData(&table[2]);
129  RegStorage key_reg;
130  // Remove the bias, if necessary
131  if (low_key == 0) {
132    key_reg = rl_src.reg;
133  } else {
134    key_reg = AllocTemp();
135    OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
136  }
137  // Bounds check - if < 0 or >= size continue following switch
138  OpRegImm(kOpCmp, key_reg, size - 1);
139  LIR* branch_over = OpCondBranch(kCondHi, NULL);
140
141  // Load the displacement from the switch table
142  RegStorage disp_reg = AllocTemp();
143  LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
144
145  // Get base branch address.
146  RegStorage branch_reg = AllocTemp();
147  LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
148  tab_rec->anchor = switch_label;
149
150  // Add displacement to base branch address and go!
151  OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(),
152                   ENCODE_NO_SHIFT, true);
153  NewLIR1(kA64Br1x, branch_reg.GetReg());
154
155  // branch_over target here
156  LIR* target = NewLIR0(kPseudoTargetLabel);
157  branch_over->target = target;
158}
159
160/*
161 * Array data table format:
162 *  ushort ident = 0x0300   magic value
163 *  ushort width            width of each element in the table
164 *  uint   size             number of elements in the table
165 *  ubyte  data[size*width] table of data values (may contain a single-byte
166 *                          padding at the end)
167 *
168 * Total size is 4+(width * size + 1)/2 16-bit code units.
169 */
170void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) {
171  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
172  // Add the table to the list - we'll process it later
173  FillArrayData *tab_rec =
174      static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData));
175  tab_rec->table = table;
176  tab_rec->vaddr = current_dalvik_offset_;
177  uint16_t width = tab_rec->table[1];
178  uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16);
179  tab_rec->size = (size * width) + 8;
180
181  fill_array_data_.Insert(tab_rec);
182
183  // Making a call - use explicit registers
184  FlushAllRegs();   /* Everything to home location */
185  LoadValueDirectFixed(rl_src, rs_x0);
186  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
187               rs_rA64_LR);
188  // Materialize a pointer to the fill data image
189  NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
190  ClobberCallerSave();
191  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
192  MarkSafepointPC(call_inst);
193}
194
195/*
196 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
197 * details see monitor.cc.
198 */
199void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
200  // x0/w0 = object
201  // w1    = thin lock thread id
202  // x2    = address of lock word
203  // w3    = lock word / store failure
204  // TUNING: How much performance we get when we inline this?
205  // Since we've already flush all register.
206  FlushAllRegs();
207  LoadValueDirectFixed(rl_src, rs_w0);
208  LockCallTemps();  // Prepare for explicit register usage
209  LIR* null_check_branch = nullptr;
210  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
211    null_check_branch = nullptr;  // No null check.
212  } else {
213    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
214    if (Runtime::Current()->ExplicitNullChecks()) {
215      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
216    }
217  }
218  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
219  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
220  NewLIR2(kA64Ldxr2rX, rw3, rx2);
221  MarkPossibleNullPointerException(opt_flags);
222  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
223  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
224  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
225
226  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
227  not_unlocked_branch->target = slow_path_target;
228  if (null_check_branch != nullptr) {
229    null_check_branch->target = slow_path_target;
230  }
231  // TODO: move to a slow path.
232  // Go expensive route - artLockObjectFromCode(obj);
233  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
234  ClobberCallerSave();
235  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
236  MarkSafepointPC(call_inst);
237
238  LIR* success_target = NewLIR0(kPseudoTargetLabel);
239  lock_success_branch->target = success_target;
240  GenMemBarrier(kLoadLoad);
241}
242
243/*
244 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
245 * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
246 * and can only give away ownership if its suspended.
247 */
248void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
249  // x0/w0 = object
250  // w1    = thin lock thread id
251  // w2    = lock word
252  // TUNING: How much performance we get when we inline this?
253  // Since we've already flush all register.
254  FlushAllRegs();
255  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
256  LockCallTemps();  // Prepare for explicit register usage
257  LIR* null_check_branch = nullptr;
258  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
259    null_check_branch = nullptr;  // No null check.
260  } else {
261    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
262    if (Runtime::Current()->ExplicitNullChecks()) {
263      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
264    }
265  }
266  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
267  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
268  MarkPossibleNullPointerException(opt_flags);
269  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
270  GenMemBarrier(kStoreLoad);
271  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr);
272  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
273
274  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
275  slow_unlock_branch->target = slow_path_target;
276  if (null_check_branch != nullptr) {
277    null_check_branch->target = slow_path_target;
278  }
279  // TODO: move to a slow path.
280  // Go expensive route - artUnlockObjectFromCode(obj);
281  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
282  ClobberCallerSave();
283  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
284  MarkSafepointPC(call_inst);
285
286  LIR* success_target = NewLIR0(kPseudoTargetLabel);
287  unlock_success_branch->target = success_target;
288}
289
290void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
291  int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
292  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
293  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
294  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
295  StoreValue(rl_dest, rl_result);
296}
297
298/*
299 * Mark garbage collection card. Skip if the value we're storing is null.
300 */
301void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
302  RegStorage reg_card_base = AllocTemp();
303  RegStorage reg_card_no = AllocTemp();
304  LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
305  LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
306  OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
307  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
308  LIR* target = NewLIR0(kPseudoTargetLabel);
309  branch_over->target = target;
310  FreeTemp(reg_card_base);
311  FreeTemp(reg_card_no);
312}
313
314void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
315  /*
316   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
317   * mechanism know so it doesn't try to use any of them when
318   * expanding the frame or flushing.  This leaves the utility
319   * code with a single temp: r12.  This should be enough.
320   */
321  LockTemp(rs_x0);
322  LockTemp(rs_x1);
323  LockTemp(rs_x2);
324  LockTemp(rs_x3);
325
326  /*
327   * We can safely skip the stack overflow check if we're
328   * a leaf *and* our frame size < fudge factor.
329   */
330  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
331                            (static_cast<size_t>(frame_size_) <
332                            Thread::kStackOverflowReservedBytes));
333  NewLIR0(kPseudoMethodEntry);
334
335  if (!skip_overflow_check) {
336    LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
337    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
338    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
339      /* Load stack limit */
340      // TODO(Arm64): fix the line below:
341      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
342    } else {
343      // Implicit stack overflow check.
344      // Generate a load from [sp, #-framesize].  If this is in the stack
345      // redzone we will get a segmentation fault.
346      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
347      Load32Disp(rs_rA64_SP, 0, rs_wzr);
348      MarkPossibleStackOverflowException();
349    }
350  } else if (frame_size_ > 0) {
351    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
352  }
353
354  /* Need to spill any FP regs? */
355  if (fp_spill_mask_) {
356    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
357    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
358  }
359
360  /* Spill core callee saves. */
361  if (core_spill_mask_) {
362    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
363    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
364  }
365
366  FlushIns(ArgLocs, rl_method);
367
368  FreeTemp(rs_x0);
369  FreeTemp(rs_x1);
370  FreeTemp(rs_x2);
371  FreeTemp(rs_x3);
372}
373
374void Arm64Mir2Lir::GenExitSequence() {
375  /*
376   * In the exit path, r0/r1 are live - make sure they aren't
377   * allocated by the register utilities as temps.
378   */
379  LockTemp(rs_x0);
380  LockTemp(rs_x1);
381
382  NewLIR0(kPseudoMethodExit);
383
384  /* Need to restore any FP callee saves? */
385  if (fp_spill_mask_) {
386    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
387    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
388  }
389  if (core_spill_mask_) {
390    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
391    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
392  }
393
394  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
395  NewLIR0(kA64Ret);
396}
397
398void Arm64Mir2Lir::GenSpecialExitSequence() {
399  NewLIR0(kA64Ret);
400}
401
402}  // namespace art
403