int_arm64.cc revision c763e350da562b0c6bebf10599588d4901140e45
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "dex/reg_storage_eq.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24#include "mirror/array.h"
25#include "utils.h"
26
27namespace art {
28
29LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
30  OpRegReg(kOpCmp, src1, src2);
31  return OpCondBranch(cond, target);
32}
33
34LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
35  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
36  return NULL;
37}
38
39void Arm64Mir2Lir::OpEndIT(LIR* it) {
40  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
41}
42
43/*
44 * 64-bit 3way compare function.
45 *     cmp   xA, xB
46 *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
47 *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
48 */
49void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
50                              RegLocation rl_src2) {
51  RegLocation rl_result;
52  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
53  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
54  rl_result = EvalLoc(rl_dest, kCoreReg, true);
55
56  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
57  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
58  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
59          rl_result.reg.GetReg(), kArmCondGe);
60  StoreValue(rl_dest, rl_result);
61}
62
63void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
64                             RegLocation rl_src1, RegLocation rl_shift) {
65  OpKind op = kOpBkpt;
66  switch (opcode) {
67  case Instruction::SHL_LONG:
68  case Instruction::SHL_LONG_2ADDR:
69    op = kOpLsl;
70    break;
71  case Instruction::SHR_LONG:
72  case Instruction::SHR_LONG_2ADDR:
73    op = kOpAsr;
74    break;
75  case Instruction::USHR_LONG:
76  case Instruction::USHR_LONG_2ADDR:
77    op = kOpLsr;
78    break;
79  default:
80    LOG(FATAL) << "Unexpected case: " << opcode;
81  }
82  rl_shift = LoadValue(rl_shift, kCoreReg);
83  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
84  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
85  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
86  StoreValueWide(rl_dest, rl_result);
87}
88
89static constexpr bool kUseDeltaEncodingInGenSelect = false;
90
91void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
92                             RegStorage rs_dest, int result_reg_class) {
93  if (false_val == 0 ||               // 0 is better as first operand.
94      true_val == 1 ||                // Potentially Csinc.
95      true_val == -1 ||               // Potentially Csinv.
96      true_val == false_val + 1) {    // Potentially Csinc.
97    ccode = NegateComparison(ccode);
98    std::swap(true_val, false_val);
99  }
100
101  ArmConditionCode code = ArmConditionEncoding(ccode);
102
103  int opcode;                                      // The opcode.
104  RegStorage left_op = RegStorage::InvalidReg();   // The operands.
105  RegStorage right_op = RegStorage::InvalidReg();  // The operands.
106
107  bool is_wide = rs_dest.Is64Bit();
108
109  RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
110
111  if (true_val == 0) {
112    left_op = zero_reg;
113  } else {
114    left_op = rs_dest;
115    LoadConstantNoClobber(rs_dest, true_val);
116  }
117  if (false_val == 1) {
118    right_op = zero_reg;
119    opcode = kA64Csinc4rrrc;
120  } else if (false_val == -1) {
121    right_op = zero_reg;
122    opcode = kA64Csinv4rrrc;
123  } else if (false_val == true_val + 1) {
124    right_op = left_op;
125    opcode = kA64Csinc4rrrc;
126  } else if (false_val == -true_val) {
127    right_op = left_op;
128    opcode = kA64Csneg4rrrc;
129  } else if (false_val == ~true_val) {
130    right_op = left_op;
131    opcode = kA64Csinv4rrrc;
132  } else if (true_val == 0) {
133    // left_op is zero_reg.
134    right_op = rs_dest;
135    LoadConstantNoClobber(rs_dest, false_val);
136    opcode = kA64Csel4rrrc;
137  } else {
138    // Generic case.
139    RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
140    if (is_wide) {
141      if (t_reg2.Is32Bit()) {
142        t_reg2 = As64BitReg(t_reg2);
143      }
144    } else {
145      if (t_reg2.Is64Bit()) {
146        t_reg2 = As32BitReg(t_reg2);
147      }
148    }
149
150    if (kUseDeltaEncodingInGenSelect) {
151      int32_t delta = false_val - true_val;
152      uint32_t abs_val = delta < 0 ? -delta : delta;
153
154      if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
155        // Can encode as immediate to an add.
156        right_op = t_reg2;
157        OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
158      }
159    }
160
161    // Load as constant.
162    if (!right_op.Valid()) {
163      LoadConstantNoClobber(t_reg2, false_val);
164      right_op = t_reg2;
165    }
166
167    opcode = kA64Csel4rrrc;
168  }
169
170  DCHECK(left_op.Valid() && right_op.Valid());
171  NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
172      code);
173}
174
175void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
176                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
177                                    int dest_reg_class) {
178  DCHECK(rs_dest.Valid());
179  OpRegReg(kOpCmp, left_op, right_op);
180  GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
181}
182
183void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
184  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
185  rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
186  // rl_src may be aliased with rl_result/rl_dest, so do compare early.
187  OpRegImm(kOpCmp, rl_src.reg, 0);
188
189  RegLocation rl_dest = mir_graph_->GetDest(mir);
190
191  // The kMirOpSelect has two variants, one for constants and one for moves.
192  if (mir->ssa_rep->num_uses == 1) {
193    RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
194    GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
195              rl_dest.ref ? kRefReg : kCoreReg);
196    StoreValue(rl_dest, rl_result);
197  } else {
198    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
199    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
200
201    RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
202    rl_true = LoadValue(rl_true, result_reg_class);
203    rl_false = LoadValue(rl_false, result_reg_class);
204    RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
205
206    bool is_wide = rl_dest.ref || rl_dest.wide;
207    int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
208    NewLIR4(opcode, rl_result.reg.GetReg(),
209            rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
210    StoreValue(rl_dest, rl_result);
211  }
212}
213
214void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
215  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
216  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
217  LIR* taken = &block_label_list_[bb->taken];
218  LIR* not_taken = &block_label_list_[bb->fall_through];
219  // Normalize such that if either operand is constant, src2 will be constant.
220  ConditionCode ccode = mir->meta.ccode;
221  if (rl_src1.is_const) {
222    std::swap(rl_src1, rl_src2);
223    ccode = FlipComparisonOrder(ccode);
224  }
225
226  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
227
228  if (rl_src2.is_const) {
229    // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
230
231    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
232    // Special handling using cbz & cbnz.
233    if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
234      OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
235      OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
236      return;
237    }
238
239    // Only handle Imm if src2 is not already in a register.
240    rl_src2 = UpdateLocWide(rl_src2);
241    if (rl_src2.location != kLocPhysReg) {
242      OpRegImm64(kOpCmp, rl_src1.reg, val);
243      OpCondBranch(ccode, taken);
244      OpCondBranch(NegateComparison(ccode), not_taken);
245      return;
246    }
247  }
248
249  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
250  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
251  OpCondBranch(ccode, taken);
252  OpCondBranch(NegateComparison(ccode), not_taken);
253}
254
255/*
256 * Generate a register comparison to an immediate and branch.  Caller
257 * is responsible for setting branch target field.
258 */
259LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
260                                  LIR* target) {
261  LIR* branch = nullptr;
262  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
263  if (check_value == 0) {
264    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
265      ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
266      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
267      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
268    } else if (arm_cond == kArmCondLs) {
269      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
270      // This case happens for a bounds check of array[0].
271      ArmOpcode opcode = kA64Cbz2rt;
272      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
273      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
274    }
275  }
276
277  if (branch == nullptr) {
278    OpRegImm(kOpCmp, reg, check_value);
279    branch = NewLIR2(kA64B2ct, arm_cond, 0);
280  }
281
282  branch->target = target;
283  return branch;
284}
285
286LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
287                                     RegStorage base_reg, int offset, int check_value,
288                                     LIR* target, LIR** compare) {
289  DCHECK(compare == nullptr);
290  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
291  // Always compare 32-bit value no matter what temp_reg is.
292  if (temp_reg.Is64Bit()) {
293    temp_reg = As32BitReg(temp_reg);
294  }
295  Load32Disp(base_reg, offset, temp_reg);
296  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
297  return branch;
298}
299
300LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
301  bool dest_is_fp = r_dest.IsFloat();
302  bool src_is_fp = r_src.IsFloat();
303  ArmOpcode opcode = kA64Brk1d;
304  LIR* res;
305
306  if (LIKELY(dest_is_fp == src_is_fp)) {
307    if (LIKELY(!dest_is_fp)) {
308      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
309
310      // Core/core copy.
311      // Copies involving the sp register require a different instruction.
312      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
313
314      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
315      //   This currently works because the other arguments are set to 0 by default. We should
316      //   rather introduce an alias kA64Mov2RR.
317
318      // core/core copy. Do a x/x copy only if both registers are x.
319      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
320        opcode = WIDE(opcode);
321      }
322    } else {
323      // Float/float copy.
324      bool dest_is_double = r_dest.IsDouble();
325      bool src_is_double = r_src.IsDouble();
326
327      // We do not do float/double or double/float casts here.
328      DCHECK_EQ(dest_is_double, src_is_double);
329
330      // Homogeneous float/float copy.
331      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
332    }
333  } else {
334    // Inhomogeneous register copy.
335    if (dest_is_fp) {
336      if (r_dest.IsDouble()) {
337        opcode = kA64Fmov2Sx;
338      } else {
339        r_src = Check32BitReg(r_src);
340        opcode = kA64Fmov2sw;
341      }
342    } else {
343      if (r_src.IsDouble()) {
344        opcode = kA64Fmov2xS;
345      } else {
346        r_dest = Check32BitReg(r_dest);
347        opcode = kA64Fmov2ws;
348      }
349    }
350  }
351
352  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
353
354  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
355    res->flags.is_nop = true;
356  }
357
358  return res;
359}
360
361void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
362  if (r_dest != r_src) {
363    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
364    AppendLIR(res);
365  }
366}
367
368void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
369  OpRegCopy(r_dest, r_src);
370}
371
372// Table of magic divisors
373struct MagicTable {
374  int magic64_base;
375  int magic64_eor;
376  uint64_t magic64;
377  uint32_t magic32;
378  uint32_t shift;
379  DividePattern pattern;
380};
381
382static const MagicTable magic_table[] = {
383  {   0,      0,                  0,          0, 0, DivideNone},  // 0
384  {   0,      0,                  0,          0, 0, DivideNone},  // 1
385  {   0,      0,                  0,          0, 0, DivideNone},  // 2
386  {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
387  {   0,      0,                  0,          0, 0, DivideNone},  // 4
388  {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
389  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
390  {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
391  {   0,      0,                  0,          0, 0, DivideNone},  // 8
392  {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
393  {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
394  {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
395  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
396  {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
397  {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
398  {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
399};
400
401// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
402bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
403                                      RegLocation rl_src, RegLocation rl_dest, int lit) {
404  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
405    return false;
406  }
407  DividePattern pattern = magic_table[lit].pattern;
408  if (pattern == DivideNone) {
409    return false;
410  }
411  // Tuning: add rem patterns
412  if (!is_div) {
413    return false;
414  }
415
416  RegStorage r_magic = AllocTemp();
417  LoadConstant(r_magic, magic_table[lit].magic32);
418  rl_src = LoadValue(rl_src, kCoreReg);
419  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
420  RegStorage r_long_mul = AllocTemp();
421  NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
422          r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
423  switch (pattern) {
424    case Divide3:
425      OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
426      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
427      break;
428    case Divide5:
429      OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
430                  32 + magic_table[lit].shift);
431      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
432      break;
433    case Divide7:
434      OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
435                       As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
436      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
437      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
438      break;
439    default:
440      LOG(FATAL) << "Unexpected pattern: " << pattern;
441  }
442  StoreValue(rl_dest, rl_result);
443  return true;
444}
445
446bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
447                                        RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
448  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
449    return false;
450  }
451  DividePattern pattern = magic_table[lit].pattern;
452  if (pattern == DivideNone) {
453    return false;
454  }
455  // Tuning: add rem patterns
456  if (!is_div) {
457    return false;
458  }
459
460  RegStorage r_magic = AllocTempWide();
461  rl_src = LoadValueWide(rl_src, kCoreReg);
462  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
463  RegStorage r_long_mul = AllocTempWide();
464
465  if (magic_table[lit].magic64_base >= 0) {
466    // Check that the entry in the table is correct.
467    if (kIsDebugBuild) {
468      uint64_t reconstructed_imm;
469      uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
470      if (magic_table[lit].magic64_eor >= 0) {
471        uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
472        reconstructed_imm = base ^ eor;
473      } else {
474        reconstructed_imm = base + 1;
475      }
476      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
477    }
478
479    // Load the magic constant in two instructions.
480    NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
481    if (magic_table[lit].magic64_eor >= 0) {
482      NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
483              magic_table[lit].magic64_eor);
484    } else {
485      NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
486    }
487  } else {
488    LoadConstantWide(r_magic, magic_table[lit].magic64);
489  }
490
491  NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
492  switch (pattern) {
493    case Divide3:
494      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
495      break;
496    case Divide5:
497      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
498      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
499      break;
500    case Divide7:
501      OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
502      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
503      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
504      break;
505    default:
506      LOG(FATAL) << "Unexpected pattern: " << pattern;
507  }
508  StoreValueWide(rl_dest, rl_result);
509  return true;
510}
511
512// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
513// and store the result in 'rl_dest'.
514bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
515                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
516  return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
517}
518
519// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
520// and store the result in 'rl_dest'.
521bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
522                                      RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
523  const bool is_64bit = rl_dest.wide;
524  const int nbits = (is_64bit) ? 64 : 32;
525
526  if (lit < 2) {
527    return false;
528  }
529  if (!IsPowerOfTwo(lit)) {
530    if (is_64bit) {
531      return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
532    } else {
533      return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
534    }
535  }
536  int k = LowestSetBit(lit);
537  if (k >= nbits - 2) {
538    // Avoid special cases.
539    return false;
540  }
541
542  RegLocation rl_result;
543  RegStorage t_reg;
544  if (is_64bit) {
545    rl_src = LoadValueWide(rl_src, kCoreReg);
546    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
547    t_reg = AllocTempWide();
548  } else {
549    rl_src = LoadValue(rl_src, kCoreReg);
550    rl_result = EvalLoc(rl_dest, kCoreReg, true);
551    t_reg = AllocTemp();
552  }
553
554  int shift = EncodeShift(kA64Lsr, nbits - k);
555  if (is_div) {
556    if (lit == 2) {
557      // Division by 2 is by far the most common division by constant.
558      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
559      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
560    } else {
561      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
562      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
563      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
564    }
565  } else {
566    if (lit == 2) {
567      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
568      OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
569      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
570    } else {
571      RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
572      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
573      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
574      OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
575      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
576    }
577  }
578
579  if (is_64bit) {
580    StoreValueWide(rl_dest, rl_result);
581  } else {
582    StoreValue(rl_dest, rl_result);
583  }
584  return true;
585}
586
587bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
588  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
589  return false;
590}
591
592RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
593  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
594  return rl_dest;
595}
596
597RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
598  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
599
600  // Put the literal in a temp.
601  RegStorage lit_temp = AllocTemp();
602  LoadConstant(lit_temp, lit);
603  // Use the generic case for div/rem with arg2 in a register.
604  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
605  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
606  FreeTemp(lit_temp);
607
608  return rl_result;
609}
610
611RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
612                                    RegLocation rl_src2, bool is_div, bool check_zero) {
613  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
614  return rl_dest;
615}
616
617RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
618                                    bool is_div) {
619  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
620
621  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
622  if (is_div) {
623    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
624  } else {
625    // temp = r_src1 / r_src2
626    // dest = r_src1 - temp * r_src2
627    RegStorage temp;
628    ArmOpcode wide;
629    if (rl_result.reg.Is64Bit()) {
630      temp = AllocTempWide();
631      wide = WIDE(0);
632    } else {
633      temp = AllocTemp();
634      wide = UNWIDE(0);
635    }
636    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
637    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
638            r_src1.GetReg(), r_src2.GetReg());
639    FreeTemp(temp);
640  }
641  return rl_result;
642}
643
644bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
645  RegLocation rl_src = info->args[0];
646  rl_src = LoadValueWide(rl_src, kCoreReg);
647  RegLocation rl_dest = InlineTargetWide(info);
648  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
649  RegStorage sign_reg = AllocTempWide();
650  // abs(x) = y<=x>>63, (x+y)^y.
651  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
652  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
653  OpRegReg(kOpXor, rl_result.reg, sign_reg);
654  StoreValueWide(rl_dest, rl_result);
655  return true;
656}
657
658bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
659  DCHECK_EQ(cu_->instruction_set, kArm64);
660  RegLocation rl_src1 = info->args[0];
661  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
662  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
663  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
664  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
665  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
666  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
667  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
668          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
669  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
670  return true;
671}
672
673bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
674  RegLocation rl_src_address = info->args[0];  // long address
675  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
676  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
677  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
678
679  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
680  if (size == k64) {
681    StoreValueWide(rl_dest, rl_result);
682  } else {
683    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
684    StoreValue(rl_dest, rl_result);
685  }
686  return true;
687}
688
689bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
690  RegLocation rl_src_address = info->args[0];  // long address
691  RegLocation rl_src_value = info->args[2];  // [size] value
692  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
693
694  RegLocation rl_value;
695  if (size == k64) {
696    rl_value = LoadValueWide(rl_src_value, kCoreReg);
697  } else {
698    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
699    rl_value = LoadValue(rl_src_value, kCoreReg);
700  }
701  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
702  return true;
703}
704
705bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
706  DCHECK_EQ(cu_->instruction_set, kArm64);
707  // Unused - RegLocation rl_src_unsafe = info->args[0];
708  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
709  RegLocation rl_src_offset = info->args[2];  // long low
710  RegLocation rl_src_expected = info->args[4];  // int, long or Object
711  // If is_long, high half is in info->args[5]
712  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
713  // If is_long, high half is in info->args[7]
714  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
715
716  // Load Object and offset
717  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
718  RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
719
720  RegLocation rl_new_value;
721  RegLocation rl_expected;
722  if (is_long) {
723    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
724    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
725  } else {
726    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
727    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
728  }
729
730  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
731    // Mark card for object assuming new value is stored.
732    MarkGCCard(rl_new_value.reg, rl_object.reg);
733  }
734
735  RegStorage r_ptr = AllocTempRef();
736  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
737
738  // Free now unneeded rl_object and rl_offset to give more temps.
739  ClobberSReg(rl_object.s_reg_low);
740  FreeTemp(rl_object.reg);
741  ClobberSReg(rl_offset.s_reg_low);
742  FreeTemp(rl_offset.reg);
743
744  // do {
745  //   tmp = [r_ptr] - expected;
746  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
747  // result = tmp != 0;
748
749  RegStorage r_tmp;
750  RegStorage r_tmp_stored;
751  RegStorage rl_new_value_stored = rl_new_value.reg;
752  ArmOpcode wide = UNWIDE(0);
753  if (is_long) {
754    r_tmp_stored = r_tmp = AllocTempWide();
755    wide = WIDE(0);
756  } else if (is_object) {
757    // References use 64-bit registers, but are stored as compressed 32-bit values.
758    // This means r_tmp_stored != r_tmp.
759    r_tmp = AllocTempRef();
760    r_tmp_stored = As32BitReg(r_tmp);
761    rl_new_value_stored = As32BitReg(rl_new_value_stored);
762  } else {
763    r_tmp_stored = r_tmp = AllocTemp();
764  }
765
766  RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
767  LIR* loop = NewLIR0(kPseudoTargetLabel);
768  NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
769  OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
770  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
771  LIR* early_exit = OpCondBranch(kCondNe, NULL);
772  NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
773  NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
774  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
775  OpCondBranch(kCondNe, loop);
776
777  LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
778  early_exit->target = exit_loop;
779
780  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
781  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
782
783  FreeTemp(r_tmp);  // Now unneeded.
784  FreeTemp(r_ptr);  // Now unneeded.
785
786  StoreValue(rl_dest, rl_result);
787
788  return true;
789}
790
791LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
792  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
793  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
794}
795
796LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
797  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
798  return NULL;
799}
800
801LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
802  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
803  return NULL;
804}
805
806void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
807                                               RegLocation rl_result, int lit,
808                                               int first_bit, int second_bit) {
809  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
810  if (first_bit != 0) {
811    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
812  }
813}
814
815void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
816  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
817}
818
819// Test suspend flag, return target of taken suspend branch
820LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
821  NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1);
822  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
823}
824
825// Decrement register and branch on condition
826LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
827  // Combine sub & test using sub setflags encoding here.  We need to make sure a
828  // subtract form that sets carry is used, so generate explicitly.
829  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
830  ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
831  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
832  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
833  return OpCondBranch(c_code, target);
834}
835
836bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
837#if ANDROID_SMP != 0
838  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
839  LIR* barrier = last_lir_insn_;
840
841  int dmb_flavor;
842  // TODO: revisit Arm barrier kinds
843  switch (barrier_kind) {
844    case kAnyStore: dmb_flavor = kISH; break;
845    case kLoadAny: dmb_flavor = kISH; break;
846        // We conjecture that kISHLD is insufficient.  It is documented
847        // to provide LoadLoad | StoreStore ordering.  But if this were used
848        // to implement volatile loads, we suspect that the lack of store
849        // atomicity on ARM would cause us to allow incorrect results for
850        // the canonical IRIW example.  But we're not sure.
851        // We should be using acquire loads instead.
852    case kStoreStore: dmb_flavor = kISHST; break;
853    case kAnyAny: dmb_flavor = kISH; break;
854    default:
855      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
856      dmb_flavor = kSY;  // quiet gcc.
857      break;
858  }
859
860  bool ret = false;
861
862  // If the same barrier already exists, don't generate another.
863  if (barrier == nullptr
864      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
865    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
866    ret = true;
867  }
868
869  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
870  DCHECK(!barrier->flags.use_def_invalid);
871  barrier->u.m.def_mask = &kEncodeAll;
872  return ret;
873#else
874  return false;
875#endif
876}
877
878void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
879  RegLocation rl_result;
880
881  rl_src = LoadValue(rl_src, kCoreReg);
882  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
883  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
884  StoreValueWide(rl_dest, rl_result);
885}
886
887void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
888                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
889  if (rl_src2.is_const) {
890    DCHECK(rl_src2.wide);
891    int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
892    if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
893      return;
894    }
895  }
896
897  RegLocation rl_result;
898  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
899  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
900  GenDivZeroCheck(rl_src2.reg);
901  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
902  StoreValueWide(rl_dest, rl_result);
903}
904
905void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
906                             RegLocation rl_src2) {
907  RegLocation rl_result;
908
909  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
910  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
911  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
912  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
913  StoreValueWide(rl_dest, rl_result);
914}
915
916void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
917  RegLocation rl_result;
918
919  rl_src = LoadValueWide(rl_src, kCoreReg);
920  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
921  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
922  StoreValueWide(rl_dest, rl_result);
923}
924
925void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
926  RegLocation rl_result;
927
928  rl_src = LoadValueWide(rl_src, kCoreReg);
929  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
930  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
931  StoreValueWide(rl_dest, rl_result);
932}
933
934void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
935                              RegLocation rl_src1, RegLocation rl_src2) {
936  GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
937}
938
939void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
940                              RegLocation rl_src2) {
941  GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
942}
943
944void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
945                            RegLocation rl_src2) {
946  GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
947}
948
949void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
950                            RegLocation rl_src2) {
951  GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
952}
953
954void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
955                           RegLocation rl_src2) {
956  GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
957}
958
959void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
960                            RegLocation rl_src2) {
961  GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
962}
963
964/*
965 * Generate array load
966 */
967void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
968                             RegLocation rl_index, RegLocation rl_dest, int scale) {
969  RegisterClass reg_class = RegClassBySize(size);
970  int len_offset = mirror::Array::LengthOffset().Int32Value();
971  int data_offset;
972  RegLocation rl_result;
973  bool constant_index = rl_index.is_const;
974  rl_array = LoadValue(rl_array, kRefReg);
975  if (!constant_index) {
976    rl_index = LoadValue(rl_index, kCoreReg);
977  }
978
979  if (rl_dest.wide) {
980    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
981  } else {
982    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
983  }
984
985  // If index is constant, just fold it into the data offset
986  if (constant_index) {
987    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
988  }
989
990  /* null object? */
991  GenNullCheck(rl_array.reg, opt_flags);
992
993  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
994  RegStorage reg_len;
995  if (needs_range_check) {
996    reg_len = AllocTemp();
997    /* Get len */
998    Load32Disp(rl_array.reg, len_offset, reg_len);
999    MarkPossibleNullPointerException(opt_flags);
1000  } else {
1001    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1002  }
1003  if (rl_dest.wide || rl_dest.fp || constant_index) {
1004    RegStorage reg_ptr;
1005    if (constant_index) {
1006      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
1007    } else {
1008      // No special indexed operation, lea + load w/ displacement
1009      reg_ptr = AllocTempRef();
1010      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
1011                       EncodeShift(kA64Lsl, scale));
1012      FreeTemp(rl_index.reg);
1013    }
1014    rl_result = EvalLoc(rl_dest, reg_class, true);
1015
1016    if (needs_range_check) {
1017      if (constant_index) {
1018        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1019      } else {
1020        GenArrayBoundsCheck(rl_index.reg, reg_len);
1021      }
1022      FreeTemp(reg_len);
1023    }
1024    if (rl_result.ref) {
1025      LoadRefDisp(reg_ptr, data_offset, rl_result.reg, kNotVolatile);
1026    } else {
1027      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
1028    }
1029    MarkPossibleNullPointerException(opt_flags);
1030    if (!constant_index) {
1031      FreeTemp(reg_ptr);
1032    }
1033    if (rl_dest.wide) {
1034      StoreValueWide(rl_dest, rl_result);
1035    } else {
1036      StoreValue(rl_dest, rl_result);
1037    }
1038  } else {
1039    // Offset base, then use indexed load
1040    RegStorage reg_ptr = AllocTempRef();
1041    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1042    FreeTemp(rl_array.reg);
1043    rl_result = EvalLoc(rl_dest, reg_class, true);
1044
1045    if (needs_range_check) {
1046      GenArrayBoundsCheck(rl_index.reg, reg_len);
1047      FreeTemp(reg_len);
1048    }
1049    if (rl_result.ref) {
1050      LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale);
1051    } else {
1052      LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
1053    }
1054    MarkPossibleNullPointerException(opt_flags);
1055    FreeTemp(reg_ptr);
1056    StoreValue(rl_dest, rl_result);
1057  }
1058}
1059
1060/*
1061 * Generate array store
1062 *
1063 */
1064void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1065                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1066  RegisterClass reg_class = RegClassBySize(size);
1067  int len_offset = mirror::Array::LengthOffset().Int32Value();
1068  bool constant_index = rl_index.is_const;
1069
1070  int data_offset;
1071  if (size == k64 || size == kDouble) {
1072    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1073  } else {
1074    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1075  }
1076
1077  // If index is constant, just fold it into the data offset.
1078  if (constant_index) {
1079    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1080  }
1081
1082  rl_array = LoadValue(rl_array, kRefReg);
1083  if (!constant_index) {
1084    rl_index = LoadValue(rl_index, kCoreReg);
1085  }
1086
1087  RegStorage reg_ptr;
1088  bool allocated_reg_ptr_temp = false;
1089  if (constant_index) {
1090    reg_ptr = rl_array.reg;
1091  } else if (IsTemp(rl_array.reg) && !card_mark) {
1092    Clobber(rl_array.reg);
1093    reg_ptr = rl_array.reg;
1094  } else {
1095    allocated_reg_ptr_temp = true;
1096    reg_ptr = AllocTempRef();
1097  }
1098
1099  /* null object? */
1100  GenNullCheck(rl_array.reg, opt_flags);
1101
1102  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1103  RegStorage reg_len;
1104  if (needs_range_check) {
1105    reg_len = AllocTemp();
1106    // NOTE: max live temps(4) here.
1107    /* Get len */
1108    Load32Disp(rl_array.reg, len_offset, reg_len);
1109    MarkPossibleNullPointerException(opt_flags);
1110  } else {
1111    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1112  }
1113  /* at this point, reg_ptr points to array, 2 live temps */
1114  if (rl_src.wide || rl_src.fp || constant_index) {
1115    if (rl_src.wide) {
1116      rl_src = LoadValueWide(rl_src, reg_class);
1117    } else {
1118      rl_src = LoadValue(rl_src, reg_class);
1119    }
1120    if (!constant_index) {
1121      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
1122                       EncodeShift(kA64Lsl, scale));
1123    }
1124    if (needs_range_check) {
1125      if (constant_index) {
1126        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1127      } else {
1128        GenArrayBoundsCheck(rl_index.reg, reg_len);
1129      }
1130      FreeTemp(reg_len);
1131    }
1132    if (rl_src.ref) {
1133      StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
1134    } else {
1135      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1136    }
1137    MarkPossibleNullPointerException(opt_flags);
1138  } else {
1139    /* reg_ptr -> array data */
1140    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1141    rl_src = LoadValue(rl_src, reg_class);
1142    if (needs_range_check) {
1143      GenArrayBoundsCheck(rl_index.reg, reg_len);
1144      FreeTemp(reg_len);
1145    }
1146    if (rl_src.ref) {
1147      StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale);
1148    } else {
1149      StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
1150    }
1151    MarkPossibleNullPointerException(opt_flags);
1152  }
1153  if (allocated_reg_ptr_temp) {
1154    FreeTemp(reg_ptr);
1155  }
1156  if (card_mark) {
1157    MarkGCCard(rl_src.reg, rl_array.reg);
1158  }
1159}
1160
1161void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1162                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1163  OpKind op = kOpBkpt;
1164  // Per spec, we only care about low 6 bits of shift amount.
1165  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1166  rl_src = LoadValueWide(rl_src, kCoreReg);
1167  if (shift_amount == 0) {
1168    StoreValueWide(rl_dest, rl_src);
1169    return;
1170  }
1171
1172  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1173  switch (opcode) {
1174    case Instruction::SHL_LONG:
1175    case Instruction::SHL_LONG_2ADDR:
1176      op = kOpLsl;
1177      break;
1178    case Instruction::SHR_LONG:
1179    case Instruction::SHR_LONG_2ADDR:
1180      op = kOpAsr;
1181      break;
1182    case Instruction::USHR_LONG:
1183    case Instruction::USHR_LONG_2ADDR:
1184      op = kOpLsr;
1185      break;
1186    default:
1187      LOG(FATAL) << "Unexpected case";
1188  }
1189  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
1190  StoreValueWide(rl_dest, rl_result);
1191}
1192
1193void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1194                                     RegLocation rl_src1, RegLocation rl_src2) {
1195  OpKind op = kOpBkpt;
1196  switch (opcode) {
1197    case Instruction::ADD_LONG:
1198    case Instruction::ADD_LONG_2ADDR:
1199      op = kOpAdd;
1200      break;
1201    case Instruction::SUB_LONG:
1202    case Instruction::SUB_LONG_2ADDR:
1203      op = kOpSub;
1204      break;
1205    case Instruction::AND_LONG:
1206    case Instruction::AND_LONG_2ADDR:
1207      op = kOpAnd;
1208      break;
1209    case Instruction::OR_LONG:
1210    case Instruction::OR_LONG_2ADDR:
1211      op = kOpOr;
1212      break;
1213    case Instruction::XOR_LONG:
1214    case Instruction::XOR_LONG_2ADDR:
1215      op = kOpXor;
1216      break;
1217    default:
1218      LOG(FATAL) << "Unexpected opcode";
1219  }
1220
1221  if (op == kOpSub) {
1222    if (!rl_src2.is_const) {
1223      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1224    }
1225  } else {
1226    // Associativity.
1227    if (!rl_src2.is_const) {
1228      DCHECK(rl_src1.is_const);
1229      std::swap(rl_src1, rl_src2);
1230    }
1231  }
1232  DCHECK(rl_src2.is_const);
1233  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1234
1235  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1236  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1237  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
1238  StoreValueWide(rl_dest, rl_result);
1239}
1240
1241static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
1242  // Find first register.
1243  int first_bit_set = CTZ(reg_mask) + 1;
1244  *reg = *reg + first_bit_set;
1245  reg_mask >>= first_bit_set;
1246  return reg_mask;
1247}
1248
1249/**
1250 * @brief Split a register list in pairs or registers.
1251 *
1252 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
1253 * @code
1254 *   int reg1 = -1, reg2 = -1;
1255 *   while (reg_mask) {
1256 *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1257 *     if (UNLIKELY(reg2 < 0)) {
1258 *       // Single register in reg1.
1259 *     } else {
1260 *       // Pair in reg1, reg2.
1261 *     }
1262 *   }
1263 * @endcode
1264 */
1265static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
1266  // Find first register.
1267  int first_bit_set = CTZ(reg_mask) + 1;
1268  int reg = *reg1 + first_bit_set;
1269  reg_mask >>= first_bit_set;
1270
1271  if (LIKELY(reg_mask)) {
1272    // Save the first register, find the second and use the pair opcode.
1273    int second_bit_set = CTZ(reg_mask) + 1;
1274    *reg2 = reg;
1275    reg_mask >>= second_bit_set;
1276    *reg1 = reg + second_bit_set;
1277    return reg_mask;
1278  }
1279
1280  // Use the single opcode, as we just have one register.
1281  *reg1 = reg;
1282  *reg2 = -1;
1283  return reg_mask;
1284}
1285
1286static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1287  int reg1 = -1, reg2 = -1;
1288  const int reg_log2_size = 3;
1289
1290  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1291    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1292    if (UNLIKELY(reg2 < 0)) {
1293      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1294    } else {
1295      m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1296                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1297    }
1298  }
1299}
1300
1301// TODO(Arm64): consider using ld1 and st1?
1302static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1303  int reg1 = -1, reg2 = -1;
1304  const int reg_log2_size = 3;
1305
1306  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1307    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1308    if (UNLIKELY(reg2 < 0)) {
1309      m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1310                   offset);
1311    } else {
1312      m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1313                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1314    }
1315  }
1316}
1317
1318static int SpillRegsPreSub(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
1319                           uint32_t fp_reg_mask, int frame_size) {
1320  m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
1321
1322  int core_count = POPCOUNT(core_reg_mask);
1323
1324  if (fp_reg_mask != 0) {
1325    // Spill FP regs.
1326    int fp_count = POPCOUNT(fp_reg_mask);
1327    int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
1328    SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
1329  }
1330
1331  if (core_reg_mask != 0) {
1332    // Spill core regs.
1333    int spill_offset = frame_size - (core_count * kArm64PointerSize);
1334    SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
1335  }
1336
1337  return frame_size;
1338}
1339
1340static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
1341                               uint32_t fp_reg_mask, int frame_size) {
1342  // Otherwise, spill both core and fp regs at the same time.
1343  // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
1344  // down. From then on, we fill upwards. This will generate overall the same number of instructions
1345  // as the specialized code above in most cases (exception being odd number of core and even
1346  // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
1347  //
1348  // Some demonstrative fill cases : (c) = core, (f) = fp
1349  // cc    44   cc    44   cc    22   cc    33   fc => 1[1/2]
1350  // fc => 23   fc => 23   ff => 11   ff => 22
1351  // ff    11    f    11               f    11
1352  //
1353  int reg1 = -1, reg2 = -1;
1354  int core_count = POPCOUNT(core_reg_mask);
1355  int fp_count = POPCOUNT(fp_reg_mask);
1356
1357  int combined = fp_count + core_count;
1358  int all_offset = RoundUp(combined, 2);  // Needs to be 16B = 2-reg aligned.
1359
1360  int cur_offset = 2;  // What's the starting offset after the first stp? We expect the base slot
1361                       // to be filled.
1362
1363  // First figure out whether the bottom is FP or core.
1364  if (fp_count > 0) {
1365    // Some FP spills.
1366    //
1367    // Four cases: (d0 is dummy to fill up stp)
1368    // 1) Single FP, even number of core -> stp d0, fp_reg
1369    // 2) Single FP, odd number of core -> stp fp_reg, d0
1370    // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
1371    // 4) More FP, odd number combined -> stp d0, fp_reg
1372    if (fp_count == 1) {
1373      fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1374      DCHECK_EQ(fp_reg_mask, 0U);
1375      if (core_count % 2 == 0) {
1376        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1377                     RegStorage::FloatSolo64(reg1).GetReg(),
1378                     RegStorage::FloatSolo64(reg1).GetReg(),
1379                     base.GetReg(), -all_offset);
1380      } else {
1381        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1382                     RegStorage::FloatSolo64(reg1).GetReg(),
1383                     RegStorage::FloatSolo64(reg1).GetReg(),
1384                     base.GetReg(), -all_offset);
1385        cur_offset = 0;  // That core reg needs to go into the upper half.
1386      }
1387    } else {
1388      if (combined % 2 == 0) {
1389        fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1390        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1391                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
1392      } else {
1393        fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1394        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
1395                     base.GetReg(), -all_offset);
1396      }
1397    }
1398  } else {
1399    // No FP spills.
1400    //
1401    // Two cases:
1402    // 1) Even number of core -> stp core1, core2
1403    // 2) Odd number of core -> stp xzr, core1
1404    if (core_count % 2 == 1) {
1405      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1406      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
1407                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1408    } else {
1409      core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1410      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
1411                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1412    }
1413  }
1414
1415  if (fp_count != 0) {
1416    for (; fp_reg_mask != 0;) {
1417      // Have some FP regs to do.
1418      fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1419      if (UNLIKELY(reg2 < 0)) {
1420        m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1421                     cur_offset);
1422        // Do not increment offset here, as the second half will be filled by a core reg.
1423      } else {
1424        m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1425                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
1426        cur_offset += 2;
1427      }
1428    }
1429
1430    // Reset counting.
1431    reg1 = -1;
1432
1433    // If there is an odd number of core registers, we need to store the bottom now.
1434    if (core_count % 2 == 1) {
1435      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1436      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
1437                   cur_offset + 1);
1438      cur_offset += 2;  // Half-slot filled now.
1439    }
1440  }
1441
1442  // Spill the rest of the core regs. They are guaranteed to be even.
1443  DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
1444  for (; core_reg_mask != 0; cur_offset += 2) {
1445    core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1446    m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1447                 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
1448  }
1449
1450  DCHECK_EQ(cur_offset, all_offset);
1451
1452  return all_offset * 8;
1453}
1454
1455int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1456                            int frame_size) {
1457  // If the frame size is small enough that all offsets would fit into the immediates, use that
1458  // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
1459  // instruction-count wise than the complicated code below.
1460  //
1461  // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
1462  // number of fp spills.
1463  if ((RoundUp(frame_size, 8) / 8 <= 63)) {
1464    return SpillRegsPreSub(this, base, core_reg_mask, fp_reg_mask, frame_size);
1465  } else {
1466    return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask, frame_size);
1467  }
1468}
1469
1470static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1471  int reg1 = -1, reg2 = -1;
1472  const int reg_log2_size = 3;
1473
1474  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1475    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1476    if (UNLIKELY(reg2 < 0)) {
1477      m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1478    } else {
1479      DCHECK_LE(offset, 63);
1480      m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1481                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1482    }
1483  }
1484}
1485
1486static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1487  int reg1 = -1, reg2 = -1;
1488  const int reg_log2_size = 3;
1489
1490  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1491     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1492    if (UNLIKELY(reg2 < 0)) {
1493      m2l->NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1494                   offset);
1495    } else {
1496      m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1497                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1498    }
1499  }
1500}
1501
1502void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1503                               int frame_size) {
1504  // Restore saves and drop stack frame.
1505  // 2 versions:
1506  //
1507  // 1. (Original): Try to address directly, then drop the whole frame.
1508  //                Limitation: ldp is a 7b signed immediate.
1509  //
1510  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
1511  //           in range. Then drop the rest.
1512  //
1513  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
1514  //       in variant 1.
1515
1516  // "Magic" constant, 63 (max signed 7b) * 8.
1517  static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
1518
1519  const int num_core_spills = POPCOUNT(core_reg_mask);
1520  const int num_fp_spills = POPCOUNT(fp_reg_mask);
1521
1522  int early_drop = 0;
1523
1524  if (frame_size > kMaxFramesizeForOffset) {
1525    // Second variant. Drop the frame part.
1526
1527    // TODO: Always use the first formula, as num_fp_spills would be zero?
1528    if (fp_reg_mask != 0) {
1529      early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
1530    } else {
1531      early_drop = frame_size - kArm64PointerSize * num_core_spills;
1532    }
1533
1534    // Drop needs to be 16B aligned, so that SP keeps aligned.
1535    early_drop = RoundDown(early_drop, 16);
1536
1537    OpRegImm64(kOpAdd, rs_sp, early_drop);
1538  }
1539
1540  // Unspill.
1541  if (fp_reg_mask != 0) {
1542    int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
1543    UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
1544  }
1545  if (core_reg_mask != 0) {
1546    int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
1547    UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
1548  }
1549
1550  // Drop the (rest of) the frame.
1551  OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
1552}
1553
1554bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1555  ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
1556  RegLocation rl_src_i = info->args[0];
1557  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
1558  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1559  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
1560  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
1561  (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
1562  return true;
1563}
1564
1565}  // namespace art
1566