1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "codegen_arm64.h"
20
21#include "arch/instruction_set_features.h"
22#include "arm64_lir.h"
23#include "base/bit_utils.h"
24#include "base/logging.h"
25#include "dex/compiler_ir.h"
26#include "dex/mir_graph.h"
27#include "dex/quick/mir_to_lir-inl.h"
28#include "dex/reg_storage_eq.h"
29#include "driver/compiler_driver.h"
30#include "entrypoints/quick/quick_entrypoints.h"
31#include "mirror/array-inl.h"
32
33namespace art {
34
35LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
36  OpRegReg(kOpCmp, src1, src2);
37  return OpCondBranch(cond, target);
38}
39
40LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
41  UNUSED(ccode, guide);
42  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
43  UNREACHABLE();
44}
45
46void Arm64Mir2Lir::OpEndIT(LIR* it) {
47  UNUSED(it);
48  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
49}
50
51/*
52 * 64-bit 3way compare function.
53 *     cmp   xA, xB
54 *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
55 *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
56 */
57void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
58                              RegLocation rl_src2) {
59  RegLocation rl_result;
60  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
61  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
62  rl_result = EvalLoc(rl_dest, kCoreReg, true);
63
64  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
65  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
66  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
67          rl_result.reg.GetReg(), kArmCondGe);
68  StoreValue(rl_dest, rl_result);
69}
70
71void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
72                             RegLocation rl_src1, RegLocation rl_shift) {
73  OpKind op = kOpBkpt;
74  switch (opcode) {
75  case Instruction::SHL_LONG:
76  case Instruction::SHL_LONG_2ADDR:
77    op = kOpLsl;
78    break;
79  case Instruction::SHR_LONG:
80  case Instruction::SHR_LONG_2ADDR:
81    op = kOpAsr;
82    break;
83  case Instruction::USHR_LONG:
84  case Instruction::USHR_LONG_2ADDR:
85    op = kOpLsr;
86    break;
87  default:
88    LOG(FATAL) << "Unexpected case: " << opcode;
89  }
90  rl_shift = LoadValue(rl_shift, kCoreReg);
91  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
92  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
93  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
94  StoreValueWide(rl_dest, rl_result);
95}
96
97static constexpr bool kUseDeltaEncodingInGenSelect = false;
98
99void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
100                             RegStorage rs_dest, int result_reg_class) {
101  if (false_val == 0 ||               // 0 is better as first operand.
102      true_val == 1 ||                // Potentially Csinc.
103      true_val == -1 ||               // Potentially Csinv.
104      true_val == false_val + 1) {    // Potentially Csinc.
105    ccode = NegateComparison(ccode);
106    std::swap(true_val, false_val);
107  }
108
109  ArmConditionCode code = ArmConditionEncoding(ccode);
110
111  int opcode;                                      // The opcode.
112  RegStorage left_op = RegStorage::InvalidReg();   // The operands.
113  RegStorage right_op = RegStorage::InvalidReg();  // The operands.
114
115  bool is_wide = rs_dest.Is64Bit();
116
117  RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
118
119  if (true_val == 0) {
120    left_op = zero_reg;
121  } else {
122    left_op = rs_dest;
123    LoadConstantNoClobber(rs_dest, true_val);
124  }
125  if (false_val == 1) {
126    right_op = zero_reg;
127    opcode = kA64Csinc4rrrc;
128  } else if (false_val == -1) {
129    right_op = zero_reg;
130    opcode = kA64Csinv4rrrc;
131  } else if (false_val == true_val + 1) {
132    right_op = left_op;
133    opcode = kA64Csinc4rrrc;
134  } else if (false_val == -true_val) {
135    right_op = left_op;
136    opcode = kA64Csneg4rrrc;
137  } else if (false_val == ~true_val) {
138    right_op = left_op;
139    opcode = kA64Csinv4rrrc;
140  } else if (true_val == 0) {
141    // left_op is zero_reg.
142    right_op = rs_dest;
143    LoadConstantNoClobber(rs_dest, false_val);
144    opcode = kA64Csel4rrrc;
145  } else {
146    // Generic case.
147    RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
148    if (is_wide) {
149      if (t_reg2.Is32Bit()) {
150        t_reg2 = As64BitReg(t_reg2);
151      }
152    } else {
153      if (t_reg2.Is64Bit()) {
154        t_reg2 = As32BitReg(t_reg2);
155      }
156    }
157
158    if (kUseDeltaEncodingInGenSelect) {
159      int32_t delta = false_val - true_val;
160      uint32_t abs_val = delta < 0 ? -delta : delta;
161
162      if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
163        // Can encode as immediate to an add.
164        right_op = t_reg2;
165        OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
166      }
167    }
168
169    // Load as constant.
170    if (!right_op.Valid()) {
171      LoadConstantNoClobber(t_reg2, false_val);
172      right_op = t_reg2;
173    }
174
175    opcode = kA64Csel4rrrc;
176  }
177
178  DCHECK(left_op.Valid() && right_op.Valid());
179  NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
180      code);
181}
182
183void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
184                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
185                                    RegisterClass dest_reg_class) {
186  DCHECK(rs_dest.Valid());
187  OpRegReg(kOpCmp, left_op, right_op);
188  GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
189}
190
191void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
192  UNUSED(bb);
193  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
194  rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
195  // rl_src may be aliased with rl_result/rl_dest, so do compare early.
196  OpRegImm(kOpCmp, rl_src.reg, 0);
197
198  RegLocation rl_dest = mir_graph_->GetDest(mir);
199
200  // The kMirOpSelect has two variants, one for constants and one for moves.
201  if (mir->ssa_rep->num_uses == 1) {
202    RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
203    GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
204              rl_dest.ref ? kRefReg : kCoreReg);
205    StoreValue(rl_dest, rl_result);
206  } else {
207    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
208    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
209
210    RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
211    rl_true = LoadValue(rl_true, result_reg_class);
212    rl_false = LoadValue(rl_false, result_reg_class);
213    RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
214
215    bool is_wide = rl_dest.ref || rl_dest.wide;
216    int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
217    NewLIR4(opcode, rl_result.reg.GetReg(),
218            rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
219    StoreValue(rl_dest, rl_result);
220  }
221}
222
223void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
224  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
225  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
226  LIR* taken = &block_label_list_[bb->taken];
227  LIR* not_taken = &block_label_list_[bb->fall_through];
228  // Normalize such that if either operand is constant, src2 will be constant.
229  ConditionCode ccode = mir->meta.ccode;
230  if (rl_src1.is_const) {
231    std::swap(rl_src1, rl_src2);
232    ccode = FlipComparisonOrder(ccode);
233  }
234
235  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
236
237  if (rl_src2.is_const) {
238    // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
239
240    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
241    // Special handling using cbz & cbnz.
242    if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
243      OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
244      OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
245      return;
246    }
247
248    // Only handle Imm if src2 is not already in a register.
249    rl_src2 = UpdateLocWide(rl_src2);
250    if (rl_src2.location != kLocPhysReg) {
251      OpRegImm64(kOpCmp, rl_src1.reg, val);
252      OpCondBranch(ccode, taken);
253      OpCondBranch(NegateComparison(ccode), not_taken);
254      return;
255    }
256  }
257
258  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
259  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
260  OpCondBranch(ccode, taken);
261  OpCondBranch(NegateComparison(ccode), not_taken);
262}
263
264/*
265 * Generate a register comparison to an immediate and branch.  Caller
266 * is responsible for setting branch target field.
267 */
268LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
269                                  LIR* target) {
270  LIR* branch = nullptr;
271  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
272  if (check_value == 0) {
273    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
274      A64Opcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
275      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
276      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
277    } else if (arm_cond == kArmCondLs) {
278      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
279      // This case happens for a bounds check of array[0].
280      A64Opcode opcode = kA64Cbz2rt;
281      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
282      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
283    } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
284      A64Opcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
285      A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
286      int value = reg.Is64Bit() ? 63 : 31;
287      branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
288    }
289  }
290
291  if (branch == nullptr) {
292    OpRegImm(kOpCmp, reg, check_value);
293    branch = NewLIR2(kA64B2ct, arm_cond, 0);
294  }
295
296  branch->target = target;
297  return branch;
298}
299
300LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
301                                     RegStorage base_reg, int offset, int check_value,
302                                     LIR* target, LIR** compare) {
303  DCHECK(compare == nullptr);
304  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
305  // Always compare 32-bit value no matter what temp_reg is.
306  if (temp_reg.Is64Bit()) {
307    temp_reg = As32BitReg(temp_reg);
308  }
309  Load32Disp(base_reg, offset, temp_reg);
310  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
311  return branch;
312}
313
314LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
315  bool dest_is_fp = r_dest.IsFloat();
316  bool src_is_fp = r_src.IsFloat();
317  A64Opcode opcode = kA64Brk1d;
318  LIR* res;
319
320  if (LIKELY(dest_is_fp == src_is_fp)) {
321    if (LIKELY(!dest_is_fp)) {
322      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
323
324      // Core/core copy.
325      // Copies involving the sp register require a different instruction.
326      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
327
328      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
329      //   This currently works because the other arguments are set to 0 by default. We should
330      //   rather introduce an alias kA64Mov2RR.
331
332      // core/core copy. Do a x/x copy only if both registers are x.
333      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
334        opcode = WIDE(opcode);
335      }
336    } else {
337      // Float/float copy.
338      bool dest_is_double = r_dest.IsDouble();
339      bool src_is_double = r_src.IsDouble();
340
341      // We do not do float/double or double/float casts here.
342      DCHECK_EQ(dest_is_double, src_is_double);
343
344      // Homogeneous float/float copy.
345      opcode = (dest_is_double) ? WIDE(kA64Fmov2ff) : kA64Fmov2ff;
346    }
347  } else {
348    // Inhomogeneous register copy.
349    if (dest_is_fp) {
350      if (r_dest.IsDouble()) {
351        opcode = kA64Fmov2Sx;
352      } else {
353        r_src = Check32BitReg(r_src);
354        opcode = kA64Fmov2sw;
355      }
356    } else {
357      if (r_src.IsDouble()) {
358        opcode = kA64Fmov2xS;
359      } else {
360        r_dest = Check32BitReg(r_dest);
361        opcode = kA64Fmov2ws;
362      }
363    }
364  }
365
366  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
367
368  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
369    res->flags.is_nop = true;
370  }
371
372  return res;
373}
374
375void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
376  if (r_dest != r_src) {
377    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
378    AppendLIR(res);
379  }
380}
381
382void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
383  OpRegCopy(r_dest, r_src);
384}
385
386// Table of magic divisors
387struct MagicTable {
388  int magic64_base;
389  int magic64_eor;
390  uint64_t magic64;
391  uint32_t magic32;
392  uint32_t shift;
393  DividePattern pattern;
394};
395
396static const MagicTable magic_table[] = {
397  {   0,      0,                  0,          0, 0, DivideNone},  // 0
398  {   0,      0,                  0,          0, 0, DivideNone},  // 1
399  {   0,      0,                  0,          0, 0, DivideNone},  // 2
400  {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
401  {   0,      0,                  0,          0, 0, DivideNone},  // 4
402  {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
403  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
404  {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
405  {   0,      0,                  0,          0, 0, DivideNone},  // 8
406  {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
407  {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
408  {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
409  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
410  {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
411  {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
412  {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
413};
414
415// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
416bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
417                                      RegLocation rl_src, RegLocation rl_dest, int lit) {
418  UNUSED(dalvik_opcode);
419  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
420    return false;
421  }
422  DividePattern pattern = magic_table[lit].pattern;
423  if (pattern == DivideNone) {
424    return false;
425  }
426  // Tuning: add rem patterns
427  if (!is_div) {
428    return false;
429  }
430
431  RegStorage r_magic = AllocTemp();
432  LoadConstant(r_magic, magic_table[lit].magic32);
433  rl_src = LoadValue(rl_src, kCoreReg);
434  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
435  RegStorage r_long_mul = AllocTemp();
436  NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
437  switch (pattern) {
438    case Divide3:
439      OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
440      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
441      break;
442    case Divide5:
443      OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
444                  32 + magic_table[lit].shift);
445      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
446      break;
447    case Divide7:
448      OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
449                       As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
450      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
451      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
452      break;
453    default:
454      LOG(FATAL) << "Unexpected pattern: " << pattern;
455  }
456  StoreValue(rl_dest, rl_result);
457  return true;
458}
459
460bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
461                                        RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
462  UNUSED(dalvik_opcode);
463  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
464    return false;
465  }
466  DividePattern pattern = magic_table[lit].pattern;
467  if (pattern == DivideNone) {
468    return false;
469  }
470  // Tuning: add rem patterns
471  if (!is_div) {
472    return false;
473  }
474
475  RegStorage r_magic = AllocTempWide();
476  rl_src = LoadValueWide(rl_src, kCoreReg);
477  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
478  RegStorage r_long_mul = AllocTempWide();
479
480  if (magic_table[lit].magic64_base >= 0) {
481    // Check that the entry in the table is correct.
482    if (kIsDebugBuild) {
483      uint64_t reconstructed_imm;
484      uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
485      if (magic_table[lit].magic64_eor >= 0) {
486        uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
487        reconstructed_imm = base ^ eor;
488      } else {
489        reconstructed_imm = base + 1;
490      }
491      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
492    }
493
494    // Load the magic constant in two instructions.
495    NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
496    if (magic_table[lit].magic64_eor >= 0) {
497      NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
498              magic_table[lit].magic64_eor);
499    } else {
500      NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
501    }
502  } else {
503    LoadConstantWide(r_magic, magic_table[lit].magic64);
504  }
505
506  NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
507  switch (pattern) {
508    case Divide3:
509      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
510      break;
511    case Divide5:
512      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
513      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
514      break;
515    case Divide7:
516      OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
517      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
518      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
519      break;
520    default:
521      LOG(FATAL) << "Unexpected pattern: " << pattern;
522  }
523  StoreValueWide(rl_dest, rl_result);
524  return true;
525}
526
527// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
528// and store the result in 'rl_dest'.
529bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
530                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
531  return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
532}
533
534// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
535// and store the result in 'rl_dest'.
536bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
537                                      RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
538  const bool is_64bit = rl_dest.wide;
539  const int nbits = (is_64bit) ? 64 : 32;
540
541  if (lit < 2) {
542    return false;
543  }
544  if (!IsPowerOfTwo(lit)) {
545    if (is_64bit) {
546      return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
547    } else {
548      return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
549    }
550  }
551  int k = CTZ(lit);
552  if (k >= nbits - 2) {
553    // Avoid special cases.
554    return false;
555  }
556
557  RegLocation rl_result;
558  RegStorage t_reg;
559  if (is_64bit) {
560    rl_src = LoadValueWide(rl_src, kCoreReg);
561    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
562    t_reg = AllocTempWide();
563  } else {
564    rl_src = LoadValue(rl_src, kCoreReg);
565    rl_result = EvalLoc(rl_dest, kCoreReg, true);
566    t_reg = AllocTemp();
567  }
568
569  int shift = EncodeShift(kA64Lsr, nbits - k);
570  if (is_div) {
571    if (lit == 2) {
572      // Division by 2 is by far the most common division by constant.
573      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
574      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
575    } else {
576      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
577      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
578      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
579    }
580  } else {
581    if (lit == 2) {
582      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
583      OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
584      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
585    } else {
586      RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
587      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
588      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
589      OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
590      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
591    }
592  }
593
594  if (is_64bit) {
595    StoreValueWide(rl_dest, rl_result);
596  } else {
597    StoreValue(rl_dest, rl_result);
598  }
599  return true;
600}
601
602bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
603  UNUSED(rl_src, rl_dest, lit);
604  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
605  UNREACHABLE();
606}
607
608RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
609                                       bool is_div) {
610  UNUSED(rl_dest, rl_src1, lit, is_div);
611  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
612  UNREACHABLE();
613}
614
615RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
616  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
617
618  // Put the literal in a temp.
619  RegStorage lit_temp = AllocTemp();
620  LoadConstant(lit_temp, lit);
621  // Use the generic case for div/rem with arg2 in a register.
622  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
623  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
624  FreeTemp(lit_temp);
625
626  return rl_result;
627}
628
629RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
630                                    RegLocation rl_src2, bool is_div, int flags) {
631  UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
632  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
633  UNREACHABLE();
634}
635
636RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
637                                    bool is_div) {
638  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
639
640  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
641  if (is_div) {
642    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
643  } else {
644    // temp = r_src1 / r_src2
645    // dest = r_src1 - temp * r_src2
646    RegStorage temp;
647    A64Opcode wide;
648    if (rl_result.reg.Is64Bit()) {
649      temp = AllocTempWide();
650      wide = WIDE(0);
651    } else {
652      temp = AllocTemp();
653      wide = UNWIDE(0);
654    }
655    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
656    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
657            r_src2.GetReg(), r_src1.GetReg());
658    FreeTemp(temp);
659  }
660  return rl_result;
661}
662
663bool Arm64Mir2Lir::GenInlinedAbsInt(CallInfo* info) {
664  RegLocation rl_src = info->args[0];
665  rl_src = LoadValue(rl_src, kCoreReg);
666  RegLocation rl_dest = InlineTarget(info);
667  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
668
669  // Compare the source value with zero. Write the negated value to the result if
670  // negative, otherwise write the original value.
671  OpRegImm(kOpCmp, rl_src.reg, 0);
672  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
673          kArmCondPl);
674  StoreValue(rl_dest, rl_result);
675  return true;
676}
677
678bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
679  RegLocation rl_src = info->args[0];
680  rl_src = LoadValueWide(rl_src, kCoreReg);
681  RegLocation rl_dest = InlineTargetWide(info);
682  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
683
684  // Compare the source value with zero. Write the negated value to the result if
685  // negative, otherwise write the original value.
686  OpRegImm(kOpCmp, rl_src.reg, 0);
687  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_src.reg.GetReg(),
688          rl_src.reg.GetReg(), kArmCondPl);
689  StoreValueWide(rl_dest, rl_result);
690  return true;
691}
692
693bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
694  DCHECK_EQ(cu_->instruction_set, kArm64);
695  RegLocation rl_src1 = info->args[0];
696  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
697  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
698  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
699  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
700  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
701  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
702  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
703          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
704  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
705  return true;
706}
707
708bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
709  RegLocation rl_src_address = info->args[0];  // long address
710  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
711  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
712  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
713
714  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
715  if (size == k64) {
716    StoreValueWide(rl_dest, rl_result);
717  } else {
718    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
719    StoreValue(rl_dest, rl_result);
720  }
721  return true;
722}
723
724bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
725  RegLocation rl_src_address = info->args[0];  // long address
726  RegLocation rl_src_value = info->args[2];  // [size] value
727  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
728
729  RegLocation rl_value;
730  if (size == k64) {
731    rl_value = LoadValueWide(rl_src_value, kCoreReg);
732  } else {
733    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
734    rl_value = LoadValue(rl_src_value, kCoreReg);
735  }
736  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
737  return true;
738}
739
740bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
741  DCHECK_EQ(cu_->instruction_set, kArm64);
742  // Unused - RegLocation rl_src_unsafe = info->args[0];
743  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
744  RegLocation rl_src_offset = info->args[2];  // long low
745  RegLocation rl_src_expected = info->args[4];  // int, long or Object
746  // If is_long, high half is in info->args[5]
747  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
748  // If is_long, high half is in info->args[7]
749  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
750
751  // Load Object and offset
752  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
753  RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
754
755  RegLocation rl_new_value;
756  RegLocation rl_expected;
757  if (is_long) {
758    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
759    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
760  } else {
761    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
762    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
763  }
764
765  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
766    // Mark card for object assuming new value is stored.
767    MarkGCCard(0, rl_new_value.reg, rl_object.reg);
768  }
769
770  RegStorage r_ptr = AllocTempRef();
771  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
772
773  // Free now unneeded rl_object and rl_offset to give more temps.
774  ClobberSReg(rl_object.s_reg_low);
775  FreeTemp(rl_object.reg);
776  ClobberSReg(rl_offset.s_reg_low);
777  FreeTemp(rl_offset.reg);
778
779  // do {
780  //   tmp = [r_ptr] - expected;
781  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
782  // result = tmp != 0;
783
784  RegStorage r_tmp;
785  RegStorage r_tmp_stored;
786  RegStorage rl_new_value_stored = rl_new_value.reg;
787  A64Opcode wide = UNWIDE(0);
788  if (is_long) {
789    r_tmp_stored = r_tmp = AllocTempWide();
790    wide = WIDE(0);
791  } else if (is_object) {
792    // References use 64-bit registers, but are stored as compressed 32-bit values.
793    // This means r_tmp_stored != r_tmp.
794    r_tmp = AllocTempRef();
795    r_tmp_stored = As32BitReg(r_tmp);
796    rl_new_value_stored = As32BitReg(rl_new_value_stored);
797  } else {
798    r_tmp_stored = r_tmp = AllocTemp();
799  }
800
801  RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
802  LIR* loop = NewLIR0(kPseudoTargetLabel);
803  NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
804  OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
805  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
806  LIR* early_exit = OpCondBranch(kCondNe, nullptr);
807  NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
808  NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
809  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
810  OpCondBranch(kCondNe, loop);
811
812  LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
813  early_exit->target = exit_loop;
814
815  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
816  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
817
818  FreeTemp(r_tmp);  // Now unneeded.
819  FreeTemp(r_ptr);  // Now unneeded.
820
821  StoreValue(rl_dest, rl_result);
822
823  return true;
824}
825
826bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
827  constexpr int kLargeArrayThreshold = 512;
828
829  RegLocation rl_src = info->args[0];
830  RegLocation rl_src_pos = info->args[1];
831  RegLocation rl_dst = info->args[2];
832  RegLocation rl_dst_pos = info->args[3];
833  RegLocation rl_length = info->args[4];
834  // Compile time check, handle exception by non-inline method to reduce related meta-data.
835  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
836      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
837      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
838    return false;
839  }
840
841  ClobberCallerSave();
842  LockCallTemps();  // Prepare for explicit register usage.
843  RegStorage rs_src = rs_x0;
844  RegStorage rs_dst = rs_x1;
845  LoadValueDirectFixed(rl_src, rs_src);
846  LoadValueDirectFixed(rl_dst, rs_dst);
847
848  // Handle null pointer exception in slow-path.
849  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
850  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
851  // Handle potential overlapping in slow-path.
852  // TUNING: Support overlapping cases.
853  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
854  // Handle exception or big length in slow-path.
855  RegStorage rs_length = rs_w2;
856  LoadValueDirectFixed(rl_length, rs_length);
857  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
858  // Src bounds check.
859  RegStorage rs_src_pos = rs_w3;
860  RegStorage rs_arr_length = rs_w4;
861  LoadValueDirectFixed(rl_src_pos, rs_src_pos);
862  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
863  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
864  OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
865  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
866  // Dst bounds check.
867  RegStorage rs_dst_pos = rs_w5;
868  LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
869  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
870  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
871  OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
872  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
873
874  // Everything is checked now.
875  // Set rs_src to the address of the first element to be copied.
876  rs_src_pos = As64BitReg(rs_src_pos);
877  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
878  OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
879  OpRegReg(kOpAdd, rs_src, rs_src_pos);
880  // Set rs_src to the address of the first element to be copied.
881  rs_dst_pos = As64BitReg(rs_dst_pos);
882  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
883  OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
884  OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
885
886  // rs_arr_length won't be not used anymore.
887  RegStorage rs_tmp = rs_arr_length;
888  // Use 64-bit view since rs_length will be used as index.
889  rs_length = As64BitReg(rs_length);
890  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
891
892  // Copy one element.
893  LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0);
894  OpRegImm(kOpSub, rs_length, 2);
895  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
896  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
897
898  // Copy two elements.
899  LIR *copy_two = NewLIR0(kPseudoTargetLabel);
900  LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0);
901  OpRegImm(kOpSub, rs_length, 4);
902  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
903  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
904
905  // Copy four elements.
906  LIR *copy_four = NewLIR0(kPseudoTargetLabel);
907  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
908  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
909  OpRegImm(kOpSub, rs_length, 8);
910  rs_tmp = As64BitReg(rs_tmp);
911  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
912  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
913  LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
914  LIR* loop_finished = OpUnconditionalBranch(nullptr);
915
916  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
917  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
918  LIR* return_point = NewLIR0(kPseudoTargetLabel);
919
920  src_check_branch->target = check_failed;
921  dst_check_branch->target = check_failed;
922  src_dst_same->target = check_failed;
923  len_neg_or_too_big->target = check_failed;
924  src_pos_negative->target = check_failed;
925  src_bad_len->target = check_failed;
926  dst_pos_negative->target = check_failed;
927  dst_bad_len->target = check_failed;
928  jmp_to_copy_two->target = copy_two;
929  jmp_to_copy_four->target = copy_four;
930  jmp_to_ret->target = return_point;
931  jmp_to_loop->target = begin_loop;
932  loop_finished->target = return_point;
933
934  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
935  ClobberCallerSave();  // We must clobber everything because slow path will return here
936
937  return true;
938}
939
940void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
941  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
942  LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
943  lir->target = target;
944}
945
946bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
947  return dex_cache_arrays_layout_.Valid();
948}
949
950void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
951                                            bool wide) {
952  LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
953  adrp->operands[2] = WrapPointer(dex_file);
954  adrp->operands[3] = offset;
955  adrp->operands[4] = WrapPointer(adrp);
956  dex_cache_access_insns_.push_back(adrp);
957  if (wide) {
958    DCHECK(r_dest.Is64Bit());
959  }
960  LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, wide ? k64 : kReference, kNotVolatile);
961  ldr->operands[4] = adrp->operands[4];
962  ldr->flags.fixup = kFixupLabel;
963  dex_cache_access_insns_.push_back(ldr);
964}
965
966LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
967  UNUSED(r_base, count);
968  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
969  UNREACHABLE();
970}
971
972LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
973  UNUSED(r_base, count);
974  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
975  UNREACHABLE();
976}
977
978void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
979                                  RegLocation rl_src3, bool is_sub) {
980  rl_src1 = LoadValue(rl_src1, kCoreReg);
981  rl_src2 = LoadValue(rl_src2, kCoreReg);
982  rl_src3 = LoadValue(rl_src3, kCoreReg);
983  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
984  NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
985          rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
986  StoreValue(rl_dest, rl_result);
987}
988
989void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
990                                   RegLocation rl_src3, bool is_sub) {
991  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
992  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
993  rl_src3 = LoadValueWide(rl_src3, kCoreReg);
994  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
995  NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(),
996          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
997  StoreValueWide(rl_dest, rl_result);
998}
999
1000void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1001                                                 RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
1002                                                 int first_bit, int second_bit) {
1003  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
1004                   EncodeShift(kA64Lsl, second_bit - first_bit));
1005  if (first_bit != 0) {
1006    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1007  }
1008}
1009
1010void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg ATTRIBUTE_UNUSED) {
1011  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
1012}
1013
1014// Test suspend flag, return target of taken suspend branch
1015LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
1016  RegStorage r_tmp = AllocTemp();
1017  LoadBaseDisp(rs_xSELF, Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value(), r_tmp,
1018      kUnsignedHalf, kNotVolatile);
1019  LIR* cmp_branch = OpCmpImmBranch(target == nullptr ? kCondNe: kCondEq, r_tmp, 0, target);
1020  FreeTemp(r_tmp);
1021  return cmp_branch;
1022}
1023
1024// Decrement register and branch on condition
1025LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1026  // Combine sub & test using sub setflags encoding here.  We need to make sure a
1027  // subtract form that sets carry is used, so generate explicitly.
1028  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
1029  A64Opcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
1030  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
1031  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
1032  return OpCondBranch(c_code, target);
1033}
1034
1035bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
1036  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
1037    return false;
1038  }
1039  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
1040  LIR* barrier = last_lir_insn_;
1041
1042  int dmb_flavor;
1043  // TODO: revisit Arm barrier kinds
1044  switch (barrier_kind) {
1045    case kAnyStore: dmb_flavor = kISH; break;
1046    case kLoadAny: dmb_flavor = kISH; break;
1047        // We conjecture that kISHLD is insufficient.  It is documented
1048        // to provide LoadLoad | StoreStore ordering.  But if this were used
1049        // to implement volatile loads, we suspect that the lack of store
1050        // atomicity on ARM would cause us to allow incorrect results for
1051        // the canonical IRIW example.  But we're not sure.
1052        // We should be using acquire loads instead.
1053    case kStoreStore: dmb_flavor = kISHST; break;
1054    case kAnyAny: dmb_flavor = kISH; break;
1055    default:
1056      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
1057      dmb_flavor = kSY;  // quiet gcc.
1058      break;
1059  }
1060
1061  bool ret = false;
1062
1063  // If the same barrier already exists, don't generate another.
1064  if (barrier == nullptr
1065      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
1066    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
1067    ret = true;
1068  }
1069
1070  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
1071  DCHECK(!barrier->flags.use_def_invalid);
1072  barrier->u.m.def_mask = &kEncodeAll;
1073  return ret;
1074}
1075
1076void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
1077  RegLocation rl_result;
1078
1079  rl_src = LoadValue(rl_src, kCoreReg);
1080  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1081  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
1082  StoreValueWide(rl_dest, rl_result);
1083}
1084
1085void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
1086                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) {
1087  if (rl_src2.is_const) {
1088    DCHECK(rl_src2.wide);
1089    int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
1090    if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
1091      return;
1092    }
1093  }
1094
1095  RegLocation rl_result;
1096  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1097  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1098  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
1099    GenDivZeroCheck(rl_src2.reg);
1100  }
1101  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
1102  StoreValueWide(rl_dest, rl_result);
1103}
1104
1105void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
1106                             RegLocation rl_src2) {
1107  RegLocation rl_result;
1108
1109  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1110  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1111  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1112  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
1113  StoreValueWide(rl_dest, rl_result);
1114}
1115
1116void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1117  RegLocation rl_result;
1118
1119  rl_src = LoadValueWide(rl_src, kCoreReg);
1120  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1121  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
1122  StoreValueWide(rl_dest, rl_result);
1123}
1124
1125void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1126  RegLocation rl_result;
1127
1128  rl_src = LoadValueWide(rl_src, kCoreReg);
1129  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1130  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
1131  StoreValueWide(rl_dest, rl_result);
1132}
1133
1134void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
1135                                  RegLocation rl_src1, RegLocation rl_src2, int flags) {
1136  switch (opcode) {
1137    case Instruction::NOT_LONG:
1138      GenNotLong(rl_dest, rl_src2);
1139      return;
1140    case Instruction::ADD_LONG:
1141    case Instruction::ADD_LONG_2ADDR:
1142      GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
1143      return;
1144    case Instruction::SUB_LONG:
1145    case Instruction::SUB_LONG_2ADDR:
1146      GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
1147      return;
1148    case Instruction::MUL_LONG:
1149    case Instruction::MUL_LONG_2ADDR:
1150      GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
1151      return;
1152    case Instruction::DIV_LONG:
1153    case Instruction::DIV_LONG_2ADDR:
1154      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
1155      return;
1156    case Instruction::REM_LONG:
1157    case Instruction::REM_LONG_2ADDR:
1158      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
1159      return;
1160    case Instruction::AND_LONG_2ADDR:
1161    case Instruction::AND_LONG:
1162      GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
1163      return;
1164    case Instruction::OR_LONG:
1165    case Instruction::OR_LONG_2ADDR:
1166      GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
1167      return;
1168    case Instruction::XOR_LONG:
1169    case Instruction::XOR_LONG_2ADDR:
1170      GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
1171      return;
1172    case Instruction::NEG_LONG: {
1173      GenNegLong(rl_dest, rl_src2);
1174      return;
1175    }
1176    default:
1177      LOG(FATAL) << "Invalid long arith op";
1178      return;
1179  }
1180}
1181
1182/*
1183 * Generate array load
1184 */
1185void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1186                             RegLocation rl_index, RegLocation rl_dest, int scale) {
1187  RegisterClass reg_class = RegClassBySize(size);
1188  int len_offset = mirror::Array::LengthOffset().Int32Value();
1189  int data_offset;
1190  RegLocation rl_result;
1191  bool constant_index = rl_index.is_const;
1192  rl_array = LoadValue(rl_array, kRefReg);
1193  if (!constant_index) {
1194    rl_index = LoadValue(rl_index, kCoreReg);
1195  }
1196
1197  if (rl_dest.wide) {
1198    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1199  } else {
1200    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1201  }
1202
1203  /* null object? */
1204  GenNullCheck(rl_array.reg, opt_flags);
1205
1206  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1207  RegStorage reg_len;
1208  if (needs_range_check) {
1209    reg_len = AllocTemp();
1210    /* Get len */
1211    Load32Disp(rl_array.reg, len_offset, reg_len);
1212    MarkPossibleNullPointerException(opt_flags);
1213  } else {
1214    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1215  }
1216  if (constant_index) {
1217    rl_result = EvalLoc(rl_dest, reg_class, true);
1218
1219    if (needs_range_check) {
1220      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1221      FreeTemp(reg_len);
1222    }
1223    // Fold the constant index into the data offset.
1224    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1225    if (rl_result.ref) {
1226      LoadRefDisp(rl_array.reg, data_offset, rl_result.reg, kNotVolatile);
1227    } else {
1228      LoadBaseDisp(rl_array.reg, data_offset, rl_result.reg, size, kNotVolatile);
1229    }
1230  } else {
1231    // Offset base, then use indexed load.
1232    RegStorage reg_ptr = AllocTempRef();
1233    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1234    FreeTemp(rl_array.reg);
1235    rl_result = EvalLoc(rl_dest, reg_class, true);
1236
1237    if (needs_range_check) {
1238      GenArrayBoundsCheck(rl_index.reg, reg_len);
1239      FreeTemp(reg_len);
1240    }
1241    if (rl_result.ref) {
1242      LoadRefIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale);
1243    } else {
1244      LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
1245    }
1246    FreeTemp(reg_ptr);
1247  }
1248  if (rl_dest.wide) {
1249    StoreValueWide(rl_dest, rl_result);
1250  } else {
1251    StoreValue(rl_dest, rl_result);
1252  }
1253}
1254
1255/*
1256 * Generate array store
1257 *
1258 */
1259void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1260                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1261  RegisterClass reg_class = RegClassBySize(size);
1262  int len_offset = mirror::Array::LengthOffset().Int32Value();
1263  bool constant_index = rl_index.is_const;
1264
1265  int data_offset;
1266  if (size == k64 || size == kDouble) {
1267    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1268  } else {
1269    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1270  }
1271
1272  rl_array = LoadValue(rl_array, kRefReg);
1273  if (!constant_index) {
1274    rl_index = LoadValue(rl_index, kCoreReg);
1275  }
1276
1277  RegStorage reg_ptr;
1278  bool allocated_reg_ptr_temp = false;
1279  if (constant_index) {
1280    reg_ptr = rl_array.reg;
1281  } else if (IsTemp(rl_array.reg) && !card_mark) {
1282    Clobber(rl_array.reg);
1283    reg_ptr = rl_array.reg;
1284  } else {
1285    allocated_reg_ptr_temp = true;
1286    reg_ptr = AllocTempRef();
1287  }
1288
1289  /* null object? */
1290  GenNullCheck(rl_array.reg, opt_flags);
1291
1292  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1293  RegStorage reg_len;
1294  if (needs_range_check) {
1295    reg_len = AllocTemp();
1296    // NOTE: max live temps(4) here.
1297    /* Get len */
1298    Load32Disp(rl_array.reg, len_offset, reg_len);
1299    MarkPossibleNullPointerException(opt_flags);
1300  } else {
1301    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1302  }
1303  /* at this point, reg_ptr points to array, 2 live temps */
1304  if (rl_src.wide) {
1305    rl_src = LoadValueWide(rl_src, reg_class);
1306  } else {
1307    rl_src = LoadValue(rl_src, reg_class);
1308  }
1309  if (constant_index) {
1310    if (needs_range_check) {
1311      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1312      FreeTemp(reg_len);
1313    }
1314    // Fold the constant index into the data offset.
1315    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1316    if (rl_src.ref) {
1317      StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
1318    } else {
1319      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1320    }
1321  } else {
1322    /* reg_ptr -> array data */
1323    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1324    if (needs_range_check) {
1325      GenArrayBoundsCheck(rl_index.reg, reg_len);
1326      FreeTemp(reg_len);
1327    }
1328    if (rl_src.ref) {
1329      StoreRefIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale);
1330    } else {
1331      StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1332    }
1333  }
1334  if (allocated_reg_ptr_temp) {
1335    FreeTemp(reg_ptr);
1336  }
1337  if (card_mark) {
1338    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
1339  }
1340}
1341
1342void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1343                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
1344                                     int flags ATTRIBUTE_UNUSED) {
1345  OpKind op = kOpBkpt;
1346  // Per spec, we only care about low 6 bits of shift amount.
1347  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1348  rl_src = LoadValueWide(rl_src, kCoreReg);
1349  if (shift_amount == 0) {
1350    StoreValueWide(rl_dest, rl_src);
1351    return;
1352  }
1353
1354  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1355  switch (opcode) {
1356    case Instruction::SHL_LONG:
1357    case Instruction::SHL_LONG_2ADDR:
1358      op = kOpLsl;
1359      break;
1360    case Instruction::SHR_LONG:
1361    case Instruction::SHR_LONG_2ADDR:
1362      op = kOpAsr;
1363      break;
1364    case Instruction::USHR_LONG:
1365    case Instruction::USHR_LONG_2ADDR:
1366      op = kOpLsr;
1367      break;
1368    default:
1369      LOG(FATAL) << "Unexpected case";
1370  }
1371  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
1372  StoreValueWide(rl_dest, rl_result);
1373}
1374
1375void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1376                                     RegLocation rl_src1, RegLocation rl_src2, int flags) {
1377  OpKind op = kOpBkpt;
1378  switch (opcode) {
1379    case Instruction::ADD_LONG:
1380    case Instruction::ADD_LONG_2ADDR:
1381      op = kOpAdd;
1382      break;
1383    case Instruction::SUB_LONG:
1384    case Instruction::SUB_LONG_2ADDR:
1385      op = kOpSub;
1386      break;
1387    case Instruction::AND_LONG:
1388    case Instruction::AND_LONG_2ADDR:
1389      op = kOpAnd;
1390      break;
1391    case Instruction::OR_LONG:
1392    case Instruction::OR_LONG_2ADDR:
1393      op = kOpOr;
1394      break;
1395    case Instruction::XOR_LONG:
1396    case Instruction::XOR_LONG_2ADDR:
1397      op = kOpXor;
1398      break;
1399    default:
1400      LOG(FATAL) << "Unexpected opcode";
1401  }
1402
1403  if (op == kOpSub) {
1404    if (!rl_src2.is_const) {
1405      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1406    }
1407  } else {
1408    // Associativity.
1409    if (!rl_src2.is_const) {
1410      DCHECK(rl_src1.is_const);
1411      std::swap(rl_src1, rl_src2);
1412    }
1413  }
1414  DCHECK(rl_src2.is_const);
1415  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1416
1417  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1418  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1419  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
1420  StoreValueWide(rl_dest, rl_result);
1421}
1422
1423static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
1424  // Find first register.
1425  int first_bit_set = CTZ(reg_mask) + 1;
1426  *reg = *reg + first_bit_set;
1427  reg_mask >>= first_bit_set;
1428  return reg_mask;
1429}
1430
1431/**
1432 * @brief Split a register list in pairs or registers.
1433 *
1434 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
1435 * @code
1436 *   int reg1 = -1, reg2 = -1;
1437 *   while (reg_mask) {
1438 *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1439 *     if (UNLIKELY(reg2 < 0)) {
1440 *       // Single register in reg1.
1441 *     } else {
1442 *       // Pair in reg1, reg2.
1443 *     }
1444 *   }
1445 * @endcode
1446 */
1447static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
1448  // Find first register.
1449  int first_bit_set = CTZ(reg_mask) + 1;
1450  int reg = *reg1 + first_bit_set;
1451  reg_mask >>= first_bit_set;
1452
1453  if (LIKELY(reg_mask)) {
1454    // Save the first register, find the second and use the pair opcode.
1455    int second_bit_set = CTZ(reg_mask) + 1;
1456    *reg2 = reg;
1457    reg_mask >>= second_bit_set;
1458    *reg1 = reg + second_bit_set;
1459    return reg_mask;
1460  }
1461
1462  // Use the single opcode, as we just have one register.
1463  *reg1 = reg;
1464  *reg2 = -1;
1465  return reg_mask;
1466}
1467
1468static dwarf::Reg DwarfCoreReg(int num) {
1469  return dwarf::Reg::Arm64Core(num);
1470}
1471
1472static dwarf::Reg DwarfFpReg(int num) {
1473  return dwarf::Reg::Arm64Fp(num);
1474}
1475
1476static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1477  int reg1 = -1, reg2 = -1;
1478  const int reg_log2_size = 3;
1479
1480  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1481    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1482    if (UNLIKELY(reg2 < 0)) {
1483      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1484      m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
1485    } else {
1486      m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1487                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1488      m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
1489      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
1490    }
1491  }
1492}
1493
1494// TODO(Arm64): consider using ld1 and st1?
1495static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1496  int reg1 = -1, reg2 = -1;
1497  const int reg_log2_size = 3;
1498
1499  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1500    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1501    if (UNLIKELY(reg2 < 0)) {
1502      m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1503                   offset);
1504      m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
1505    } else {
1506      m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1507                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1508      m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
1509      m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
1510    }
1511  }
1512}
1513
1514static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1515                           int frame_size) {
1516  m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
1517  m2l->cfi().AdjustCFAOffset(frame_size);
1518
1519  int core_count = POPCOUNT(core_reg_mask);
1520
1521  if (fp_reg_mask != 0) {
1522    // Spill FP regs.
1523    int fp_count = POPCOUNT(fp_reg_mask);
1524    int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
1525    SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
1526  }
1527
1528  if (core_reg_mask != 0) {
1529    // Spill core regs.
1530    int spill_offset = frame_size - (core_count * kArm64PointerSize);
1531    SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
1532  }
1533
1534  return frame_size;
1535}
1536
1537static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
1538                               uint32_t fp_reg_mask) {
1539  // Otherwise, spill both core and fp regs at the same time.
1540  // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
1541  // down. From then on, we fill upwards. This will generate overall the same number of instructions
1542  // as the specialized code above in most cases (exception being odd number of core and even
1543  // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
1544  //
1545  // Some demonstrative fill cases : (c) = core, (f) = fp
1546  // cc    44   cc    44   cc    22   cc    33   fc => 1[1/2]
1547  // fc => 23   fc => 23   ff => 11   ff => 22
1548  // ff    11    f    11               f    11
1549  //
1550  int reg1 = -1, reg2 = -1;
1551  int core_count = POPCOUNT(core_reg_mask);
1552  int fp_count = POPCOUNT(fp_reg_mask);
1553
1554  int combined = fp_count + core_count;
1555  int all_offset = RoundUp(combined, 2);  // Needs to be 16B = 2-reg aligned.
1556
1557  int cur_offset = 2;  // What's the starting offset after the first stp? We expect the base slot
1558                       // to be filled.
1559
1560  // First figure out whether the bottom is FP or core.
1561  if (fp_count > 0) {
1562    // Some FP spills.
1563    //
1564    // Four cases: (d0 is dummy to fill up stp)
1565    // 1) Single FP, even number of core -> stp d0, fp_reg
1566    // 2) Single FP, odd number of core -> stp fp_reg, d0
1567    // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
1568    // 4) More FP, odd number combined -> stp d0, fp_reg
1569    if (fp_count == 1) {
1570      fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1571      DCHECK_EQ(fp_reg_mask, 0U);
1572      if (core_count % 2 == 0) {
1573        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1574                     RegStorage::FloatSolo64(reg1).GetReg(),
1575                     RegStorage::FloatSolo64(reg1).GetReg(),
1576                     base.GetReg(), -all_offset);
1577        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1578        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
1579      } else {
1580        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1581                     RegStorage::FloatSolo64(reg1).GetReg(),
1582                     RegStorage::FloatSolo64(reg1).GetReg(),
1583                     base.GetReg(), -all_offset);
1584        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1585        m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
1586        cur_offset = 0;  // That core reg needs to go into the upper half.
1587      }
1588    } else {
1589      if (combined % 2 == 0) {
1590        fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1591        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1592                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
1593        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1594        m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
1595        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
1596      } else {
1597        fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1598        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
1599                     base.GetReg(), -all_offset);
1600        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1601        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
1602      }
1603    }
1604  } else {
1605    // No FP spills.
1606    //
1607    // Two cases:
1608    // 1) Even number of core -> stp core1, core2
1609    // 2) Odd number of core -> stp xzr, core1
1610    if (core_count % 2 == 1) {
1611      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1612      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
1613                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1614      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1615      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
1616    } else {
1617      core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1618      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
1619                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1620      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
1621      m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
1622      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
1623    }
1624  }
1625  DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
1626            static_cast<int>(all_offset * kArm64PointerSize));
1627
1628  if (fp_count != 0) {
1629    for (; fp_reg_mask != 0;) {
1630      // Have some FP regs to do.
1631      fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1632      if (UNLIKELY(reg2 < 0)) {
1633        m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1634                     cur_offset);
1635        m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
1636        // Do not increment offset here, as the second half will be filled by a core reg.
1637      } else {
1638        m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1639                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
1640        m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
1641        m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
1642        cur_offset += 2;
1643      }
1644    }
1645
1646    // Reset counting.
1647    reg1 = -1;
1648
1649    // If there is an odd number of core registers, we need to store the bottom now.
1650    if (core_count % 2 == 1) {
1651      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1652      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
1653                   cur_offset + 1);
1654      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
1655      cur_offset += 2;  // Half-slot filled now.
1656    }
1657  }
1658
1659  // Spill the rest of the core regs. They are guaranteed to be even.
1660  DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
1661  for (; core_reg_mask != 0; cur_offset += 2) {
1662    core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1663    m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1664                 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
1665    m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
1666    m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
1667  }
1668
1669  DCHECK_EQ(cur_offset, all_offset);
1670
1671  return all_offset * 8;
1672}
1673
1674int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1675                            int frame_size) {
1676  // If the frame size is small enough that all offsets would fit into the immediates, use that
1677  // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
1678  // instruction-count wise than the complicated code below.
1679  //
1680  // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
1681  // number of fp spills.
1682  if ((RoundUp(frame_size, 8) / 8 <= 63)) {
1683    return SpillRegsPreSub(this, core_reg_mask, fp_reg_mask, frame_size);
1684  } else {
1685    return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask);
1686  }
1687}
1688
1689static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1690  int reg1 = -1, reg2 = -1;
1691  const int reg_log2_size = 3;
1692
1693  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1694    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1695    if (UNLIKELY(reg2 < 0)) {
1696      m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1697      m2l->cfi().Restore(DwarfCoreReg(reg1));
1698    } else {
1699      DCHECK_LE(offset, 63);
1700      m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1701                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1702      m2l->cfi().Restore(DwarfCoreReg(reg2));
1703      m2l->cfi().Restore(DwarfCoreReg(reg1));
1704    }
1705  }
1706}
1707
1708static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1709  int reg1 = -1, reg2 = -1;
1710  const int reg_log2_size = 3;
1711
1712  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1713     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1714    if (UNLIKELY(reg2 < 0)) {
1715      m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1716                   offset);
1717      m2l->cfi().Restore(DwarfFpReg(reg1));
1718    } else {
1719      m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1720                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1721      m2l->cfi().Restore(DwarfFpReg(reg2));
1722      m2l->cfi().Restore(DwarfFpReg(reg1));
1723    }
1724  }
1725}
1726
1727void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1728                               int frame_size) {
1729  DCHECK_EQ(base, rs_sp);
1730  // Restore saves and drop stack frame.
1731  // 2 versions:
1732  //
1733  // 1. (Original): Try to address directly, then drop the whole frame.
1734  //                Limitation: ldp is a 7b signed immediate.
1735  //
1736  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
1737  //           in range. Then drop the rest.
1738  //
1739  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
1740  //       in variant 1.
1741
1742  // "Magic" constant, 63 (max signed 7b) * 8.
1743  static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
1744
1745  const int num_core_spills = POPCOUNT(core_reg_mask);
1746  const int num_fp_spills = POPCOUNT(fp_reg_mask);
1747
1748  int early_drop = 0;
1749
1750  if (frame_size > kMaxFramesizeForOffset) {
1751    // Second variant. Drop the frame part.
1752
1753    // TODO: Always use the first formula, as num_fp_spills would be zero?
1754    if (fp_reg_mask != 0) {
1755      early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
1756    } else {
1757      early_drop = frame_size - kArm64PointerSize * num_core_spills;
1758    }
1759
1760    // Drop needs to be 16B aligned, so that SP keeps aligned.
1761    early_drop = RoundDown(early_drop, 16);
1762
1763    OpRegImm64(kOpAdd, rs_sp, early_drop);
1764    cfi_.AdjustCFAOffset(-early_drop);
1765  }
1766
1767  // Unspill.
1768  if (fp_reg_mask != 0) {
1769    int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
1770    UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
1771  }
1772  if (core_reg_mask != 0) {
1773    int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
1774    UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
1775  }
1776
1777  // Drop the (rest of) the frame.
1778  int adjust = frame_size - early_drop;
1779  OpRegImm64(kOpAdd, rs_sp, adjust);
1780  cfi_.AdjustCFAOffset(-adjust);
1781}
1782
1783bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1784  A64Opcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
1785  RegLocation rl_src_i = info->args[0];
1786  RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
1787  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1788  RegLocation rl_i = IsWide(size) ?
1789      LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
1790  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
1791  IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
1792  return true;
1793}
1794
1795}  // namespace art
1796