int_arm64.cc revision fa9c8ec37c66574654e448513e1bb59af7cb9365
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "dex/reg_storage_eq.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24#include "mirror/array.h"
25#include "utils.h"
26
27namespace art {
28
29LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
30  OpRegReg(kOpCmp, src1, src2);
31  return OpCondBranch(cond, target);
32}
33
34LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
35  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
36  return NULL;
37}
38
39void Arm64Mir2Lir::OpEndIT(LIR* it) {
40  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
41}
42
43/*
44 * 64-bit 3way compare function.
45 *     cmp   xA, xB
46 *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
47 *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
48 */
49void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
50                              RegLocation rl_src2) {
51  RegLocation rl_result;
52  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
53  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
54  rl_result = EvalLoc(rl_dest, kCoreReg, true);
55
56  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
57  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
58  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
59          rl_result.reg.GetReg(), kArmCondGe);
60  StoreValue(rl_dest, rl_result);
61}
62
63void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
64                             RegLocation rl_src1, RegLocation rl_shift) {
65  OpKind op = kOpBkpt;
66  switch (opcode) {
67  case Instruction::SHL_LONG:
68  case Instruction::SHL_LONG_2ADDR:
69    op = kOpLsl;
70    break;
71  case Instruction::SHR_LONG:
72  case Instruction::SHR_LONG_2ADDR:
73    op = kOpAsr;
74    break;
75  case Instruction::USHR_LONG:
76  case Instruction::USHR_LONG_2ADDR:
77    op = kOpLsr;
78    break;
79  default:
80    LOG(FATAL) << "Unexpected case: " << opcode;
81  }
82  rl_shift = LoadValue(rl_shift, kCoreReg);
83  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
84  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
85  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
86  StoreValueWide(rl_dest, rl_result);
87}
88
89static constexpr bool kUseDeltaEncodingInGenSelect = false;
90
91void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
92                             RegStorage rs_dest, int result_reg_class) {
93  if (false_val == 0 ||               // 0 is better as first operand.
94      true_val == 1 ||                // Potentially Csinc.
95      true_val == -1 ||               // Potentially Csinv.
96      true_val == false_val + 1) {    // Potentially Csinc.
97    ccode = NegateComparison(ccode);
98    std::swap(true_val, false_val);
99  }
100
101  ArmConditionCode code = ArmConditionEncoding(ccode);
102
103  int opcode;                                      // The opcode.
104  RegStorage left_op = RegStorage::InvalidReg();   // The operands.
105  RegStorage right_op = RegStorage::InvalidReg();  // The operands.
106
107  bool is_wide = rs_dest.Is64Bit();
108
109  RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
110
111  if (true_val == 0) {
112    left_op = zero_reg;
113  } else {
114    left_op = rs_dest;
115    LoadConstantNoClobber(rs_dest, true_val);
116  }
117  if (false_val == 1) {
118    right_op = zero_reg;
119    opcode = kA64Csinc4rrrc;
120  } else if (false_val == -1) {
121    right_op = zero_reg;
122    opcode = kA64Csinv4rrrc;
123  } else if (false_val == true_val + 1) {
124    right_op = left_op;
125    opcode = kA64Csinc4rrrc;
126  } else if (false_val == -true_val) {
127    right_op = left_op;
128    opcode = kA64Csneg4rrrc;
129  } else if (false_val == ~true_val) {
130    right_op = left_op;
131    opcode = kA64Csinv4rrrc;
132  } else if (true_val == 0) {
133    // left_op is zero_reg.
134    right_op = rs_dest;
135    LoadConstantNoClobber(rs_dest, false_val);
136    opcode = kA64Csel4rrrc;
137  } else {
138    // Generic case.
139    RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
140    if (is_wide) {
141      if (t_reg2.Is32Bit()) {
142        t_reg2 = As64BitReg(t_reg2);
143      }
144    } else {
145      if (t_reg2.Is64Bit()) {
146        t_reg2 = As32BitReg(t_reg2);
147      }
148    }
149
150    if (kUseDeltaEncodingInGenSelect) {
151      int32_t delta = false_val - true_val;
152      uint32_t abs_val = delta < 0 ? -delta : delta;
153
154      if (abs_val < 0x1000) {  // TODO: Replace with InexpensiveConstant with opcode.
155        // Can encode as immediate to an add.
156        right_op = t_reg2;
157        OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
158      }
159    }
160
161    // Load as constant.
162    if (!right_op.Valid()) {
163      LoadConstantNoClobber(t_reg2, false_val);
164      right_op = t_reg2;
165    }
166
167    opcode = kA64Csel4rrrc;
168  }
169
170  DCHECK(left_op.Valid() && right_op.Valid());
171  NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
172      code);
173}
174
175void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
176                                    int32_t true_val, int32_t false_val, RegStorage rs_dest,
177                                    int dest_reg_class) {
178  DCHECK(rs_dest.Valid());
179  OpRegReg(kOpCmp, left_op, right_op);
180  GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
181}
182
183void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
184  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
185  rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
186  // rl_src may be aliased with rl_result/rl_dest, so do compare early.
187  OpRegImm(kOpCmp, rl_src.reg, 0);
188
189  RegLocation rl_dest = mir_graph_->GetDest(mir);
190
191  // The kMirOpSelect has two variants, one for constants and one for moves.
192  if (mir->ssa_rep->num_uses == 1) {
193    RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
194    GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
195              rl_dest.ref ? kRefReg : kCoreReg);
196    StoreValue(rl_dest, rl_result);
197  } else {
198    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
199    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
200
201    RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
202    rl_true = LoadValue(rl_true, result_reg_class);
203    rl_false = LoadValue(rl_false, result_reg_class);
204    RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
205
206    bool is_wide = rl_dest.ref || rl_dest.wide;
207    int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
208    NewLIR4(opcode, rl_result.reg.GetReg(),
209            rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
210    StoreValue(rl_dest, rl_result);
211  }
212}
213
214void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
215  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
216  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
217  LIR* taken = &block_label_list_[bb->taken];
218  LIR* not_taken = &block_label_list_[bb->fall_through];
219  // Normalize such that if either operand is constant, src2 will be constant.
220  ConditionCode ccode = mir->meta.ccode;
221  if (rl_src1.is_const) {
222    std::swap(rl_src1, rl_src2);
223    ccode = FlipComparisonOrder(ccode);
224  }
225
226  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
227
228  if (rl_src2.is_const) {
229    // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
230
231    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
232    // Special handling using cbz & cbnz.
233    if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
234      OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
235      OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
236      return;
237    }
238
239    // Only handle Imm if src2 is not already in a register.
240    rl_src2 = UpdateLocWide(rl_src2);
241    if (rl_src2.location != kLocPhysReg) {
242      OpRegImm64(kOpCmp, rl_src1.reg, val);
243      OpCondBranch(ccode, taken);
244      OpCondBranch(NegateComparison(ccode), not_taken);
245      return;
246    }
247  }
248
249  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
250  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
251  OpCondBranch(ccode, taken);
252  OpCondBranch(NegateComparison(ccode), not_taken);
253}
254
255/*
256 * Generate a register comparison to an immediate and branch.  Caller
257 * is responsible for setting branch target field.
258 */
259LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
260                                  LIR* target) {
261  LIR* branch = nullptr;
262  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
263  if (check_value == 0) {
264    if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
265      ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
266      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
267      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
268    } else if (arm_cond == kArmCondLs) {
269      // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
270      // This case happens for a bounds check of array[0].
271      ArmOpcode opcode = kA64Cbz2rt;
272      ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
273      branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
274    }
275    // TODO: Use tbz/tbnz for < 0 or >= 0.
276  }
277
278  if (branch == nullptr) {
279    OpRegImm(kOpCmp, reg, check_value);
280    branch = NewLIR2(kA64B2ct, arm_cond, 0);
281  }
282
283  branch->target = target;
284  return branch;
285}
286
287LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
288                                     RegStorage base_reg, int offset, int check_value,
289                                     LIR* target, LIR** compare) {
290  DCHECK(compare == nullptr);
291  // It is possible that temp register is 64-bit. (ArgReg or RefReg)
292  // Always compare 32-bit value no matter what temp_reg is.
293  if (temp_reg.Is64Bit()) {
294    temp_reg = As32BitReg(temp_reg);
295  }
296  Load32Disp(base_reg, offset, temp_reg);
297  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
298  return branch;
299}
300
301LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
302  bool dest_is_fp = r_dest.IsFloat();
303  bool src_is_fp = r_src.IsFloat();
304  ArmOpcode opcode = kA64Brk1d;
305  LIR* res;
306
307  if (LIKELY(dest_is_fp == src_is_fp)) {
308    if (LIKELY(!dest_is_fp)) {
309      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
310
311      // Core/core copy.
312      // Copies involving the sp register require a different instruction.
313      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
314
315      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
316      //   This currently works because the other arguments are set to 0 by default. We should
317      //   rather introduce an alias kA64Mov2RR.
318
319      // core/core copy. Do a x/x copy only if both registers are x.
320      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
321        opcode = WIDE(opcode);
322      }
323    } else {
324      // Float/float copy.
325      bool dest_is_double = r_dest.IsDouble();
326      bool src_is_double = r_src.IsDouble();
327
328      // We do not do float/double or double/float casts here.
329      DCHECK_EQ(dest_is_double, src_is_double);
330
331      // Homogeneous float/float copy.
332      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
333    }
334  } else {
335    // Inhomogeneous register copy.
336    if (dest_is_fp) {
337      if (r_dest.IsDouble()) {
338        opcode = kA64Fmov2Sx;
339      } else {
340        r_src = Check32BitReg(r_src);
341        opcode = kA64Fmov2sw;
342      }
343    } else {
344      if (r_src.IsDouble()) {
345        opcode = kA64Fmov2xS;
346      } else {
347        r_dest = Check32BitReg(r_dest);
348        opcode = kA64Fmov2ws;
349      }
350    }
351  }
352
353  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
354
355  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
356    res->flags.is_nop = true;
357  }
358
359  return res;
360}
361
362void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
363  if (r_dest != r_src) {
364    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
365    AppendLIR(res);
366  }
367}
368
369void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
370  OpRegCopy(r_dest, r_src);
371}
372
373// Table of magic divisors
374struct MagicTable {
375  int magic64_base;
376  int magic64_eor;
377  uint64_t magic64;
378  uint32_t magic32;
379  uint32_t shift;
380  DividePattern pattern;
381};
382
383static const MagicTable magic_table[] = {
384  {   0,      0,                  0,          0, 0, DivideNone},  // 0
385  {   0,      0,                  0,          0, 0, DivideNone},  // 1
386  {   0,      0,                  0,          0, 0, DivideNone},  // 2
387  {0x3c,     -1, 0x5555555555555556, 0x55555556, 0, Divide3},     // 3
388  {   0,      0,                  0,          0, 0, DivideNone},  // 4
389  {0xf9,     -1, 0x6666666666666667, 0x66666667, 1, Divide5},     // 5
390  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3},     // 6
391  {  -1,     -1, 0x924924924924924A, 0x92492493, 2, Divide7},     // 7
392  {   0,      0,                  0,          0, 0, DivideNone},  // 8
393  {  -1,     -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5},     // 9
394  {0xf9,     -1, 0x6666666666666667, 0x66666667, 2, Divide5},     // 10
395  {  -1,     -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5},     // 11
396  {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5},     // 12
397  {  -1,     -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5},     // 13
398  {  -1,     -1, 0x924924924924924A, 0x92492493, 3, Divide7},     // 14
399  {0x78,     -1, 0x8888888888888889, 0x88888889, 3, Divide7},     // 15
400};
401
402// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
403bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
404                                      RegLocation rl_src, RegLocation rl_dest, int lit) {
405  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
406    return false;
407  }
408  DividePattern pattern = magic_table[lit].pattern;
409  if (pattern == DivideNone) {
410    return false;
411  }
412  // Tuning: add rem patterns
413  if (!is_div) {
414    return false;
415  }
416
417  RegStorage r_magic = AllocTemp();
418  LoadConstant(r_magic, magic_table[lit].magic32);
419  rl_src = LoadValue(rl_src, kCoreReg);
420  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
421  RegStorage r_long_mul = AllocTemp();
422  NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
423          r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
424  switch (pattern) {
425    case Divide3:
426      OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
427      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
428      break;
429    case Divide5:
430      OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
431                  32 + magic_table[lit].shift);
432      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
433      break;
434    case Divide7:
435      OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
436                       As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
437      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
438      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
439      break;
440    default:
441      LOG(FATAL) << "Unexpected pattern: " << pattern;
442  }
443  StoreValue(rl_dest, rl_result);
444  return true;
445}
446
447bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
448                                        RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
449  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
450    return false;
451  }
452  DividePattern pattern = magic_table[lit].pattern;
453  if (pattern == DivideNone) {
454    return false;
455  }
456  // Tuning: add rem patterns
457  if (!is_div) {
458    return false;
459  }
460
461  RegStorage r_magic = AllocTempWide();
462  rl_src = LoadValueWide(rl_src, kCoreReg);
463  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
464  RegStorage r_long_mul = AllocTempWide();
465
466  if (magic_table[lit].magic64_base >= 0) {
467    // Check that the entry in the table is correct.
468    if (kIsDebugBuild) {
469      uint64_t reconstructed_imm;
470      uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
471      if (magic_table[lit].magic64_eor >= 0) {
472        uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
473        reconstructed_imm = base ^ eor;
474      } else {
475        reconstructed_imm = base + 1;
476      }
477      DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
478    }
479
480    // Load the magic constant in two instructions.
481    NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
482    if (magic_table[lit].magic64_eor >= 0) {
483      NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
484              magic_table[lit].magic64_eor);
485    } else {
486      NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
487    }
488  } else {
489    LoadConstantWide(r_magic, magic_table[lit].magic64);
490  }
491
492  NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
493  switch (pattern) {
494    case Divide3:
495      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
496      break;
497    case Divide5:
498      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
499      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
500      break;
501    case Divide7:
502      OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
503      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
504      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
505      break;
506    default:
507      LOG(FATAL) << "Unexpected pattern: " << pattern;
508  }
509  StoreValueWide(rl_dest, rl_result);
510  return true;
511}
512
513// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
514// and store the result in 'rl_dest'.
515bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
516                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
517  return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
518}
519
520// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
521// and store the result in 'rl_dest'.
522bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
523                                      RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
524  const bool is_64bit = rl_dest.wide;
525  const int nbits = (is_64bit) ? 64 : 32;
526
527  if (lit < 2) {
528    return false;
529  }
530  if (!IsPowerOfTwo(lit)) {
531    if (is_64bit) {
532      return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
533    } else {
534      return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
535    }
536  }
537  int k = LowestSetBit(lit);
538  if (k >= nbits - 2) {
539    // Avoid special cases.
540    return false;
541  }
542
543  RegLocation rl_result;
544  RegStorage t_reg;
545  if (is_64bit) {
546    rl_src = LoadValueWide(rl_src, kCoreReg);
547    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
548    t_reg = AllocTempWide();
549  } else {
550    rl_src = LoadValue(rl_src, kCoreReg);
551    rl_result = EvalLoc(rl_dest, kCoreReg, true);
552    t_reg = AllocTemp();
553  }
554
555  int shift = EncodeShift(kA64Lsr, nbits - k);
556  if (is_div) {
557    if (lit == 2) {
558      // Division by 2 is by far the most common division by constant.
559      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
560      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
561    } else {
562      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
563      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
564      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
565    }
566  } else {
567    if (lit == 2) {
568      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
569      OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
570      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
571    } else {
572      RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
573      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
574      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
575      OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
576      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
577    }
578  }
579
580  if (is_64bit) {
581    StoreValueWide(rl_dest, rl_result);
582  } else {
583    StoreValue(rl_dest, rl_result);
584  }
585  return true;
586}
587
588bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
589  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
590  return false;
591}
592
593RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
594  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
595  return rl_dest;
596}
597
598RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
599  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
600
601  // Put the literal in a temp.
602  RegStorage lit_temp = AllocTemp();
603  LoadConstant(lit_temp, lit);
604  // Use the generic case for div/rem with arg2 in a register.
605  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
606  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
607  FreeTemp(lit_temp);
608
609  return rl_result;
610}
611
612RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
613                                    RegLocation rl_src2, bool is_div, bool check_zero) {
614  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
615  return rl_dest;
616}
617
618RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
619                                    bool is_div) {
620  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
621
622  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
623  if (is_div) {
624    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
625  } else {
626    // temp = r_src1 / r_src2
627    // dest = r_src1 - temp * r_src2
628    RegStorage temp;
629    ArmOpcode wide;
630    if (rl_result.reg.Is64Bit()) {
631      temp = AllocTempWide();
632      wide = WIDE(0);
633    } else {
634      temp = AllocTemp();
635      wide = UNWIDE(0);
636    }
637    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
638    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
639            r_src1.GetReg(), r_src2.GetReg());
640    FreeTemp(temp);
641  }
642  return rl_result;
643}
644
645bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
646  RegLocation rl_src = info->args[0];
647  rl_src = LoadValueWide(rl_src, kCoreReg);
648  RegLocation rl_dest = InlineTargetWide(info);
649  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
650  RegStorage sign_reg = AllocTempWide();
651  // abs(x) = y<=x>>63, (x+y)^y.
652  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
653  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
654  OpRegReg(kOpXor, rl_result.reg, sign_reg);
655  StoreValueWide(rl_dest, rl_result);
656  return true;
657}
658
659bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
660  DCHECK_EQ(cu_->instruction_set, kArm64);
661  RegLocation rl_src1 = info->args[0];
662  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
663  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
664  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
665  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
666  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
667  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
668  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
669          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
670  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
671  return true;
672}
673
674bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
675  RegLocation rl_src_address = info->args[0];  // long address
676  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
677  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
678  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
679
680  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
681  if (size == k64) {
682    StoreValueWide(rl_dest, rl_result);
683  } else {
684    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
685    StoreValue(rl_dest, rl_result);
686  }
687  return true;
688}
689
690bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
691  RegLocation rl_src_address = info->args[0];  // long address
692  RegLocation rl_src_value = info->args[2];  // [size] value
693  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
694
695  RegLocation rl_value;
696  if (size == k64) {
697    rl_value = LoadValueWide(rl_src_value, kCoreReg);
698  } else {
699    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
700    rl_value = LoadValue(rl_src_value, kCoreReg);
701  }
702  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
703  return true;
704}
705
706bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
707  DCHECK_EQ(cu_->instruction_set, kArm64);
708  // Unused - RegLocation rl_src_unsafe = info->args[0];
709  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
710  RegLocation rl_src_offset = info->args[2];  // long low
711  RegLocation rl_src_expected = info->args[4];  // int, long or Object
712  // If is_long, high half is in info->args[5]
713  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
714  // If is_long, high half is in info->args[7]
715  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
716
717  // Load Object and offset
718  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
719  RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
720
721  RegLocation rl_new_value;
722  RegLocation rl_expected;
723  if (is_long) {
724    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
725    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
726  } else {
727    rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
728    rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
729  }
730
731  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
732    // Mark card for object assuming new value is stored.
733    MarkGCCard(rl_new_value.reg, rl_object.reg);
734  }
735
736  RegStorage r_ptr = AllocTempRef();
737  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
738
739  // Free now unneeded rl_object and rl_offset to give more temps.
740  ClobberSReg(rl_object.s_reg_low);
741  FreeTemp(rl_object.reg);
742  ClobberSReg(rl_offset.s_reg_low);
743  FreeTemp(rl_offset.reg);
744
745  // do {
746  //   tmp = [r_ptr] - expected;
747  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
748  // result = tmp != 0;
749
750  RegStorage r_tmp;
751  RegStorage r_tmp_stored;
752  RegStorage rl_new_value_stored = rl_new_value.reg;
753  ArmOpcode wide = UNWIDE(0);
754  if (is_long) {
755    r_tmp_stored = r_tmp = AllocTempWide();
756    wide = WIDE(0);
757  } else if (is_object) {
758    // References use 64-bit registers, but are stored as compressed 32-bit values.
759    // This means r_tmp_stored != r_tmp.
760    r_tmp = AllocTempRef();
761    r_tmp_stored = As32BitReg(r_tmp);
762    rl_new_value_stored = As32BitReg(rl_new_value_stored);
763  } else {
764    r_tmp_stored = r_tmp = AllocTemp();
765  }
766
767  RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
768  LIR* loop = NewLIR0(kPseudoTargetLabel);
769  NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
770  OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
771  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
772  LIR* early_exit = OpCondBranch(kCondNe, NULL);
773  NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
774  NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
775  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
776  OpCondBranch(kCondNe, loop);
777
778  LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
779  early_exit->target = exit_loop;
780
781  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
782  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
783
784  FreeTemp(r_tmp);  // Now unneeded.
785  FreeTemp(r_ptr);  // Now unneeded.
786
787  StoreValue(rl_dest, rl_result);
788
789  return true;
790}
791
792bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
793  constexpr int kLargeArrayThreshold = 512;
794
795  RegLocation rl_src = info->args[0];
796  RegLocation rl_src_pos = info->args[1];
797  RegLocation rl_dst = info->args[2];
798  RegLocation rl_dst_pos = info->args[3];
799  RegLocation rl_length = info->args[4];
800  // Compile time check, handle exception by non-inline method to reduce related meta-data.
801  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
802      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
803      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
804    return false;
805  }
806
807  ClobberCallerSave();
808  LockCallTemps();  // Prepare for explicit register usage.
809  RegStorage rs_src = rs_x0;
810  RegStorage rs_dst = rs_x1;
811  LoadValueDirectFixed(rl_src, rs_src);
812  LoadValueDirectFixed(rl_dst, rs_dst);
813
814  // Handle null pointer exception in slow-path.
815  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
816  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
817  // Handle potential overlapping in slow-path.
818  // TUNING: Support overlapping cases.
819  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
820  // Handle exception or big length in slow-path.
821  RegStorage rs_length = rs_w2;
822  LoadValueDirectFixed(rl_length, rs_length);
823  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
824  // Src bounds check.
825  RegStorage rs_src_pos = rs_w3;
826  RegStorage rs_arr_length = rs_w4;
827  LoadValueDirectFixed(rl_src_pos, rs_src_pos);
828  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
829  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
830  OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
831  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
832  // Dst bounds check.
833  RegStorage rs_dst_pos = rs_w5;
834  LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
835  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
836  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
837  OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
838  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
839
840  // Everything is checked now.
841  // Set rs_src to the address of the first element to be copied.
842  rs_src_pos = As64BitReg(rs_src_pos);
843  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
844  OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
845  OpRegReg(kOpAdd, rs_src, rs_src_pos);
846  // Set rs_src to the address of the first element to be copied.
847  rs_dst_pos = As64BitReg(rs_dst_pos);
848  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
849  OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
850  OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
851
852  // rs_arr_length won't be not used anymore.
853  RegStorage rs_tmp = rs_arr_length;
854  // Use 64-bit view since rs_length will be used as index.
855  rs_length = As64BitReg(rs_length);
856  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
857
858  // Copy one element.
859  OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2);
860  LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
861  OpRegImm(kOpSub, rs_length, 2);
862  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
863  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
864
865  // Copy two elements.
866  LIR *copy_two = NewLIR0(kPseudoTargetLabel);
867  OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4);
868  LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
869  OpRegImm(kOpSub, rs_length, 4);
870  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
871  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
872
873  // Copy four elements.
874  LIR *copy_four = NewLIR0(kPseudoTargetLabel);
875  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
876  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
877  OpRegImm(kOpSub, rs_length, 8);
878  rs_tmp = As64BitReg(rs_tmp);
879  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
880  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
881  LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
882  LIR* loop_finished = OpUnconditionalBranch(nullptr);
883
884  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
885  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
886  LIR* return_point = NewLIR0(kPseudoTargetLabel);
887
888  src_check_branch->target = check_failed;
889  dst_check_branch->target = check_failed;
890  src_dst_same->target = check_failed;
891  len_neg_or_too_big->target = check_failed;
892  src_pos_negative->target = check_failed;
893  src_bad_len->target = check_failed;
894  dst_pos_negative->target = check_failed;
895  dst_bad_len->target = check_failed;
896  jmp_to_copy_two->target = copy_two;
897  jmp_to_copy_four->target = copy_four;
898  jmp_to_ret->target = return_point;
899  jmp_to_loop->target = begin_loop;
900  loop_finished->target = return_point;
901
902  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
903
904  return true;
905}
906
907LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
908  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
909  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
910}
911
912LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
913  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
914  return NULL;
915}
916
917LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
918  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
919  return NULL;
920}
921
922void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
923                                               RegLocation rl_result, int lit,
924                                               int first_bit, int second_bit) {
925  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
926  if (first_bit != 0) {
927    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
928  }
929}
930
931void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
932  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
933}
934
935// Test suspend flag, return target of taken suspend branch
936LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
937  NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1);
938  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
939}
940
941// Decrement register and branch on condition
942LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
943  // Combine sub & test using sub setflags encoding here.  We need to make sure a
944  // subtract form that sets carry is used, so generate explicitly.
945  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
946  ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
947  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
948  DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
949  return OpCondBranch(c_code, target);
950}
951
952bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
953#if ANDROID_SMP != 0
954  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
955  LIR* barrier = last_lir_insn_;
956
957  int dmb_flavor;
958  // TODO: revisit Arm barrier kinds
959  switch (barrier_kind) {
960    case kAnyStore: dmb_flavor = kISH; break;
961    case kLoadAny: dmb_flavor = kISH; break;
962        // We conjecture that kISHLD is insufficient.  It is documented
963        // to provide LoadLoad | StoreStore ordering.  But if this were used
964        // to implement volatile loads, we suspect that the lack of store
965        // atomicity on ARM would cause us to allow incorrect results for
966        // the canonical IRIW example.  But we're not sure.
967        // We should be using acquire loads instead.
968    case kStoreStore: dmb_flavor = kISHST; break;
969    case kAnyAny: dmb_flavor = kISH; break;
970    default:
971      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
972      dmb_flavor = kSY;  // quiet gcc.
973      break;
974  }
975
976  bool ret = false;
977
978  // If the same barrier already exists, don't generate another.
979  if (barrier == nullptr
980      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
981    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
982    ret = true;
983  }
984
985  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
986  DCHECK(!barrier->flags.use_def_invalid);
987  barrier->u.m.def_mask = &kEncodeAll;
988  return ret;
989#else
990  return false;
991#endif
992}
993
994void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
995  RegLocation rl_result;
996
997  rl_src = LoadValue(rl_src, kCoreReg);
998  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
999  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
1000  StoreValueWide(rl_dest, rl_result);
1001}
1002
1003void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
1004                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
1005  if (rl_src2.is_const) {
1006    DCHECK(rl_src2.wide);
1007    int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
1008    if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
1009      return;
1010    }
1011  }
1012
1013  RegLocation rl_result;
1014  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1015  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1016  GenDivZeroCheck(rl_src2.reg);
1017  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
1018  StoreValueWide(rl_dest, rl_result);
1019}
1020
1021void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
1022                             RegLocation rl_src2) {
1023  RegLocation rl_result;
1024
1025  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1026  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1027  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1028  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
1029  StoreValueWide(rl_dest, rl_result);
1030}
1031
1032void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1033  RegLocation rl_result;
1034
1035  rl_src = LoadValueWide(rl_src, kCoreReg);
1036  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1037  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
1038  StoreValueWide(rl_dest, rl_result);
1039}
1040
1041void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1042  RegLocation rl_result;
1043
1044  rl_src = LoadValueWide(rl_src, kCoreReg);
1045  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1046  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
1047  StoreValueWide(rl_dest, rl_result);
1048}
1049
1050void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
1051                                  RegLocation rl_src1, RegLocation rl_src2) {
1052  switch (opcode) {
1053    case Instruction::NOT_LONG:
1054      GenNotLong(rl_dest, rl_src2);
1055      return;
1056    case Instruction::ADD_LONG:
1057    case Instruction::ADD_LONG_2ADDR:
1058      GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
1059      return;
1060    case Instruction::SUB_LONG:
1061    case Instruction::SUB_LONG_2ADDR:
1062      GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
1063      return;
1064    case Instruction::MUL_LONG:
1065    case Instruction::MUL_LONG_2ADDR:
1066      GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
1067      return;
1068    case Instruction::DIV_LONG:
1069    case Instruction::DIV_LONG_2ADDR:
1070      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
1071      return;
1072    case Instruction::REM_LONG:
1073    case Instruction::REM_LONG_2ADDR:
1074      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
1075      return;
1076    case Instruction::AND_LONG_2ADDR:
1077    case Instruction::AND_LONG:
1078      GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
1079      return;
1080    case Instruction::OR_LONG:
1081    case Instruction::OR_LONG_2ADDR:
1082      GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
1083      return;
1084    case Instruction::XOR_LONG:
1085    case Instruction::XOR_LONG_2ADDR:
1086      GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
1087      return;
1088    case Instruction::NEG_LONG: {
1089      GenNegLong(rl_dest, rl_src2);
1090      return;
1091    }
1092    default:
1093      LOG(FATAL) << "Invalid long arith op";
1094      return;
1095  }
1096}
1097
1098/*
1099 * Generate array load
1100 */
1101void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1102                             RegLocation rl_index, RegLocation rl_dest, int scale) {
1103  RegisterClass reg_class = RegClassBySize(size);
1104  int len_offset = mirror::Array::LengthOffset().Int32Value();
1105  int data_offset;
1106  RegLocation rl_result;
1107  bool constant_index = rl_index.is_const;
1108  rl_array = LoadValue(rl_array, kRefReg);
1109  if (!constant_index) {
1110    rl_index = LoadValue(rl_index, kCoreReg);
1111  }
1112
1113  if (rl_dest.wide) {
1114    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1115  } else {
1116    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1117  }
1118
1119  // If index is constant, just fold it into the data offset
1120  if (constant_index) {
1121    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1122  }
1123
1124  /* null object? */
1125  GenNullCheck(rl_array.reg, opt_flags);
1126
1127  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1128  RegStorage reg_len;
1129  if (needs_range_check) {
1130    reg_len = AllocTemp();
1131    /* Get len */
1132    Load32Disp(rl_array.reg, len_offset, reg_len);
1133    MarkPossibleNullPointerException(opt_flags);
1134  } else {
1135    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1136  }
1137  if (rl_dest.wide || rl_dest.fp || constant_index) {
1138    RegStorage reg_ptr;
1139    if (constant_index) {
1140      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
1141    } else {
1142      // No special indexed operation, lea + load w/ displacement
1143      reg_ptr = AllocTempRef();
1144      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
1145                       EncodeShift(kA64Lsl, scale));
1146      FreeTemp(rl_index.reg);
1147    }
1148    rl_result = EvalLoc(rl_dest, reg_class, true);
1149
1150    if (needs_range_check) {
1151      if (constant_index) {
1152        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1153      } else {
1154        GenArrayBoundsCheck(rl_index.reg, reg_len);
1155      }
1156      FreeTemp(reg_len);
1157    }
1158    if (rl_result.ref) {
1159      LoadRefDisp(reg_ptr, data_offset, rl_result.reg, kNotVolatile);
1160    } else {
1161      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
1162    }
1163    MarkPossibleNullPointerException(opt_flags);
1164    if (!constant_index) {
1165      FreeTemp(reg_ptr);
1166    }
1167    if (rl_dest.wide) {
1168      StoreValueWide(rl_dest, rl_result);
1169    } else {
1170      StoreValue(rl_dest, rl_result);
1171    }
1172  } else {
1173    // Offset base, then use indexed load
1174    RegStorage reg_ptr = AllocTempRef();
1175    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1176    FreeTemp(rl_array.reg);
1177    rl_result = EvalLoc(rl_dest, reg_class, true);
1178
1179    if (needs_range_check) {
1180      GenArrayBoundsCheck(rl_index.reg, reg_len);
1181      FreeTemp(reg_len);
1182    }
1183    if (rl_result.ref) {
1184      LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale);
1185    } else {
1186      LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
1187    }
1188    MarkPossibleNullPointerException(opt_flags);
1189    FreeTemp(reg_ptr);
1190    StoreValue(rl_dest, rl_result);
1191  }
1192}
1193
1194/*
1195 * Generate array store
1196 *
1197 */
1198void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1199                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1200  RegisterClass reg_class = RegClassBySize(size);
1201  int len_offset = mirror::Array::LengthOffset().Int32Value();
1202  bool constant_index = rl_index.is_const;
1203
1204  int data_offset;
1205  if (size == k64 || size == kDouble) {
1206    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1207  } else {
1208    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1209  }
1210
1211  // If index is constant, just fold it into the data offset.
1212  if (constant_index) {
1213    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1214  }
1215
1216  rl_array = LoadValue(rl_array, kRefReg);
1217  if (!constant_index) {
1218    rl_index = LoadValue(rl_index, kCoreReg);
1219  }
1220
1221  RegStorage reg_ptr;
1222  bool allocated_reg_ptr_temp = false;
1223  if (constant_index) {
1224    reg_ptr = rl_array.reg;
1225  } else if (IsTemp(rl_array.reg) && !card_mark) {
1226    Clobber(rl_array.reg);
1227    reg_ptr = rl_array.reg;
1228  } else {
1229    allocated_reg_ptr_temp = true;
1230    reg_ptr = AllocTempRef();
1231  }
1232
1233  /* null object? */
1234  GenNullCheck(rl_array.reg, opt_flags);
1235
1236  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1237  RegStorage reg_len;
1238  if (needs_range_check) {
1239    reg_len = AllocTemp();
1240    // NOTE: max live temps(4) here.
1241    /* Get len */
1242    Load32Disp(rl_array.reg, len_offset, reg_len);
1243    MarkPossibleNullPointerException(opt_flags);
1244  } else {
1245    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1246  }
1247  /* at this point, reg_ptr points to array, 2 live temps */
1248  if (rl_src.wide || rl_src.fp || constant_index) {
1249    if (rl_src.wide) {
1250      rl_src = LoadValueWide(rl_src, reg_class);
1251    } else {
1252      rl_src = LoadValue(rl_src, reg_class);
1253    }
1254    if (!constant_index) {
1255      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
1256                       EncodeShift(kA64Lsl, scale));
1257    }
1258    if (needs_range_check) {
1259      if (constant_index) {
1260        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1261      } else {
1262        GenArrayBoundsCheck(rl_index.reg, reg_len);
1263      }
1264      FreeTemp(reg_len);
1265    }
1266    if (rl_src.ref) {
1267      StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
1268    } else {
1269      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1270    }
1271    MarkPossibleNullPointerException(opt_flags);
1272  } else {
1273    /* reg_ptr -> array data */
1274    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1275    rl_src = LoadValue(rl_src, reg_class);
1276    if (needs_range_check) {
1277      GenArrayBoundsCheck(rl_index.reg, reg_len);
1278      FreeTemp(reg_len);
1279    }
1280    if (rl_src.ref) {
1281      StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale);
1282    } else {
1283      StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
1284    }
1285    MarkPossibleNullPointerException(opt_flags);
1286  }
1287  if (allocated_reg_ptr_temp) {
1288    FreeTemp(reg_ptr);
1289  }
1290  if (card_mark) {
1291    MarkGCCard(rl_src.reg, rl_array.reg);
1292  }
1293}
1294
1295void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1296                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1297  OpKind op = kOpBkpt;
1298  // Per spec, we only care about low 6 bits of shift amount.
1299  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1300  rl_src = LoadValueWide(rl_src, kCoreReg);
1301  if (shift_amount == 0) {
1302    StoreValueWide(rl_dest, rl_src);
1303    return;
1304  }
1305
1306  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1307  switch (opcode) {
1308    case Instruction::SHL_LONG:
1309    case Instruction::SHL_LONG_2ADDR:
1310      op = kOpLsl;
1311      break;
1312    case Instruction::SHR_LONG:
1313    case Instruction::SHR_LONG_2ADDR:
1314      op = kOpAsr;
1315      break;
1316    case Instruction::USHR_LONG:
1317    case Instruction::USHR_LONG_2ADDR:
1318      op = kOpLsr;
1319      break;
1320    default:
1321      LOG(FATAL) << "Unexpected case";
1322  }
1323  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
1324  StoreValueWide(rl_dest, rl_result);
1325}
1326
1327void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1328                                     RegLocation rl_src1, RegLocation rl_src2) {
1329  OpKind op = kOpBkpt;
1330  switch (opcode) {
1331    case Instruction::ADD_LONG:
1332    case Instruction::ADD_LONG_2ADDR:
1333      op = kOpAdd;
1334      break;
1335    case Instruction::SUB_LONG:
1336    case Instruction::SUB_LONG_2ADDR:
1337      op = kOpSub;
1338      break;
1339    case Instruction::AND_LONG:
1340    case Instruction::AND_LONG_2ADDR:
1341      op = kOpAnd;
1342      break;
1343    case Instruction::OR_LONG:
1344    case Instruction::OR_LONG_2ADDR:
1345      op = kOpOr;
1346      break;
1347    case Instruction::XOR_LONG:
1348    case Instruction::XOR_LONG_2ADDR:
1349      op = kOpXor;
1350      break;
1351    default:
1352      LOG(FATAL) << "Unexpected opcode";
1353  }
1354
1355  if (op == kOpSub) {
1356    if (!rl_src2.is_const) {
1357      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1358    }
1359  } else {
1360    // Associativity.
1361    if (!rl_src2.is_const) {
1362      DCHECK(rl_src1.is_const);
1363      std::swap(rl_src1, rl_src2);
1364    }
1365  }
1366  DCHECK(rl_src2.is_const);
1367  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1368
1369  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1370  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1371  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
1372  StoreValueWide(rl_dest, rl_result);
1373}
1374
1375static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
1376  // Find first register.
1377  int first_bit_set = CTZ(reg_mask) + 1;
1378  *reg = *reg + first_bit_set;
1379  reg_mask >>= first_bit_set;
1380  return reg_mask;
1381}
1382
1383/**
1384 * @brief Split a register list in pairs or registers.
1385 *
1386 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
1387 * @code
1388 *   int reg1 = -1, reg2 = -1;
1389 *   while (reg_mask) {
1390 *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1391 *     if (UNLIKELY(reg2 < 0)) {
1392 *       // Single register in reg1.
1393 *     } else {
1394 *       // Pair in reg1, reg2.
1395 *     }
1396 *   }
1397 * @endcode
1398 */
1399static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
1400  // Find first register.
1401  int first_bit_set = CTZ(reg_mask) + 1;
1402  int reg = *reg1 + first_bit_set;
1403  reg_mask >>= first_bit_set;
1404
1405  if (LIKELY(reg_mask)) {
1406    // Save the first register, find the second and use the pair opcode.
1407    int second_bit_set = CTZ(reg_mask) + 1;
1408    *reg2 = reg;
1409    reg_mask >>= second_bit_set;
1410    *reg1 = reg + second_bit_set;
1411    return reg_mask;
1412  }
1413
1414  // Use the single opcode, as we just have one register.
1415  *reg1 = reg;
1416  *reg2 = -1;
1417  return reg_mask;
1418}
1419
1420static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1421  int reg1 = -1, reg2 = -1;
1422  const int reg_log2_size = 3;
1423
1424  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1425    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1426    if (UNLIKELY(reg2 < 0)) {
1427      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1428    } else {
1429      m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1430                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1431    }
1432  }
1433}
1434
1435// TODO(Arm64): consider using ld1 and st1?
1436static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1437  int reg1 = -1, reg2 = -1;
1438  const int reg_log2_size = 3;
1439
1440  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1441    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1442    if (UNLIKELY(reg2 < 0)) {
1443      m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1444                   offset);
1445    } else {
1446      m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1447                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1448    }
1449  }
1450}
1451
1452static int SpillRegsPreSub(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
1453                           uint32_t fp_reg_mask, int frame_size) {
1454  m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
1455
1456  int core_count = POPCOUNT(core_reg_mask);
1457
1458  if (fp_reg_mask != 0) {
1459    // Spill FP regs.
1460    int fp_count = POPCOUNT(fp_reg_mask);
1461    int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
1462    SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
1463  }
1464
1465  if (core_reg_mask != 0) {
1466    // Spill core regs.
1467    int spill_offset = frame_size - (core_count * kArm64PointerSize);
1468    SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
1469  }
1470
1471  return frame_size;
1472}
1473
1474static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
1475                               uint32_t fp_reg_mask, int frame_size) {
1476  // Otherwise, spill both core and fp regs at the same time.
1477  // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
1478  // down. From then on, we fill upwards. This will generate overall the same number of instructions
1479  // as the specialized code above in most cases (exception being odd number of core and even
1480  // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
1481  //
1482  // Some demonstrative fill cases : (c) = core, (f) = fp
1483  // cc    44   cc    44   cc    22   cc    33   fc => 1[1/2]
1484  // fc => 23   fc => 23   ff => 11   ff => 22
1485  // ff    11    f    11               f    11
1486  //
1487  int reg1 = -1, reg2 = -1;
1488  int core_count = POPCOUNT(core_reg_mask);
1489  int fp_count = POPCOUNT(fp_reg_mask);
1490
1491  int combined = fp_count + core_count;
1492  int all_offset = RoundUp(combined, 2);  // Needs to be 16B = 2-reg aligned.
1493
1494  int cur_offset = 2;  // What's the starting offset after the first stp? We expect the base slot
1495                       // to be filled.
1496
1497  // First figure out whether the bottom is FP or core.
1498  if (fp_count > 0) {
1499    // Some FP spills.
1500    //
1501    // Four cases: (d0 is dummy to fill up stp)
1502    // 1) Single FP, even number of core -> stp d0, fp_reg
1503    // 2) Single FP, odd number of core -> stp fp_reg, d0
1504    // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
1505    // 4) More FP, odd number combined -> stp d0, fp_reg
1506    if (fp_count == 1) {
1507      fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1508      DCHECK_EQ(fp_reg_mask, 0U);
1509      if (core_count % 2 == 0) {
1510        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1511                     RegStorage::FloatSolo64(reg1).GetReg(),
1512                     RegStorage::FloatSolo64(reg1).GetReg(),
1513                     base.GetReg(), -all_offset);
1514      } else {
1515        m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1516                     RegStorage::FloatSolo64(reg1).GetReg(),
1517                     RegStorage::FloatSolo64(reg1).GetReg(),
1518                     base.GetReg(), -all_offset);
1519        cur_offset = 0;  // That core reg needs to go into the upper half.
1520      }
1521    } else {
1522      if (combined % 2 == 0) {
1523        fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1524        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1525                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
1526      } else {
1527        fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1528        m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
1529                     base.GetReg(), -all_offset);
1530      }
1531    }
1532  } else {
1533    // No FP spills.
1534    //
1535    // Two cases:
1536    // 1) Even number of core -> stp core1, core2
1537    // 2) Odd number of core -> stp xzr, core1
1538    if (core_count % 2 == 1) {
1539      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1540      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
1541                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1542    } else {
1543      core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1544      m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
1545                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1546    }
1547  }
1548
1549  if (fp_count != 0) {
1550    for (; fp_reg_mask != 0;) {
1551      // Have some FP regs to do.
1552      fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1553      if (UNLIKELY(reg2 < 0)) {
1554        m2l->NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1555                     cur_offset);
1556        // Do not increment offset here, as the second half will be filled by a core reg.
1557      } else {
1558        m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1559                     RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
1560        cur_offset += 2;
1561      }
1562    }
1563
1564    // Reset counting.
1565    reg1 = -1;
1566
1567    // If there is an odd number of core registers, we need to store the bottom now.
1568    if (core_count % 2 == 1) {
1569      core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1570      m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
1571                   cur_offset + 1);
1572      cur_offset += 2;  // Half-slot filled now.
1573    }
1574  }
1575
1576  // Spill the rest of the core regs. They are guaranteed to be even.
1577  DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
1578  for (; core_reg_mask != 0; cur_offset += 2) {
1579    core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1580    m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1581                 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
1582  }
1583
1584  DCHECK_EQ(cur_offset, all_offset);
1585
1586  return all_offset * 8;
1587}
1588
1589int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1590                            int frame_size) {
1591  // If the frame size is small enough that all offsets would fit into the immediates, use that
1592  // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
1593  // instruction-count wise than the complicated code below.
1594  //
1595  // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
1596  // number of fp spills.
1597  if ((RoundUp(frame_size, 8) / 8 <= 63)) {
1598    return SpillRegsPreSub(this, base, core_reg_mask, fp_reg_mask, frame_size);
1599  } else {
1600    return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask, frame_size);
1601  }
1602}
1603
1604static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1605  int reg1 = -1, reg2 = -1;
1606  const int reg_log2_size = 3;
1607
1608  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1609    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1610    if (UNLIKELY(reg2 < 0)) {
1611      m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1612    } else {
1613      DCHECK_LE(offset, 63);
1614      m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1615                   RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1616    }
1617  }
1618}
1619
1620static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1621  int reg1 = -1, reg2 = -1;
1622  const int reg_log2_size = 3;
1623
1624  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1625     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1626    if (UNLIKELY(reg2 < 0)) {
1627      m2l->NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
1628                   offset);
1629    } else {
1630      m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1631                   RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1632    }
1633  }
1634}
1635
1636void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1637                               int frame_size) {
1638  // Restore saves and drop stack frame.
1639  // 2 versions:
1640  //
1641  // 1. (Original): Try to address directly, then drop the whole frame.
1642  //                Limitation: ldp is a 7b signed immediate.
1643  //
1644  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
1645  //           in range. Then drop the rest.
1646  //
1647  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
1648  //       in variant 1.
1649
1650  // "Magic" constant, 63 (max signed 7b) * 8.
1651  static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
1652
1653  const int num_core_spills = POPCOUNT(core_reg_mask);
1654  const int num_fp_spills = POPCOUNT(fp_reg_mask);
1655
1656  int early_drop = 0;
1657
1658  if (frame_size > kMaxFramesizeForOffset) {
1659    // Second variant. Drop the frame part.
1660
1661    // TODO: Always use the first formula, as num_fp_spills would be zero?
1662    if (fp_reg_mask != 0) {
1663      early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
1664    } else {
1665      early_drop = frame_size - kArm64PointerSize * num_core_spills;
1666    }
1667
1668    // Drop needs to be 16B aligned, so that SP keeps aligned.
1669    early_drop = RoundDown(early_drop, 16);
1670
1671    OpRegImm64(kOpAdd, rs_sp, early_drop);
1672  }
1673
1674  // Unspill.
1675  if (fp_reg_mask != 0) {
1676    int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
1677    UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
1678  }
1679  if (core_reg_mask != 0) {
1680    int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
1681    UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
1682  }
1683
1684  // Drop the (rest of) the frame.
1685  OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
1686}
1687
1688bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1689  ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
1690  RegLocation rl_src_i = info->args[0];
1691  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
1692  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1693  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
1694  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
1695  (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
1696  return true;
1697}
1698
1699}  // namespace art
1700