int_arm64.cc revision ed65c5e982705defdb597d94d1aa3f2997239c9b
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm64_lir.h"
20#include "codegen_arm64.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "mirror/array.h"
24
25namespace art {
26
27LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
28  OpRegReg(kOpCmp, src1, src2);
29  return OpCondBranch(cond, target);
30}
31
32// TODO(Arm64): remove this.
33LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
34  LOG(FATAL) << "Unexpected use of OpIT for Arm64";
35  return NULL;
36}
37
38void Arm64Mir2Lir::OpEndIT(LIR* it) {
39  LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
40}
41
42/*
43 * 64-bit 3way compare function.
44 *     cmp   xA, xB
45 *     csinc wC, wzr, wzr, eq
46 *     csneg wC, wC, wC, le
47 */
48void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
49                              RegLocation rl_src2) {
50  RegLocation rl_result;
51  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
52  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
53  rl_result = EvalLoc(rl_dest, kCoreReg, true);
54
55  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
56  NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq);
57  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(),
58          rl_result.reg.GetReg(), kArmCondLe);
59  StoreValueWide(rl_dest, rl_result);
60}
61
62void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
63                             RegLocation rl_src1, RegLocation rl_shift) {
64  OpKind op = kOpBkpt;
65  switch (opcode) {
66  case Instruction::SHL_LONG:
67  case Instruction::SHL_LONG_2ADDR:
68    op = kOpLsl;
69    break;
70  case Instruction::SHR_LONG:
71  case Instruction::SHR_LONG_2ADDR:
72    op = kOpAsr;
73    break;
74  case Instruction::USHR_LONG:
75  case Instruction::USHR_LONG_2ADDR:
76    op = kOpLsr;
77    break;
78  default:
79    LOG(FATAL) << "Unexpected case: " << opcode;
80  }
81  rl_shift = LoadValueWide(rl_shift, kCoreReg);
82  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
83  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
84  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg);
85  StoreValueWide(rl_dest, rl_result);
86}
87
88void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
89                                            int64_t val, ConditionCode ccode) {
90  LIR* taken = &block_label_list_[bb->taken];
91  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
92
93  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
94    ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
95    LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
96    branch->target = taken;
97  } else {
98    OpRegImm64(kOpCmp, rl_src1.reg, val);
99    OpCondBranch(ccode, taken);
100  }
101}
102
103void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
104  // TODO(Arm64): implement this.
105  UNIMPLEMENTED(FATAL);
106
107  RegLocation rl_result;
108  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
109  RegLocation rl_dest = mir_graph_->GetDest(mir);
110  rl_src = LoadValue(rl_src, kCoreReg);
111  ConditionCode ccode = mir->meta.ccode;
112  if (mir->ssa_rep->num_uses == 1) {
113    // CONST case
114    int true_val = mir->dalvikInsn.vB;
115    int false_val = mir->dalvikInsn.vC;
116    rl_result = EvalLoc(rl_dest, kCoreReg, true);
117    // Change kCondNe to kCondEq for the special cases below.
118    if (ccode == kCondNe) {
119      ccode = kCondEq;
120      std::swap(true_val, false_val);
121    }
122    bool cheap_false_val = InexpensiveConstantInt(false_val);
123    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
124      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
125      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
126      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
127      LoadConstant(rl_result.reg, false_val);
128      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
129    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
130      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
131      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
132      OpIT(kCondLs, "");
133      LoadConstant(rl_result.reg, false_val);
134      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
135    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
136      OpRegImm(kOpCmp, rl_src.reg, 0);
137      OpIT(ccode, "E");
138      LoadConstant(rl_result.reg, true_val);
139      LoadConstant(rl_result.reg, false_val);
140      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
141    } else {
142      // Unlikely case - could be tuned.
143      RegStorage t_reg1 = AllocTemp();
144      RegStorage t_reg2 = AllocTemp();
145      LoadConstant(t_reg1, true_val);
146      LoadConstant(t_reg2, false_val);
147      OpRegImm(kOpCmp, rl_src.reg, 0);
148      OpIT(ccode, "E");
149      OpRegCopy(rl_result.reg, t_reg1);
150      OpRegCopy(rl_result.reg, t_reg2);
151      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
152    }
153  } else {
154    // MOVE case
155    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
156    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
157    rl_true = LoadValue(rl_true, kCoreReg);
158    rl_false = LoadValue(rl_false, kCoreReg);
159    rl_result = EvalLoc(rl_dest, kCoreReg, true);
160    OpRegImm(kOpCmp, rl_src.reg, 0);
161    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
162      OpIT(NegateComparison(ccode), "");
163      OpRegCopy(rl_result.reg, rl_false.reg);
164    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
165      OpIT(ccode, "");
166      OpRegCopy(rl_result.reg, rl_true.reg);
167    } else {  // Normal - select between the two.
168      OpIT(ccode, "E");
169      OpRegCopy(rl_result.reg, rl_true.reg);
170      OpRegCopy(rl_result.reg, rl_false.reg);
171    }
172    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
173  }
174  StoreValue(rl_dest, rl_result);
175}
176
177void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
178  // TODO(Arm64): implement this.
179  UNIMPLEMENTED(FATAL);
180
181  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
182  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
183  // Normalize such that if either operand is constant, src2 will be constant.
184  ConditionCode ccode = mir->meta.ccode;
185  if (rl_src1.is_const) {
186    std::swap(rl_src1, rl_src2);
187    ccode = FlipComparisonOrder(ccode);
188  }
189  if (rl_src2.is_const) {
190    RegLocation rl_temp = UpdateLocWide(rl_src2);
191    // Do special compare/branch against simple const operand if not already in registers.
192    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
193    if ((rl_temp.location != kLocPhysReg)
194     /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
195      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
196      return;
197    }
198  }
199  LIR* taken = &block_label_list_[bb->taken];
200  LIR* not_taken = &block_label_list_[bb->fall_through];
201  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
202  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
203  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
204  switch (ccode) {
205    case kCondEq:
206      OpCondBranch(kCondNe, not_taken);
207      break;
208    case kCondNe:
209      OpCondBranch(kCondNe, taken);
210      break;
211    case kCondLt:
212      OpCondBranch(kCondLt, taken);
213      OpCondBranch(kCondGt, not_taken);
214      ccode = kCondUlt;
215      break;
216    case kCondLe:
217      OpCondBranch(kCondLt, taken);
218      OpCondBranch(kCondGt, not_taken);
219      ccode = kCondLs;
220      break;
221    case kCondGt:
222      OpCondBranch(kCondGt, taken);
223      OpCondBranch(kCondLt, not_taken);
224      ccode = kCondHi;
225      break;
226    case kCondGe:
227      OpCondBranch(kCondGt, taken);
228      OpCondBranch(kCondLt, not_taken);
229      ccode = kCondUge;
230      break;
231    default:
232      LOG(FATAL) << "Unexpected ccode: " << ccode;
233  }
234  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
235  OpCondBranch(ccode, taken);
236}
237
238/*
239 * Generate a register comparison to an immediate and branch.  Caller
240 * is responsible for setting branch target field.
241 */
242LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
243                                  LIR* target) {
244  LIR* branch;
245  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
246  if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
247    ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
248    ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
249    branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
250  } else {
251    OpRegImm(kOpCmp, reg, check_value);
252    branch = NewLIR2(kA64B2ct, arm_cond, 0);
253  }
254  branch->target = target;
255  return branch;
256}
257
258LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
259  bool dest_is_fp = r_dest.IsFloat();
260  bool src_is_fp = r_src.IsFloat();
261  ArmOpcode opcode = kA64Brk1d;
262  LIR* res;
263
264  if (LIKELY(dest_is_fp == src_is_fp)) {
265    if (LIKELY(!dest_is_fp)) {
266      // Core/core copy.
267      // Copies involving the sp register require a different instruction.
268      opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
269
270      // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
271      //   This currently works because the other arguments are set to 0 by default. We should
272      //   rather introduce an alias kA64Mov2RR.
273
274      // core/core copy. Do a x/x copy only if both registers are x.
275      if (r_dest.Is64Bit() && r_src.Is64Bit()) {
276        opcode = WIDE(opcode);
277      }
278    } else {
279      // Float/float copy.
280      bool dest_is_double = r_dest.IsDouble();
281      bool src_is_double = r_src.IsDouble();
282
283      // We do not do float/double or double/float casts here.
284      DCHECK_EQ(dest_is_double, src_is_double);
285
286      // Homogeneous float/float copy.
287      opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
288    }
289  } else {
290    // Inhomogeneous register copy.
291    if (dest_is_fp) {
292      if (r_dest.IsDouble()) {
293        opcode = kA64Fmov2Sx;
294      } else {
295        DCHECK(r_src.IsSingle());
296        opcode = kA64Fmov2sw;
297      }
298    } else {
299      if (r_src.IsDouble()) {
300        opcode = kA64Fmov2xS;
301      } else {
302        DCHECK(r_dest.Is32Bit());
303        opcode = kA64Fmov2ws;
304      }
305    }
306  }
307
308  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
309
310  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
311    res->flags.is_nop = true;
312  }
313
314  return res;
315}
316
317void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
318  if (r_dest != r_src) {
319    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
320    AppendLIR(res);
321  }
322}
323
324void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
325  OpRegCopy(r_dest, r_src);
326}
327
328// Table of magic divisors
329struct MagicTable {
330  uint32_t magic;
331  uint32_t shift;
332  DividePattern pattern;
333};
334
335static const MagicTable magic_table[] = {
336  {0, 0, DivideNone},        // 0
337  {0, 0, DivideNone},        // 1
338  {0, 0, DivideNone},        // 2
339  {0x55555556, 0, Divide3},  // 3
340  {0, 0, DivideNone},        // 4
341  {0x66666667, 1, Divide5},  // 5
342  {0x2AAAAAAB, 0, Divide3},  // 6
343  {0x92492493, 2, Divide7},  // 7
344  {0, 0, DivideNone},        // 8
345  {0x38E38E39, 1, Divide5},  // 9
346  {0x66666667, 2, Divide5},  // 10
347  {0x2E8BA2E9, 1, Divide5},  // 11
348  {0x2AAAAAAB, 1, Divide5},  // 12
349  {0x4EC4EC4F, 2, Divide5},  // 13
350  {0x92492493, 3, Divide7},  // 14
351  {0x88888889, 3, Divide7},  // 15
352};
353
354// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
355bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
356                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
357  // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
358  //   It should be possible subtracting one from all its entries, and using smaddl
359  //   to counteract this. The advantage is that integers should then be easier to
360  //   encode as logical immediates (0x55555555 rather than 0x55555556).
361  UNIMPLEMENTED(FATAL);
362
363  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
364    return false;
365  }
366  DividePattern pattern = magic_table[lit].pattern;
367  if (pattern == DivideNone) {
368    return false;
369  }
370  // Tuning: add rem patterns
371  if (!is_div) {
372    return false;
373  }
374
375  RegStorage r_magic = AllocTemp();
376  LoadConstant(r_magic, magic_table[lit].magic);
377  rl_src = LoadValue(rl_src, kCoreReg);
378  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
379  RegStorage r_hi = AllocTemp();
380  RegStorage r_lo = AllocTemp();
381  NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
382  switch (pattern) {
383    case Divide3:
384      OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31));
385      break;
386    case Divide5:
387      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
388      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
389      break;
390    case Divide7:
391      OpRegReg(kOpAdd, r_hi, rl_src.reg);
392      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
393      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
394      break;
395    default:
396      LOG(FATAL) << "Unexpected pattern: " << pattern;
397  }
398  StoreValue(rl_dest, rl_result);
399  return true;
400}
401
402bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
403  LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
404  return false;
405}
406
407RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
408                      RegLocation rl_src2, bool is_div, bool check_zero) {
409  LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
410  return rl_dest;
411}
412
413RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
414  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
415  return rl_dest;
416}
417
418RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
419  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
420
421  // Put the literal in a temp.
422  RegStorage lit_temp = AllocTemp();
423  LoadConstant(lit_temp, lit);
424  // Use the generic case for div/rem with arg2 in a register.
425  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
426  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
427  FreeTemp(lit_temp);
428
429  return rl_result;
430}
431
432RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
433                                  bool is_div) {
434  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
435
436  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
437  if (is_div) {
438    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
439  } else {
440    // temp = r_src1 / r_src2
441    // dest = r_src1 - temp * r_src2
442    RegStorage temp;
443    ArmOpcode wide;
444    if (rl_result.reg.Is64Bit()) {
445      temp = AllocTempWide();
446      wide = WIDE(0);
447    } else {
448      temp = AllocTemp();
449      wide = UNWIDE(0);
450    }
451    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
452    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
453            r_src1.GetReg(), r_src2.GetReg());
454    FreeTemp(temp);
455  }
456  return rl_result;
457}
458
459bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
460  // TODO(Arm64): implement this.
461  UNIMPLEMENTED(FATAL);
462
463  DCHECK_EQ(cu_->instruction_set, kThumb2);
464  RegLocation rl_src1 = info->args[0];
465  RegLocation rl_src2 = info->args[1];
466  rl_src1 = LoadValue(rl_src1, kCoreReg);
467  rl_src2 = LoadValue(rl_src2, kCoreReg);
468  RegLocation rl_dest = InlineTarget(info);
469  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
470  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
471  OpIT((is_min) ? kCondGt : kCondLt, "E");
472  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
473  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
474  GenBarrier();
475  StoreValue(rl_dest, rl_result);
476  return true;
477}
478
479bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
480  // TODO(Arm64): implement this.
481  UNIMPLEMENTED(WARNING);
482
483  RegLocation rl_src_address = info->args[0];  // long address
484  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
485  RegLocation rl_dest = InlineTarget(info);
486  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
487  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
488  if (size == k64) {
489    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
490    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
491      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
492      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
493    } else {
494      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
495      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
496    }
497    StoreValueWide(rl_dest, rl_result);
498  } else {
499    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
500    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
501    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
502    StoreValue(rl_dest, rl_result);
503  }
504  return true;
505}
506
507bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
508  // TODO(Arm64): implement this.
509  UNIMPLEMENTED(WARNING);
510
511  RegLocation rl_src_address = info->args[0];  // long address
512  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
513  RegLocation rl_src_value = info->args[2];  // [size] value
514  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
515  if (size == k64) {
516    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
517    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
518    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
519    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
520  } else {
521    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
522    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
523    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
524    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
525  }
526  return true;
527}
528
529void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
530  LOG(FATAL) << "Unexpected use of OpLea for Arm64";
531}
532
533void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
534  UNIMPLEMENTED(FATAL) << "Should not be used.";
535}
536
537void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
538  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64";
539}
540
541bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
542  // TODO(Arm64): implement this.
543  UNIMPLEMENTED(WARNING);
544
545  DCHECK_EQ(cu_->instruction_set, kThumb2);
546  // Unused - RegLocation rl_src_unsafe = info->args[0];
547  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
548  RegLocation rl_src_offset = info->args[2];  // long low
549  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
550  RegLocation rl_src_expected = info->args[4];  // int, long or Object
551  // If is_long, high half is in info->args[5]
552  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
553  // If is_long, high half is in info->args[7]
554  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
555
556  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
557  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
558  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
559  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
560  // into the same temps, reducing the number of required temps down to 5. We shall work
561  // around the potentially locked temp by using LR for r_ptr, unconditionally.
562  // TODO: Pass information about the need for more temps to the stack frame generation
563  // code so that we can rely on being able to allocate enough temps.
564  DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
565  MarkTemp(rs_rA64_LR);
566  FreeTemp(rs_rA64_LR);
567  LockTemp(rs_rA64_LR);
568  bool load_early = true;
569  if (is_long) {
570    RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
571        rl_src_expected.reg;
572    RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
573        rl_src_new_value.reg;
574    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
575    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
576    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
577    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
578
579    if (!expected_is_good_reg && !new_value_is_good_reg) {
580      // None of expected/new_value is non-temp reg, need to load both late
581      load_early = false;
582      // Make sure they are not in the temp regs and the load will not be skipped.
583      if (expected_is_core_reg) {
584        FlushRegWide(rl_src_expected.reg);
585        ClobberSReg(rl_src_expected.s_reg_low);
586        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
587        rl_src_expected.location = kLocDalvikFrame;
588      }
589      if (new_value_is_core_reg) {
590        FlushRegWide(rl_src_new_value.reg);
591        ClobberSReg(rl_src_new_value.s_reg_low);
592        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
593        rl_src_new_value.location = kLocDalvikFrame;
594      }
595    }
596  }
597
598  // Release store semantics, get the barrier out of the way.  TODO: revisit
599  GenMemBarrier(kStoreLoad);
600
601  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
602  RegLocation rl_new_value;
603  if (!is_long) {
604    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
605  } else if (load_early) {
606    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
607  }
608
609  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
610    // Mark card for object assuming new value is stored.
611    MarkGCCard(rl_new_value.reg, rl_object.reg);
612  }
613
614  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
615
616  RegStorage r_ptr = rs_rA64_LR;
617  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
618
619  // Free now unneeded rl_object and rl_offset to give more temps.
620  ClobberSReg(rl_object.s_reg_low);
621  FreeTemp(rl_object.reg);
622  ClobberSReg(rl_offset.s_reg_low);
623  FreeTemp(rl_offset.reg);
624
625  RegLocation rl_expected;
626  if (!is_long) {
627    rl_expected = LoadValue(rl_src_expected, kCoreReg);
628  } else if (load_early) {
629    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
630  } else {
631    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
632    int low_reg = AllocTemp().GetReg();
633    int high_reg = AllocTemp().GetReg();
634    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
635    rl_expected = rl_new_value;
636  }
637
638  // do {
639  //   tmp = [r_ptr] - expected;
640  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
641  // result = tmp != 0;
642
643  RegStorage r_tmp = AllocTemp();
644  LIR* target = NewLIR0(kPseudoTargetLabel);
645
646  if (is_long) {
647    RegStorage r_tmp_high = AllocTemp();
648    if (!load_early) {
649      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
650    }
651    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
652    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
653    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
654    if (!load_early) {
655      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
656    }
657
658    LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
659    LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
660    NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
661            rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
662    LIR* target2 = NewLIR0(kPseudoTargetLabel);
663    branch1->target = target2;
664    branch2->target = target2;
665    FreeTemp(r_tmp_high);  // Now unneeded
666
667  } else {
668    NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
669    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
670    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
671    OpIT(kCondEq, "T");
672    NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
673  }
674
675  // Still one conditional left from OpIT(kCondEq, "T") from either branch
676  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
677  OpCondBranch(kCondEq, target);
678
679  if (!load_early) {
680    FreeTemp(rl_expected.reg);  // Now unneeded.
681  }
682
683  // result := (tmp1 != 0) ? 0 : 1;
684  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
685  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
686  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
687  OpIT(kCondUlt, "");
688  LoadConstant(rl_result.reg, 0); /* cc */
689  FreeTemp(r_tmp);  // Now unneeded.
690
691  StoreValue(rl_dest, rl_result);
692
693  // Now, restore lr to its non-temp status.
694  Clobber(rs_rA64_LR);
695  UnmarkTemp(rs_rA64_LR);
696  return true;
697}
698
699LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
700  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
701}
702
703LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
704  LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
705  return NULL;
706}
707
708LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
709  LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
710  return NULL;
711}
712
713void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
714                                               RegLocation rl_result, int lit,
715                                               int first_bit, int second_bit) {
716  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
717  if (first_bit != 0) {
718    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
719  }
720}
721
722void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
723  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
724}
725
726// Test suspend flag, return target of taken suspend branch
727LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
728  // TODO(Arm64): re-enable suspend checks, once art_quick_test_suspend is implemented and
729  //   the suspend register is properly handled in the trampolines.
730#if 0
731  NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
732  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
733#else
734  // TODO(Arm64): Fake suspend check. Will always fail to branch. Remove this.
735  LIR* branch = NewLIR2((target == NULL) ? kA64Cbnz2rt : kA64Cbz2rt, rwzr, 0);
736  branch->target = target;
737  return branch;
738#endif
739}
740
741// Decrement register and branch on condition
742LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
743  // Combine sub & test using sub setflags encoding here
744  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
745  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
746  return OpCondBranch(c_code, target);
747}
748
749bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
750#if ANDROID_SMP != 0
751  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
752  LIR* barrier = last_lir_insn_;
753
754  int dmb_flavor;
755  // TODO: revisit Arm barrier kinds
756  switch (barrier_kind) {
757    case kLoadStore: dmb_flavor = kISH; break;
758    case kLoadLoad: dmb_flavor = kISH; break;
759    case kStoreStore: dmb_flavor = kISHST; break;
760    case kStoreLoad: dmb_flavor = kISH; break;
761    default:
762      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
763      dmb_flavor = kSY;  // quiet gcc.
764      break;
765  }
766
767  bool ret = false;
768
769  // If the same barrier already exists, don't generate another.
770  if (barrier == nullptr
771      || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
772    barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
773    ret = true;
774  }
775
776  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
777  DCHECK(!barrier->flags.use_def_invalid);
778  barrier->u.m.def_mask = ENCODE_ALL;
779  return ret;
780#else
781  return false;
782#endif
783}
784
785void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
786  RegLocation rl_result;
787
788  rl_src = LoadValue(rl_src, kCoreReg);
789  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
790  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
791  StoreValueWide(rl_dest, rl_result);
792}
793
794void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
795                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
796  RegLocation rl_result;
797  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
798  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
799  GenDivZeroCheck(rl_src2.reg);
800  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
801  StoreValueWide(rl_dest, rl_result);
802}
803
804void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
805                             RegLocation rl_src2) {
806  RegLocation rl_result;
807
808  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
809  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
810  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
811  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
812  StoreValueWide(rl_dest, rl_result);
813}
814
815void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
816  RegLocation rl_result;
817
818  rl_src = LoadValueWide(rl_src, kCoreReg);
819  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
820  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
821  StoreValueWide(rl_dest, rl_result);
822}
823
824void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
825  RegLocation rl_result;
826
827  rl_src = LoadValueWide(rl_src, kCoreReg);
828  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
829  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
830  StoreValueWide(rl_dest, rl_result);
831}
832
833void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
834                              RegLocation rl_src1, RegLocation rl_src2) {
835  GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
836}
837
838void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
839                              RegLocation rl_src2) {
840  GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
841}
842
843void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
844                            RegLocation rl_src2) {
845  GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
846}
847
848void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
849                            RegLocation rl_src2) {
850  GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
851}
852
853void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
854                           RegLocation rl_src2) {
855  GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
856}
857
858void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
859                            RegLocation rl_src2) {
860  GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
861}
862
863/*
864 * Generate array load
865 */
866void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
867                             RegLocation rl_index, RegLocation rl_dest, int scale) {
868  // TODO(Arm64): check this.
869  UNIMPLEMENTED(WARNING);
870
871  RegisterClass reg_class = RegClassBySize(size);
872  int len_offset = mirror::Array::LengthOffset().Int32Value();
873  int data_offset;
874  RegLocation rl_result;
875  bool constant_index = rl_index.is_const;
876  rl_array = LoadValue(rl_array, kCoreReg);
877  if (!constant_index) {
878    rl_index = LoadValue(rl_index, kCoreReg);
879  }
880
881  if (rl_dest.wide) {
882    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
883  } else {
884    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
885  }
886
887  // If index is constant, just fold it into the data offset
888  if (constant_index) {
889    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
890  }
891
892  /* null object? */
893  GenNullCheck(rl_array.reg, opt_flags);
894
895  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
896  RegStorage reg_len;
897  if (needs_range_check) {
898    reg_len = AllocTemp();
899    /* Get len */
900    Load32Disp(rl_array.reg, len_offset, reg_len);
901    MarkPossibleNullPointerException(opt_flags);
902  } else {
903    ForceImplicitNullCheck(rl_array.reg, opt_flags);
904  }
905  if (rl_dest.wide || rl_dest.fp || constant_index) {
906    RegStorage reg_ptr;
907    if (constant_index) {
908      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
909    } else {
910      // No special indexed operation, lea + load w/ displacement
911      reg_ptr = AllocTemp();
912      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
913      FreeTemp(rl_index.reg);
914    }
915    rl_result = EvalLoc(rl_dest, reg_class, true);
916
917    if (needs_range_check) {
918      if (constant_index) {
919        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
920      } else {
921        GenArrayBoundsCheck(rl_index.reg, reg_len);
922      }
923      FreeTemp(reg_len);
924    }
925    LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
926    MarkPossibleNullPointerException(opt_flags);
927    if (!constant_index) {
928      FreeTemp(reg_ptr);
929    }
930    if (rl_dest.wide) {
931      StoreValueWide(rl_dest, rl_result);
932    } else {
933      StoreValue(rl_dest, rl_result);
934    }
935  } else {
936    // Offset base, then use indexed load
937    RegStorage reg_ptr = AllocTemp();
938    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
939    FreeTemp(rl_array.reg);
940    rl_result = EvalLoc(rl_dest, reg_class, true);
941
942    if (needs_range_check) {
943      GenArrayBoundsCheck(rl_index.reg, reg_len);
944      FreeTemp(reg_len);
945    }
946    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
947    MarkPossibleNullPointerException(opt_flags);
948    FreeTemp(reg_ptr);
949    StoreValue(rl_dest, rl_result);
950  }
951}
952
953/*
954 * Generate array store
955 *
956 */
957void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
958                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
959  // TODO(Arm64): check this.
960  UNIMPLEMENTED(WARNING);
961
962  RegisterClass reg_class = RegClassBySize(size);
963  int len_offset = mirror::Array::LengthOffset().Int32Value();
964  bool constant_index = rl_index.is_const;
965
966  int data_offset;
967  if (size == k64 || size == kDouble) {
968    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
969  } else {
970    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
971  }
972
973  // If index is constant, just fold it into the data offset.
974  if (constant_index) {
975    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
976  }
977
978  rl_array = LoadValue(rl_array, kCoreReg);
979  if (!constant_index) {
980    rl_index = LoadValue(rl_index, kCoreReg);
981  }
982
983  RegStorage reg_ptr;
984  bool allocated_reg_ptr_temp = false;
985  if (constant_index) {
986    reg_ptr = rl_array.reg;
987  } else if (IsTemp(rl_array.reg) && !card_mark) {
988    Clobber(rl_array.reg);
989    reg_ptr = rl_array.reg;
990  } else {
991    allocated_reg_ptr_temp = true;
992    reg_ptr = AllocTemp();
993  }
994
995  /* null object? */
996  GenNullCheck(rl_array.reg, opt_flags);
997
998  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
999  RegStorage reg_len;
1000  if (needs_range_check) {
1001    reg_len = AllocTemp();
1002    // NOTE: max live temps(4) here.
1003    /* Get len */
1004    Load32Disp(rl_array.reg, len_offset, reg_len);
1005    MarkPossibleNullPointerException(opt_flags);
1006  } else {
1007    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1008  }
1009  /* at this point, reg_ptr points to array, 2 live temps */
1010  if (rl_src.wide || rl_src.fp || constant_index) {
1011    if (rl_src.wide) {
1012      rl_src = LoadValueWide(rl_src, reg_class);
1013    } else {
1014      rl_src = LoadValue(rl_src, reg_class);
1015    }
1016    if (!constant_index) {
1017      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
1018    }
1019    if (needs_range_check) {
1020      if (constant_index) {
1021        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1022      } else {
1023        GenArrayBoundsCheck(rl_index.reg, reg_len);
1024      }
1025      FreeTemp(reg_len);
1026    }
1027
1028    StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
1029    MarkPossibleNullPointerException(opt_flags);
1030  } else {
1031    /* reg_ptr -> array data */
1032    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1033    rl_src = LoadValue(rl_src, reg_class);
1034    if (needs_range_check) {
1035      GenArrayBoundsCheck(rl_index.reg, reg_len);
1036      FreeTemp(reg_len);
1037    }
1038    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1039    MarkPossibleNullPointerException(opt_flags);
1040  }
1041  if (allocated_reg_ptr_temp) {
1042    FreeTemp(reg_ptr);
1043  }
1044  if (card_mark) {
1045    MarkGCCard(rl_src.reg, rl_array.reg);
1046  }
1047}
1048
1049void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1050                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1051  OpKind op = kOpBkpt;
1052  // Per spec, we only care about low 6 bits of shift amount.
1053  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1054  rl_src = LoadValueWide(rl_src, kCoreReg);
1055  if (shift_amount == 0) {
1056    StoreValueWide(rl_dest, rl_src);
1057    return;
1058  }
1059
1060  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1061  switch (opcode) {
1062    case Instruction::SHL_LONG:
1063    case Instruction::SHL_LONG_2ADDR:
1064      op = kOpLsl;
1065      break;
1066    case Instruction::SHR_LONG:
1067    case Instruction::SHR_LONG_2ADDR:
1068      op = kOpAsr;
1069      break;
1070    case Instruction::USHR_LONG:
1071    case Instruction::USHR_LONG_2ADDR:
1072      op = kOpLsr;
1073      break;
1074    default:
1075      LOG(FATAL) << "Unexpected case";
1076  }
1077  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
1078  StoreValueWide(rl_dest, rl_result);
1079}
1080
1081void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1082                                     RegLocation rl_src1, RegLocation rl_src2) {
1083  if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) {
1084    if (!rl_src2.is_const) {
1085      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1086    }
1087  } else {
1088    // Associativity.
1089    if (!rl_src2.is_const) {
1090      DCHECK(rl_src1.is_const);
1091      std::swap(rl_src1, rl_src2);
1092    }
1093  }
1094  DCHECK(rl_src2.is_const);
1095
1096  OpKind op = kOpBkpt;
1097  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1098
1099  switch (opcode) {
1100    case Instruction::ADD_LONG:
1101    case Instruction::ADD_LONG_2ADDR:
1102      op = kOpAdd;
1103      break;
1104    case Instruction::SUB_LONG:
1105    case Instruction::SUB_LONG_2ADDR:
1106      op = kOpSub;
1107      break;
1108    case Instruction::AND_LONG:
1109    case Instruction::AND_LONG_2ADDR:
1110      op = kOpAnd;
1111      break;
1112    case Instruction::OR_LONG:
1113    case Instruction::OR_LONG_2ADDR:
1114      op = kOpOr;
1115      break;
1116    case Instruction::XOR_LONG:
1117    case Instruction::XOR_LONG_2ADDR:
1118      op = kOpXor;
1119      break;
1120    default:
1121      LOG(FATAL) << "Unexpected opcode";
1122  }
1123
1124  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1125  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1126  OpRegRegImm(op, rl_result.reg, rl_src1.reg, val);
1127  StoreValueWide(rl_dest, rl_result);
1128}
1129
1130/**
1131 * @brief Split a register list in pairs or registers.
1132 *
1133 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
1134 * @code
1135 *   int reg1 = -1, reg2 = -1;
1136 *   while (reg_mask) {
1137 *     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1138 *     if (UNLIKELY(reg2 < 0)) {
1139 *       // Single register in reg1.
1140 *     } else {
1141 *       // Pair in reg1, reg2.
1142 *     }
1143 *   }
1144 * @endcode
1145 */
1146uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
1147  // Find first register.
1148  int first_bit_set = __builtin_ctz(reg_mask) + 1;
1149  int reg = *reg1 + first_bit_set;
1150  reg_mask >>= first_bit_set;
1151
1152  if (LIKELY(reg_mask)) {
1153    // Save the first register, find the second and use the pair opcode.
1154    int second_bit_set = __builtin_ctz(reg_mask) + 1;
1155    *reg2 = reg;
1156    reg_mask >>= second_bit_set;
1157    *reg1 = reg + second_bit_set;
1158    return reg_mask;
1159  }
1160
1161  // Use the single opcode, as we just have one register.
1162  *reg1 = reg;
1163  *reg2 = -1;
1164  return reg_mask;
1165}
1166
1167void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
1168  int reg1 = -1, reg2 = -1;
1169  const int reg_log2_size = 3;
1170
1171  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1172     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1173    if (UNLIKELY(reg2 < 0)) {
1174      NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1175    } else {
1176      NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1177              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1178    }
1179  }
1180}
1181
1182void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
1183  int reg1 = -1, reg2 = -1;
1184  const int reg_log2_size = 3;
1185
1186  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1187    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1188    if (UNLIKELY(reg2 < 0)) {
1189      NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1190    } else {
1191      NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1192              RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1193    }
1194  }
1195}
1196
1197void Arm64Mir2Lir::UnSpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
1198  int reg1 = -1, reg2 = -1;
1199  const int reg_log2_size = 3;
1200
1201  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1202     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1203    if (UNLIKELY(reg2 < 0)) {
1204      NewLIR3(FWIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1205    } else {
1206      NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1207              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1208    }
1209  }
1210}
1211
1212// TODO(Arm64): consider using ld1 and st1?
1213void Arm64Mir2Lir::SpillFPRegs(RegStorage base, int offset, uint32_t reg_mask) {
1214  int reg1 = -1, reg2 = -1;
1215  const int reg_log2_size = 3;
1216
1217  for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1218    reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1219    if (UNLIKELY(reg2 < 0)) {
1220      NewLIR3(FWIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1221    } else {
1222      NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1223              RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1224    }
1225  }
1226}
1227
1228}  // namespace art
1229