int_arm.cc revision 99ad7230ccaace93bf323dea9790f35fe991a4a2
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "mirror/array.h"
24
25namespace art {
26
27LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) {
28  OpRegReg(kOpCmp, src1, src2);
29  return OpCondBranch(cond, target);
30}
31
32/*
33 * Generate a Thumb2 IT instruction, which can nullify up to
34 * four subsequent instructions based on a condition and its
35 * inverse.  The condition applies to the first instruction, which
36 * is executed if the condition is met.  The string "guide" consists
37 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
38 * A "T" means the instruction is executed if the condition is
39 * met, and an "E" means the instruction is executed if the condition
40 * is not met.
41 */
42LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
43  int mask;
44  int mask3 = 0;
45  int mask2 = 0;
46  int mask1 = 0;
47  ArmConditionCode code = ArmConditionEncoding(ccode);
48  int cond_bit = code & 1;
49  int alt_bit = cond_bit ^ 1;
50
51  // Note: case fallthroughs intentional
52  switch (strlen(guide)) {
53    case 3:
54      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
55    case 2:
56      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
57    case 1:
58      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
59      break;
60    case 0:
61      break;
62    default:
63      LOG(FATAL) << "OAT: bad case in OpIT";
64  }
65  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
66       (1 << (3 - strlen(guide)));
67  return NewLIR2(kThumb2It, code, mask);
68}
69
70/*
71 * 64-bit 3way compare function.
72 *     mov   rX, #-1
73 *     cmp   op1hi, op2hi
74 *     blt   done
75 *     bgt   flip
76 *     sub   rX, op1lo, op2lo (treat as unsigned)
77 *     beq   done
78 *     ite   hi
79 *     mov(hi)   rX, #-1
80 *     mov(!hi)  rX, #1
81 * flip:
82 *     neg   rX
83 * done:
84 */
85void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
86                            RegLocation rl_src2) {
87  LIR* target1;
88  LIR* target2;
89  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
90  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
91  int t_reg = AllocTemp();
92  LoadConstant(t_reg, -1);
93  OpRegReg(kOpCmp, rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
94  LIR* branch1 = OpCondBranch(kCondLt, NULL);
95  LIR* branch2 = OpCondBranch(kCondGt, NULL);
96  OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
97  LIR* branch3 = OpCondBranch(kCondEq, NULL);
98
99  OpIT(kCondHi, "E");
100  NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1));
101  LoadConstant(t_reg, 1);
102  GenBarrier();
103
104  target2 = NewLIR0(kPseudoTargetLabel);
105  OpRegReg(kOpNeg, t_reg, t_reg);
106
107  target1 = NewLIR0(kPseudoTargetLabel);
108
109  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
110  rl_temp.reg.SetReg(t_reg);
111  StoreValue(rl_dest, rl_temp);
112  FreeTemp(t_reg);
113
114  branch1->target = target1;
115  branch2->target = target2;
116  branch3->target = branch1->target;
117}
118
119void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
120                                          int64_t val, ConditionCode ccode) {
121  int32_t val_lo = Low32Bits(val);
122  int32_t val_hi = High32Bits(val);
123  DCHECK_GE(ModifiedImmediate(val_lo), 0);
124  DCHECK_GE(ModifiedImmediate(val_hi), 0);
125  LIR* taken = &block_label_list_[bb->taken];
126  LIR* not_taken = &block_label_list_[bb->fall_through];
127  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
128  int32_t low_reg = rl_src1.reg.GetReg();
129  int32_t high_reg = rl_src1.reg.GetHighReg();
130
131  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
132    int t_reg = AllocTemp();
133    NewLIR4(kThumb2OrrRRRs, t_reg, low_reg, high_reg, 0);
134    FreeTemp(t_reg);
135    OpCondBranch(ccode, taken);
136    return;
137  }
138
139  switch (ccode) {
140    case kCondEq:
141    case kCondNe:
142      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
143      break;
144    case kCondLt:
145      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
146      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
147      ccode = kCondUlt;
148      break;
149    case kCondLe:
150      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
151      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
152      ccode = kCondLs;
153      break;
154    case kCondGt:
155      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
156      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
157      ccode = kCondHi;
158      break;
159    case kCondGe:
160      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
161      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
162      ccode = kCondUge;
163      break;
164    default:
165      LOG(FATAL) << "Unexpected ccode: " << ccode;
166  }
167  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
168}
169
170void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
171  RegLocation rl_result;
172  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
173  RegLocation rl_dest = mir_graph_->GetDest(mir);
174  rl_src = LoadValue(rl_src, kCoreReg);
175  ConditionCode ccode = mir->meta.ccode;
176  if (mir->ssa_rep->num_uses == 1) {
177    // CONST case
178    int true_val = mir->dalvikInsn.vB;
179    int false_val = mir->dalvikInsn.vC;
180    rl_result = EvalLoc(rl_dest, kCoreReg, true);
181    // Change kCondNe to kCondEq for the special cases below.
182    if (ccode == kCondNe) {
183      ccode = kCondEq;
184      std::swap(true_val, false_val);
185    }
186    bool cheap_false_val = InexpensiveConstantInt(false_val);
187    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
188      OpRegRegImm(kOpSub, rl_result.reg.GetReg(), rl_src.reg.GetReg(), -true_val);
189      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
190      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
191      LoadConstant(rl_result.reg.GetReg(), false_val);
192      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
193    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
194      OpRegRegImm(kOpRsub, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 1);
195      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
196      OpIT(kCondLs, "");
197      LoadConstant(rl_result.reg.GetReg(), false_val);
198      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
199    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
200      OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0);
201      OpIT(ccode, "E");
202      LoadConstant(rl_result.reg.GetReg(), true_val);
203      LoadConstant(rl_result.reg.GetReg(), false_val);
204      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
205    } else {
206      // Unlikely case - could be tuned.
207      int t_reg1 = AllocTemp();
208      int t_reg2 = AllocTemp();
209      LoadConstant(t_reg1, true_val);
210      LoadConstant(t_reg2, false_val);
211      OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0);
212      OpIT(ccode, "E");
213      OpRegCopy(rl_result.reg.GetReg(), t_reg1);
214      OpRegCopy(rl_result.reg.GetReg(), t_reg2);
215      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
216    }
217  } else {
218    // MOVE case
219    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
220    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
221    rl_true = LoadValue(rl_true, kCoreReg);
222    rl_false = LoadValue(rl_false, kCoreReg);
223    rl_result = EvalLoc(rl_dest, kCoreReg, true);
224    OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0);
225    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
226      OpIT(NegateComparison(ccode), "");
227      OpRegCopy(rl_result.reg.GetReg(), rl_false.reg.GetReg());
228    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
229      OpIT(ccode, "");
230      OpRegCopy(rl_result.reg.GetReg(), rl_true.reg.GetReg());
231    } else {  // Normal - select between the two.
232      OpIT(ccode, "E");
233      OpRegCopy(rl_result.reg.GetReg(), rl_true.reg.GetReg());
234      OpRegCopy(rl_result.reg.GetReg(), rl_false.reg.GetReg());
235    }
236    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
237  }
238  StoreValue(rl_dest, rl_result);
239}
240
241void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
242  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
243  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
244  // Normalize such that if either operand is constant, src2 will be constant.
245  ConditionCode ccode = mir->meta.ccode;
246  if (rl_src1.is_const) {
247    std::swap(rl_src1, rl_src2);
248    ccode = FlipComparisonOrder(ccode);
249  }
250  if (rl_src2.is_const) {
251    RegLocation rl_temp = UpdateLocWide(rl_src2);
252    // Do special compare/branch against simple const operand if not already in registers.
253    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
254    if ((rl_temp.location != kLocPhysReg) &&
255        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
256      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
257      return;
258    }
259  }
260  LIR* taken = &block_label_list_[bb->taken];
261  LIR* not_taken = &block_label_list_[bb->fall_through];
262  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
263  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
264  OpRegReg(kOpCmp, rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
265  switch (ccode) {
266    case kCondEq:
267      OpCondBranch(kCondNe, not_taken);
268      break;
269    case kCondNe:
270      OpCondBranch(kCondNe, taken);
271      break;
272    case kCondLt:
273      OpCondBranch(kCondLt, taken);
274      OpCondBranch(kCondGt, not_taken);
275      ccode = kCondUlt;
276      break;
277    case kCondLe:
278      OpCondBranch(kCondLt, taken);
279      OpCondBranch(kCondGt, not_taken);
280      ccode = kCondLs;
281      break;
282    case kCondGt:
283      OpCondBranch(kCondGt, taken);
284      OpCondBranch(kCondLt, not_taken);
285      ccode = kCondHi;
286      break;
287    case kCondGe:
288      OpCondBranch(kCondGt, taken);
289      OpCondBranch(kCondLt, not_taken);
290      ccode = kCondUge;
291      break;
292    default:
293      LOG(FATAL) << "Unexpected ccode: " << ccode;
294  }
295  OpRegReg(kOpCmp, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
296  OpCondBranch(ccode, taken);
297}
298
299/*
300 * Generate a register comparison to an immediate and branch.  Caller
301 * is responsible for setting branch target field.
302 */
303LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value,
304                                LIR* target) {
305  LIR* branch;
306  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
307  /*
308   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
309   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
310   * branch forward to a launch pad, they will frequently not reach - and thus have to
311   * be converted to a long form during assembly (which will trigger another assembly
312   * pass).  Here we estimate the branch distance for checks, and if large directly
313   * generate the long form in an attempt to avoid an extra assembly pass.
314   * TODO: consider interspersing launchpads in code following unconditional branches.
315   */
316  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
317  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
318  if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) &&
319     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
320    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
321                     reg, 0);
322  } else {
323    OpRegImm(kOpCmp, reg, check_value);
324    branch = NewLIR2(kThumbBCond, 0, arm_cond);
325  }
326  branch->target = target;
327  return branch;
328}
329
330LIR* ArmMir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) {
331  LIR* res;
332  int opcode;
333  if (ARM_FPREG(r_dest) || ARM_FPREG(r_src))
334    return OpFpRegCopy(r_dest, r_src);
335  if (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src))
336    opcode = kThumbMovRR;
337  else if (!ARM_LOWREG(r_dest) && !ARM_LOWREG(r_src))
338     opcode = kThumbMovRR_H2H;
339  else if (ARM_LOWREG(r_dest))
340     opcode = kThumbMovRR_H2L;
341  else
342     opcode = kThumbMovRR_L2H;
343  res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src);
344  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
345    res->flags.is_nop = true;
346  }
347  return res;
348}
349
350LIR* ArmMir2Lir::OpRegCopy(int r_dest, int r_src) {
351  LIR* res = OpRegCopyNoInsert(r_dest, r_src);
352  AppendLIR(res);
353  return res;
354}
355
356void ArmMir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, int src_lo,
357                               int src_hi) {
358  bool dest_fp = ARM_FPREG(dest_lo) && ARM_FPREG(dest_hi);
359  bool src_fp = ARM_FPREG(src_lo) && ARM_FPREG(src_hi);
360  DCHECK_EQ(ARM_FPREG(src_lo), ARM_FPREG(src_hi));
361  DCHECK_EQ(ARM_FPREG(dest_lo), ARM_FPREG(dest_hi));
362  if (dest_fp) {
363    if (src_fp) {
364      OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi));
365    } else {
366      NewLIR3(kThumb2Fmdrr, S2d(dest_lo, dest_hi), src_lo, src_hi);
367    }
368  } else {
369    if (src_fp) {
370      NewLIR3(kThumb2Fmrrd, dest_lo, dest_hi, S2d(src_lo, src_hi));
371    } else {
372      // Handle overlap
373      if (src_hi == dest_lo) {
374        DCHECK_NE(src_lo, dest_hi);
375        OpRegCopy(dest_hi, src_hi);
376        OpRegCopy(dest_lo, src_lo);
377      } else {
378        OpRegCopy(dest_lo, src_lo);
379        OpRegCopy(dest_hi, src_hi);
380      }
381    }
382  }
383}
384
385// Table of magic divisors
386struct MagicTable {
387  uint32_t magic;
388  uint32_t shift;
389  DividePattern pattern;
390};
391
392static const MagicTable magic_table[] = {
393  {0, 0, DivideNone},        // 0
394  {0, 0, DivideNone},        // 1
395  {0, 0, DivideNone},        // 2
396  {0x55555556, 0, Divide3},  // 3
397  {0, 0, DivideNone},        // 4
398  {0x66666667, 1, Divide5},  // 5
399  {0x2AAAAAAB, 0, Divide3},  // 6
400  {0x92492493, 2, Divide7},  // 7
401  {0, 0, DivideNone},        // 8
402  {0x38E38E39, 1, Divide5},  // 9
403  {0x66666667, 2, Divide5},  // 10
404  {0x2E8BA2E9, 1, Divide5},  // 11
405  {0x2AAAAAAB, 1, Divide5},  // 12
406  {0x4EC4EC4F, 2, Divide5},  // 13
407  {0x92492493, 3, Divide7},  // 14
408  {0x88888889, 3, Divide7},  // 15
409};
410
411// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
412bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
413                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
414  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
415    return false;
416  }
417  DividePattern pattern = magic_table[lit].pattern;
418  if (pattern == DivideNone) {
419    return false;
420  }
421  // Tuning: add rem patterns
422  if (!is_div) {
423    return false;
424  }
425
426  int r_magic = AllocTemp();
427  LoadConstant(r_magic, magic_table[lit].magic);
428  rl_src = LoadValue(rl_src, kCoreReg);
429  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
430  int r_hi = AllocTemp();
431  int r_lo = AllocTemp();
432  NewLIR4(kThumb2Smull, r_lo, r_hi, r_magic, rl_src.reg.GetReg());
433  switch (pattern) {
434    case Divide3:
435      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi,
436               rl_src.reg.GetReg(), EncodeShift(kArmAsr, 31));
437      break;
438    case Divide5:
439      OpRegRegImm(kOpAsr, r_lo, rl_src.reg.GetReg(), 31);
440      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo, r_hi,
441               EncodeShift(kArmAsr, magic_table[lit].shift));
442      break;
443    case Divide7:
444      OpRegReg(kOpAdd, r_hi, rl_src.reg.GetReg());
445      OpRegRegImm(kOpAsr, r_lo, rl_src.reg.GetReg(), 31);
446      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo, r_hi,
447               EncodeShift(kArmAsr, magic_table[lit].shift));
448      break;
449    default:
450      LOG(FATAL) << "Unexpected pattern: " << pattern;
451  }
452  StoreValue(rl_dest, rl_result);
453  return true;
454}
455
456LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code,
457                    int reg1, int base, int offset, ThrowKind kind) {
458  LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm";
459  return NULL;
460}
461
462RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
463                      RegLocation rl_src2, bool is_div, bool check_zero) {
464  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
465  return rl_dest;
466}
467
468RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
469  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
470  return rl_dest;
471}
472
473RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
474                                     bool is_div) {
475  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
476
477  // Put the literal in a temp.
478  int lit_temp = AllocTemp();
479  LoadConstant(lit_temp, lit);
480  // Use the generic case for div/rem with arg2 in a register.
481  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
482  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
483  FreeTemp(lit_temp);
484
485  return rl_result;
486}
487
488RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2,
489                                  bool is_div) {
490  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
491  if (is_div) {
492    // Simple case, use sdiv instruction.
493    OpRegRegReg(kOpDiv, rl_result.reg.GetReg(), reg1, reg2);
494  } else {
495    // Remainder case, use the following code:
496    // temp = reg1 / reg2      - integer division
497    // temp = temp * reg2
498    // dest = reg1 - temp
499
500    int temp = AllocTemp();
501    OpRegRegReg(kOpDiv, temp, reg1, reg2);
502    OpRegReg(kOpMul, temp, reg2);
503    OpRegRegReg(kOpSub, rl_result.reg.GetReg(), reg1, temp);
504    FreeTemp(temp);
505  }
506
507  return rl_result;
508}
509
510bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
511  DCHECK_EQ(cu_->instruction_set, kThumb2);
512  RegLocation rl_src1 = info->args[0];
513  RegLocation rl_src2 = info->args[1];
514  rl_src1 = LoadValue(rl_src1, kCoreReg);
515  rl_src2 = LoadValue(rl_src2, kCoreReg);
516  RegLocation rl_dest = InlineTarget(info);
517  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
518  OpRegReg(kOpCmp, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
519  OpIT((is_min) ? kCondGt : kCondLt, "E");
520  OpRegReg(kOpMov, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
521  OpRegReg(kOpMov, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
522  GenBarrier();
523  StoreValue(rl_dest, rl_result);
524  return true;
525}
526
527bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
528  RegLocation rl_src_address = info->args[0];  // long address
529  rl_src_address.wide = 0;  // ignore high half in info->args[1]
530  RegLocation rl_dest = InlineTarget(info);
531  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
532  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
533  if (size == kLong) {
534    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
535    if (rl_address.reg.GetReg() != rl_result.reg.GetReg()) {
536      LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), kWord, INVALID_SREG);
537      LoadBaseDisp(rl_address.reg.GetReg(), 4, rl_result.reg.GetHighReg(), kWord, INVALID_SREG);
538    } else {
539      LoadBaseDisp(rl_address.reg.GetReg(), 4, rl_result.reg.GetHighReg(), kWord, INVALID_SREG);
540      LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), kWord, INVALID_SREG);
541    }
542    StoreValueWide(rl_dest, rl_result);
543  } else {
544    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
545    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
546    LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), size, INVALID_SREG);
547    StoreValue(rl_dest, rl_result);
548  }
549  return true;
550}
551
552bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
553  RegLocation rl_src_address = info->args[0];  // long address
554  rl_src_address.wide = 0;  // ignore high half in info->args[1]
555  RegLocation rl_src_value = info->args[2];  // [size] value
556  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
557  if (size == kLong) {
558    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
559    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
560    StoreBaseDisp(rl_address.reg.GetReg(), 0, rl_value.reg.GetReg(), kWord);
561    StoreBaseDisp(rl_address.reg.GetReg(), 4, rl_value.reg.GetHighReg(), kWord);
562  } else {
563    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
564    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
565    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
566    StoreBaseDisp(rl_address.reg.GetReg(), 0, rl_value.reg.GetReg(), size);
567  }
568  return true;
569}
570
571void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) {
572  LOG(FATAL) << "Unexpected use of OpLea for Arm";
573}
574
575void ArmMir2Lir::OpTlsCmp(ThreadOffset offset, int val) {
576  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
577}
578
579bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
580  DCHECK_EQ(cu_->instruction_set, kThumb2);
581  // Unused - RegLocation rl_src_unsafe = info->args[0];
582  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
583  RegLocation rl_src_offset = info->args[2];  // long low
584  rl_src_offset.wide = 0;  // ignore high half in info->args[3]
585  RegLocation rl_src_expected = info->args[4];  // int, long or Object
586  // If is_long, high half is in info->args[5]
587  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
588  // If is_long, high half is in info->args[7]
589  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
590
591  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
592  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
593  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
594  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
595  // into the same temps, reducing the number of required temps down to 5. We shall work
596  // around the potentially locked temp by using LR for r_ptr, unconditionally.
597  // TODO: Pass information about the need for more temps to the stack frame generation
598  // code so that we can rely on being able to allocate enough temps.
599  DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp);
600  MarkTemp(rARM_LR);
601  FreeTemp(rARM_LR);
602  LockTemp(rARM_LR);
603  bool load_early = true;
604  if (is_long) {
605    bool expected_is_core_reg =
606        rl_src_expected.location == kLocPhysReg && !IsFpReg(rl_src_expected.reg.GetReg());
607    bool new_value_is_core_reg =
608        rl_src_new_value.location == kLocPhysReg && !IsFpReg(rl_src_new_value.reg.GetReg());
609    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(rl_src_expected.reg.GetReg());
610    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(rl_src_new_value.reg.GetReg());
611
612    if (!expected_is_good_reg && !new_value_is_good_reg) {
613      // None of expected/new_value is non-temp reg, need to load both late
614      load_early = false;
615      // Make sure they are not in the temp regs and the load will not be skipped.
616      if (expected_is_core_reg) {
617        FlushRegWide(rl_src_expected.reg.GetReg(), rl_src_expected.reg.GetHighReg());
618        ClobberSReg(rl_src_expected.s_reg_low);
619        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
620        rl_src_expected.location = kLocDalvikFrame;
621      }
622      if (new_value_is_core_reg) {
623        FlushRegWide(rl_src_new_value.reg.GetReg(), rl_src_new_value.reg.GetHighReg());
624        ClobberSReg(rl_src_new_value.s_reg_low);
625        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
626        rl_src_new_value.location = kLocDalvikFrame;
627      }
628    }
629  }
630
631  // Release store semantics, get the barrier out of the way.  TODO: revisit
632  GenMemBarrier(kStoreLoad);
633
634  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
635  RegLocation rl_new_value;
636  if (!is_long) {
637    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
638  } else if (load_early) {
639    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
640  }
641
642  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
643    // Mark card for object assuming new value is stored.
644    MarkGCCard(rl_new_value.reg.GetReg(), rl_object.reg.GetReg());
645  }
646
647  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
648
649  int r_ptr = rARM_LR;
650  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg.GetReg(), rl_offset.reg.GetReg());
651
652  // Free now unneeded rl_object and rl_offset to give more temps.
653  ClobberSReg(rl_object.s_reg_low);
654  FreeTemp(rl_object.reg.GetReg());
655  ClobberSReg(rl_offset.s_reg_low);
656  FreeTemp(rl_offset.reg.GetReg());
657
658  RegLocation rl_expected;
659  if (!is_long) {
660    rl_expected = LoadValue(rl_src_expected, kCoreReg);
661  } else if (load_early) {
662    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
663  } else {
664    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
665    int low_reg = AllocTemp();
666    int high_reg = AllocTemp();
667    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
668    rl_expected = rl_new_value;
669  }
670
671  // do {
672  //   tmp = [r_ptr] - expected;
673  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
674  // result = tmp != 0;
675
676  int r_tmp = AllocTemp();
677  LIR* target = NewLIR0(kPseudoTargetLabel);
678
679  if (is_long) {
680    int r_tmp_high = AllocTemp();
681    if (!load_early) {
682      LoadValueDirectWide(rl_src_expected, rl_expected.reg.GetReg(), rl_expected.reg.GetHighReg());
683    }
684    NewLIR3(kThumb2Ldrexd, r_tmp, r_tmp_high, r_ptr);
685    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetReg());
686    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHighReg());
687    if (!load_early) {
688      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg.GetReg(), rl_new_value.reg.GetHighReg());
689    }
690    // Make sure we use ORR that sets the ccode
691    if (ARM_LOWREG(r_tmp) && ARM_LOWREG(r_tmp_high)) {
692      NewLIR2(kThumbOrr, r_tmp, r_tmp_high);
693    } else {
694      NewLIR4(kThumb2OrrRRRs, r_tmp, r_tmp, r_tmp_high, 0);
695    }
696    FreeTemp(r_tmp_high);  // Now unneeded
697
698    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
699    OpIT(kCondEq, "T");
700    NewLIR4(kThumb2Strexd /* eq */, r_tmp, rl_new_value.reg.GetReg(), rl_new_value.reg.GetHighReg(), r_ptr);
701
702  } else {
703    NewLIR3(kThumb2Ldrex, r_tmp, r_ptr, 0);
704    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetReg());
705    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
706    OpIT(kCondEq, "T");
707    NewLIR4(kThumb2Strex /* eq */, r_tmp, rl_new_value.reg.GetReg(), r_ptr, 0);
708  }
709
710  // Still one conditional left from OpIT(kCondEq, "T") from either branch
711  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
712  OpCondBranch(kCondEq, target);
713
714  if (!load_early) {
715    FreeTemp(rl_expected.reg.GetReg());  // Now unneeded.
716    FreeTemp(rl_expected.reg.GetHighReg());  // Now unneeded.
717  }
718
719  // result := (tmp1 != 0) ? 0 : 1;
720  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
721  OpRegRegImm(kOpRsub, rl_result.reg.GetReg(), r_tmp, 1);
722  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
723  OpIT(kCondUlt, "");
724  LoadConstant(rl_result.reg.GetReg(), 0); /* cc */
725  FreeTemp(r_tmp);  // Now unneeded.
726
727  StoreValue(rl_dest, rl_result);
728
729  // Now, restore lr to its non-temp status.
730  Clobber(rARM_LR);
731  UnmarkTemp(rARM_LR);
732  return true;
733}
734
735LIR* ArmMir2Lir::OpPcRelLoad(int reg, LIR* target) {
736  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg, 0, 0, 0, 0, target);
737}
738
739LIR* ArmMir2Lir::OpVldm(int rBase, int count) {
740  return NewLIR3(kThumb2Vldms, rBase, fr0, count);
741}
742
743LIR* ArmMir2Lir::OpVstm(int rBase, int count) {
744  return NewLIR3(kThumb2Vstms, rBase, fr0, count);
745}
746
747void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
748                                               RegLocation rl_result, int lit,
749                                               int first_bit, int second_bit) {
750  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
751                   EncodeShift(kArmLsl, second_bit - first_bit));
752  if (first_bit != 0) {
753    OpRegRegImm(kOpLsl, rl_result.reg.GetReg(), rl_result.reg.GetReg(), first_bit);
754  }
755}
756
757void ArmMir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) {
758  int t_reg = AllocTemp();
759  NewLIR4(kThumb2OrrRRRs, t_reg, reg_lo, reg_hi, 0);
760  FreeTemp(t_reg);
761  GenCheck(kCondEq, kThrowDivZero);
762}
763
764// Test suspend flag, return target of taken suspend branch
765LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
766  NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1);
767  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
768}
769
770// Decrement register and branch on condition
771LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) {
772  // Combine sub & test using sub setflags encoding here
773  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
774  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
775  return OpCondBranch(c_code, target);
776}
777
778void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
779#if ANDROID_SMP != 0
780  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
781  LIR* barrier = last_lir_insn_;
782
783  int dmb_flavor;
784  // TODO: revisit Arm barrier kinds
785  switch (barrier_kind) {
786    case kLoadStore: dmb_flavor = kISH; break;
787    case kLoadLoad: dmb_flavor = kISH; break;
788    case kStoreStore: dmb_flavor = kISHST; break;
789    case kStoreLoad: dmb_flavor = kISH; break;
790    default:
791      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
792      dmb_flavor = kSY;  // quiet gcc.
793      break;
794  }
795
796  // If the same barrier already exists, don't generate another.
797  if (barrier == nullptr
798      || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
799    barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
800  }
801
802  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
803  DCHECK(!barrier->flags.use_def_invalid);
804  barrier->u.m.def_mask = ENCODE_ALL;
805#endif
806}
807
808void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
809  rl_src = LoadValueWide(rl_src, kCoreReg);
810  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
811  int z_reg = AllocTemp();
812  LoadConstantNoClobber(z_reg, 0);
813  // Check for destructive overlap
814  if (rl_result.reg.GetReg() == rl_src.reg.GetHighReg()) {
815    int t_reg = AllocTemp();
816    OpRegRegReg(kOpSub, rl_result.reg.GetReg(), z_reg, rl_src.reg.GetReg());
817    OpRegRegReg(kOpSbc, rl_result.reg.GetHighReg(), z_reg, t_reg);
818    FreeTemp(t_reg);
819  } else {
820    OpRegRegReg(kOpSub, rl_result.reg.GetReg(), z_reg, rl_src.reg.GetReg());
821    OpRegRegReg(kOpSbc, rl_result.reg.GetHighReg(), z_reg, rl_src.reg.GetHighReg());
822  }
823  FreeTemp(z_reg);
824  StoreValueWide(rl_dest, rl_result);
825}
826
827void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
828                            RegLocation rl_src1, RegLocation rl_src2) {
829    /*
830     * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
831     * dest     = src1.lo * src2.lo;
832     * tmp1    += src1.lo * src2.hi;
833     * dest.hi += tmp1;
834     *
835     * To pull off inline multiply, we have a worst-case requirement of 7 temporary
836     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
837     * lr in the temp set.  The only problematic case is all operands and result are
838     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
839     * freeing operand temp registers after they are no longer needed.  All other cases
840     * can proceed normally.  We'll just punt on the case of the result having a misaligned
841     * overlap with either operand and send that case to a runtime handler.
842     */
843    RegLocation rl_result;
844    if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
845      ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pLmul);
846      FlushAllRegs();
847      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
848      rl_result = GetReturnWide(false);
849      StoreValueWide(rl_dest, rl_result);
850      return;
851    }
852
853    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
854    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
855
856    int reg_status = 0;
857    int res_lo = INVALID_REG;
858    int res_hi = INVALID_REG;
859    bool dest_promoted = rl_dest.location == kLocPhysReg && !rl_dest.reg.IsInvalid() &&
860        !IsTemp(rl_dest.reg.GetReg()) && !IsTemp(rl_dest.reg.GetHighReg());
861    bool src1_promoted = !IsTemp(rl_src1.reg.GetReg()) && !IsTemp(rl_src1.reg.GetHighReg());
862    bool src2_promoted = !IsTemp(rl_src2.reg.GetReg()) && !IsTemp(rl_src2.reg.GetHighReg());
863    // Check if rl_dest is *not* either operand and we have enough temp registers.
864    if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
865        (dest_promoted || src1_promoted || src2_promoted)) {
866      // In this case, we do not need to manually allocate temp registers for result.
867      rl_result = EvalLoc(rl_dest, kCoreReg, true);
868      res_lo = rl_result.reg.GetReg();
869      res_hi = rl_result.reg.GetHighReg();
870    } else {
871      res_lo = AllocTemp();
872      if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
873        // In this case, we have enough temp registers to be allocated for result.
874        res_hi = AllocTemp();
875        reg_status = 1;
876      } else {
877        // In this case, all temps are now allocated.
878        // res_hi will be allocated after we can free src1_hi.
879        reg_status = 2;
880      }
881    }
882
883    // Temporarily add LR to the temp pool, and assign it to tmp1
884    MarkTemp(rARM_LR);
885    FreeTemp(rARM_LR);
886    int tmp1 = rARM_LR;
887    LockTemp(rARM_LR);
888
889    if (rl_src1.reg.GetReg() == rl_src2.reg.GetReg()) {
890      DCHECK_NE(res_hi, INVALID_REG);
891      DCHECK_NE(res_lo, INVALID_REG);
892      NewLIR3(kThumb2MulRRR, tmp1, rl_src1.reg.GetReg(), rl_src1.reg.GetHighReg());
893      NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
894      OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
895    } else {
896      NewLIR3(kThumb2MulRRR, tmp1, rl_src2.reg.GetReg(), rl_src1.reg.GetHighReg());
897      if (reg_status == 2) {
898        DCHECK_EQ(res_hi, INVALID_REG);
899        DCHECK_NE(rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
900        DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
901        FreeTemp(rl_src1.reg.GetHighReg());
902        res_hi = AllocTemp();
903      }
904      DCHECK_NE(res_hi, INVALID_REG);
905      DCHECK_NE(res_lo, INVALID_REG);
906      NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src2.reg.GetReg(), rl_src1.reg.GetReg());
907      NewLIR4(kThumb2Mla, tmp1, rl_src1.reg.GetReg(), rl_src2.reg.GetHighReg(), tmp1);
908      NewLIR4(kThumb2AddRRR, res_hi, tmp1, res_hi, 0);
909      if (reg_status == 2) {
910        // Clobber rl_src1 since it was corrupted.
911        FreeTemp(rl_src1.reg.GetReg());
912        Clobber(rl_src1.reg.GetReg());
913        Clobber(rl_src1.reg.GetHighReg());
914      }
915    }
916
917    // Now, restore lr to its non-temp status.
918    FreeTemp(tmp1);
919    Clobber(rARM_LR);
920    UnmarkTemp(rARM_LR);
921
922    if (reg_status != 0) {
923      // We had manually allocated registers for rl_result.
924      // Now construct a RegLocation.
925      rl_result = GetReturnWide(false);  // Just using as a template.
926      rl_result.reg.SetReg(res_lo);
927      rl_result.reg.SetHighReg(res_hi);
928    }
929
930    StoreValueWide(rl_dest, rl_result);
931}
932
933void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
934                            RegLocation rl_src2) {
935  LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
936}
937
938void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
939                            RegLocation rl_src2) {
940  LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
941}
942
943void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
944                            RegLocation rl_src2) {
945  LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
946}
947
948void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
949                           RegLocation rl_src2) {
950  LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
951}
952
953void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
954                            RegLocation rl_src2) {
955  LOG(FATAL) << "Unexpected use of genXoLong for Arm";
956}
957
958/*
959 * Generate array load
960 */
961void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
962                             RegLocation rl_index, RegLocation rl_dest, int scale) {
963  RegisterClass reg_class = oat_reg_class_by_size(size);
964  int len_offset = mirror::Array::LengthOffset().Int32Value();
965  int data_offset;
966  RegLocation rl_result;
967  bool constant_index = rl_index.is_const;
968  rl_array = LoadValue(rl_array, kCoreReg);
969  if (!constant_index) {
970    rl_index = LoadValue(rl_index, kCoreReg);
971  }
972
973  if (rl_dest.wide) {
974    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
975  } else {
976    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
977  }
978
979  // If index is constant, just fold it into the data offset
980  if (constant_index) {
981    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
982  }
983
984  /* null object? */
985  GenNullCheck(rl_array.reg.GetReg(), opt_flags);
986
987  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
988  int reg_len = INVALID_REG;
989  if (needs_range_check) {
990    reg_len = AllocTemp();
991    /* Get len */
992    LoadWordDisp(rl_array.reg.GetReg(), len_offset, reg_len);
993    MarkPossibleNullPointerException(opt_flags);
994  } else {
995    ForceImplicitNullCheck(rl_array.reg.GetReg(), opt_flags);
996  }
997  if (rl_dest.wide || rl_dest.fp || constant_index) {
998    int reg_ptr;
999    if (constant_index) {
1000      reg_ptr = rl_array.reg.GetReg();  // NOTE: must not alter reg_ptr in constant case.
1001    } else {
1002      // No special indexed operation, lea + load w/ displacement
1003      reg_ptr = AllocTemp();
1004      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg.GetReg(), rl_index.reg.GetReg(),
1005                       EncodeShift(kArmLsl, scale));
1006      FreeTemp(rl_index.reg.GetReg());
1007    }
1008    rl_result = EvalLoc(rl_dest, reg_class, true);
1009
1010    if (needs_range_check) {
1011      if (constant_index) {
1012        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
1013      } else {
1014        GenRegRegCheck(kCondLs, reg_len, rl_index.reg.GetReg(), kThrowArrayBounds);
1015      }
1016      FreeTemp(reg_len);
1017    }
1018    if (rl_dest.wide) {
1019      LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(),
1020        INVALID_SREG);
1021      MarkPossibleNullPointerException(opt_flags);
1022      if (!constant_index) {
1023        FreeTemp(reg_ptr);
1024      }
1025      StoreValueWide(rl_dest, rl_result);
1026    } else {
1027      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg.GetReg(), size, INVALID_SREG);
1028      MarkPossibleNullPointerException(opt_flags);
1029      if (!constant_index) {
1030        FreeTemp(reg_ptr);
1031      }
1032      StoreValue(rl_dest, rl_result);
1033    }
1034  } else {
1035    // Offset base, then use indexed load
1036    int reg_ptr = AllocTemp();
1037    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg.GetReg(), data_offset);
1038    FreeTemp(rl_array.reg.GetReg());
1039    rl_result = EvalLoc(rl_dest, reg_class, true);
1040
1041    if (needs_range_check) {
1042      GenRegRegCheck(kCondUge, rl_index.reg.GetReg(), reg_len, kThrowArrayBounds);
1043      FreeTemp(reg_len);
1044    }
1045    LoadBaseIndexed(reg_ptr, rl_index.reg.GetReg(), rl_result.reg.GetReg(), scale, size);
1046    MarkPossibleNullPointerException(opt_flags);
1047    FreeTemp(reg_ptr);
1048    StoreValue(rl_dest, rl_result);
1049  }
1050}
1051
1052/*
1053 * Generate array store
1054 *
1055 */
1056void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1057                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1058  RegisterClass reg_class = oat_reg_class_by_size(size);
1059  int len_offset = mirror::Array::LengthOffset().Int32Value();
1060  bool constant_index = rl_index.is_const;
1061
1062  int data_offset;
1063  if (size == kLong || size == kDouble) {
1064    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1065  } else {
1066    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1067  }
1068
1069  // If index is constant, just fold it into the data offset.
1070  if (constant_index) {
1071    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1072  }
1073
1074  rl_array = LoadValue(rl_array, kCoreReg);
1075  if (!constant_index) {
1076    rl_index = LoadValue(rl_index, kCoreReg);
1077  }
1078
1079  int reg_ptr;
1080  bool allocated_reg_ptr_temp = false;
1081  if (constant_index) {
1082    reg_ptr = rl_array.reg.GetReg();
1083  } else if (IsTemp(rl_array.reg.GetReg()) && !card_mark) {
1084    Clobber(rl_array.reg.GetReg());
1085    reg_ptr = rl_array.reg.GetReg();
1086  } else {
1087    allocated_reg_ptr_temp = true;
1088    reg_ptr = AllocTemp();
1089  }
1090
1091  /* null object? */
1092  GenNullCheck(rl_array.reg.GetReg(), opt_flags);
1093
1094  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1095  int reg_len = INVALID_REG;
1096  if (needs_range_check) {
1097    reg_len = AllocTemp();
1098    // NOTE: max live temps(4) here.
1099    /* Get len */
1100    LoadWordDisp(rl_array.reg.GetReg(), len_offset, reg_len);
1101    MarkPossibleNullPointerException(opt_flags);
1102  } else {
1103    ForceImplicitNullCheck(rl_array.reg.GetReg(), opt_flags);
1104  }
1105  /* at this point, reg_ptr points to array, 2 live temps */
1106  if (rl_src.wide || rl_src.fp || constant_index) {
1107    if (rl_src.wide) {
1108      rl_src = LoadValueWide(rl_src, reg_class);
1109    } else {
1110      rl_src = LoadValue(rl_src, reg_class);
1111    }
1112    if (!constant_index) {
1113      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg.GetReg(), rl_index.reg.GetReg(),
1114                       EncodeShift(kArmLsl, scale));
1115    }
1116    if (needs_range_check) {
1117      if (constant_index) {
1118        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
1119      } else {
1120        GenRegRegCheck(kCondLs, reg_len, rl_index.reg.GetReg(), kThrowArrayBounds);
1121      }
1122      FreeTemp(reg_len);
1123    }
1124
1125    if (rl_src.wide) {
1126      StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
1127    } else {
1128      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg.GetReg(), size);
1129    }
1130    MarkPossibleNullPointerException(opt_flags);
1131  } else {
1132    /* reg_ptr -> array data */
1133    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg.GetReg(), data_offset);
1134    rl_src = LoadValue(rl_src, reg_class);
1135    if (needs_range_check) {
1136      GenRegRegCheck(kCondUge, rl_index.reg.GetReg(), reg_len, kThrowArrayBounds);
1137      FreeTemp(reg_len);
1138    }
1139    StoreBaseIndexed(reg_ptr, rl_index.reg.GetReg(), rl_src.reg.GetReg(),
1140                     scale, size);
1141    MarkPossibleNullPointerException(opt_flags);
1142  }
1143  if (allocated_reg_ptr_temp) {
1144    FreeTemp(reg_ptr);
1145  }
1146  if (card_mark) {
1147    MarkGCCard(rl_src.reg.GetReg(), rl_array.reg.GetReg());
1148  }
1149}
1150
1151
1152void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1153                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1154  rl_src = LoadValueWide(rl_src, kCoreReg);
1155  // Per spec, we only care about low 6 bits of shift amount.
1156  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1157  if (shift_amount == 0) {
1158    StoreValueWide(rl_dest, rl_src);
1159    return;
1160  }
1161  if (BadOverlap(rl_src, rl_dest)) {
1162    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1163    return;
1164  }
1165  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1166  switch (opcode) {
1167    case Instruction::SHL_LONG:
1168    case Instruction::SHL_LONG_2ADDR:
1169      if (shift_amount == 1) {
1170        OpRegRegReg(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg());
1171        OpRegRegReg(kOpAdc, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), rl_src.reg.GetHighReg());
1172      } else if (shift_amount == 32) {
1173        OpRegCopy(rl_result.reg.GetHighReg(), rl_src.reg.GetReg());
1174        LoadConstant(rl_result.reg.GetReg(), 0);
1175      } else if (shift_amount > 31) {
1176        OpRegRegImm(kOpLsl, rl_result.reg.GetHighReg(), rl_src.reg.GetReg(), shift_amount - 32);
1177        LoadConstant(rl_result.reg.GetReg(), 0);
1178      } else {
1179        OpRegRegImm(kOpLsl, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount);
1180        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetReg(),
1181                         EncodeShift(kArmLsr, 32 - shift_amount));
1182        OpRegRegImm(kOpLsl, rl_result.reg.GetReg(), rl_src.reg.GetReg(), shift_amount);
1183      }
1184      break;
1185    case Instruction::SHR_LONG:
1186    case Instruction::SHR_LONG_2ADDR:
1187      if (shift_amount == 32) {
1188        OpRegCopy(rl_result.reg.GetReg(), rl_src.reg.GetHighReg());
1189        OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), 31);
1190      } else if (shift_amount > 31) {
1191        OpRegRegImm(kOpAsr, rl_result.reg.GetReg(), rl_src.reg.GetHighReg(), shift_amount - 32);
1192        OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), 31);
1193      } else {
1194        int t_reg = AllocTemp();
1195        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetReg(), shift_amount);
1196        OpRegRegRegShift(kOpOr, rl_result.reg.GetReg(), t_reg, rl_src.reg.GetHighReg(),
1197                         EncodeShift(kArmLsl, 32 - shift_amount));
1198        FreeTemp(t_reg);
1199        OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount);
1200      }
1201      break;
1202    case Instruction::USHR_LONG:
1203    case Instruction::USHR_LONG_2ADDR:
1204      if (shift_amount == 32) {
1205        OpRegCopy(rl_result.reg.GetReg(), rl_src.reg.GetHighReg());
1206        LoadConstant(rl_result.reg.GetHighReg(), 0);
1207      } else if (shift_amount > 31) {
1208        OpRegRegImm(kOpLsr, rl_result.reg.GetReg(), rl_src.reg.GetHighReg(), shift_amount - 32);
1209        LoadConstant(rl_result.reg.GetHighReg(), 0);
1210      } else {
1211        int t_reg = AllocTemp();
1212        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetReg(), shift_amount);
1213        OpRegRegRegShift(kOpOr, rl_result.reg.GetReg(), t_reg, rl_src.reg.GetHighReg(),
1214                         EncodeShift(kArmLsl, 32 - shift_amount));
1215        FreeTemp(t_reg);
1216        OpRegRegImm(kOpLsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount);
1217      }
1218      break;
1219    default:
1220      LOG(FATAL) << "Unexpected case";
1221  }
1222  StoreValueWide(rl_dest, rl_result);
1223}
1224
1225void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1226                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
1227  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1228    if (!rl_src2.is_const) {
1229      // Don't bother with special handling for subtract from immediate.
1230      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1231      return;
1232    }
1233  } else {
1234    // Normalize
1235    if (!rl_src2.is_const) {
1236      DCHECK(rl_src1.is_const);
1237      std::swap(rl_src1, rl_src2);
1238    }
1239  }
1240  if (BadOverlap(rl_src1, rl_dest)) {
1241    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1242    return;
1243  }
1244  DCHECK(rl_src2.is_const);
1245  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1246  uint32_t val_lo = Low32Bits(val);
1247  uint32_t val_hi = High32Bits(val);
1248  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1249  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1250
1251  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1252  switch (opcode) {
1253    case Instruction::ADD_LONG:
1254    case Instruction::ADD_LONG_2ADDR:
1255    case Instruction::SUB_LONG:
1256    case Instruction::SUB_LONG_2ADDR:
1257      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1258        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1259        return;
1260      }
1261      break;
1262    default:
1263      break;
1264  }
1265  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1266  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1267  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1268  switch (opcode) {
1269    case Instruction::ADD_LONG:
1270    case Instruction::ADD_LONG_2ADDR:
1271      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), mod_imm_lo);
1272      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1273      break;
1274    case Instruction::OR_LONG:
1275    case Instruction::OR_LONG_2ADDR:
1276      if ((val_lo != 0) || (rl_result.reg.GetReg() != rl_src1.reg.GetReg())) {
1277        OpRegRegImm(kOpOr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo);
1278      }
1279      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1280        OpRegRegImm(kOpOr, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi);
1281      }
1282      break;
1283    case Instruction::XOR_LONG:
1284    case Instruction::XOR_LONG_2ADDR:
1285      OpRegRegImm(kOpXor, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo);
1286      OpRegRegImm(kOpXor, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi);
1287      break;
1288    case Instruction::AND_LONG:
1289    case Instruction::AND_LONG_2ADDR:
1290      if ((val_lo != 0xffffffff) || (rl_result.reg.GetReg() != rl_src1.reg.GetReg())) {
1291        OpRegRegImm(kOpAnd, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo);
1292      }
1293      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1294        OpRegRegImm(kOpAnd, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi);
1295      }
1296      break;
1297    case Instruction::SUB_LONG_2ADDR:
1298    case Instruction::SUB_LONG:
1299      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), mod_imm_lo);
1300      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1301      break;
1302    default:
1303      LOG(FATAL) << "Unexpected opcode " << opcode;
1304  }
1305  StoreValueWide(rl_dest, rl_result);
1306}
1307
1308}  // namespace art
1309