int_arm.cc revision 2bf31e67694da24a19fc1f328285cebb1a4b9964
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "mirror/array.h"
24
25namespace art {
26
27LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) {
28  OpRegReg(kOpCmp, src1, src2);
29  return OpCondBranch(cond, target);
30}
31
32/*
33 * Generate a Thumb2 IT instruction, which can nullify up to
34 * four subsequent instructions based on a condition and its
35 * inverse.  The condition applies to the first instruction, which
36 * is executed if the condition is met.  The string "guide" consists
37 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
38 * A "T" means the instruction is executed if the condition is
39 * met, and an "E" means the instruction is executed if the condition
40 * is not met.
41 */
42LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
43  int mask;
44  int mask3 = 0;
45  int mask2 = 0;
46  int mask1 = 0;
47  ArmConditionCode code = ArmConditionEncoding(ccode);
48  int cond_bit = code & 1;
49  int alt_bit = cond_bit ^ 1;
50
51  // Note: case fallthroughs intentional
52  switch (strlen(guide)) {
53    case 3:
54      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
55    case 2:
56      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
57    case 1:
58      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
59      break;
60    case 0:
61      break;
62    default:
63      LOG(FATAL) << "OAT: bad case in OpIT";
64  }
65  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
66       (1 << (3 - strlen(guide)));
67  return NewLIR2(kThumb2It, code, mask);
68}
69
70/*
71 * 64-bit 3way compare function.
72 *     mov   rX, #-1
73 *     cmp   op1hi, op2hi
74 *     blt   done
75 *     bgt   flip
76 *     sub   rX, op1lo, op2lo (treat as unsigned)
77 *     beq   done
78 *     ite   hi
79 *     mov(hi)   rX, #-1
80 *     mov(!hi)  rX, #1
81 * flip:
82 *     neg   rX
83 * done:
84 */
85void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
86                            RegLocation rl_src2) {
87  LIR* target1;
88  LIR* target2;
89  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
90  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
91  int t_reg = AllocTemp();
92  LoadConstant(t_reg, -1);
93  OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg);
94  LIR* branch1 = OpCondBranch(kCondLt, NULL);
95  LIR* branch2 = OpCondBranch(kCondGt, NULL);
96  OpRegRegReg(kOpSub, t_reg, rl_src1.low_reg, rl_src2.low_reg);
97  LIR* branch3 = OpCondBranch(kCondEq, NULL);
98
99  OpIT(kCondHi, "E");
100  NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1));
101  LoadConstant(t_reg, 1);
102  GenBarrier();
103
104  target2 = NewLIR0(kPseudoTargetLabel);
105  OpRegReg(kOpNeg, t_reg, t_reg);
106
107  target1 = NewLIR0(kPseudoTargetLabel);
108
109  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
110  rl_temp.low_reg = t_reg;
111  StoreValue(rl_dest, rl_temp);
112  FreeTemp(t_reg);
113
114  branch1->target = target1;
115  branch2->target = target2;
116  branch3->target = branch1->target;
117}
118
119void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
120                                          int64_t val, ConditionCode ccode) {
121  int32_t val_lo = Low32Bits(val);
122  int32_t val_hi = High32Bits(val);
123  DCHECK_GE(ModifiedImmediate(val_lo), 0);
124  DCHECK_GE(ModifiedImmediate(val_hi), 0);
125  LIR* taken = &block_label_list_[bb->taken];
126  LIR* not_taken = &block_label_list_[bb->fall_through];
127  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
128  int32_t low_reg = rl_src1.low_reg;
129  int32_t high_reg = rl_src1.high_reg;
130
131  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
132    int t_reg = AllocTemp();
133    NewLIR4(kThumb2OrrRRRs, t_reg, low_reg, high_reg, 0);
134    FreeTemp(t_reg);
135    OpCondBranch(ccode, taken);
136    return;
137  }
138
139  switch (ccode) {
140    case kCondEq:
141    case kCondNe:
142      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
143      break;
144    case kCondLt:
145      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
146      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
147      ccode = kCondUlt;
148      break;
149    case kCondLe:
150      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
151      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
152      ccode = kCondLs;
153      break;
154    case kCondGt:
155      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
156      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
157      ccode = kCondHi;
158      break;
159    case kCondGe:
160      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
161      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
162      ccode = kCondUge;
163      break;
164    default:
165      LOG(FATAL) << "Unexpected ccode: " << ccode;
166  }
167  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
168}
169
170void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
171  RegLocation rl_result;
172  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
173  RegLocation rl_dest = mir_graph_->GetDest(mir);
174  rl_src = LoadValue(rl_src, kCoreReg);
175  if (mir->ssa_rep->num_uses == 1) {
176    // CONST case
177    int true_val = mir->dalvikInsn.vB;
178    int false_val = mir->dalvikInsn.vC;
179    rl_result = EvalLoc(rl_dest, kCoreReg, true);
180    if ((true_val == 1) && (false_val == 0)) {
181      OpRegRegImm(kOpRsub, rl_result.low_reg, rl_src.low_reg, 1);
182      OpIT(kCondUlt, "");
183      LoadConstant(rl_result.low_reg, 0);
184      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
185    } else if (InexpensiveConstantInt(true_val) && InexpensiveConstantInt(false_val)) {
186      OpRegImm(kOpCmp, rl_src.low_reg, 0);
187      OpIT(kCondEq, "E");
188      LoadConstant(rl_result.low_reg, true_val);
189      LoadConstant(rl_result.low_reg, false_val);
190      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
191    } else {
192      // Unlikely case - could be tuned.
193      int t_reg1 = AllocTemp();
194      int t_reg2 = AllocTemp();
195      LoadConstant(t_reg1, true_val);
196      LoadConstant(t_reg2, false_val);
197      OpRegImm(kOpCmp, rl_src.low_reg, 0);
198      OpIT(kCondEq, "E");
199      OpRegCopy(rl_result.low_reg, t_reg1);
200      OpRegCopy(rl_result.low_reg, t_reg2);
201      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
202    }
203  } else {
204    // MOVE case
205    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
206    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
207    rl_true = LoadValue(rl_true, kCoreReg);
208    rl_false = LoadValue(rl_false, kCoreReg);
209    rl_result = EvalLoc(rl_dest, kCoreReg, true);
210    OpRegImm(kOpCmp, rl_src.low_reg, 0);
211    if (rl_result.low_reg == rl_true.low_reg) {  // Is the "true" case already in place?
212      OpIT(kCondNe, "");
213      OpRegCopy(rl_result.low_reg, rl_false.low_reg);
214    } else if (rl_result.low_reg == rl_false.low_reg) {  // False case in place?
215      OpIT(kCondEq, "");
216      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
217    } else {  // Normal - select between the two.
218      OpIT(kCondEq, "E");
219      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
220      OpRegCopy(rl_result.low_reg, rl_false.low_reg);
221    }
222    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
223  }
224  StoreValue(rl_dest, rl_result);
225}
226
227void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
228  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
229  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
230  // Normalize such that if either operand is constant, src2 will be constant.
231  ConditionCode ccode = mir->meta.ccode;
232  if (rl_src1.is_const) {
233    std::swap(rl_src1, rl_src2);
234    ccode = FlipComparisonOrder(ccode);
235  }
236  if (rl_src2.is_const) {
237    RegLocation rl_temp = UpdateLocWide(rl_src2);
238    // Do special compare/branch against simple const operand if not already in registers.
239    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
240    if ((rl_temp.location != kLocPhysReg) &&
241        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
242      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
243      return;
244    }
245  }
246  LIR* taken = &block_label_list_[bb->taken];
247  LIR* not_taken = &block_label_list_[bb->fall_through];
248  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
249  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
250  OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg);
251  switch (ccode) {
252    case kCondEq:
253      OpCondBranch(kCondNe, not_taken);
254      break;
255    case kCondNe:
256      OpCondBranch(kCondNe, taken);
257      break;
258    case kCondLt:
259      OpCondBranch(kCondLt, taken);
260      OpCondBranch(kCondGt, not_taken);
261      ccode = kCondUlt;
262      break;
263    case kCondLe:
264      OpCondBranch(kCondLt, taken);
265      OpCondBranch(kCondGt, not_taken);
266      ccode = kCondLs;
267      break;
268    case kCondGt:
269      OpCondBranch(kCondGt, taken);
270      OpCondBranch(kCondLt, not_taken);
271      ccode = kCondHi;
272      break;
273    case kCondGe:
274      OpCondBranch(kCondGt, taken);
275      OpCondBranch(kCondLt, not_taken);
276      ccode = kCondUge;
277      break;
278    default:
279      LOG(FATAL) << "Unexpected ccode: " << ccode;
280  }
281  OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg);
282  OpCondBranch(ccode, taken);
283}
284
285/*
286 * Generate a register comparison to an immediate and branch.  Caller
287 * is responsible for setting branch target field.
288 */
289LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value,
290                                LIR* target) {
291  LIR* branch;
292  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
293  /*
294   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
295   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
296   * branch forward to a launch pad, they will frequently not reach - and thus have to
297   * be converted to a long form during assembly (which will trigger another assembly
298   * pass).  Here we estimate the branch distance for checks, and if large directly
299   * generate the long form in an attempt to avoid an extra assembly pass.
300   * TODO: consider interspersing launchpads in code following unconditional branches.
301   */
302  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
303  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
304  if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) &&
305     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
306    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
307                     reg, 0);
308  } else {
309    OpRegImm(kOpCmp, reg, check_value);
310    branch = NewLIR2(kThumbBCond, 0, arm_cond);
311  }
312  branch->target = target;
313  return branch;
314}
315
316LIR* ArmMir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) {
317  LIR* res;
318  int opcode;
319  if (ARM_FPREG(r_dest) || ARM_FPREG(r_src))
320    return OpFpRegCopy(r_dest, r_src);
321  if (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src))
322    opcode = kThumbMovRR;
323  else if (!ARM_LOWREG(r_dest) && !ARM_LOWREG(r_src))
324     opcode = kThumbMovRR_H2H;
325  else if (ARM_LOWREG(r_dest))
326     opcode = kThumbMovRR_H2L;
327  else
328     opcode = kThumbMovRR_L2H;
329  res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src);
330  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
331    res->flags.is_nop = true;
332  }
333  return res;
334}
335
336LIR* ArmMir2Lir::OpRegCopy(int r_dest, int r_src) {
337  LIR* res = OpRegCopyNoInsert(r_dest, r_src);
338  AppendLIR(res);
339  return res;
340}
341
342void ArmMir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, int src_lo,
343                               int src_hi) {
344  bool dest_fp = ARM_FPREG(dest_lo) && ARM_FPREG(dest_hi);
345  bool src_fp = ARM_FPREG(src_lo) && ARM_FPREG(src_hi);
346  DCHECK_EQ(ARM_FPREG(src_lo), ARM_FPREG(src_hi));
347  DCHECK_EQ(ARM_FPREG(dest_lo), ARM_FPREG(dest_hi));
348  if (dest_fp) {
349    if (src_fp) {
350      OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi));
351    } else {
352      NewLIR3(kThumb2Fmdrr, S2d(dest_lo, dest_hi), src_lo, src_hi);
353    }
354  } else {
355    if (src_fp) {
356      NewLIR3(kThumb2Fmrrd, dest_lo, dest_hi, S2d(src_lo, src_hi));
357    } else {
358      // Handle overlap
359      if (src_hi == dest_lo) {
360        OpRegCopy(dest_hi, src_hi);
361        OpRegCopy(dest_lo, src_lo);
362      } else {
363        OpRegCopy(dest_lo, src_lo);
364        OpRegCopy(dest_hi, src_hi);
365      }
366    }
367  }
368}
369
370// Table of magic divisors
371struct MagicTable {
372  uint32_t magic;
373  uint32_t shift;
374  DividePattern pattern;
375};
376
377static const MagicTable magic_table[] = {
378  {0, 0, DivideNone},        // 0
379  {0, 0, DivideNone},        // 1
380  {0, 0, DivideNone},        // 2
381  {0x55555556, 0, Divide3},  // 3
382  {0, 0, DivideNone},        // 4
383  {0x66666667, 1, Divide5},  // 5
384  {0x2AAAAAAB, 0, Divide3},  // 6
385  {0x92492493, 2, Divide7},  // 7
386  {0, 0, DivideNone},        // 8
387  {0x38E38E39, 1, Divide5},  // 9
388  {0x66666667, 2, Divide5},  // 10
389  {0x2E8BA2E9, 1, Divide5},  // 11
390  {0x2AAAAAAB, 1, Divide5},  // 12
391  {0x4EC4EC4F, 2, Divide5},  // 13
392  {0x92492493, 3, Divide7},  // 14
393  {0x88888889, 3, Divide7},  // 15
394};
395
396// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
397bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
398                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
399  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
400    return false;
401  }
402  DividePattern pattern = magic_table[lit].pattern;
403  if (pattern == DivideNone) {
404    return false;
405  }
406  // Tuning: add rem patterns
407  if (!is_div) {
408    return false;
409  }
410
411  int r_magic = AllocTemp();
412  LoadConstant(r_magic, magic_table[lit].magic);
413  rl_src = LoadValue(rl_src, kCoreReg);
414  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
415  int r_hi = AllocTemp();
416  int r_lo = AllocTemp();
417  NewLIR4(kThumb2Smull, r_lo, r_hi, r_magic, rl_src.low_reg);
418  switch (pattern) {
419    case Divide3:
420      OpRegRegRegShift(kOpSub, rl_result.low_reg, r_hi,
421               rl_src.low_reg, EncodeShift(kArmAsr, 31));
422      break;
423    case Divide5:
424      OpRegRegImm(kOpAsr, r_lo, rl_src.low_reg, 31);
425      OpRegRegRegShift(kOpRsub, rl_result.low_reg, r_lo, r_hi,
426               EncodeShift(kArmAsr, magic_table[lit].shift));
427      break;
428    case Divide7:
429      OpRegReg(kOpAdd, r_hi, rl_src.low_reg);
430      OpRegRegImm(kOpAsr, r_lo, rl_src.low_reg, 31);
431      OpRegRegRegShift(kOpRsub, rl_result.low_reg, r_lo, r_hi,
432               EncodeShift(kArmAsr, magic_table[lit].shift));
433      break;
434    default:
435      LOG(FATAL) << "Unexpected pattern: " << pattern;
436  }
437  StoreValue(rl_dest, rl_result);
438  return true;
439}
440
441LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code,
442                    int reg1, int base, int offset, ThrowKind kind) {
443  LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm";
444  return NULL;
445}
446
447RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
448                      RegLocation rl_src2, bool is_div, bool check_zero) {
449  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
450  return rl_dest;
451}
452
453RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
454  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
455  return rl_dest;
456}
457
458RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
459                                     bool is_div) {
460  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
461
462  // Put the literal in a temp.
463  int lit_temp = AllocTemp();
464  LoadConstant(lit_temp, lit);
465  // Use the generic case for div/rem with arg2 in a register.
466  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
467  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
468  FreeTemp(lit_temp);
469
470  return rl_result;
471}
472
473RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2,
474                                  bool is_div) {
475  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
476  if (is_div) {
477    // Simple case, use sdiv instruction.
478    OpRegRegReg(kOpDiv, rl_result.low_reg, reg1, reg2);
479  } else {
480    // Remainder case, use the following code:
481    // temp = reg1 / reg2      - integer division
482    // temp = temp * reg2
483    // dest = reg1 - temp
484
485    int temp = AllocTemp();
486    OpRegRegReg(kOpDiv, temp, reg1, reg2);
487    OpRegReg(kOpMul, temp, reg2);
488    OpRegRegReg(kOpSub, rl_result.low_reg, reg1, temp);
489    FreeTemp(temp);
490  }
491
492  return rl_result;
493}
494
495bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
496  DCHECK_EQ(cu_->instruction_set, kThumb2);
497  RegLocation rl_src1 = info->args[0];
498  RegLocation rl_src2 = info->args[1];
499  rl_src1 = LoadValue(rl_src1, kCoreReg);
500  rl_src2 = LoadValue(rl_src2, kCoreReg);
501  RegLocation rl_dest = InlineTarget(info);
502  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
503  OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg);
504  OpIT((is_min) ? kCondGt : kCondLt, "E");
505  OpRegReg(kOpMov, rl_result.low_reg, rl_src2.low_reg);
506  OpRegReg(kOpMov, rl_result.low_reg, rl_src1.low_reg);
507  GenBarrier();
508  StoreValue(rl_dest, rl_result);
509  return true;
510}
511
512bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
513  RegLocation rl_src_address = info->args[0];  // long address
514  rl_src_address.wide = 0;  // ignore high half in info->args[1]
515  RegLocation rl_dest = InlineTarget(info);
516  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
517  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
518  if (size == kLong) {
519    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
520    if (rl_address.low_reg != rl_result.low_reg) {
521      LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG);
522      LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG);
523    } else {
524      LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG);
525      LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG);
526    }
527    StoreValueWide(rl_dest, rl_result);
528  } else {
529    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
530    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
531    LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG);
532    StoreValue(rl_dest, rl_result);
533  }
534  return true;
535}
536
537bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
538  RegLocation rl_src_address = info->args[0];  // long address
539  rl_src_address.wide = 0;  // ignore high half in info->args[1]
540  RegLocation rl_src_value = info->args[2];  // [size] value
541  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
542  if (size == kLong) {
543    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
544    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
545    StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, kWord);
546    StoreBaseDisp(rl_address.low_reg, 4, rl_value.high_reg, kWord);
547  } else {
548    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
549    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
550    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
551    StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size);
552  }
553  return true;
554}
555
556void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) {
557  LOG(FATAL) << "Unexpected use of OpLea for Arm";
558}
559
560void ArmMir2Lir::OpTlsCmp(ThreadOffset offset, int val) {
561  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
562}
563
564bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
565  DCHECK_EQ(cu_->instruction_set, kThumb2);
566  // Unused - RegLocation rl_src_unsafe = info->args[0];
567  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
568  RegLocation rl_src_offset = info->args[2];  // long low
569  rl_src_offset.wide = 0;  // ignore high half in info->args[3]
570  RegLocation rl_src_expected = info->args[4];  // int, long or Object
571  // If is_long, high half is in info->args[5]
572  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
573  // If is_long, high half is in info->args[7]
574  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
575
576  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
577  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
578  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
579  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
580  // into the same temps, reducing the number of required temps down to 5. We shall work
581  // around the potentially locked temp by using LR for r_ptr, unconditionally.
582  // TODO: Pass information about the need for more temps to the stack frame generation
583  // code so that we can rely on being able to allocate enough temps.
584  DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp);
585  MarkTemp(rARM_LR);
586  FreeTemp(rARM_LR);
587  LockTemp(rARM_LR);
588  bool load_early = true;
589  if (is_long) {
590    bool expected_is_core_reg =
591        rl_src_expected.location == kLocPhysReg && !IsFpReg(rl_src_expected.low_reg);
592    bool new_value_is_core_reg =
593        rl_src_new_value.location == kLocPhysReg && !IsFpReg(rl_src_new_value.low_reg);
594    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(rl_src_expected.low_reg);
595    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(rl_src_new_value.low_reg);
596
597    if (!expected_is_good_reg && !new_value_is_good_reg) {
598      // None of expected/new_value is non-temp reg, need to load both late
599      load_early = false;
600      // Make sure they are not in the temp regs and the load will not be skipped.
601      if (expected_is_core_reg) {
602        FlushRegWide(rl_src_expected.low_reg, rl_src_expected.high_reg);
603        ClobberSReg(rl_src_expected.s_reg_low);
604        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
605        rl_src_expected.location = kLocDalvikFrame;
606      }
607      if (new_value_is_core_reg) {
608        FlushRegWide(rl_src_new_value.low_reg, rl_src_new_value.high_reg);
609        ClobberSReg(rl_src_new_value.s_reg_low);
610        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
611        rl_src_new_value.location = kLocDalvikFrame;
612      }
613    }
614  }
615
616  // Release store semantics, get the barrier out of the way.  TODO: revisit
617  GenMemBarrier(kStoreLoad);
618
619  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
620  RegLocation rl_new_value;
621  if (!is_long) {
622    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
623  } else if (load_early) {
624    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
625  }
626
627  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
628    // Mark card for object assuming new value is stored.
629    MarkGCCard(rl_new_value.low_reg, rl_object.low_reg);
630  }
631
632  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
633
634  int r_ptr = rARM_LR;
635  OpRegRegReg(kOpAdd, r_ptr, rl_object.low_reg, rl_offset.low_reg);
636
637  // Free now unneeded rl_object and rl_offset to give more temps.
638  ClobberSReg(rl_object.s_reg_low);
639  FreeTemp(rl_object.low_reg);
640  ClobberSReg(rl_offset.s_reg_low);
641  FreeTemp(rl_offset.low_reg);
642
643  RegLocation rl_expected;
644  if (!is_long) {
645    rl_expected = LoadValue(rl_src_expected, kCoreReg);
646  } else if (load_early) {
647    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
648  } else {
649    rl_new_value.low_reg = rl_expected.low_reg = AllocTemp();
650    rl_new_value.high_reg = rl_expected.high_reg = AllocTemp();
651  }
652
653  // do {
654  //   tmp = [r_ptr] - expected;
655  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
656  // result = tmp != 0;
657
658  int r_tmp = AllocTemp();
659  LIR* target = NewLIR0(kPseudoTargetLabel);
660
661  if (is_long) {
662    int r_tmp_high = AllocTemp();
663    if (!load_early) {
664      LoadValueDirectWide(rl_src_expected, rl_expected.low_reg, rl_expected.high_reg);
665    }
666    NewLIR3(kThumb2Ldrexd, r_tmp, r_tmp_high, r_ptr);
667    OpRegReg(kOpSub, r_tmp, rl_expected.low_reg);
668    OpRegReg(kOpSub, r_tmp_high, rl_expected.high_reg);
669    if (!load_early) {
670      LoadValueDirectWide(rl_src_new_value, rl_new_value.low_reg, rl_new_value.high_reg);
671    }
672    // Make sure we use ORR that sets the ccode
673    if (ARM_LOWREG(r_tmp) && ARM_LOWREG(r_tmp_high)) {
674      NewLIR2(kThumbOrr, r_tmp, r_tmp_high);
675    } else {
676      NewLIR4(kThumb2OrrRRRs, r_tmp, r_tmp, r_tmp_high, 0);
677    }
678    FreeTemp(r_tmp_high);  // Now unneeded
679
680    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
681    OpIT(kCondEq, "T");
682    NewLIR4(kThumb2Strexd /* eq */, r_tmp, rl_new_value.low_reg, rl_new_value.high_reg, r_ptr);
683
684  } else {
685    NewLIR3(kThumb2Ldrex, r_tmp, r_ptr, 0);
686    OpRegReg(kOpSub, r_tmp, rl_expected.low_reg);
687    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
688    OpIT(kCondEq, "T");
689    NewLIR4(kThumb2Strex /* eq */, r_tmp, rl_new_value.low_reg, r_ptr, 0);
690  }
691
692  // Still one conditional left from OpIT(kCondEq, "T") from either branch
693  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
694  OpCondBranch(kCondEq, target);
695
696  if (!load_early) {
697    FreeTemp(rl_expected.low_reg);  // Now unneeded.
698    FreeTemp(rl_expected.high_reg);  // Now unneeded.
699  }
700
701  // result := (tmp1 != 0) ? 0 : 1;
702  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
703  OpRegRegImm(kOpRsub, rl_result.low_reg, r_tmp, 1);
704  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
705  OpIT(kCondUlt, "");
706  LoadConstant(rl_result.low_reg, 0); /* cc */
707  FreeTemp(r_tmp);  // Now unneeded.
708
709  StoreValue(rl_dest, rl_result);
710
711  // Now, restore lr to its non-temp status.
712  Clobber(rARM_LR);
713  UnmarkTemp(rARM_LR);
714  return true;
715}
716
717LIR* ArmMir2Lir::OpPcRelLoad(int reg, LIR* target) {
718  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg, 0, 0, 0, 0, target);
719}
720
721LIR* ArmMir2Lir::OpVldm(int rBase, int count) {
722  return NewLIR3(kThumb2Vldms, rBase, fr0, count);
723}
724
725LIR* ArmMir2Lir::OpVstm(int rBase, int count) {
726  return NewLIR3(kThumb2Vstms, rBase, fr0, count);
727}
728
729void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
730                                               RegLocation rl_result, int lit,
731                                               int first_bit, int second_bit) {
732  OpRegRegRegShift(kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg,
733                   EncodeShift(kArmLsl, second_bit - first_bit));
734  if (first_bit != 0) {
735    OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit);
736  }
737}
738
739void ArmMir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) {
740  int t_reg = AllocTemp();
741  NewLIR4(kThumb2OrrRRRs, t_reg, reg_lo, reg_hi, 0);
742  FreeTemp(t_reg);
743  GenCheck(kCondEq, kThrowDivZero);
744}
745
746// Test suspend flag, return target of taken suspend branch
747LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
748  NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1);
749  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
750}
751
752// Decrement register and branch on condition
753LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) {
754  // Combine sub & test using sub setflags encoding here
755  NewLIR3(kThumb2SubsRRI12, reg, reg, 1);
756  return OpCondBranch(c_code, target);
757}
758
759void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
760#if ANDROID_SMP != 0
761  int dmb_flavor;
762  // TODO: revisit Arm barrier kinds
763  switch (barrier_kind) {
764    case kLoadStore: dmb_flavor = kISH; break;
765    case kLoadLoad: dmb_flavor = kISH; break;
766    case kStoreStore: dmb_flavor = kISHST; break;
767    case kStoreLoad: dmb_flavor = kISH; break;
768    default:
769      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
770      dmb_flavor = kSY;  // quiet gcc.
771      break;
772  }
773  LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor);
774  dmb->u.m.def_mask = ENCODE_ALL;
775#endif
776}
777
778void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
779  rl_src = LoadValueWide(rl_src, kCoreReg);
780  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
781  int z_reg = AllocTemp();
782  LoadConstantNoClobber(z_reg, 0);
783  // Check for destructive overlap
784  if (rl_result.low_reg == rl_src.high_reg) {
785    int t_reg = AllocTemp();
786    OpRegRegReg(kOpSub, rl_result.low_reg, z_reg, rl_src.low_reg);
787    OpRegRegReg(kOpSbc, rl_result.high_reg, z_reg, t_reg);
788    FreeTemp(t_reg);
789  } else {
790    OpRegRegReg(kOpSub, rl_result.low_reg, z_reg, rl_src.low_reg);
791    OpRegRegReg(kOpSbc, rl_result.high_reg, z_reg, rl_src.high_reg);
792  }
793  FreeTemp(z_reg);
794  StoreValueWide(rl_dest, rl_result);
795}
796
797
798 /*
799  * Check to see if a result pair has a misaligned overlap with an operand pair.  This
800  * is not usual for dx to generate, but it is legal (for now).  In a future rev of
801  * dex, we'll want to make this case illegal.
802  */
803bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
804  DCHECK(rl_src.wide);
805  DCHECK(rl_dest.wide);
806  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
807}
808
809void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
810                            RegLocation rl_src1, RegLocation rl_src2) {
811    /*
812     * To pull off inline multiply, we have a worst-case requirement of 8 temporary
813     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
814     * lr in the temp set.  The only problematic case is all operands and result are
815     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
816     * freeing operand temp registers after they are no longer needed.  All other cases
817     * can proceed normally.  We'll just punt on the case of the result having a misaligned
818     * overlap with either operand and send that case to a runtime handler.
819     */
820    RegLocation rl_result;
821    if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
822      ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pLmul);
823      FlushAllRegs();
824      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
825      rl_result = GetReturnWide(false);
826      StoreValueWide(rl_dest, rl_result);
827      return;
828    }
829    // Temporarily add LR to the temp pool, and assign it to tmp1
830    MarkTemp(rARM_LR);
831    FreeTemp(rARM_LR);
832    int tmp1 = rARM_LR;
833    LockTemp(rARM_LR);
834
835    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
836    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
837
838    bool special_case = true;
839    // If operands are the same, or any pair has been promoted we're not the special case.
840    if ((rl_src1.s_reg_low == rl_src2.s_reg_low) ||
841        (!IsTemp(rl_src1.low_reg) && !IsTemp(rl_src1.high_reg)) ||
842        (!IsTemp(rl_src2.low_reg) && !IsTemp(rl_src2.high_reg))) {
843      special_case = false;
844    }
845    // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly.
846    int res_lo = AllocTemp();
847    int res_hi;
848    if (rl_src1.low_reg == rl_src2.low_reg) {
849      res_hi = AllocTemp();
850      NewLIR3(kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg);
851      NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg);
852      OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
853    } else {
854      // In the special case, all temps are now allocated
855      NewLIR3(kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg);
856      if (special_case) {
857        DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg);
858        DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg);
859        FreeTemp(rl_src1.high_reg);
860      }
861      res_hi = AllocTemp();
862
863      NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg);
864      NewLIR4(kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1);
865      NewLIR4(kThumb2AddRRR, res_hi, tmp1, res_hi, 0);
866      if (special_case) {
867        FreeTemp(rl_src1.low_reg);
868        Clobber(rl_src1.low_reg);
869        Clobber(rl_src1.high_reg);
870      }
871    }
872    FreeTemp(tmp1);
873    rl_result = GetReturnWide(false);  // Just using as a template.
874    rl_result.low_reg = res_lo;
875    rl_result.high_reg = res_hi;
876    StoreValueWide(rl_dest, rl_result);
877    // Now, restore lr to its non-temp status.
878    Clobber(rARM_LR);
879    UnmarkTemp(rARM_LR);
880}
881
882void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
883                            RegLocation rl_src2) {
884  LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
885}
886
887void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
888                            RegLocation rl_src2) {
889  LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
890}
891
892void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
893                            RegLocation rl_src2) {
894  LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
895}
896
897void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
898                           RegLocation rl_src2) {
899  LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
900}
901
902void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
903                            RegLocation rl_src2) {
904  LOG(FATAL) << "Unexpected use of genXoLong for Arm";
905}
906
907/*
908 * Generate array load
909 */
910void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
911                             RegLocation rl_index, RegLocation rl_dest, int scale) {
912  RegisterClass reg_class = oat_reg_class_by_size(size);
913  int len_offset = mirror::Array::LengthOffset().Int32Value();
914  int data_offset;
915  RegLocation rl_result;
916  bool constant_index = rl_index.is_const;
917  rl_array = LoadValue(rl_array, kCoreReg);
918  if (!constant_index) {
919    rl_index = LoadValue(rl_index, kCoreReg);
920  }
921
922  if (rl_dest.wide) {
923    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
924  } else {
925    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
926  }
927
928  // If index is constant, just fold it into the data offset
929  if (constant_index) {
930    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
931  }
932
933  /* null object? */
934  GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags);
935
936  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
937  int reg_len = INVALID_REG;
938  if (needs_range_check) {
939    reg_len = AllocTemp();
940    /* Get len */
941    LoadWordDisp(rl_array.low_reg, len_offset, reg_len);
942  }
943  if (rl_dest.wide || rl_dest.fp || constant_index) {
944    int reg_ptr;
945    if (constant_index) {
946      reg_ptr = rl_array.low_reg;  // NOTE: must not alter reg_ptr in constant case.
947    } else {
948      // No special indexed operation, lea + load w/ displacement
949      reg_ptr = AllocTemp();
950      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
951                       EncodeShift(kArmLsl, scale));
952      FreeTemp(rl_index.low_reg);
953    }
954    rl_result = EvalLoc(rl_dest, reg_class, true);
955
956    if (needs_range_check) {
957      if (constant_index) {
958        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
959      } else {
960        GenRegRegCheck(kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
961      }
962      FreeTemp(reg_len);
963    }
964    if (rl_dest.wide) {
965      LoadBaseDispWide(reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
966      if (!constant_index) {
967        FreeTemp(reg_ptr);
968      }
969      StoreValueWide(rl_dest, rl_result);
970    } else {
971      LoadBaseDisp(reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG);
972      if (!constant_index) {
973        FreeTemp(reg_ptr);
974      }
975      StoreValue(rl_dest, rl_result);
976    }
977  } else {
978    // Offset base, then use indexed load
979    int reg_ptr = AllocTemp();
980    OpRegRegImm(kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
981    FreeTemp(rl_array.low_reg);
982    rl_result = EvalLoc(rl_dest, reg_class, true);
983
984    if (needs_range_check) {
985      GenRegRegCheck(kCondUge, rl_index.low_reg, reg_len, kThrowArrayBounds);
986      FreeTemp(reg_len);
987    }
988    LoadBaseIndexed(reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size);
989    FreeTemp(reg_ptr);
990    StoreValue(rl_dest, rl_result);
991  }
992}
993
994/*
995 * Generate array store
996 *
997 */
998void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
999                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1000  RegisterClass reg_class = oat_reg_class_by_size(size);
1001  int len_offset = mirror::Array::LengthOffset().Int32Value();
1002  bool constant_index = rl_index.is_const;
1003
1004  int data_offset;
1005  if (size == kLong || size == kDouble) {
1006    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1007  } else {
1008    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1009  }
1010
1011  // If index is constant, just fold it into the data offset.
1012  if (constant_index) {
1013    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1014  }
1015
1016  rl_array = LoadValue(rl_array, kCoreReg);
1017  if (!constant_index) {
1018    rl_index = LoadValue(rl_index, kCoreReg);
1019  }
1020
1021  int reg_ptr;
1022  bool allocated_reg_ptr_temp = false;
1023  if (constant_index) {
1024    reg_ptr = rl_array.low_reg;
1025  } else if (IsTemp(rl_array.low_reg) && !card_mark) {
1026    Clobber(rl_array.low_reg);
1027    reg_ptr = rl_array.low_reg;
1028  } else {
1029    allocated_reg_ptr_temp = true;
1030    reg_ptr = AllocTemp();
1031  }
1032
1033  /* null object? */
1034  GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags);
1035
1036  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1037  int reg_len = INVALID_REG;
1038  if (needs_range_check) {
1039    reg_len = AllocTemp();
1040    // NOTE: max live temps(4) here.
1041    /* Get len */
1042    LoadWordDisp(rl_array.low_reg, len_offset, reg_len);
1043  }
1044  /* at this point, reg_ptr points to array, 2 live temps */
1045  if (rl_src.wide || rl_src.fp || constant_index) {
1046    if (rl_src.wide) {
1047      rl_src = LoadValueWide(rl_src, reg_class);
1048    } else {
1049      rl_src = LoadValue(rl_src, reg_class);
1050    }
1051    if (!constant_index) {
1052      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
1053                       EncodeShift(kArmLsl, scale));
1054    }
1055    if (needs_range_check) {
1056      if (constant_index) {
1057        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
1058      } else {
1059        GenRegRegCheck(kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
1060      }
1061      FreeTemp(reg_len);
1062    }
1063
1064    if (rl_src.wide) {
1065      StoreBaseDispWide(reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg);
1066    } else {
1067      StoreBaseDisp(reg_ptr, data_offset, rl_src.low_reg, size);
1068    }
1069  } else {
1070    /* reg_ptr -> array data */
1071    OpRegRegImm(kOpAdd, reg_ptr, rl_array.low_reg, data_offset);
1072    rl_src = LoadValue(rl_src, reg_class);
1073    if (needs_range_check) {
1074      GenRegRegCheck(kCondUge, rl_index.low_reg, reg_len, kThrowArrayBounds);
1075      FreeTemp(reg_len);
1076    }
1077    StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg,
1078                     scale, size);
1079  }
1080  if (allocated_reg_ptr_temp) {
1081    FreeTemp(reg_ptr);
1082  }
1083  if (card_mark) {
1084    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
1085  }
1086}
1087
1088
1089void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1090                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1091  rl_src = LoadValueWide(rl_src, kCoreReg);
1092  // Per spec, we only care about low 6 bits of shift amount.
1093  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1094  if (shift_amount == 0) {
1095    StoreValueWide(rl_dest, rl_src);
1096    return;
1097  }
1098  if (BadOverlap(rl_src, rl_dest)) {
1099    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1100    return;
1101  }
1102  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1103  switch (opcode) {
1104    case Instruction::SHL_LONG:
1105    case Instruction::SHL_LONG_2ADDR:
1106      if (shift_amount == 1) {
1107        OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg);
1108        OpRegRegReg(kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg);
1109      } else if (shift_amount == 32) {
1110        OpRegCopy(rl_result.high_reg, rl_src.low_reg);
1111        LoadConstant(rl_result.low_reg, 0);
1112      } else if (shift_amount > 31) {
1113        OpRegRegImm(kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32);
1114        LoadConstant(rl_result.low_reg, 0);
1115      } else {
1116        OpRegRegImm(kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount);
1117        OpRegRegRegShift(kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg,
1118                         EncodeShift(kArmLsr, 32 - shift_amount));
1119        OpRegRegImm(kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount);
1120      }
1121      break;
1122    case Instruction::SHR_LONG:
1123    case Instruction::SHR_LONG_2ADDR:
1124      if (shift_amount == 32) {
1125        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
1126        OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
1127      } else if (shift_amount > 31) {
1128        OpRegRegImm(kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
1129        OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
1130      } else {
1131        int t_reg = AllocTemp();
1132        OpRegRegImm(kOpLsr, t_reg, rl_src.low_reg, shift_amount);
1133        OpRegRegRegShift(kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
1134                         EncodeShift(kArmLsl, 32 - shift_amount));
1135        FreeTemp(t_reg);
1136        OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
1137      }
1138      break;
1139    case Instruction::USHR_LONG:
1140    case Instruction::USHR_LONG_2ADDR:
1141      if (shift_amount == 32) {
1142        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
1143        LoadConstant(rl_result.high_reg, 0);
1144      } else if (shift_amount > 31) {
1145        OpRegRegImm(kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
1146        LoadConstant(rl_result.high_reg, 0);
1147      } else {
1148        int t_reg = AllocTemp();
1149        OpRegRegImm(kOpLsr, t_reg, rl_src.low_reg, shift_amount);
1150        OpRegRegRegShift(kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
1151                         EncodeShift(kArmLsl, 32 - shift_amount));
1152        FreeTemp(t_reg);
1153        OpRegRegImm(kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
1154      }
1155      break;
1156    default:
1157      LOG(FATAL) << "Unexpected case";
1158  }
1159  StoreValueWide(rl_dest, rl_result);
1160}
1161
1162void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1163                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
1164  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1165    if (!rl_src2.is_const) {
1166      // Don't bother with special handling for subtract from immediate.
1167      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1168      return;
1169    }
1170  } else {
1171    // Normalize
1172    if (!rl_src2.is_const) {
1173      DCHECK(rl_src1.is_const);
1174      std::swap(rl_src1, rl_src2);
1175    }
1176  }
1177  if (BadOverlap(rl_src1, rl_dest)) {
1178    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1179    return;
1180  }
1181  DCHECK(rl_src2.is_const);
1182  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1183  uint32_t val_lo = Low32Bits(val);
1184  uint32_t val_hi = High32Bits(val);
1185  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1186  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1187
1188  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1189  switch (opcode) {
1190    case Instruction::ADD_LONG:
1191    case Instruction::ADD_LONG_2ADDR:
1192    case Instruction::SUB_LONG:
1193    case Instruction::SUB_LONG_2ADDR:
1194      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1195        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1196        return;
1197      }
1198      break;
1199    default:
1200      break;
1201  }
1202  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1203  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1204  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1205  switch (opcode) {
1206    case Instruction::ADD_LONG:
1207    case Instruction::ADD_LONG_2ADDR:
1208      NewLIR3(kThumb2AddRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
1209      NewLIR3(kThumb2AdcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
1210      break;
1211    case Instruction::OR_LONG:
1212    case Instruction::OR_LONG_2ADDR:
1213      if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) {
1214        OpRegRegImm(kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo);
1215      }
1216      if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) {
1217        OpRegRegImm(kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi);
1218      }
1219      break;
1220    case Instruction::XOR_LONG:
1221    case Instruction::XOR_LONG_2ADDR:
1222      OpRegRegImm(kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo);
1223      OpRegRegImm(kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi);
1224      break;
1225    case Instruction::AND_LONG:
1226    case Instruction::AND_LONG_2ADDR:
1227      if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) {
1228        OpRegRegImm(kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo);
1229      }
1230      if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) {
1231        OpRegRegImm(kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi);
1232      }
1233      break;
1234    case Instruction::SUB_LONG_2ADDR:
1235    case Instruction::SUB_LONG:
1236      NewLIR3(kThumb2SubRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
1237      NewLIR3(kThumb2SbcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
1238      break;
1239    default:
1240      LOG(FATAL) << "Unexpected opcode " << opcode;
1241  }
1242  StoreValueWide(rl_dest, rl_result);
1243}
1244
1245}  // namespace art
1246