int_arm.cc revision dd7624d2b9e599d57762d12031b10b89defc9807
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
19#include "arm_lir.h"
20#include "codegen_arm.h"
21#include "dex/quick/mir_to_lir-inl.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "mirror/array.h"
24
25namespace art {
26
27LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
28  OpRegReg(kOpCmp, src1, src2);
29  return OpCondBranch(cond, target);
30}
31
32/*
33 * Generate a Thumb2 IT instruction, which can nullify up to
34 * four subsequent instructions based on a condition and its
35 * inverse.  The condition applies to the first instruction, which
36 * is executed if the condition is met.  The string "guide" consists
37 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
38 * A "T" means the instruction is executed if the condition is
39 * met, and an "E" means the instruction is executed if the condition
40 * is not met.
41 */
42LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
43  int mask;
44  int mask3 = 0;
45  int mask2 = 0;
46  int mask1 = 0;
47  ArmConditionCode code = ArmConditionEncoding(ccode);
48  int cond_bit = code & 1;
49  int alt_bit = cond_bit ^ 1;
50
51  // Note: case fallthroughs intentional
52  switch (strlen(guide)) {
53    case 3:
54      mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
55    case 2:
56      mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
57    case 1:
58      mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
59      break;
60    case 0:
61      break;
62    default:
63      LOG(FATAL) << "OAT: bad case in OpIT";
64  }
65  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
66       (1 << (3 - strlen(guide)));
67  return NewLIR2(kThumb2It, code, mask);
68}
69
70/*
71 * 64-bit 3way compare function.
72 *     mov   rX, #-1
73 *     cmp   op1hi, op2hi
74 *     blt   done
75 *     bgt   flip
76 *     sub   rX, op1lo, op2lo (treat as unsigned)
77 *     beq   done
78 *     ite   hi
79 *     mov(hi)   rX, #-1
80 *     mov(!hi)  rX, #1
81 * flip:
82 *     neg   rX
83 * done:
84 */
85void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
86                            RegLocation rl_src2) {
87  LIR* target1;
88  LIR* target2;
89  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
90  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
91  RegStorage t_reg = AllocTemp();
92  LoadConstant(t_reg, -1);
93  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
94  LIR* branch1 = OpCondBranch(kCondLt, NULL);
95  LIR* branch2 = OpCondBranch(kCondGt, NULL);
96  OpRegRegReg(kOpSub, t_reg, rl_src1.reg, rl_src2.reg);
97  LIR* branch3 = OpCondBranch(kCondEq, NULL);
98
99  OpIT(kCondHi, "E");
100  NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
101  LoadConstant(t_reg, 1);
102  GenBarrier();
103
104  target2 = NewLIR0(kPseudoTargetLabel);
105  OpRegReg(kOpNeg, t_reg, t_reg);
106
107  target1 = NewLIR0(kPseudoTargetLabel);
108
109  RegLocation rl_temp = LocCReturn();  // Just using as template, will change
110  rl_temp.reg.SetReg(t_reg.GetReg());
111  StoreValue(rl_dest, rl_temp);
112  FreeTemp(t_reg);
113
114  branch1->target = target1;
115  branch2->target = target2;
116  branch3->target = branch1->target;
117}
118
119void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
120                                          int64_t val, ConditionCode ccode) {
121  int32_t val_lo = Low32Bits(val);
122  int32_t val_hi = High32Bits(val);
123  DCHECK_GE(ModifiedImmediate(val_lo), 0);
124  DCHECK_GE(ModifiedImmediate(val_hi), 0);
125  LIR* taken = &block_label_list_[bb->taken];
126  LIR* not_taken = &block_label_list_[bb->fall_through];
127  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
128  RegStorage low_reg = rl_src1.reg.GetLow();
129  RegStorage high_reg = rl_src1.reg.GetHigh();
130
131  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
132    RegStorage t_reg = AllocTemp();
133    NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
134    FreeTemp(t_reg);
135    OpCondBranch(ccode, taken);
136    return;
137  }
138
139  switch (ccode) {
140    case kCondEq:
141    case kCondNe:
142      OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
143      break;
144    case kCondLt:
145      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
146      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
147      ccode = kCondUlt;
148      break;
149    case kCondLe:
150      OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
151      OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
152      ccode = kCondLs;
153      break;
154    case kCondGt:
155      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
156      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
157      ccode = kCondHi;
158      break;
159    case kCondGe:
160      OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
161      OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
162      ccode = kCondUge;
163      break;
164    default:
165      LOG(FATAL) << "Unexpected ccode: " << ccode;
166  }
167  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
168}
169
170void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
171  RegLocation rl_result;
172  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
173  RegLocation rl_dest = mir_graph_->GetDest(mir);
174  rl_src = LoadValue(rl_src, kCoreReg);
175  ConditionCode ccode = mir->meta.ccode;
176  if (mir->ssa_rep->num_uses == 1) {
177    // CONST case
178    int true_val = mir->dalvikInsn.vB;
179    int false_val = mir->dalvikInsn.vC;
180    rl_result = EvalLoc(rl_dest, kCoreReg, true);
181    // Change kCondNe to kCondEq for the special cases below.
182    if (ccode == kCondNe) {
183      ccode = kCondEq;
184      std::swap(true_val, false_val);
185    }
186    bool cheap_false_val = InexpensiveConstantInt(false_val);
187    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
188      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
189      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
190      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
191      LoadConstant(rl_result.reg, false_val);
192      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
193    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
194      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
195      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
196      OpIT(kCondLs, "");
197      LoadConstant(rl_result.reg, false_val);
198      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
199    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
200      OpRegImm(kOpCmp, rl_src.reg, 0);
201      OpIT(ccode, "E");
202      LoadConstant(rl_result.reg, true_val);
203      LoadConstant(rl_result.reg, false_val);
204      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
205    } else {
206      // Unlikely case - could be tuned.
207      RegStorage t_reg1 = AllocTemp();
208      RegStorage t_reg2 = AllocTemp();
209      LoadConstant(t_reg1, true_val);
210      LoadConstant(t_reg2, false_val);
211      OpRegImm(kOpCmp, rl_src.reg, 0);
212      OpIT(ccode, "E");
213      OpRegCopy(rl_result.reg, t_reg1);
214      OpRegCopy(rl_result.reg, t_reg2);
215      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
216    }
217  } else {
218    // MOVE case
219    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
220    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
221    rl_true = LoadValue(rl_true, kCoreReg);
222    rl_false = LoadValue(rl_false, kCoreReg);
223    rl_result = EvalLoc(rl_dest, kCoreReg, true);
224    OpRegImm(kOpCmp, rl_src.reg, 0);
225    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
226      OpIT(NegateComparison(ccode), "");
227      OpRegCopy(rl_result.reg, rl_false.reg);
228    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
229      OpIT(ccode, "");
230      OpRegCopy(rl_result.reg, rl_true.reg);
231    } else {  // Normal - select between the two.
232      OpIT(ccode, "E");
233      OpRegCopy(rl_result.reg, rl_true.reg);
234      OpRegCopy(rl_result.reg, rl_false.reg);
235    }
236    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
237  }
238  StoreValue(rl_dest, rl_result);
239}
240
241void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
242  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
243  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
244  // Normalize such that if either operand is constant, src2 will be constant.
245  ConditionCode ccode = mir->meta.ccode;
246  if (rl_src1.is_const) {
247    std::swap(rl_src1, rl_src2);
248    ccode = FlipComparisonOrder(ccode);
249  }
250  if (rl_src2.is_const) {
251    RegLocation rl_temp = UpdateLocWide(rl_src2);
252    // Do special compare/branch against simple const operand if not already in registers.
253    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
254    if ((rl_temp.location != kLocPhysReg) &&
255        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
256      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
257      return;
258    }
259  }
260  LIR* taken = &block_label_list_[bb->taken];
261  LIR* not_taken = &block_label_list_[bb->fall_through];
262  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
263  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
264  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
265  switch (ccode) {
266    case kCondEq:
267      OpCondBranch(kCondNe, not_taken);
268      break;
269    case kCondNe:
270      OpCondBranch(kCondNe, taken);
271      break;
272    case kCondLt:
273      OpCondBranch(kCondLt, taken);
274      OpCondBranch(kCondGt, not_taken);
275      ccode = kCondUlt;
276      break;
277    case kCondLe:
278      OpCondBranch(kCondLt, taken);
279      OpCondBranch(kCondGt, not_taken);
280      ccode = kCondLs;
281      break;
282    case kCondGt:
283      OpCondBranch(kCondGt, taken);
284      OpCondBranch(kCondLt, not_taken);
285      ccode = kCondHi;
286      break;
287    case kCondGe:
288      OpCondBranch(kCondGt, taken);
289      OpCondBranch(kCondLt, not_taken);
290      ccode = kCondUge;
291      break;
292    default:
293      LOG(FATAL) << "Unexpected ccode: " << ccode;
294  }
295  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
296  OpCondBranch(ccode, taken);
297}
298
299/*
300 * Generate a register comparison to an immediate and branch.  Caller
301 * is responsible for setting branch target field.
302 */
303LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
304  LIR* branch;
305  ArmConditionCode arm_cond = ArmConditionEncoding(cond);
306  /*
307   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
308   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
309   * branch forward to a launch pad, they will frequently not reach - and thus have to
310   * be converted to a long form during assembly (which will trigger another assembly
311   * pass).  Here we estimate the branch distance for checks, and if large directly
312   * generate the long form in an attempt to avoid an extra assembly pass.
313   * TODO: consider interspersing launchpads in code following unconditional branches.
314   */
315  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
316  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
317  if (!skip && (ARM_LOWREG(reg.GetReg())) && (check_value == 0) &&
318     ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
319    branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
320                     reg.GetReg(), 0);
321  } else {
322    OpRegImm(kOpCmp, reg, check_value);
323    branch = NewLIR2(kThumbBCond, 0, arm_cond);
324  }
325  branch->target = target;
326  return branch;
327}
328
329LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
330  LIR* res;
331  int opcode;
332  // If src or dest is a pair, we'll be using low reg.
333  if (r_dest.IsPair()) {
334    r_dest = r_dest.GetLow();
335  }
336  if (r_src.IsPair()) {
337    r_src = r_src.GetLow();
338  }
339  if (ARM_FPREG(r_dest.GetReg()) || ARM_FPREG(r_src.GetReg()))
340    return OpFpRegCopy(r_dest, r_src);
341  if (ARM_LOWREG(r_dest.GetReg()) && ARM_LOWREG(r_src.GetReg()))
342    opcode = kThumbMovRR;
343  else if (!ARM_LOWREG(r_dest.GetReg()) && !ARM_LOWREG(r_src.GetReg()))
344     opcode = kThumbMovRR_H2H;
345  else if (ARM_LOWREG(r_dest.GetReg()))
346     opcode = kThumbMovRR_H2L;
347  else
348     opcode = kThumbMovRR_L2H;
349  res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
350  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
351    res->flags.is_nop = true;
352  }
353  return res;
354}
355
356LIR* ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
357  LIR* res = OpRegCopyNoInsert(r_dest, r_src);
358  AppendLIR(res);
359  return res;
360}
361
362void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
363  bool dest_fp = ARM_FPREG(r_dest.GetLowReg());
364  bool src_fp = ARM_FPREG(r_src.GetLowReg());
365  if (dest_fp) {
366    if (src_fp) {
367      // FIXME: handle 64-bit solo's here.
368      OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
369                RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
370    } else {
371      NewLIR3(kThumb2Fmdrr, S2d(r_dest.GetLowReg(), r_dest.GetHighReg()),
372              r_src.GetLowReg(), r_src.GetHighReg());
373    }
374  } else {
375    if (src_fp) {
376      NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), S2d(r_src.GetLowReg(),
377              r_src.GetHighReg()));
378    } else {
379      // Handle overlap
380      if (r_src.GetHighReg() == r_dest.GetLowReg()) {
381        DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
382        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
383        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
384      } else {
385        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
386        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
387      }
388    }
389  }
390}
391
392// Table of magic divisors
393struct MagicTable {
394  uint32_t magic;
395  uint32_t shift;
396  DividePattern pattern;
397};
398
399static const MagicTable magic_table[] = {
400  {0, 0, DivideNone},        // 0
401  {0, 0, DivideNone},        // 1
402  {0, 0, DivideNone},        // 2
403  {0x55555556, 0, Divide3},  // 3
404  {0, 0, DivideNone},        // 4
405  {0x66666667, 1, Divide5},  // 5
406  {0x2AAAAAAB, 0, Divide3},  // 6
407  {0x92492493, 2, Divide7},  // 7
408  {0, 0, DivideNone},        // 8
409  {0x38E38E39, 1, Divide5},  // 9
410  {0x66666667, 2, Divide5},  // 10
411  {0x2E8BA2E9, 1, Divide5},  // 11
412  {0x2AAAAAAB, 1, Divide5},  // 12
413  {0x4EC4EC4F, 2, Divide5},  // 13
414  {0x92492493, 3, Divide7},  // 14
415  {0x88888889, 3, Divide7},  // 15
416};
417
418// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
419bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
420                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
421  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
422    return false;
423  }
424  DividePattern pattern = magic_table[lit].pattern;
425  if (pattern == DivideNone) {
426    return false;
427  }
428  // Tuning: add rem patterns
429  if (!is_div) {
430    return false;
431  }
432
433  RegStorage r_magic = AllocTemp();
434  LoadConstant(r_magic, magic_table[lit].magic);
435  rl_src = LoadValue(rl_src, kCoreReg);
436  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
437  RegStorage r_hi = AllocTemp();
438  RegStorage r_lo = AllocTemp();
439  NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
440  switch (pattern) {
441    case Divide3:
442      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
443               rl_src.reg.GetReg(), EncodeShift(kArmAsr, 31));
444      break;
445    case Divide5:
446      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
447      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
448               EncodeShift(kArmAsr, magic_table[lit].shift));
449      break;
450    case Divide7:
451      OpRegReg(kOpAdd, r_hi, rl_src.reg);
452      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
453      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
454               EncodeShift(kArmAsr, magic_table[lit].shift));
455      break;
456    default:
457      LOG(FATAL) << "Unexpected pattern: " << pattern;
458  }
459  StoreValue(rl_dest, rl_result);
460  return true;
461}
462
463LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, RegStorage reg1, RegStorage base,
464                                int offset, ThrowKind kind) {
465  LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm";
466  return NULL;
467}
468
469RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
470                      RegLocation rl_src2, bool is_div, bool check_zero) {
471  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
472  return rl_dest;
473}
474
475RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
476  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
477  return rl_dest;
478}
479
480RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
481  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
482
483  // Put the literal in a temp.
484  RegStorage lit_temp = AllocTemp();
485  LoadConstant(lit_temp, lit);
486  // Use the generic case for div/rem with arg2 in a register.
487  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
488  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
489  FreeTemp(lit_temp);
490
491  return rl_result;
492}
493
494RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
495                                  bool is_div) {
496  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
497  if (is_div) {
498    // Simple case, use sdiv instruction.
499    OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
500  } else {
501    // Remainder case, use the following code:
502    // temp = reg1 / reg2      - integer division
503    // temp = temp * reg2
504    // dest = reg1 - temp
505
506    RegStorage temp = AllocTemp();
507    OpRegRegReg(kOpDiv, temp, reg1, reg2);
508    OpRegReg(kOpMul, temp, reg2);
509    OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
510    FreeTemp(temp);
511  }
512
513  return rl_result;
514}
515
516bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
517  DCHECK_EQ(cu_->instruction_set, kThumb2);
518  RegLocation rl_src1 = info->args[0];
519  RegLocation rl_src2 = info->args[1];
520  rl_src1 = LoadValue(rl_src1, kCoreReg);
521  rl_src2 = LoadValue(rl_src2, kCoreReg);
522  RegLocation rl_dest = InlineTarget(info);
523  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
524  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
525  OpIT((is_min) ? kCondGt : kCondLt, "E");
526  OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
527  OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
528  GenBarrier();
529  StoreValue(rl_dest, rl_result);
530  return true;
531}
532
533bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
534  RegLocation rl_src_address = info->args[0];  // long address
535  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
536  RegLocation rl_dest = InlineTarget(info);
537  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
538  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
539  if (size == kLong) {
540    // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
541    if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
542      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
543      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
544    } else {
545      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
546      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
547    }
548    StoreValueWide(rl_dest, rl_result);
549  } else {
550    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
551    // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
552    LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
553    StoreValue(rl_dest, rl_result);
554  }
555  return true;
556}
557
558bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
559  RegLocation rl_src_address = info->args[0];  // long address
560  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
561  RegLocation rl_src_value = info->args[2];  // [size] value
562  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
563  if (size == kLong) {
564    // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
565    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
566    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), kWord);
567    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), kWord);
568  } else {
569    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
570    // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
571    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
572    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
573  }
574  return true;
575}
576
577void ArmMir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
578  LOG(FATAL) << "Unexpected use of OpLea for Arm";
579}
580
581void ArmMir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
582  LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
583}
584
585bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
586  DCHECK_EQ(cu_->instruction_set, kThumb2);
587  // Unused - RegLocation rl_src_unsafe = info->args[0];
588  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
589  RegLocation rl_src_offset = info->args[2];  // long low
590  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
591  RegLocation rl_src_expected = info->args[4];  // int, long or Object
592  // If is_long, high half is in info->args[5]
593  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
594  // If is_long, high half is in info->args[7]
595  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
596
597  // We have only 5 temporary registers available and actually only 4 if the InlineTarget
598  // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
599  // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
600  // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
601  // into the same temps, reducing the number of required temps down to 5. We shall work
602  // around the potentially locked temp by using LR for r_ptr, unconditionally.
603  // TODO: Pass information about the need for more temps to the stack frame generation
604  // code so that we can rely on being able to allocate enough temps.
605  DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp);
606  MarkTemp(rARM_LR);
607  FreeTemp(rARM_LR);
608  LockTemp(rARM_LR);
609  bool load_early = true;
610  if (is_long) {
611    int expected_reg = is_long ? rl_src_expected.reg.GetLowReg() : rl_src_expected.reg.GetReg();
612    int new_val_reg = is_long ? rl_src_new_value.reg.GetLowReg() : rl_src_new_value.reg.GetReg();
613    bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !IsFpReg(expected_reg);
614    bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !IsFpReg(new_val_reg);
615    bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
616    bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
617
618    if (!expected_is_good_reg && !new_value_is_good_reg) {
619      // None of expected/new_value is non-temp reg, need to load both late
620      load_early = false;
621      // Make sure they are not in the temp regs and the load will not be skipped.
622      if (expected_is_core_reg) {
623        FlushRegWide(rl_src_expected.reg);
624        ClobberSReg(rl_src_expected.s_reg_low);
625        ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
626        rl_src_expected.location = kLocDalvikFrame;
627      }
628      if (new_value_is_core_reg) {
629        FlushRegWide(rl_src_new_value.reg);
630        ClobberSReg(rl_src_new_value.s_reg_low);
631        ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
632        rl_src_new_value.location = kLocDalvikFrame;
633      }
634    }
635  }
636
637  // Release store semantics, get the barrier out of the way.  TODO: revisit
638  GenMemBarrier(kStoreLoad);
639
640  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
641  RegLocation rl_new_value;
642  if (!is_long) {
643    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
644  } else if (load_early) {
645    rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
646  }
647
648  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
649    // Mark card for object assuming new value is stored.
650    MarkGCCard(rl_new_value.reg, rl_object.reg);
651  }
652
653  RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
654
655  RegStorage r_ptr = rs_rARM_LR;
656  OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
657
658  // Free now unneeded rl_object and rl_offset to give more temps.
659  ClobberSReg(rl_object.s_reg_low);
660  FreeTemp(rl_object.reg.GetReg());
661  ClobberSReg(rl_offset.s_reg_low);
662  FreeTemp(rl_offset.reg.GetReg());
663
664  RegLocation rl_expected;
665  if (!is_long) {
666    rl_expected = LoadValue(rl_src_expected, kCoreReg);
667  } else if (load_early) {
668    rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
669  } else {
670    // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
671    int low_reg = AllocTemp().GetReg();
672    int high_reg = AllocTemp().GetReg();
673    rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
674    rl_expected = rl_new_value;
675  }
676
677  // do {
678  //   tmp = [r_ptr] - expected;
679  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
680  // result = tmp != 0;
681
682  RegStorage r_tmp = AllocTemp();
683  LIR* target = NewLIR0(kPseudoTargetLabel);
684
685  if (is_long) {
686    RegStorage r_tmp_high = AllocTemp();
687    if (!load_early) {
688      LoadValueDirectWide(rl_src_expected, rl_expected.reg);
689    }
690    NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
691    OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
692    OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
693    if (!load_early) {
694      LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
695    }
696    // Make sure we use ORR that sets the ccode
697    if (ARM_LOWREG(r_tmp.GetReg()) && ARM_LOWREG(r_tmp_high.GetReg())) {
698      NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
699    } else {
700      NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
701    }
702    FreeTemp(r_tmp_high);  // Now unneeded
703
704    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
705    OpIT(kCondEq, "T");
706    NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
707
708  } else {
709    NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
710    OpRegReg(kOpSub, r_tmp, rl_expected.reg);
711    DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
712    OpIT(kCondEq, "T");
713    NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
714  }
715
716  // Still one conditional left from OpIT(kCondEq, "T") from either branch
717  OpRegImm(kOpCmp /* eq */, r_tmp, 1);
718  OpCondBranch(kCondEq, target);
719
720  if (!load_early) {
721    FreeTemp(rl_expected.reg);  // Now unneeded.
722  }
723
724  // result := (tmp1 != 0) ? 0 : 1;
725  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
726  OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
727  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
728  OpIT(kCondUlt, "");
729  LoadConstant(rl_result.reg, 0); /* cc */
730  FreeTemp(r_tmp);  // Now unneeded.
731
732  StoreValue(rl_dest, rl_result);
733
734  // Now, restore lr to its non-temp status.
735  Clobber(rARM_LR);
736  UnmarkTemp(rARM_LR);
737  return true;
738}
739
740LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
741  return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
742}
743
744LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
745  return NewLIR3(kThumb2Vldms, r_base.GetReg(), fr0, count);
746}
747
748LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
749  return NewLIR3(kThumb2Vstms, r_base.GetReg(), fr0, count);
750}
751
752void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
753                                               RegLocation rl_result, int lit,
754                                               int first_bit, int second_bit) {
755  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
756                   EncodeShift(kArmLsl, second_bit - first_bit));
757  if (first_bit != 0) {
758    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
759  }
760}
761
762void ArmMir2Lir::GenDivZeroCheck(RegStorage reg) {
763  DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
764  RegStorage t_reg = AllocTemp();
765  NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
766  FreeTemp(t_reg);
767  GenCheck(kCondEq, kThrowDivZero);
768}
769
770// Test suspend flag, return target of taken suspend branch
771LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
772  NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1);
773  return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
774}
775
776// Decrement register and branch on condition
777LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
778  // Combine sub & test using sub setflags encoding here
779  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
780  DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
781  return OpCondBranch(c_code, target);
782}
783
784void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
785#if ANDROID_SMP != 0
786  // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
787  LIR* barrier = last_lir_insn_;
788
789  int dmb_flavor;
790  // TODO: revisit Arm barrier kinds
791  switch (barrier_kind) {
792    case kLoadStore: dmb_flavor = kISH; break;
793    case kLoadLoad: dmb_flavor = kISH; break;
794    case kStoreStore: dmb_flavor = kISHST; break;
795    case kStoreLoad: dmb_flavor = kISH; break;
796    default:
797      LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
798      dmb_flavor = kSY;  // quiet gcc.
799      break;
800  }
801
802  // If the same barrier already exists, don't generate another.
803  if (barrier == nullptr
804      || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
805    barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
806  }
807
808  // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
809  DCHECK(!barrier->flags.use_def_invalid);
810  barrier->u.m.def_mask = ENCODE_ALL;
811#endif
812}
813
814void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
815  rl_src = LoadValueWide(rl_src, kCoreReg);
816  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
817  RegStorage z_reg = AllocTemp();
818  LoadConstantNoClobber(z_reg, 0);
819  // Check for destructive overlap
820  if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
821    RegStorage t_reg = AllocTemp();
822    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
823    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
824    FreeTemp(t_reg);
825  } else {
826    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
827    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
828  }
829  FreeTemp(z_reg);
830  StoreValueWide(rl_dest, rl_result);
831}
832
833void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
834                            RegLocation rl_src1, RegLocation rl_src2) {
835    /*
836     * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
837     * dest     = src1.lo * src2.lo;
838     * tmp1    += src1.lo * src2.hi;
839     * dest.hi += tmp1;
840     *
841     * To pull off inline multiply, we have a worst-case requirement of 7 temporary
842     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
843     * lr in the temp set.  The only problematic case is all operands and result are
844     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
845     * freeing operand temp registers after they are no longer needed.  All other cases
846     * can proceed normally.  We'll just punt on the case of the result having a misaligned
847     * overlap with either operand and send that case to a runtime handler.
848     */
849    RegLocation rl_result;
850    if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
851      ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
852      FlushAllRegs();
853      CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
854      rl_result = GetReturnWide(false);
855      StoreValueWide(rl_dest, rl_result);
856      return;
857    }
858
859    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
860    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
861
862    int reg_status = 0;
863    RegStorage res_lo;
864    RegStorage res_hi;
865    bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
866        !IsTemp(rl_dest.reg.GetLowReg()) && !IsTemp(rl_dest.reg.GetHighReg());
867    bool src1_promoted = !IsTemp(rl_src1.reg.GetLowReg()) && !IsTemp(rl_src1.reg.GetHighReg());
868    bool src2_promoted = !IsTemp(rl_src2.reg.GetLowReg()) && !IsTemp(rl_src2.reg.GetHighReg());
869    // Check if rl_dest is *not* either operand and we have enough temp registers.
870    if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
871        (dest_promoted || src1_promoted || src2_promoted)) {
872      // In this case, we do not need to manually allocate temp registers for result.
873      rl_result = EvalLoc(rl_dest, kCoreReg, true);
874      res_lo = rl_result.reg.GetLow();
875      res_hi = rl_result.reg.GetHigh();
876    } else {
877      res_lo = AllocTemp();
878      if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
879        // In this case, we have enough temp registers to be allocated for result.
880        res_hi = AllocTemp();
881        reg_status = 1;
882      } else {
883        // In this case, all temps are now allocated.
884        // res_hi will be allocated after we can free src1_hi.
885        reg_status = 2;
886      }
887    }
888
889    // Temporarily add LR to the temp pool, and assign it to tmp1
890    MarkTemp(rARM_LR);
891    FreeTemp(rARM_LR);
892    RegStorage tmp1 = rs_rARM_LR;
893    LockTemp(rARM_LR);
894
895    if (rl_src1.reg == rl_src2.reg) {
896      DCHECK(res_hi.Valid());
897      DCHECK(res_lo.Valid());
898      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
899      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
900              rl_src1.reg.GetLowReg());
901      OpRegRegRegShift(kOpAdd, res_hi.GetReg(), res_hi.GetReg(), tmp1.GetReg(),
902                       EncodeShift(kArmLsl, 1));
903    } else {
904      NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
905      if (reg_status == 2) {
906        DCHECK(!res_hi.Valid());
907        DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
908        DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
909        FreeTemp(rl_src1.reg.GetHighReg());
910        res_hi = AllocTemp();
911      }
912      DCHECK(res_hi.Valid());
913      DCHECK(res_lo.Valid());
914      NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
915              rl_src1.reg.GetLowReg());
916      NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
917              tmp1.GetReg());
918      NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
919      if (reg_status == 2) {
920        // Clobber rl_src1 since it was corrupted.
921        FreeTemp(rl_src1.reg);
922        Clobber(rl_src1.reg);
923      }
924    }
925
926    // Now, restore lr to its non-temp status.
927    FreeTemp(tmp1);
928    Clobber(rARM_LR);
929    UnmarkTemp(rARM_LR);
930
931    if (reg_status != 0) {
932      // We had manually allocated registers for rl_result.
933      // Now construct a RegLocation.
934      rl_result = GetReturnWide(false);  // Just using as a template.
935      rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
936    }
937
938    StoreValueWide(rl_dest, rl_result);
939}
940
941void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
942                            RegLocation rl_src2) {
943  LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
944}
945
946void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
947                            RegLocation rl_src2) {
948  LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
949}
950
951void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
952                            RegLocation rl_src2) {
953  LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
954}
955
956void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
957                           RegLocation rl_src2) {
958  LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
959}
960
961void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
962                            RegLocation rl_src2) {
963  LOG(FATAL) << "Unexpected use of genXoLong for Arm";
964}
965
966/*
967 * Generate array load
968 */
969void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
970                             RegLocation rl_index, RegLocation rl_dest, int scale) {
971  RegisterClass reg_class = oat_reg_class_by_size(size);
972  int len_offset = mirror::Array::LengthOffset().Int32Value();
973  int data_offset;
974  RegLocation rl_result;
975  bool constant_index = rl_index.is_const;
976  rl_array = LoadValue(rl_array, kCoreReg);
977  if (!constant_index) {
978    rl_index = LoadValue(rl_index, kCoreReg);
979  }
980
981  if (rl_dest.wide) {
982    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
983  } else {
984    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
985  }
986
987  // If index is constant, just fold it into the data offset
988  if (constant_index) {
989    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
990  }
991
992  /* null object? */
993  GenNullCheck(rl_array.reg, opt_flags);
994
995  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
996  RegStorage reg_len;
997  if (needs_range_check) {
998    reg_len = AllocTemp();
999    /* Get len */
1000    LoadWordDisp(rl_array.reg, len_offset, reg_len);
1001    MarkPossibleNullPointerException(opt_flags);
1002  } else {
1003    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1004  }
1005  if (rl_dest.wide || rl_dest.fp || constant_index) {
1006    RegStorage reg_ptr;
1007    if (constant_index) {
1008      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
1009    } else {
1010      // No special indexed operation, lea + load w/ displacement
1011      reg_ptr = AllocTemp();
1012      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
1013                       EncodeShift(kArmLsl, scale));
1014      FreeTemp(rl_index.reg.GetReg());
1015    }
1016    rl_result = EvalLoc(rl_dest, reg_class, true);
1017
1018    if (needs_range_check) {
1019      if (constant_index) {
1020        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
1021      } else {
1022        GenRegRegCheck(kCondLs, reg_len, rl_index.reg, kThrowArrayBounds);
1023      }
1024      FreeTemp(reg_len);
1025    }
1026    if (rl_dest.wide) {
1027      LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG);
1028      MarkPossibleNullPointerException(opt_flags);
1029      if (!constant_index) {
1030        FreeTemp(reg_ptr);
1031      }
1032      StoreValueWide(rl_dest, rl_result);
1033    } else {
1034      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
1035      MarkPossibleNullPointerException(opt_flags);
1036      if (!constant_index) {
1037        FreeTemp(reg_ptr);
1038      }
1039      StoreValue(rl_dest, rl_result);
1040    }
1041  } else {
1042    // Offset base, then use indexed load
1043    RegStorage reg_ptr = AllocTemp();
1044    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1045    FreeTemp(rl_array.reg.GetReg());
1046    rl_result = EvalLoc(rl_dest, reg_class, true);
1047
1048    if (needs_range_check) {
1049      GenRegRegCheck(kCondUge, rl_index.reg, reg_len, kThrowArrayBounds);
1050      FreeTemp(reg_len);
1051    }
1052    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
1053    MarkPossibleNullPointerException(opt_flags);
1054    FreeTemp(reg_ptr);
1055    StoreValue(rl_dest, rl_result);
1056  }
1057}
1058
1059/*
1060 * Generate array store
1061 *
1062 */
1063void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1064                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1065  RegisterClass reg_class = oat_reg_class_by_size(size);
1066  int len_offset = mirror::Array::LengthOffset().Int32Value();
1067  bool constant_index = rl_index.is_const;
1068
1069  int data_offset;
1070  if (size == kLong || size == kDouble) {
1071    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1072  } else {
1073    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1074  }
1075
1076  // If index is constant, just fold it into the data offset.
1077  if (constant_index) {
1078    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1079  }
1080
1081  rl_array = LoadValue(rl_array, kCoreReg);
1082  if (!constant_index) {
1083    rl_index = LoadValue(rl_index, kCoreReg);
1084  }
1085
1086  RegStorage reg_ptr;
1087  bool allocated_reg_ptr_temp = false;
1088  if (constant_index) {
1089    reg_ptr = rl_array.reg;
1090  } else if (IsTemp(rl_array.reg.GetReg()) && !card_mark) {
1091    Clobber(rl_array.reg.GetReg());
1092    reg_ptr = rl_array.reg;
1093  } else {
1094    allocated_reg_ptr_temp = true;
1095    reg_ptr = AllocTemp();
1096  }
1097
1098  /* null object? */
1099  GenNullCheck(rl_array.reg, opt_flags);
1100
1101  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1102  RegStorage reg_len;
1103  if (needs_range_check) {
1104    reg_len = AllocTemp();
1105    // NOTE: max live temps(4) here.
1106    /* Get len */
1107    LoadWordDisp(rl_array.reg, len_offset, reg_len);
1108    MarkPossibleNullPointerException(opt_flags);
1109  } else {
1110    ForceImplicitNullCheck(rl_array.reg, opt_flags);
1111  }
1112  /* at this point, reg_ptr points to array, 2 live temps */
1113  if (rl_src.wide || rl_src.fp || constant_index) {
1114    if (rl_src.wide) {
1115      rl_src = LoadValueWide(rl_src, reg_class);
1116    } else {
1117      rl_src = LoadValue(rl_src, reg_class);
1118    }
1119    if (!constant_index) {
1120      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
1121                       EncodeShift(kArmLsl, scale));
1122    }
1123    if (needs_range_check) {
1124      if (constant_index) {
1125        GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds);
1126      } else {
1127        GenRegRegCheck(kCondLs, reg_len, rl_index.reg, kThrowArrayBounds);
1128      }
1129      FreeTemp(reg_len);
1130    }
1131
1132    if (rl_src.wide) {
1133      StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg);
1134    } else {
1135      StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size);
1136    }
1137    MarkPossibleNullPointerException(opt_flags);
1138  } else {
1139    /* reg_ptr -> array data */
1140    OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1141    rl_src = LoadValue(rl_src, reg_class);
1142    if (needs_range_check) {
1143      GenRegRegCheck(kCondUge, rl_index.reg, reg_len, kThrowArrayBounds);
1144      FreeTemp(reg_len);
1145    }
1146    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1147    MarkPossibleNullPointerException(opt_flags);
1148  }
1149  if (allocated_reg_ptr_temp) {
1150    FreeTemp(reg_ptr);
1151  }
1152  if (card_mark) {
1153    MarkGCCard(rl_src.reg, rl_array.reg);
1154  }
1155}
1156
1157
1158void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1159                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1160  rl_src = LoadValueWide(rl_src, kCoreReg);
1161  // Per spec, we only care about low 6 bits of shift amount.
1162  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1163  if (shift_amount == 0) {
1164    StoreValueWide(rl_dest, rl_src);
1165    return;
1166  }
1167  if (BadOverlap(rl_src, rl_dest)) {
1168    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1169    return;
1170  }
1171  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1172  switch (opcode) {
1173    case Instruction::SHL_LONG:
1174    case Instruction::SHL_LONG_2ADDR:
1175      if (shift_amount == 1) {
1176        OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
1177        OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
1178      } else if (shift_amount == 32) {
1179        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
1180        LoadConstant(rl_result.reg.GetLow(), 0);
1181      } else if (shift_amount > 31) {
1182        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
1183        LoadConstant(rl_result.reg.GetLow(), 0);
1184      } else {
1185        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1186        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
1187                         EncodeShift(kArmLsr, 32 - shift_amount));
1188        OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
1189      }
1190      break;
1191    case Instruction::SHR_LONG:
1192    case Instruction::SHR_LONG_2ADDR:
1193      if (shift_amount == 32) {
1194        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1195        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1196      } else if (shift_amount > 31) {
1197        OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1198        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1199      } else {
1200        RegStorage t_reg = AllocTemp();
1201        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1202        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
1203                         EncodeShift(kArmLsl, 32 - shift_amount));
1204        FreeTemp(t_reg);
1205        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1206      }
1207      break;
1208    case Instruction::USHR_LONG:
1209    case Instruction::USHR_LONG_2ADDR:
1210      if (shift_amount == 32) {
1211        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1212        LoadConstant(rl_result.reg.GetHigh(), 0);
1213      } else if (shift_amount > 31) {
1214        OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1215        LoadConstant(rl_result.reg.GetHigh(), 0);
1216      } else {
1217        RegStorage t_reg = AllocTemp();
1218        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1219        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
1220                         EncodeShift(kArmLsl, 32 - shift_amount));
1221        FreeTemp(t_reg);
1222        OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1223      }
1224      break;
1225    default:
1226      LOG(FATAL) << "Unexpected case";
1227  }
1228  StoreValueWide(rl_dest, rl_result);
1229}
1230
1231void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1232                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
1233  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1234    if (!rl_src2.is_const) {
1235      // Don't bother with special handling for subtract from immediate.
1236      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1237      return;
1238    }
1239  } else {
1240    // Normalize
1241    if (!rl_src2.is_const) {
1242      DCHECK(rl_src1.is_const);
1243      std::swap(rl_src1, rl_src2);
1244    }
1245  }
1246  if (BadOverlap(rl_src1, rl_dest)) {
1247    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1248    return;
1249  }
1250  DCHECK(rl_src2.is_const);
1251  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1252  uint32_t val_lo = Low32Bits(val);
1253  uint32_t val_hi = High32Bits(val);
1254  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1255  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1256
1257  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1258  switch (opcode) {
1259    case Instruction::ADD_LONG:
1260    case Instruction::ADD_LONG_2ADDR:
1261    case Instruction::SUB_LONG:
1262    case Instruction::SUB_LONG_2ADDR:
1263      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1264        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1265        return;
1266      }
1267      break;
1268    default:
1269      break;
1270  }
1271  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1272  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1273  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1274  switch (opcode) {
1275    case Instruction::ADD_LONG:
1276    case Instruction::ADD_LONG_2ADDR:
1277      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1278      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1279      break;
1280    case Instruction::OR_LONG:
1281    case Instruction::OR_LONG_2ADDR:
1282      if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1283        OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1284      }
1285      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1286        OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1287      }
1288      break;
1289    case Instruction::XOR_LONG:
1290    case Instruction::XOR_LONG_2ADDR:
1291      OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1292      OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1293      break;
1294    case Instruction::AND_LONG:
1295    case Instruction::AND_LONG_2ADDR:
1296      if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1297        OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1298      }
1299      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1300        OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1301      }
1302      break;
1303    case Instruction::SUB_LONG_2ADDR:
1304    case Instruction::SUB_LONG:
1305      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1306      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1307      break;
1308    default:
1309      LOG(FATAL) << "Unexpected opcode " << opcode;
1310  }
1311  StoreValueWide(rl_dest, rl_result);
1312}
1313
1314}  // namespace art
1315