1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the X86 ISA */
18
19#include "codegen_x86.h"
20
21#include "art_method.h"
22#include "base/bit_utils.h"
23#include "base/logging.h"
24#include "dex/quick/mir_to_lir-inl.h"
25#include "dex/reg_storage_eq.h"
26#include "mirror/array-inl.h"
27#include "x86_lir.h"
28
29namespace art {
30
31/*
32 * Compare two 64-bit values
33 *    x = y     return  0
34 *    x < y     return -1
35 *    x > y     return  1
36 */
37void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
38                            RegLocation rl_src2) {
39  if (cu_->target64) {
40    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
41    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
42    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
43    RegStorage temp_reg = AllocTemp();
44    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
45    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
46    NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
47    NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
48    NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
49
50    StoreValue(rl_dest, rl_result);
51    FreeTemp(temp_reg);
52    return;
53  }
54
55  // Prepare for explicit register usage
56  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
57  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
58  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
59  LoadValueDirectWideFixed(rl_src1, r_tmp1);
60  LoadValueDirectWideFixed(rl_src2, r_tmp2);
61  // Compute (r1:r0) = (r1:r0) - (r3:r2)
62  OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
63  OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
64  NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
65  NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
66  OpReg(kOpNeg, rs_r2);         // r2 = -r2
67  OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
68  NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
69  NewLIR2(kX86Movzx8RR, r0, r0);
70  OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
71  RegLocation rl_result = LocCReturn();
72  StoreValue(rl_dest, rl_result);
73}
74
75X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
76  switch (cond) {
77    case kCondEq: return kX86CondEq;
78    case kCondNe: return kX86CondNe;
79    case kCondCs: return kX86CondC;
80    case kCondCc: return kX86CondNc;
81    case kCondUlt: return kX86CondC;
82    case kCondUge: return kX86CondNc;
83    case kCondMi: return kX86CondS;
84    case kCondPl: return kX86CondNs;
85    case kCondVs: return kX86CondO;
86    case kCondVc: return kX86CondNo;
87    case kCondHi: return kX86CondA;
88    case kCondLs: return kX86CondBe;
89    case kCondGe: return kX86CondGe;
90    case kCondLt: return kX86CondL;
91    case kCondGt: return kX86CondG;
92    case kCondLe: return kX86CondLe;
93    case kCondAl:
94    case kCondNv: LOG(FATAL) << "Should not reach here";
95  }
96  return kX86CondO;
97}
98
99LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
100  NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
101  X86ConditionCode cc = X86ConditionEncoding(cond);
102  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
103                        cc);
104  branch->target = target;
105  return branch;
106}
107
108LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
109                                int check_value, LIR* target) {
110  if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
111    // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
112    NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
113  } else {
114    if (reg.Is64Bit()) {
115      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
116    } else {
117      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
118    }
119  }
120  X86ConditionCode cc = X86ConditionEncoding(cond);
121  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
122  branch->target = target;
123  return branch;
124}
125
126LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
127  // If src or dest is a pair, we'll be using low reg.
128  if (r_dest.IsPair()) {
129    r_dest = r_dest.GetLow();
130  }
131  if (r_src.IsPair()) {
132    r_src = r_src.GetLow();
133  }
134  if (r_dest.IsFloat() || r_src.IsFloat())
135    return OpFpRegCopy(r_dest, r_src);
136  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
137                    r_dest.GetReg(), r_src.GetReg());
138  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
139    res->flags.is_nop = true;
140  }
141  return res;
142}
143
144void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
145  if (r_dest != r_src) {
146    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
147    AppendLIR(res);
148  }
149}
150
151void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
152  if (r_dest != r_src) {
153    bool dest_fp = r_dest.IsFloat();
154    bool src_fp = r_src.IsFloat();
155    if (dest_fp) {
156      if (src_fp) {
157        OpRegCopy(r_dest, r_src);
158      } else {
159        // TODO: Prevent this from happening in the code. The result is often
160        // unused or could have been loaded more easily from memory.
161        if (!r_src.IsPair()) {
162          DCHECK(!r_dest.IsPair());
163          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
164        } else {
165          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
166          RegStorage r_tmp = AllocTempDouble();
167          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
168          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
169          FreeTemp(r_tmp);
170        }
171      }
172    } else {
173      if (src_fp) {
174        if (!r_dest.IsPair()) {
175          DCHECK(!r_src.IsPair());
176          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
177        } else {
178          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
179          RegStorage temp_reg = AllocTempDouble();
180          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
181          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
182          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
183        }
184      } else {
185        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
186        if (!r_src.IsPair()) {
187          // Just copy the register directly.
188          OpRegCopy(r_dest, r_src);
189        } else {
190          // Handle overlap
191          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
192              r_src.GetLowReg() == r_dest.GetHighReg()) {
193            // Deal with cycles.
194            RegStorage temp_reg = AllocTemp();
195            OpRegCopy(temp_reg, r_dest.GetHigh());
196            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
197            OpRegCopy(r_dest.GetLow(), temp_reg);
198            FreeTemp(temp_reg);
199          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
200            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
201            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
202          } else {
203            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
204            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
205          }
206        }
207      }
208    }
209  }
210}
211
212void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
213                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
214                                  RegisterClass dest_reg_class) {
215  DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
216  DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
217
218  // We really need this check for correctness, otherwise we will need to do more checks in
219  // non zero/one case
220  if (true_val == false_val) {
221    LoadConstantNoClobber(rs_dest, true_val);
222    return;
223  }
224
225  const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
226
227  const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
228  if (zero_one_case && IsByteRegister(rs_dest)) {
229    if (!dest_intersect) {
230      LoadConstantNoClobber(rs_dest, 0);
231    }
232    OpRegReg(kOpCmp, left_op, right_op);
233    // Set the low byte of the result to 0 or 1 from the compare condition code.
234    NewLIR2(kX86Set8R, rs_dest.GetReg(),
235            X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
236    if (dest_intersect) {
237      NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
238    }
239  } else {
240    // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
241    // and it cannot use xor because it makes cc flags to be dirty
242    RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
243    if (temp_reg.Valid()) {
244      if (false_val == 0 && dest_intersect) {
245        code = FlipComparisonOrder(code);
246        std::swap(true_val, false_val);
247      }
248      if (!dest_intersect) {
249        LoadConstantNoClobber(rs_dest, false_val);
250      }
251      LoadConstantNoClobber(temp_reg, true_val);
252      OpRegReg(kOpCmp, left_op, right_op);
253      if (dest_intersect) {
254        LoadConstantNoClobber(rs_dest, false_val);
255        DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
256      }
257      OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
258      FreeTemp(temp_reg);
259    } else {
260      // slow path
261      LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
262      LoadConstantNoClobber(rs_dest, false_val);
263      LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
264      LIR* true_case = NewLIR0(kPseudoTargetLabel);
265      cmp_branch->target = true_case;
266      LoadConstantNoClobber(rs_dest, true_val);
267      LIR* end = NewLIR0(kPseudoTargetLabel);
268      that_is_it->target = end;
269    }
270  }
271}
272
273void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
274  UNUSED(bb);
275  RegLocation rl_result;
276  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
277  RegLocation rl_dest = mir_graph_->GetDest(mir);
278  // Avoid using float regs here.
279  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
280  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
281  ConditionCode ccode = mir->meta.ccode;
282
283  // The kMirOpSelect has two variants, one for constants and one for moves.
284  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
285
286  if (is_constant_case) {
287    int true_val = mir->dalvikInsn.vB;
288    int false_val = mir->dalvikInsn.vC;
289
290    // simplest strange case
291    if (true_val == false_val) {
292      rl_result = EvalLoc(rl_dest, result_reg_class, true);
293      LoadConstantNoClobber(rl_result.reg, true_val);
294    } else {
295      // TODO: use GenSelectConst32 and handle additional opcode patterns such as
296      // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
297      rl_src = LoadValue(rl_src, src_reg_class);
298      rl_result = EvalLoc(rl_dest, result_reg_class, true);
299      /*
300       * For ccode == kCondEq:
301       *
302       * 1) When the true case is zero and result_reg is not same as src_reg:
303       *     xor result_reg, result_reg
304       *     cmp $0, src_reg
305       *     mov t1, $false_case
306       *     cmovnz result_reg, t1
307       * 2) When the false case is zero and result_reg is not same as src_reg:
308       *     xor result_reg, result_reg
309       *     cmp $0, src_reg
310       *     mov t1, $true_case
311       *     cmovz result_reg, t1
312       * 3) All other cases (we do compare first to set eflags):
313       *     cmp $0, src_reg
314       *     mov result_reg, $false_case
315       *     mov t1, $true_case
316       *     cmovz result_reg, t1
317       */
318      // FIXME: depending on how you use registers you could get a false != mismatch when dealing
319      // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
320      const bool result_reg_same_as_src =
321          (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
322      const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
323      const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
324      const bool catch_all_case = !(true_zero_case || false_zero_case);
325
326      if (true_zero_case || false_zero_case) {
327        OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
328      }
329
330      if (true_zero_case || false_zero_case || catch_all_case) {
331        OpRegImm(kOpCmp, rl_src.reg, 0);
332      }
333
334      if (catch_all_case) {
335        OpRegImm(kOpMov, rl_result.reg, false_val);
336      }
337
338      if (true_zero_case || false_zero_case || catch_all_case) {
339        ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
340        int immediateForTemp = true_zero_case ? false_val : true_val;
341        RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
342        OpRegImm(kOpMov, temp1_reg, immediateForTemp);
343
344        OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
345
346        FreeTemp(temp1_reg);
347      }
348    }
349  } else {
350    rl_src = LoadValue(rl_src, src_reg_class);
351    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
352    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
353    rl_true = LoadValue(rl_true, result_reg_class);
354    rl_false = LoadValue(rl_false, result_reg_class);
355    rl_result = EvalLoc(rl_dest, result_reg_class, true);
356
357    /*
358     * For ccode == kCondEq:
359     *
360     * 1) When true case is already in place:
361     *     cmp $0, src_reg
362     *     cmovnz result_reg, false_reg
363     * 2) When false case is already in place:
364     *     cmp $0, src_reg
365     *     cmovz result_reg, true_reg
366     * 3) When neither cases are in place:
367     *     cmp $0, src_reg
368     *     mov result_reg, false_reg
369     *     cmovz result_reg, true_reg
370     */
371
372    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
373    OpRegImm(kOpCmp, rl_src.reg, 0);
374
375    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
376      OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
377    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
378      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
379    } else {
380      OpRegCopy(rl_result.reg, rl_false.reg);
381      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
382    }
383  }
384
385  StoreValue(rl_dest, rl_result);
386}
387
388void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
389  LIR* taken = &block_label_list_[bb->taken];
390  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
391  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
392  ConditionCode ccode = mir->meta.ccode;
393
394  if (rl_src1.is_const) {
395    std::swap(rl_src1, rl_src2);
396    ccode = FlipComparisonOrder(ccode);
397  }
398  if (rl_src2.is_const) {
399    // Do special compare/branch against simple const operand
400    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
401    GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
402    return;
403  }
404
405  if (cu_->target64) {
406    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
407    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
408
409    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
410    OpCondBranch(ccode, taken);
411    return;
412  }
413
414  // Prepare for explicit register usage
415  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
416  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
417  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
418  LoadValueDirectWideFixed(rl_src1, r_tmp1);
419  LoadValueDirectWideFixed(rl_src2, r_tmp2);
420
421  // Swap operands and condition code to prevent use of zero flag.
422  if (ccode == kCondLe || ccode == kCondGt) {
423    // Compute (r3:r2) = (r3:r2) - (r1:r0)
424    OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
425    OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
426  } else {
427    // Compute (r1:r0) = (r1:r0) - (r3:r2)
428    OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
429    OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
430  }
431  switch (ccode) {
432    case kCondEq:
433    case kCondNe:
434      OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
435      break;
436    case kCondLe:
437      ccode = kCondGe;
438      break;
439    case kCondGt:
440      ccode = kCondLt;
441      break;
442    case kCondLt:
443    case kCondGe:
444      break;
445    default:
446      LOG(FATAL) << "Unexpected ccode: " << ccode;
447  }
448  OpCondBranch(ccode, taken);
449}
450
451void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
452                                          int64_t val, ConditionCode ccode) {
453  int32_t val_lo = Low32Bits(val);
454  int32_t val_hi = High32Bits(val);
455  LIR* taken = &block_label_list_[bb->taken];
456  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
457  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
458
459  if (cu_->target64) {
460    if (is_equality_test && val == 0) {
461      // We can simplify of comparing for ==, != to 0.
462      NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
463    } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
464      OpRegImm(kOpCmp, rl_src1.reg, val_lo);
465    } else {
466      RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
467      LoadConstantWide(tmp, val);
468      OpRegReg(kOpCmp, rl_src1.reg, tmp);
469      FreeTemp(tmp);
470    }
471    OpCondBranch(ccode, taken);
472    return;
473  }
474
475  if (is_equality_test && val != 0) {
476    rl_src1 = ForceTempWide(rl_src1);
477  }
478  RegStorage low_reg = rl_src1.reg.GetLow();
479  RegStorage high_reg = rl_src1.reg.GetHigh();
480
481  if (is_equality_test) {
482    // We can simplify of comparing for ==, != to 0.
483    if (val == 0) {
484      if (IsTemp(low_reg)) {
485        OpRegReg(kOpOr, low_reg, high_reg);
486        // We have now changed it; ignore the old values.
487        Clobber(rl_src1.reg);
488      } else {
489        RegStorage t_reg = AllocTemp();
490        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
491        FreeTemp(t_reg);
492      }
493      OpCondBranch(ccode, taken);
494      return;
495    }
496
497    // Need to compute the actual value for ==, !=.
498    OpRegImm(kOpSub, low_reg, val_lo);
499    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
500    OpRegReg(kOpOr, high_reg, low_reg);
501    Clobber(rl_src1.reg);
502  } else if (ccode == kCondLe || ccode == kCondGt) {
503    // Swap operands and condition code to prevent use of zero flag.
504    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
505    LoadConstantWide(tmp, val);
506    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
507    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
508    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
509    FreeTemp(tmp);
510  } else {
511    // We can use a compare for the low word to set CF.
512    OpRegImm(kOpCmp, low_reg, val_lo);
513    if (IsTemp(high_reg)) {
514      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
515      // We have now changed it; ignore the old values.
516      Clobber(rl_src1.reg);
517    } else {
518      // mov temp_reg, high_reg; sbb temp_reg, high_constant
519      RegStorage t_reg = AllocTemp();
520      OpRegCopy(t_reg, high_reg);
521      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
522      FreeTemp(t_reg);
523    }
524  }
525
526  OpCondBranch(ccode, taken);
527}
528
529void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
530  // It does not make sense to calculate magic and shift for zero divisor.
531  DCHECK_NE(divisor, 0);
532
533  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
534   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
535   * The magic number M and shift S can be calculated in the following way:
536   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
537   * where divisor(d) >=2.
538   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
539   * where divisor(d) <= -2.
540   * Thus nc can be calculated like:
541   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
542   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
543   *
544   * So the shift p is the smallest p satisfying
545   * 2^p > nc * (d - 2^p % d), where d >= 2
546   * 2^p > nc * (d + 2^p % d), where d <= -2.
547   *
548   * the magic number M is calcuated by
549   * M = (2^p + d - 2^p % d) / d, where d >= 2
550   * M = (2^p - d - 2^p % d) / d, where d <= -2.
551   *
552   * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
553   * the shift number S.
554   */
555
556  int64_t p = (is_long) ? 63 : 31;
557  const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
558
559  // Initialize the computations.
560  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
561  uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
562                                    static_cast<uint32_t>(divisor) >> 31);
563  uint64_t abs_nc = tmp - 1 - tmp % abs_d;
564  uint64_t quotient1 = exp / abs_nc;
565  uint64_t remainder1 = exp % abs_nc;
566  uint64_t quotient2 = exp / abs_d;
567  uint64_t remainder2 = exp % abs_d;
568
569  /*
570   * To avoid handling both positive and negative divisor, Hacker's Delight
571   * introduces a method to handle these 2 cases together to avoid duplication.
572   */
573  uint64_t delta;
574  do {
575    p++;
576    quotient1 = 2 * quotient1;
577    remainder1 = 2 * remainder1;
578    if (remainder1 >= abs_nc) {
579      quotient1++;
580      remainder1 = remainder1 - abs_nc;
581    }
582    quotient2 = 2 * quotient2;
583    remainder2 = 2 * remainder2;
584    if (remainder2 >= abs_d) {
585      quotient2++;
586      remainder2 = remainder2 - abs_d;
587    }
588    delta = abs_d - remainder2;
589  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
590
591  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
592
593  if (!is_long) {
594    magic = static_cast<int>(magic);
595  }
596
597  shift = (is_long) ? p - 64 : p - 32;
598}
599
600RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
601  UNUSED(rl_dest, reg_lo, lit, is_div);
602  LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
603  UNREACHABLE();
604}
605
606RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
607                                     int imm, bool is_div) {
608  // Use a multiply (and fixup) to perform an int div/rem by a constant.
609  RegLocation rl_result;
610
611  if (imm == 1) {
612    rl_result = EvalLoc(rl_dest, kCoreReg, true);
613    if (is_div) {
614      // x / 1 == x.
615      LoadValueDirectFixed(rl_src, rl_result.reg);
616    } else {
617      // x % 1 == 0.
618      LoadConstantNoClobber(rl_result.reg, 0);
619    }
620  } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
621    rl_result = EvalLoc(rl_dest, kCoreReg, true);
622    if (is_div) {
623      LoadValueDirectFixed(rl_src, rl_result.reg);
624
625      // Check if numerator is 0
626      OpRegImm(kOpCmp, rl_result.reg, 0);
627      LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
628
629      // handle 0x80000000 / -1
630      OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
631      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
632
633      // for x != MIN_INT, x / -1 == -x.
634      NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
635
636      // EAX already contains the right value (0x80000000),
637      minint_branch->target = NewLIR0(kPseudoTargetLabel);
638      branch->target = NewLIR0(kPseudoTargetLabel);
639    } else {
640      // x % -1 == 0.
641      LoadConstantNoClobber(rl_result.reg, 0);
642    }
643  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
644    // Division using shifting.
645    rl_src = LoadValue(rl_src, kCoreReg);
646    rl_result = EvalLoc(rl_dest, kCoreReg, true);
647    if (IsSameReg(rl_result.reg, rl_src.reg)) {
648      RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
649      rl_result.reg.SetReg(rs_temp.GetReg());
650    }
651
652    // Check if numerator is 0
653    OpRegImm(kOpCmp, rl_src.reg, 0);
654    LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
655    LoadConstantNoClobber(rl_result.reg, 0);
656    LIR* done = NewLIR1(kX86Jmp8, 0);
657    branch->target = NewLIR0(kPseudoTargetLabel);
658
659    NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
660    NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
661    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
662    int shift_amount = CTZ(imm);
663    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
664    if (imm < 0) {
665      OpReg(kOpNeg, rl_result.reg);
666    }
667    done->target = NewLIR0(kPseudoTargetLabel);
668  } else {
669    CHECK(imm <= -2 || imm >= 2);
670
671    // Use H.S.Warren's Hacker's Delight Chapter 10 and
672    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
673    int64_t magic;
674    int shift;
675    CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
676
677    /*
678     * For imm >= 2,
679     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
680     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
681     * For imm <= -2,
682     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
683     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
684     * We implement this algorithm in the following way:
685     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
686     * 2. if imm > 0 and magic < 0, add numerator to EDX
687     *    if imm < 0 and magic > 0, sub numerator from EDX
688     * 3. if S !=0, SAR S bits for EDX
689     * 4. add 1 to EDX if EDX < 0
690     * 5. Thus, EDX is the quotient
691     */
692
693    FlushReg(rs_r0);
694    Clobber(rs_r0);
695    LockTemp(rs_r0);
696    FlushReg(rs_r2);
697    Clobber(rs_r2);
698    LockTemp(rs_r2);
699
700    // Assume that the result will be in EDX for divide, and EAX for remainder.
701    rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0,
702                 INVALID_SREG, INVALID_SREG};
703
704    // We need the value at least twice.  Load into a temp.
705    rl_src = LoadValue(rl_src, kCoreReg);
706    RegStorage numerator_reg = rl_src.reg;
707
708    // Check if numerator is 0.
709    OpRegImm(kOpCmp, numerator_reg, 0);
710    LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
711    // Return result 0 if numerator was 0.
712    LoadConstantNoClobber(rl_result.reg, 0);
713    LIR* done = NewLIR1(kX86Jmp8, 0);
714    branch->target = NewLIR0(kPseudoTargetLabel);
715
716    // EAX = magic.
717    LoadConstant(rs_r0, magic);
718
719    // EDX:EAX = magic * numerator.
720    NewLIR1(kX86Imul32DaR, numerator_reg.GetReg());
721
722    if (imm > 0 && magic < 0) {
723      // Add numerator to EDX.
724      DCHECK(numerator_reg.Valid());
725      NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
726    } else if (imm < 0 && magic > 0) {
727      DCHECK(numerator_reg.Valid());
728      NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
729    }
730
731    // Do we need the shift?
732    if (shift != 0) {
733      // Shift EDX by 'shift' bits.
734      NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
735    }
736
737    // Add 1 to EDX if EDX < 0.
738
739    // Move EDX to EAX.
740    OpRegCopy(rs_r0, rs_r2);
741
742    // Move sign bit to bit 0, zeroing the rest.
743    NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
744
745    // EDX = EDX + EAX.
746    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
747
748    // Quotient is in EDX.
749    if (!is_div) {
750      // We need to compute the remainder.
751      // Remainder is divisor - (quotient * imm).
752      DCHECK(numerator_reg.Valid());
753      OpRegCopy(rs_r0, numerator_reg);
754
755      // EAX = numerator * imm.
756      OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
757
758      // EAX -= EDX.
759      NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
760
761      // For this case, return the result in EAX.
762    }
763    done->target = NewLIR0(kPseudoTargetLabel);
764  }
765
766  return rl_result;
767}
768
769RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
770                                  bool is_div) {
771  UNUSED(rl_dest, reg_lo, reg_hi, is_div);
772  LOG(FATAL) << "Unexpected use of GenDivRem for x86";
773  UNREACHABLE();
774}
775
776RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
777                                  RegLocation rl_src2, bool is_div, int flags) {
778  UNUSED(rl_dest);
779  // We have to use fixed registers, so flush all the temps.
780
781  // Prepare for explicit register usage.
782  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
783
784  // Load LHS into EAX.
785  LoadValueDirectFixed(rl_src1, rs_r0);
786
787  // Load RHS into EBX.
788  LoadValueDirectFixed(rl_src2, rs_r1);
789
790  // Copy LHS sign bit into EDX.
791  NewLIR0(kx86Cdq32Da);
792
793  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
794    // Handle division by zero case.
795    GenDivZeroCheck(rs_r1);
796  }
797
798  // Check if numerator is 0
799  OpRegImm(kOpCmp, rs_r0, 0);
800  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
801
802  // Have to catch 0x80000000/-1 case, or we will get an exception!
803  OpRegImm(kOpCmp, rs_r1, -1);
804  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
805
806  // RHS is -1.
807  OpRegImm(kOpCmp, rs_r0, 0x80000000);
808  LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
809
810  branch->target = NewLIR0(kPseudoTargetLabel);
811
812  // In 0x80000000/-1 case.
813  if (!is_div) {
814    // For DIV, EAX is already right. For REM, we need EDX 0.
815    LoadConstantNoClobber(rs_r2, 0);
816  }
817  LIR* done = NewLIR1(kX86Jmp8, 0);
818
819  // Expected case.
820  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
821  minint_branch->target = minus_one_branch->target;
822  NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
823  done->target = NewLIR0(kPseudoTargetLabel);
824
825  // Result is in EAX for div and EDX for rem.
826  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
827  if (!is_div) {
828    rl_result.reg.SetReg(r2);
829  }
830  return rl_result;
831}
832
833static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
834  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
835}
836
837bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
838  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
839
840  if (is_long && !cu_->target64) {
841   /*
842    * We want to implement the following algorithm
843    * mov eax, low part of arg1
844    * mov edx, high part of arg1
845    * mov ebx, low part of arg2
846    * mov ecx, high part of arg2
847    * mov edi, eax
848    * sub edi, ebx
849    * mov edi, edx
850    * sbb edi, ecx
851    * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
852    * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
853    *
854    * The algorithm above needs 5 registers: a pair for the first operand
855    * (which later will be used as result), a pair for the second operand
856    * and a temp register (e.g. 'edi') for intermediate calculations.
857    * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
858    * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
859    * always enough registers to operate on. Practically, there is a pair
860    * of registers 'edi' and 'esi' which holds promoted values and
861    * sometimes should be treated as 'callee save'. If one of the operands
862    * is in the promoted registers then we have enough register to
863    * operate on. Otherwise there is lack of resources and we have to
864    * save 'edi' before calculations and restore after.
865    */
866
867    RegLocation rl_src1 = info->args[0];
868    RegLocation rl_src2 = info->args[2];
869    RegLocation rl_dest = InlineTargetWide(info);
870
871    if (rl_dest.s_reg_low == INVALID_SREG) {
872      // Result is unused, the code is dead. Inlining successful, no code generated.
873      return true;
874    }
875
876    if (PartiallyIntersects(rl_src1, rl_dest) &&
877        PartiallyIntersects(rl_src2, rl_dest)) {
878      // A special case which we don't want to handle.
879      // This is when src1 is mapped on v0 and v1,
880      // src2 is mapped on v2, v3,
881      // result is mapped on v1, v2
882      return false;
883    }
884
885
886    /*
887     * If the result register is the same as the second element, then we
888     * need to be careful. The reason is that the first copy will
889     * inadvertently clobber the second element with the first one thus
890     * yielding the wrong result. Thus we do a swap in that case.
891     */
892    if (Intersects(rl_src2, rl_dest)) {
893      std::swap(rl_src1, rl_src2);
894    }
895
896    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
897    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
898
899    // Pick the first integer as min/max.
900    OpRegCopyWide(rl_result.reg, rl_src1.reg);
901
902    /*
903     * If the integers are both in the same register, then there is
904     * nothing else to do because they are equal and we have already
905     * moved one into the result.
906     */
907    if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
908        mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
909      StoreValueWide(rl_dest, rl_result);
910      return true;
911    }
912
913    // Free registers to make some room for the second operand.
914    // But don't try to free part of a source which intersects
915    // part of result or promoted registers.
916
917    if (IsTemp(rl_src1.reg.GetLow()) &&
918       (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
919       (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
920      // Is low part temporary and doesn't intersect any parts of result?
921      FreeTemp(rl_src1.reg.GetLow());
922    }
923
924    if (IsTemp(rl_src1.reg.GetHigh()) &&
925       (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
926       (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
927      // Is high part temporary and doesn't intersect any parts of result?
928      FreeTemp(rl_src1.reg.GetHigh());
929    }
930
931    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
932
933    // Do we have a free register for intermediate calculations?
934    RegStorage tmp = AllocTemp(false);
935    const int kRegSize = cu_->target64 ? 8 : 4;
936    if (tmp == RegStorage::InvalidReg()) {
937       /*
938        * No, will use 'edi'.
939        *
940        * As mentioned above we have 4 temporary and 2 promotable
941        * caller-save registers. Therefore, we assume that a free
942        * register can be allocated only if 'esi' and 'edi' are
943        * already used as operands. If number of promotable registers
944        * increases from 2 to 4 then our assumption fails and operand
945        * data is corrupted.
946        * Let's DCHECK it.
947        */
948       DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
949              IsTemp(rl_src2.reg.GetHigh()) &&
950              IsTemp(rl_result.reg.GetLow()) &&
951              IsTemp(rl_result.reg.GetHigh()));
952       tmp = rs_rDI;
953       NewLIR1(kX86Push32R, tmp.GetReg());
954       cfi_.AdjustCFAOffset(kRegSize);
955       // Record cfi only if it is not already spilled.
956       if (!CoreSpillMaskContains(tmp.GetReg())) {
957         cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
958       }
959    }
960
961    // Now we are ready to do calculations.
962    OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
963    OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
964    OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
965    OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
966
967    // Let's put pop 'edi' here to break a bit the dependency chain.
968    if (tmp == rs_rDI) {
969      NewLIR1(kX86Pop32R, tmp.GetReg());
970      cfi_.AdjustCFAOffset(-kRegSize);
971      if (!CoreSpillMaskContains(tmp.GetReg())) {
972        cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
973      }
974    } else {
975      FreeTemp(tmp);
976    }
977
978    // Conditionally move the other integer into the destination register.
979    ConditionCode cc = is_min ? kCondGe : kCondLt;
980    OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
981    OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
982    FreeTemp(rl_src2.reg);
983    StoreValueWide(rl_dest, rl_result);
984    return true;
985  }
986
987  // Get the two arguments to the invoke and place them in GP registers.
988  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
989  if (rl_dest.s_reg_low == INVALID_SREG) {
990    // Result is unused, the code is dead. Inlining successful, no code generated.
991    return true;
992  }
993  RegLocation rl_src1 = info->args[0];
994  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
995  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
996  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
997
998  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
999
1000  /*
1001   * If the result register is the same as the second element, then we need to be careful.
1002   * The reason is that the first copy will inadvertently clobber the second element with
1003   * the first one thus yielding the wrong result. Thus we do a swap in that case.
1004   */
1005  if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1006    std::swap(rl_src1, rl_src2);
1007  }
1008
1009  // Pick the first integer as min/max.
1010  OpRegCopy(rl_result.reg, rl_src1.reg);
1011
1012  // If the integers are both in the same register, then there is nothing else to do
1013  // because they are equal and we have already moved one into the result.
1014  if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
1015    // It is possible we didn't pick correctly so do the actual comparison now.
1016    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
1017
1018    // Conditionally move the other integer into the destination register.
1019    ConditionCode condition_code = is_min ? kCondGt : kCondLt;
1020    OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
1021  }
1022
1023  if (is_long) {
1024    StoreValueWide(rl_dest, rl_result);
1025  } else {
1026    StoreValue(rl_dest, rl_result);
1027  }
1028  return true;
1029}
1030
1031bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
1032  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
1033  if (rl_dest.s_reg_low == INVALID_SREG) {
1034    // Result is unused, the code is dead. Inlining successful, no code generated.
1035    return true;
1036  }
1037  RegLocation rl_src_address = info->args[0];  // long address
1038  RegLocation rl_address;
1039  if (!cu_->target64) {
1040    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
1041    rl_address = LoadValue(rl_src_address, kCoreReg);
1042  } else {
1043    rl_address = LoadValueWide(rl_src_address, kCoreReg);
1044  }
1045  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1046  // Unaligned access is allowed on x86.
1047  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
1048  if (size == k64) {
1049    StoreValueWide(rl_dest, rl_result);
1050  } else {
1051    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1052    StoreValue(rl_dest, rl_result);
1053  }
1054  return true;
1055}
1056
1057bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
1058  RegLocation rl_src_address = info->args[0];  // long address
1059  RegLocation rl_address;
1060  if (!cu_->target64) {
1061    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
1062    rl_address = LoadValue(rl_src_address, kCoreReg);
1063  } else {
1064    rl_address = LoadValueWide(rl_src_address, kCoreReg);
1065  }
1066  RegLocation rl_src_value = info->args[2];  // [size] value
1067  RegLocation rl_value;
1068  if (size == k64) {
1069    // Unaligned access is allowed on x86.
1070    rl_value = LoadValueWide(rl_src_value, kCoreReg);
1071  } else {
1072    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
1073    // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
1074    if (!cu_->target64 && size == kSignedByte) {
1075      rl_src_value = UpdateLocTyped(rl_src_value);
1076      if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
1077        RegStorage temp = AllocateByteRegister();
1078        OpRegCopy(temp, rl_src_value.reg);
1079        rl_value.reg = temp;
1080      } else {
1081        rl_value = LoadValue(rl_src_value, kCoreReg);
1082      }
1083    } else {
1084      rl_value = LoadValue(rl_src_value, kCoreReg);
1085    }
1086  }
1087  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
1088  return true;
1089}
1090
1091void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
1092  NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
1093}
1094
1095void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
1096  DCHECK_EQ(kX86, cu_->instruction_set);
1097  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1098}
1099
1100void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
1101  DCHECK_EQ(kX86_64, cu_->instruction_set);
1102  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
1103}
1104
1105static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
1106  return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
1107}
1108
1109bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
1110  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
1111  // Unused - RegLocation rl_src_unsafe = info->args[0];
1112  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
1113  RegLocation rl_src_offset = info->args[2];  // long low
1114  if (!cu_->target64) {
1115    rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
1116  }
1117  RegLocation rl_src_expected = info->args[4];  // int, long or Object
1118  // If is_long, high half is in info->args[5]
1119  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
1120  // If is_long, high half is in info->args[7]
1121  const int kRegSize = cu_->target64 ? 8 : 4;
1122
1123  if (is_long && cu_->target64) {
1124    // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
1125    FlushReg(rs_r0q);
1126    Clobber(rs_r0q);
1127    LockTemp(rs_r0q);
1128
1129    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1130    RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
1131    RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1132    LoadValueDirectWide(rl_src_expected, rs_r0q);
1133    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1134            rl_new_value.reg.GetReg());
1135
1136    // After a store we need to insert barrier in case of potential load. Since the
1137    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
1138    GenMemBarrier(kAnyAny);
1139
1140    FreeTemp(rs_r0q);
1141  } else if (is_long) {
1142    // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
1143    FlushAllRegs();
1144    LockCallTemps();
1145    RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
1146    RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
1147    LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
1148    LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
1149    // FIXME: needs 64-bit update.
1150    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
1151    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
1152    DCHECK(!obj_in_si || !obj_in_di);
1153    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
1154    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
1155    DCHECK(!off_in_si || !off_in_di);
1156    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
1157    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
1158    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
1159    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
1160    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
1161    if (push_di) {
1162      NewLIR1(kX86Push32R, rs_rDI.GetReg());
1163      MarkTemp(rs_rDI);
1164      LockTemp(rs_rDI);
1165      cfi_.AdjustCFAOffset(kRegSize);
1166      // Record cfi only if it is not already spilled.
1167      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1168        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
1169      }
1170    }
1171    if (push_si) {
1172      NewLIR1(kX86Push32R, rs_rSI.GetReg());
1173      MarkTemp(rs_rSI);
1174      LockTemp(rs_rSI);
1175      cfi_.AdjustCFAOffset(kRegSize);
1176      // Record cfi only if it is not already spilled.
1177      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1178        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
1179      }
1180    }
1181    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1182    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
1183    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1184    if (!obj_in_si && !obj_in_di) {
1185      LoadWordDisp(rs_rSP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
1186      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1187      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1188      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1189      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1190    }
1191    if (!off_in_si && !off_in_di) {
1192      LoadWordDisp(rs_rSP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
1193      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
1194      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
1195      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
1196      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1197    }
1198    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
1199
1200    // After a store we need to insert barrier to prevent reordering with either
1201    // earlier or later memory accesses.  Since
1202    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1203    // and it will be associated with the cmpxchg instruction, preventing both.
1204    GenMemBarrier(kAnyAny);
1205
1206    if (push_si) {
1207      FreeTemp(rs_rSI);
1208      UnmarkTemp(rs_rSI);
1209      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1210      cfi_.AdjustCFAOffset(-kRegSize);
1211      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
1212        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
1213      }
1214    }
1215    if (push_di) {
1216      FreeTemp(rs_rDI);
1217      UnmarkTemp(rs_rDI);
1218      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1219      cfi_.AdjustCFAOffset(-kRegSize);
1220      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
1221        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
1222      }
1223    }
1224    FreeCallTemps();
1225  } else {
1226    // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1227    FlushReg(rs_r0);
1228    Clobber(rs_r0);
1229    LockTemp(rs_r0);
1230
1231    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1232    RegLocation rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
1233
1234    if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1235      // Mark card for object assuming new value is stored.
1236      FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
1237      MarkGCCard(0, rl_new_value.reg, rl_object.reg);
1238      LockTemp(rs_r0);
1239    }
1240
1241    RegLocation rl_offset;
1242    if (cu_->target64) {
1243      rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1244    } else {
1245      rl_offset = LoadValue(rl_src_offset, kCoreReg);
1246    }
1247    LoadValueDirect(rl_src_expected, rs_r0);
1248    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1249            rl_new_value.reg.GetReg());
1250
1251    // After a store we need to insert barrier to prevent reordering with either
1252    // earlier or later memory accesses.  Since
1253    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1254    // and it will be associated with the cmpxchg instruction, preventing both.
1255    GenMemBarrier(kAnyAny);
1256
1257    FreeTemp(rs_r0);
1258  }
1259
1260  // Convert ZF to boolean
1261  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
1262  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1263  RegStorage result_reg = rl_result.reg;
1264
1265  // For 32-bit, SETcc only works with EAX..EDX.
1266  if (!IsByteRegister(result_reg)) {
1267    result_reg = AllocateByteRegister();
1268  }
1269  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1270  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1271  if (IsTemp(result_reg)) {
1272    FreeTemp(result_reg);
1273  }
1274  StoreValue(rl_dest, rl_result);
1275  return true;
1276}
1277
1278void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) {
1279  RegStorage r_temp = AllocTemp();
1280  OpRegCopy(r_temp, result_reg);
1281  OpRegImm(kOpLsr, result_reg, shift);
1282  OpRegImm(kOpAnd, r_temp, value);
1283  OpRegImm(kOpAnd, result_reg, value);
1284  OpRegImm(kOpLsl, r_temp, shift);
1285  OpRegReg(kOpOr, result_reg, r_temp);
1286  FreeTemp(r_temp);
1287}
1288
1289void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) {
1290  RegStorage r_temp = AllocTempWide();
1291  OpRegCopy(r_temp, result_reg);
1292  OpRegImm(kOpLsr, result_reg, shift);
1293  RegStorage r_value = AllocTempWide();
1294  LoadConstantWide(r_value, value);
1295  OpRegReg(kOpAnd, r_temp, r_value);
1296  OpRegReg(kOpAnd, result_reg, r_value);
1297  OpRegImm(kOpLsl, r_temp, shift);
1298  OpRegReg(kOpOr, result_reg, r_temp);
1299  FreeTemp(r_temp);
1300  FreeTemp(r_value);
1301}
1302
1303bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
1304  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
1305  if (rl_dest.s_reg_low == INVALID_SREG) {
1306    // Result is unused, the code is dead. Inlining successful, no code generated.
1307    return true;
1308  }
1309  RegLocation rl_src_i = info->args[0];
1310  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg)
1311                                   : LoadValue(rl_src_i, kCoreReg);
1312  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1313  if (size == k64) {
1314    if (cu_->instruction_set == kX86_64) {
1315      /* Use one bswap instruction to reverse byte order first and then use 3 rounds of
1316         swapping bits to reverse bits in a long number x. Using bswap to save instructions
1317         compared to generic luni implementation which has 5 rounds of swapping bits.
1318         x = bswap x
1319         x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1320         x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1321         x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1322      */
1323      OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1324      SwapBits64(rl_result.reg, 1, 0x5555555555555555);
1325      SwapBits64(rl_result.reg, 2, 0x3333333333333333);
1326      SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f);
1327      StoreValueWide(rl_dest, rl_result);
1328      return true;
1329    }
1330    RegStorage r_i_low = rl_i.reg.GetLow();
1331    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1332      // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second
1333      // REV.
1334      r_i_low = AllocTemp();
1335      OpRegCopy(r_i_low, rl_i.reg);
1336    }
1337    OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh());
1338    OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low);
1339    // Free up at least one input register if it was a temp. Otherwise we may be in the bad
1340    // situation of not having a temp available for SwapBits. Make sure it's not overlapping
1341    // with the output, though.
1342    if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) {
1343      // There's definitely a free temp after this.
1344      FreeTemp(r_i_low);
1345    } else {
1346      // We opportunistically release both here. That saves duplication of the register state
1347      // lookup (to see if it's actually a temp).
1348      if (rl_i.reg.GetLowReg() != rl_result.reg.GetHighReg()) {
1349        FreeTemp(rl_i.reg.GetLow());
1350      }
1351      if (rl_i.reg.GetHighReg() != rl_result.reg.GetLowReg() &&
1352          rl_i.reg.GetHighReg() != rl_result.reg.GetHighReg()) {
1353        FreeTemp(rl_i.reg.GetHigh());
1354      }
1355    }
1356
1357    SwapBits(rl_result.reg.GetLow(), 1, 0x55555555);
1358    SwapBits(rl_result.reg.GetLow(), 2, 0x33333333);
1359    SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f);
1360    SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555);
1361    SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333);
1362    SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f);
1363    StoreValueWide(rl_dest, rl_result);
1364  } else {
1365    OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
1366    SwapBits(rl_result.reg, 1, 0x55555555);
1367    SwapBits(rl_result.reg, 2, 0x33333333);
1368    SwapBits(rl_result.reg, 4, 0x0f0f0f0f);
1369    StoreValue(rl_dest, rl_result);
1370  }
1371  return true;
1372}
1373
1374void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1375  if (cu_->target64) {
1376    // We can do this directly using RIP addressing.
1377    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1378    LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
1379    res->target = target;
1380    res->flags.fixup = kFixupLoad;
1381    return;
1382  }
1383
1384  // Get the PC to a register and get the anchor.
1385  LIR* anchor;
1386  RegStorage r_pc = GetPcAndAnchor(&anchor);
1387
1388  // Load the proper value from the literal area.
1389  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1390  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1391  res->operands[4] = WrapPointer(anchor);
1392  res->target = target;
1393  res->flags.fixup = kFixupLoad;
1394}
1395
1396bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
1397  return dex_cache_arrays_layout_.Valid();
1398}
1399
1400LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
1401  DCHECK(!cu_->target64);
1402  LIR* call = NewLIR1(kX86CallI, 0);
1403  call->flags.fixup = kFixupLabel;
1404  LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
1405  pop->flags.fixup = kFixupLabel;
1406  DCHECK(NEXT_LIR(call) == pop);
1407  return call;
1408}
1409
1410RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
1411  if (pc_rel_base_reg_.Valid()) {
1412    DCHECK(setup_pc_rel_base_reg_ != nullptr);
1413    *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
1414    DCHECK(*anchor != nullptr);
1415    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1416    pc_rel_base_reg_used_ = true;
1417    return pc_rel_base_reg_;
1418  } else {
1419    RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
1420    LIR* load_pc = OpLoadPc(r_pc);
1421    *anchor = NEXT_LIR(load_pc);
1422    DCHECK(*anchor != nullptr);
1423    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
1424    return r_pc;
1425  }
1426}
1427
1428void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest,
1429                                          bool wide) {
1430  if (cu_->target64) {
1431    LIR* mov = NewLIR3(wide ? kX86Mov64RM : kX86Mov32RM, r_dest.GetReg(), kRIPReg,
1432        kDummy32BitOffset);
1433    mov->flags.fixup = kFixupLabel;
1434    mov->operands[3] = WrapPointer(dex_file);
1435    mov->operands[4] = offset;
1436    mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
1437    dex_cache_access_insns_.push_back(mov);
1438  } else {
1439    CHECK(!wide) << "Unsupported";
1440    // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
1441    LIR* anchor;
1442    RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
1443    LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
1444    mov->flags.fixup = kFixupLabel;
1445    mov->operands[3] = WrapPointer(dex_file);
1446    mov->operands[4] = offset;
1447    mov->target = anchor;  // Used for pc_insn_offset.
1448    dex_cache_access_insns_.push_back(mov);
1449  }
1450}
1451
1452LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1453  UNUSED(r_base, count);
1454  LOG(FATAL) << "Unexpected use of OpVldm for x86";
1455  UNREACHABLE();
1456}
1457
1458LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1459  UNUSED(r_base, count);
1460  LOG(FATAL) << "Unexpected use of OpVstm for x86";
1461  UNREACHABLE();
1462}
1463
1464void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1465                                               RegLocation rl_result, int lit,
1466                                               int first_bit, int second_bit) {
1467  UNUSED(lit);
1468  RegStorage t_reg = AllocTemp();
1469  OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1470  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1471  FreeTemp(t_reg);
1472  if (first_bit != 0) {
1473    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1474  }
1475}
1476
1477void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1478  if (cu_->target64) {
1479    DCHECK(reg.Is64Bit());
1480
1481    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1482  } else {
1483    DCHECK(reg.IsPair());
1484
1485    // We are not supposed to clobber the incoming storage, so allocate a temporary.
1486    RegStorage t_reg = AllocTemp();
1487    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1488    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1489    // The temp is no longer needed so free it at this time.
1490    FreeTemp(t_reg);
1491  }
1492
1493  // In case of zero, throw ArithmeticException.
1494  GenDivZeroCheck(kCondEq);
1495}
1496
1497void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1498                                     RegStorage array_base,
1499                                     int len_offset) {
1500  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1501   public:
1502    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1503                             RegStorage index_in, RegStorage array_base_in, int32_t len_offset_in)
1504        : LIRSlowPath(m2l, branch_in),
1505          index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1506    }
1507
1508    void Compile() OVERRIDE {
1509      m2l_->ResetRegPool();
1510      m2l_->ResetDefTracking();
1511      GenerateTargetLabel(kPseudoThrowTarget);
1512
1513      RegStorage new_index = index_;
1514      // Move index out of kArg1, either directly to kArg0, or to kArg2.
1515      // TODO: clean-up to check not a number but with type
1516      if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1517        if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1518          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1519          new_index = m2l_->TargetReg(kArg2, kNotWide);
1520        } else {
1521          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1522          new_index = m2l_->TargetReg(kArg0, kNotWide);
1523        }
1524      }
1525      // Load array length to kArg1.
1526      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1527      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1528      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1529                                       m2l_->TargetReg(kArg1, kNotWide), true);
1530    }
1531
1532   private:
1533    const RegStorage index_;
1534    const RegStorage array_base_;
1535    const int32_t len_offset_;
1536  };
1537
1538  OpRegMem(kOpCmp, index, array_base, len_offset);
1539  MarkPossibleNullPointerException(0);
1540  LIR* branch = OpCondBranch(kCondUge, nullptr);
1541  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1542                                                    index, array_base, len_offset));
1543}
1544
1545void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1546                                     RegStorage array_base,
1547                                     int32_t len_offset) {
1548  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1549   public:
1550    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch_in,
1551                             int32_t index_in, RegStorage array_base_in, int32_t len_offset_in)
1552        : LIRSlowPath(m2l, branch_in),
1553          index_(index_in), array_base_(array_base_in), len_offset_(len_offset_in) {
1554    }
1555
1556    void Compile() OVERRIDE {
1557      m2l_->ResetRegPool();
1558      m2l_->ResetDefTracking();
1559      GenerateTargetLabel(kPseudoThrowTarget);
1560
1561      // Load array length to kArg1.
1562      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1563      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1564      x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1565      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1566                                       m2l_->TargetReg(kArg1, kNotWide), true);
1567    }
1568
1569   private:
1570    const int32_t index_;
1571    const RegStorage array_base_;
1572    const int32_t len_offset_;
1573  };
1574
1575  NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1576  MarkPossibleNullPointerException(0);
1577  LIR* branch = OpCondBranch(kCondLs, nullptr);
1578  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1579                                                    index, array_base, len_offset));
1580}
1581
1582// Test suspend flag, return target of taken suspend branch
1583LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1584  if (cu_->target64) {
1585    OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1586  } else {
1587    OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1588  }
1589  return OpCondBranch((target == nullptr) ? kCondNe : kCondEq, target);
1590}
1591
1592// Decrement register and branch on condition
1593LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1594  OpRegImm(kOpSub, reg, 1);
1595  return OpCondBranch(c_code, target);
1596}
1597
1598bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1599                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
1600  UNUSED(dalvik_opcode, is_div, rl_src, rl_dest, lit);
1601  LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1602  UNREACHABLE();
1603}
1604
1605bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1606  UNUSED(rl_src, rl_dest, lit);
1607  LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1608  UNREACHABLE();
1609}
1610
1611LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1612  UNUSED(cond, guide);
1613  LOG(FATAL) << "Unexpected use of OpIT in x86";
1614  UNREACHABLE();
1615}
1616
1617void X86Mir2Lir::OpEndIT(LIR* it) {
1618  UNUSED(it);
1619  LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1620  UNREACHABLE();
1621}
1622
1623void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1624  switch (val) {
1625    case 0:
1626      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1627      break;
1628    case 1:
1629      OpRegCopy(dest, src);
1630      break;
1631    default:
1632      OpRegRegImm(kOpMul, dest, src, val);
1633      break;
1634  }
1635}
1636
1637void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1638  UNUSED(sreg);
1639  // All memory accesses below reference dalvik regs.
1640  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1641
1642  LIR *m;
1643  switch (val) {
1644    case 0:
1645      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1646      break;
1647    case 1: {
1648      const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1649      LoadBaseDisp(rs_rSP, displacement, dest, k32, kNotVolatile);
1650      break;
1651    }
1652    default:
1653      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1654                  rs_rX86_SP_32.GetReg(), displacement, val);
1655      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1656      break;
1657  }
1658}
1659
1660void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1661                                RegLocation rl_src2, int flags) {
1662  if (!cu_->target64) {
1663    // Some x86 32b ops are fallback.
1664    switch (opcode) {
1665      case Instruction::NOT_LONG:
1666      case Instruction::DIV_LONG:
1667      case Instruction::DIV_LONG_2ADDR:
1668      case Instruction::REM_LONG:
1669      case Instruction::REM_LONG_2ADDR:
1670        Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1671        return;
1672
1673      default:
1674        // Everything else we can handle.
1675        break;
1676    }
1677  }
1678
1679  switch (opcode) {
1680    case Instruction::NOT_LONG:
1681      GenNotLong(rl_dest, rl_src2);
1682      return;
1683
1684    case Instruction::ADD_LONG:
1685    case Instruction::ADD_LONG_2ADDR:
1686      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1687      return;
1688
1689    case Instruction::SUB_LONG:
1690    case Instruction::SUB_LONG_2ADDR:
1691      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1692      return;
1693
1694    case Instruction::MUL_LONG:
1695    case Instruction::MUL_LONG_2ADDR:
1696      GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
1697      return;
1698
1699    case Instruction::DIV_LONG:
1700    case Instruction::DIV_LONG_2ADDR:
1701      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
1702      return;
1703
1704    case Instruction::REM_LONG:
1705    case Instruction::REM_LONG_2ADDR:
1706      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
1707      return;
1708
1709    case Instruction::AND_LONG_2ADDR:
1710    case Instruction::AND_LONG:
1711      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1712      return;
1713
1714    case Instruction::OR_LONG:
1715    case Instruction::OR_LONG_2ADDR:
1716      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1717      return;
1718
1719    case Instruction::XOR_LONG:
1720    case Instruction::XOR_LONG_2ADDR:
1721      GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1722      return;
1723
1724    case Instruction::NEG_LONG:
1725      GenNegLong(rl_dest, rl_src2);
1726      return;
1727
1728    default:
1729      LOG(FATAL) << "Invalid long arith op";
1730      return;
1731  }
1732}
1733
1734bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
1735  // All memory accesses below reference dalvik regs.
1736  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1737
1738  if (val == 0) {
1739    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1740    if (cu_->target64) {
1741      OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1742    } else {
1743      OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1744      OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1745    }
1746    StoreValueWide(rl_dest, rl_result);
1747    return true;
1748  } else if (val == 1) {
1749    StoreValueWide(rl_dest, rl_src1);
1750    return true;
1751  } else if (val == 2) {
1752    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
1753    return true;
1754  } else if (IsPowerOfTwo(val)) {
1755    int shift_amount = CTZ(val);
1756    if (!PartiallyIntersects(rl_src1, rl_dest)) {
1757      rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1758      RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
1759                                                shift_amount, flags);
1760      StoreValueWide(rl_dest, rl_result);
1761      return true;
1762    }
1763  }
1764
1765  // Okay, on 32b just bite the bullet and do it, still better than the general case.
1766  if (!cu_->target64) {
1767    int32_t val_lo = Low32Bits(val);
1768    int32_t val_hi = High32Bits(val);
1769    // Prepare for explicit register usage.
1770    ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1771    rl_src1 = UpdateLocWideTyped(rl_src1);
1772    bool src1_in_reg = rl_src1.location == kLocPhysReg;
1773    int displacement = SRegOffset(rl_src1.s_reg_low);
1774
1775    // ECX <- 1H * 2L
1776    // EAX <- 1L * 2H
1777    if (src1_in_reg) {
1778      GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1779      GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1780    } else {
1781      GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1782      GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1783    }
1784
1785    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1786    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1787
1788    // EAX <- 2L
1789    LoadConstantNoClobber(rs_r0, val_lo);
1790
1791    // EDX:EAX <- 2L * 1L (double precision)
1792    if (src1_in_reg) {
1793      NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1794    } else {
1795      LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1796      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1797                              true /* is_load */, true /* is_64bit */);
1798    }
1799
1800    // EDX <- EDX + ECX (add high words)
1801    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1802
1803    // Result is EDX:EAX
1804    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1805                             RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1806    StoreValueWide(rl_dest, rl_result);
1807    return true;
1808  }
1809  return false;
1810}
1811
1812void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1813                            RegLocation rl_src2, int flags) {
1814  if (rl_src1.is_const) {
1815    std::swap(rl_src1, rl_src2);
1816  }
1817
1818  if (rl_src2.is_const) {
1819    if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
1820      return;
1821    }
1822  }
1823
1824  // All memory accesses below reference dalvik regs.
1825  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1826
1827  if (cu_->target64) {
1828    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1829    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1830    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1831    if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1832        rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1833      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1834    } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1835               rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1836      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1837    } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1838               rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1839      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1840    } else {
1841      OpRegCopy(rl_result.reg, rl_src1.reg);
1842      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1843    }
1844    StoreValueWide(rl_dest, rl_result);
1845    return;
1846  }
1847
1848  // Not multiplying by a constant. Do it the hard way
1849  // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1850  bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1851                   mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1852
1853  // Prepare for explicit register usage.
1854  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
1855  rl_src1 = UpdateLocWideTyped(rl_src1);
1856  rl_src2 = UpdateLocWideTyped(rl_src2);
1857
1858  // At this point, the VRs are in their home locations.
1859  bool src1_in_reg = rl_src1.location == kLocPhysReg;
1860  bool src2_in_reg = rl_src2.location == kLocPhysReg;
1861  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
1862
1863  // ECX <- 1H
1864  if (src1_in_reg) {
1865    NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1866  } else {
1867    LoadBaseDisp(rs_rSP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1868                 kNotVolatile);
1869  }
1870
1871  if (is_square) {
1872    // Take advantage of the fact that the values are the same.
1873    // ECX <- ECX * 2L  (1H * 2L)
1874    if (src2_in_reg) {
1875      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1876    } else {
1877      int displacement = SRegOffset(rl_src2.s_reg_low);
1878      LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1879                       displacement + LOWORD_OFFSET);
1880      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1881                              true /* is_load */, true /* is_64bit */);
1882    }
1883
1884    // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1885    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1886  } else {
1887    // EAX <- 2H
1888    if (src2_in_reg) {
1889      NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1890    } else {
1891      LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1892                   kNotVolatile);
1893    }
1894
1895    // EAX <- EAX * 1L  (2H * 1L)
1896    if (src1_in_reg) {
1897      NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1898    } else {
1899      int displacement = SRegOffset(rl_src1.s_reg_low);
1900      LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP_32.GetReg(),
1901                       displacement + LOWORD_OFFSET);
1902      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1903                              true /* is_load */, true /* is_64bit */);
1904    }
1905
1906    // ECX <- ECX * 2L  (1H * 2L)
1907    if (src2_in_reg) {
1908      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1909    } else {
1910      int displacement = SRegOffset(rl_src2.s_reg_low);
1911      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP_32.GetReg(),
1912                       displacement + LOWORD_OFFSET);
1913      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1914                              true /* is_load */, true /* is_64bit */);
1915    }
1916
1917    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1918    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1919  }
1920
1921  // EAX <- 2L
1922  if (src2_in_reg) {
1923    NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1924  } else {
1925    LoadBaseDisp(rs_rSP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1926                 kNotVolatile);
1927  }
1928
1929  // EDX:EAX <- 2L * 1L (double precision)
1930  if (src1_in_reg) {
1931    NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1932  } else {
1933    int displacement = SRegOffset(rl_src1.s_reg_low);
1934    LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP_32.GetReg(), displacement + LOWORD_OFFSET);
1935    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1936                            true /* is_load */, true /* is_64bit */);
1937  }
1938
1939  // EDX <- EDX + ECX (add high words)
1940  NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1941
1942  // Result is EDX:EAX
1943  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1944                           RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1945  StoreValueWide(rl_dest, rl_result);
1946}
1947
1948void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1949                                   Instruction::Code op) {
1950  DCHECK_EQ(rl_dest.location, kLocPhysReg);
1951  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1952  if (rl_src.location == kLocPhysReg) {
1953    // Both operands are in registers.
1954    // But we must ensure that rl_src is in pair
1955    if (cu_->target64) {
1956      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1957    } else {
1958      rl_src = LoadValueWide(rl_src, kCoreReg);
1959      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1960        // The registers are the same, so we would clobber it before the use.
1961        RegStorage temp_reg = AllocTemp();
1962        OpRegCopy(temp_reg, rl_dest.reg);
1963        rl_src.reg.SetHighReg(temp_reg.GetReg());
1964      }
1965      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1966
1967      x86op = GetOpcode(op, rl_dest, rl_src, true);
1968      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1969    }
1970    return;
1971  }
1972
1973  // RHS is in memory.
1974  DCHECK((rl_src.location == kLocDalvikFrame) ||
1975         (rl_src.location == kLocCompilerTemp));
1976  int r_base = rs_rX86_SP_32.GetReg();
1977  int displacement = SRegOffset(rl_src.s_reg_low);
1978
1979  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1980  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1981                     r_base, displacement + LOWORD_OFFSET);
1982  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1983                          true /* is_load */, true /* is64bit */);
1984  if (!cu_->target64) {
1985    x86op = GetOpcode(op, rl_dest, rl_src, true);
1986    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1987    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1988                            true /* is_load */, true /* is64bit */);
1989  }
1990}
1991
1992void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1993  rl_dest = UpdateLocWideTyped(rl_dest);
1994  if (rl_dest.location == kLocPhysReg) {
1995    // Ensure we are in a register pair
1996    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1997
1998    rl_src = UpdateLocWideTyped(rl_src);
1999    GenLongRegOrMemOp(rl_result, rl_src, op);
2000    StoreFinalValueWide(rl_dest, rl_result);
2001    return;
2002  } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) {
2003    // Handle the case when src and dest are intersect.
2004    rl_src = LoadValueWide(rl_src, kCoreReg);
2005    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2006    rl_src = UpdateLocWideTyped(rl_src);
2007    GenLongRegOrMemOp(rl_result, rl_src, op);
2008    StoreFinalValueWide(rl_dest, rl_result);
2009    return;
2010  }
2011
2012  // It wasn't in registers, so it better be in memory.
2013  DCHECK((rl_dest.location == kLocDalvikFrame) ||
2014         (rl_dest.location == kLocCompilerTemp));
2015  rl_src = LoadValueWide(rl_src, kCoreReg);
2016
2017  // Operate directly into memory.
2018  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
2019  int r_base = rs_rX86_SP_32.GetReg();
2020  int displacement = SRegOffset(rl_dest.s_reg_low);
2021
2022  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2023  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
2024                     cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
2025  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2026                          true /* is_load */, true /* is64bit */);
2027  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2028                          false /* is_load */, true /* is64bit */);
2029  if (!cu_->target64) {
2030    x86op = GetOpcode(op, rl_dest, rl_src, true);
2031    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
2032    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2033                            true /* is_load */, true /* is64bit */);
2034    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2035                            false /* is_load */, true /* is64bit */);
2036  }
2037
2038  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
2039  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
2040
2041  // If the left operand is in memory and the right operand is in a register
2042  // and both belong to the same dalvik register then we should clobber the
2043  // right one because it doesn't hold valid data anymore.
2044  if (v_src_reg == v_dst_reg) {
2045    Clobber(rl_src.reg);
2046  }
2047}
2048
2049void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
2050                              RegLocation rl_src2, Instruction::Code op,
2051                              bool is_commutative) {
2052  // Is this really a 2 operand operation?
2053  switch (op) {
2054    case Instruction::ADD_LONG_2ADDR:
2055    case Instruction::SUB_LONG_2ADDR:
2056    case Instruction::AND_LONG_2ADDR:
2057    case Instruction::OR_LONG_2ADDR:
2058    case Instruction::XOR_LONG_2ADDR:
2059      if (GenerateTwoOperandInstructions()) {
2060        GenLongArith(rl_dest, rl_src2, op);
2061        return;
2062      }
2063      break;
2064
2065    default:
2066      break;
2067  }
2068
2069  if (rl_dest.location == kLocPhysReg) {
2070    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
2071
2072    // We are about to clobber the LHS, so it needs to be a temp.
2073    rl_result = ForceTempWide(rl_result);
2074
2075    // Perform the operation using the RHS.
2076    rl_src2 = UpdateLocWideTyped(rl_src2);
2077    GenLongRegOrMemOp(rl_result, rl_src2, op);
2078
2079    // And now record that the result is in the temp.
2080    StoreFinalValueWide(rl_dest, rl_result);
2081    return;
2082  }
2083
2084  // It wasn't in registers, so it better be in memory.
2085  DCHECK((rl_dest.location == kLocDalvikFrame) || (rl_dest.location == kLocCompilerTemp));
2086  rl_src1 = UpdateLocWideTyped(rl_src1);
2087  rl_src2 = UpdateLocWideTyped(rl_src2);
2088
2089  // Get one of the source operands into temporary register.
2090  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2091  if (cu_->target64) {
2092    if (IsTemp(rl_src1.reg)) {
2093      GenLongRegOrMemOp(rl_src1, rl_src2, op);
2094    } else if (is_commutative) {
2095      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2096      // We need at least one of them to be a temporary.
2097      if (!IsTemp(rl_src2.reg)) {
2098        rl_src1 = ForceTempWide(rl_src1);
2099        GenLongRegOrMemOp(rl_src1, rl_src2, op);
2100      } else {
2101        GenLongRegOrMemOp(rl_src2, rl_src1, op);
2102        StoreFinalValueWide(rl_dest, rl_src2);
2103        return;
2104      }
2105    } else {
2106      // Need LHS to be the temp.
2107      rl_src1 = ForceTempWide(rl_src1);
2108      GenLongRegOrMemOp(rl_src1, rl_src2, op);
2109    }
2110  } else {
2111    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
2112      GenLongRegOrMemOp(rl_src1, rl_src2, op);
2113    } else if (is_commutative) {
2114      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
2115      // We need at least one of them to be a temporary.
2116      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
2117        rl_src1 = ForceTempWide(rl_src1);
2118        GenLongRegOrMemOp(rl_src1, rl_src2, op);
2119      } else {
2120        GenLongRegOrMemOp(rl_src2, rl_src1, op);
2121        StoreFinalValueWide(rl_dest, rl_src2);
2122        return;
2123      }
2124    } else {
2125      // Need LHS to be the temp.
2126      rl_src1 = ForceTempWide(rl_src1);
2127      GenLongRegOrMemOp(rl_src1, rl_src2, op);
2128    }
2129  }
2130
2131  StoreFinalValueWide(rl_dest, rl_src1);
2132}
2133
2134void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
2135  if (cu_->target64) {
2136    rl_src = LoadValueWide(rl_src, kCoreReg);
2137    RegLocation rl_result;
2138    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2139    OpRegCopy(rl_result.reg, rl_src.reg);
2140    OpReg(kOpNot, rl_result.reg);
2141    StoreValueWide(rl_dest, rl_result);
2142  } else {
2143    LOG(FATAL) << "Unexpected use GenNotLong()";
2144  }
2145}
2146
2147void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
2148                                  int64_t imm, bool is_div) {
2149  if (imm == 0) {
2150    GenDivZeroException();
2151  } else if (imm == 1) {
2152    if (is_div) {
2153      // x / 1 == x.
2154      StoreValueWide(rl_dest, rl_src);
2155    } else {
2156      // x % 1 == 0.
2157      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2158      LoadConstantWide(rl_result.reg, 0);
2159      StoreValueWide(rl_dest, rl_result);
2160    }
2161  } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
2162    if (is_div) {
2163      rl_src = LoadValueWide(rl_src, kCoreReg);
2164      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2165      RegStorage rs_temp = AllocTempWide();
2166
2167      OpRegCopy(rl_result.reg, rl_src.reg);
2168      LoadConstantWide(rs_temp, 0x8000000000000000);
2169
2170      // If x == MIN_LONG, return MIN_LONG.
2171      OpRegReg(kOpCmp, rl_src.reg, rs_temp);
2172      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
2173
2174      // For x != MIN_LONG, x / -1 == -x.
2175      OpReg(kOpNeg, rl_result.reg);
2176
2177      minint_branch->target = NewLIR0(kPseudoTargetLabel);
2178      FreeTemp(rs_temp);
2179      StoreValueWide(rl_dest, rl_result);
2180    } else {
2181      // x % -1 == 0.
2182      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2183      LoadConstantWide(rl_result.reg, 0);
2184      StoreValueWide(rl_dest, rl_result);
2185    }
2186  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
2187    // Division using shifting.
2188    rl_src = LoadValueWide(rl_src, kCoreReg);
2189    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2190    if (IsSameReg(rl_result.reg, rl_src.reg)) {
2191      RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
2192      rl_result.reg.SetReg(rs_temp.GetReg());
2193    }
2194    LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
2195    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
2196    NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
2197    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
2198    int shift_amount = CTZ(imm);
2199    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
2200    if (imm < 0) {
2201      OpReg(kOpNeg, rl_result.reg);
2202    }
2203    StoreValueWide(rl_dest, rl_result);
2204  } else {
2205    CHECK(imm <= -2 || imm >= 2);
2206
2207    FlushReg(rs_r0q);
2208    Clobber(rs_r0q);
2209    LockTemp(rs_r0q);
2210    FlushReg(rs_r2q);
2211    Clobber(rs_r2q);
2212    LockTemp(rs_r2q);
2213
2214    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
2215                             is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG};
2216
2217    // Use H.S.Warren's Hacker's Delight Chapter 10 and
2218    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
2219    int64_t magic;
2220    int shift;
2221    CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
2222
2223    /*
2224     * For imm >= 2,
2225     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
2226     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
2227     * For imm <= -2,
2228     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
2229     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
2230     * We implement this algorithm in the following way:
2231     * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
2232     * 2. if imm > 0 and magic < 0, add numerator to RDX
2233     *    if imm < 0 and magic > 0, sub numerator from RDX
2234     * 3. if S !=0, SAR S bits for RDX
2235     * 4. add 1 to RDX if RDX < 0
2236     * 5. Thus, RDX is the quotient
2237     */
2238
2239    // RAX = magic.
2240    LoadConstantWide(rs_r0q, magic);
2241
2242    // Multiply by numerator.
2243    RegStorage numerator_reg;
2244    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
2245      // We will need the value later.
2246      rl_src = LoadValueWide(rl_src, kCoreReg);
2247      numerator_reg = rl_src.reg;
2248
2249      // RDX:RAX = magic * numerator.
2250      NewLIR1(kX86Imul64DaR, numerator_reg.GetReg());
2251    } else {
2252      // Only need this once.  Multiply directly from the value.
2253      rl_src = UpdateLocWideTyped(rl_src);
2254      if (rl_src.location != kLocPhysReg) {
2255        // Okay, we can do this from memory.
2256        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2257        int displacement = SRegOffset(rl_src.s_reg_low);
2258        // RDX:RAX = magic * numerator.
2259        LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP_32.GetReg(), displacement);
2260        AnnotateDalvikRegAccess(m, displacement >> 2,
2261                                true /* is_load */, true /* is_64bit */);
2262      } else {
2263        // RDX:RAX = magic * numerator.
2264        NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg());
2265      }
2266    }
2267
2268    if (imm > 0 && magic < 0) {
2269      // Add numerator to RDX.
2270      DCHECK(numerator_reg.Valid());
2271      OpRegReg(kOpAdd, rs_r2q, numerator_reg);
2272    } else if (imm < 0 && magic > 0) {
2273      DCHECK(numerator_reg.Valid());
2274      OpRegReg(kOpSub, rs_r2q, numerator_reg);
2275    }
2276
2277    // Do we need the shift?
2278    if (shift != 0) {
2279      // Shift RDX by 'shift' bits.
2280      OpRegImm(kOpAsr, rs_r2q, shift);
2281    }
2282
2283    // Move RDX to RAX.
2284    OpRegCopyWide(rs_r0q, rs_r2q);
2285
2286    // Move sign bit to bit 0, zeroing the rest.
2287    OpRegImm(kOpLsr, rs_r2q, 63);
2288
2289    // RDX = RDX + RAX.
2290    OpRegReg(kOpAdd, rs_r2q, rs_r0q);
2291
2292    // Quotient is in RDX.
2293    if (!is_div) {
2294      // We need to compute the remainder.
2295      // Remainder is divisor - (quotient * imm).
2296      DCHECK(numerator_reg.Valid());
2297      OpRegCopyWide(rs_r0q, numerator_reg);
2298
2299      // Imul doesn't support 64-bit imms.
2300      if (imm > std::numeric_limits<int32_t>::max() ||
2301          imm < std::numeric_limits<int32_t>::min()) {
2302        RegStorage rs_temp = AllocTempWide();
2303        LoadConstantWide(rs_temp, imm);
2304
2305        // RAX = numerator * imm.
2306        NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
2307
2308        FreeTemp(rs_temp);
2309      } else {
2310        // RAX = numerator * imm.
2311        int short_imm = static_cast<int>(imm);
2312        NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
2313      }
2314
2315      // RAX -= RDX.
2316      OpRegReg(kOpSub, rs_r0q, rs_r2q);
2317
2318      // Result in RAX.
2319    } else {
2320      // Result in RDX.
2321    }
2322    StoreValueWide(rl_dest, rl_result);
2323    FreeTemp(rs_r0q);
2324    FreeTemp(rs_r2q);
2325  }
2326}
2327
2328void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
2329                               RegLocation rl_src2, bool is_div, int flags) {
2330  if (!cu_->target64) {
2331    LOG(FATAL) << "Unexpected use GenDivRemLong()";
2332    return;
2333  }
2334
2335  if (rl_src2.is_const) {
2336    DCHECK(rl_src2.wide);
2337    int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
2338    GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
2339    return;
2340  }
2341
2342  // We have to use fixed registers, so flush all the temps.
2343  // Prepare for explicit register usage.
2344  ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
2345
2346  // Load LHS into RAX.
2347  LoadValueDirectWideFixed(rl_src1, rs_r0q);
2348
2349  // Load RHS into RCX.
2350  LoadValueDirectWideFixed(rl_src2, rs_r1q);
2351
2352  // Copy LHS sign bit into RDX.
2353  NewLIR0(kx86Cqo64Da);
2354
2355  // Handle division by zero case.
2356  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
2357    GenDivZeroCheckWide(rs_r1q);
2358  }
2359
2360  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
2361  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
2362  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2363
2364  // RHS is -1.
2365  LoadConstantWide(rs_r6q, 0x8000000000000000);
2366  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
2367  LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
2368
2369  // In 0x8000000000000000/-1 case.
2370  if (!is_div) {
2371    // For DIV, RAX is already right. For REM, we need RDX 0.
2372    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
2373  }
2374  LIR* done = NewLIR1(kX86Jmp8, 0);
2375
2376  // Expected case.
2377  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
2378  minint_branch->target = minus_one_branch->target;
2379  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
2380  done->target = NewLIR0(kPseudoTargetLabel);
2381
2382  // Result is in RAX for div and RDX for rem.
2383  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
2384  if (!is_div) {
2385    rl_result.reg.SetReg(r2q);
2386  }
2387
2388  StoreValueWide(rl_dest, rl_result);
2389}
2390
2391void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
2392  rl_src = LoadValueWide(rl_src, kCoreReg);
2393  RegLocation rl_result;
2394  if (cu_->target64) {
2395    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2396    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
2397  } else {
2398    rl_result = ForceTempWide(rl_src);
2399    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
2400    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
2401    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
2402  }
2403  StoreValueWide(rl_dest, rl_result);
2404}
2405
2406void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
2407  DCHECK_EQ(kX86, cu_->instruction_set);
2408  X86OpCode opcode = kX86Bkpt;
2409  switch (op) {
2410  case kOpCmp: opcode = kX86Cmp32RT;  break;
2411  case kOpMov: opcode = kX86Mov32RT;  break;
2412  default:
2413    LOG(FATAL) << "Bad opcode: " << op;
2414    break;
2415  }
2416  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2417}
2418
2419void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
2420  DCHECK_EQ(kX86_64, cu_->instruction_set);
2421  X86OpCode opcode = kX86Bkpt;
2422  if (cu_->target64 && r_dest.Is64BitSolo()) {
2423    switch (op) {
2424    case kOpCmp: opcode = kX86Cmp64RT;  break;
2425    case kOpMov: opcode = kX86Mov64RT;  break;
2426    default:
2427      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
2428      break;
2429    }
2430  } else {
2431    switch (op) {
2432    case kOpCmp: opcode = kX86Cmp32RT;  break;
2433    case kOpMov: opcode = kX86Mov32RT;  break;
2434    default:
2435      LOG(FATAL) << "Bad opcode: " << op;
2436      break;
2437    }
2438  }
2439  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2440}
2441
2442/*
2443 * Generate array load
2444 */
2445void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2446                             RegLocation rl_index, RegLocation rl_dest, int scale) {
2447  RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2448  int len_offset = mirror::Array::LengthOffset().Int32Value();
2449  RegLocation rl_result;
2450  rl_array = LoadValue(rl_array, kRefReg);
2451
2452  int data_offset;
2453  if (size == k64 || size == kDouble) {
2454    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2455  } else {
2456    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2457  }
2458
2459  bool constant_index = rl_index.is_const;
2460  int32_t constant_index_value = 0;
2461  if (!constant_index) {
2462    rl_index = LoadValue(rl_index, kCoreReg);
2463  } else {
2464    constant_index_value = mir_graph_->ConstantValue(rl_index);
2465    // If index is constant, just fold it into the data offset
2466    data_offset += constant_index_value << scale;
2467    // treat as non array below
2468    rl_index.reg = RegStorage::InvalidReg();
2469  }
2470
2471  /* null object? */
2472  GenNullCheck(rl_array.reg, opt_flags);
2473
2474  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2475    if (constant_index) {
2476      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2477    } else {
2478      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2479    }
2480  }
2481  rl_result = EvalLoc(rl_dest, reg_class, true);
2482  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2483  if ((size == k64) || (size == kDouble)) {
2484    StoreValueWide(rl_dest, rl_result);
2485  } else {
2486    StoreValue(rl_dest, rl_result);
2487  }
2488}
2489
2490/*
2491 * Generate array store
2492 *
2493 */
2494void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2495                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2496  RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
2497  int len_offset = mirror::Array::LengthOffset().Int32Value();
2498  int data_offset;
2499
2500  if (size == k64 || size == kDouble) {
2501    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2502  } else {
2503    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2504  }
2505
2506  rl_array = LoadValue(rl_array, kRefReg);
2507  bool constant_index = rl_index.is_const;
2508  int32_t constant_index_value = 0;
2509  if (!constant_index) {
2510    rl_index = LoadValue(rl_index, kCoreReg);
2511  } else {
2512    // If index is constant, just fold it into the data offset
2513    constant_index_value = mir_graph_->ConstantValue(rl_index);
2514    data_offset += constant_index_value << scale;
2515    // treat as non array below
2516    rl_index.reg = RegStorage::InvalidReg();
2517  }
2518
2519  /* null object? */
2520  GenNullCheck(rl_array.reg, opt_flags);
2521
2522  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2523    if (constant_index) {
2524      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2525    } else {
2526      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2527    }
2528  }
2529  if ((size == k64) || (size == kDouble)) {
2530    rl_src = LoadValueWide(rl_src, reg_class);
2531  } else {
2532    rl_src = LoadValue(rl_src, reg_class);
2533  }
2534  // If the src reg can't be byte accessed, move it to a temp first.
2535  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2536    RegStorage temp = AllocTemp();
2537    OpRegCopy(temp, rl_src.reg);
2538    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags);
2539  } else {
2540    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags);
2541  }
2542  if (card_mark) {
2543    // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2544    if (!constant_index) {
2545      FreeTemp(rl_index.reg);
2546    }
2547    MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
2548  }
2549}
2550
2551RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2552                                          RegLocation rl_src, int shift_amount, int flags) {
2553  UNUSED(flags);
2554  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2555  if (cu_->target64) {
2556    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
2557    switch (opcode) {
2558      case Instruction::SHL_LONG:
2559      case Instruction::SHL_LONG_2ADDR:
2560        op = kOpLsl;
2561        break;
2562      case Instruction::SHR_LONG:
2563      case Instruction::SHR_LONG_2ADDR:
2564        op = kOpAsr;
2565        break;
2566      case Instruction::USHR_LONG:
2567      case Instruction::USHR_LONG_2ADDR:
2568        op = kOpLsr;
2569        break;
2570      default:
2571        LOG(FATAL) << "Unexpected case";
2572    }
2573    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2574  } else {
2575    switch (opcode) {
2576      case Instruction::SHL_LONG:
2577      case Instruction::SHL_LONG_2ADDR:
2578        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
2579        if (shift_amount == 32) {
2580          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2581          LoadConstant(rl_result.reg.GetLow(), 0);
2582        } else if (shift_amount > 31) {
2583          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2584          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2585          LoadConstant(rl_result.reg.GetLow(), 0);
2586        } else {
2587          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2588          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2589          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2590                  shift_amount);
2591          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2592        }
2593        break;
2594      case Instruction::SHR_LONG:
2595      case Instruction::SHR_LONG_2ADDR:
2596        if (shift_amount == 32) {
2597          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2598          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2599          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2600        } else if (shift_amount > 31) {
2601          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2602          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2603          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2604          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2605        } else {
2606          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2607          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2608          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2609                  shift_amount);
2610          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2611        }
2612        break;
2613      case Instruction::USHR_LONG:
2614      case Instruction::USHR_LONG_2ADDR:
2615        if (shift_amount == 32) {
2616          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2617          LoadConstant(rl_result.reg.GetHigh(), 0);
2618        } else if (shift_amount > 31) {
2619          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2620          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2621          LoadConstant(rl_result.reg.GetHigh(), 0);
2622        } else {
2623          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2624          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2625          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2626                  shift_amount);
2627          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2628        }
2629        break;
2630      default:
2631        LOG(FATAL) << "Unexpected case";
2632    }
2633  }
2634  return rl_result;
2635}
2636
2637void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2638                                   RegLocation rl_src, RegLocation rl_shift, int flags) {
2639  // Per spec, we only care about low 6 bits of shift amount.
2640  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2641  if (shift_amount == 0) {
2642    rl_src = LoadValueWide(rl_src, kCoreReg);
2643    StoreValueWide(rl_dest, rl_src);
2644    return;
2645  } else if (shift_amount == 1 &&
2646            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2647    // Need to handle this here to avoid calling StoreValueWide twice.
2648    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
2649    return;
2650  }
2651  if (PartiallyIntersects(rl_src, rl_dest)) {
2652    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2653    return;
2654  }
2655  rl_src = LoadValueWide(rl_src, kCoreReg);
2656  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
2657  StoreValueWide(rl_dest, rl_result);
2658}
2659
2660void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2661                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
2662                                   int flags) {
2663  bool isConstSuccess = false;
2664  switch (opcode) {
2665    case Instruction::ADD_LONG:
2666    case Instruction::AND_LONG:
2667    case Instruction::OR_LONG:
2668    case Instruction::XOR_LONG:
2669      if (rl_src2.is_const) {
2670        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2671      } else {
2672        DCHECK(rl_src1.is_const);
2673        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2674      }
2675      break;
2676    case Instruction::SUB_LONG:
2677    case Instruction::SUB_LONG_2ADDR:
2678      if (rl_src2.is_const) {
2679        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2680      } else {
2681        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2682        isConstSuccess = true;
2683      }
2684      break;
2685    case Instruction::ADD_LONG_2ADDR:
2686    case Instruction::OR_LONG_2ADDR:
2687    case Instruction::XOR_LONG_2ADDR:
2688    case Instruction::AND_LONG_2ADDR:
2689      if (rl_src2.is_const) {
2690        if (GenerateTwoOperandInstructions()) {
2691          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2692        } else {
2693          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2694        }
2695      } else {
2696        DCHECK(rl_src1.is_const);
2697        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2698      }
2699      break;
2700    default:
2701      isConstSuccess = false;
2702      break;
2703  }
2704
2705  if (!isConstSuccess) {
2706    // Default - bail to non-const handler.
2707    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
2708  }
2709}
2710
2711bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2712  switch (op) {
2713    case Instruction::AND_LONG_2ADDR:
2714    case Instruction::AND_LONG:
2715      return value == -1;
2716    case Instruction::OR_LONG:
2717    case Instruction::OR_LONG_2ADDR:
2718    case Instruction::XOR_LONG:
2719    case Instruction::XOR_LONG_2ADDR:
2720      return value == 0;
2721    default:
2722      return false;
2723  }
2724}
2725
2726X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2727                                bool is_high_op) {
2728  bool rhs_in_mem = rhs.location != kLocPhysReg;
2729  bool dest_in_mem = dest.location != kLocPhysReg;
2730  bool is64Bit = cu_->target64;
2731  DCHECK(!rhs_in_mem || !dest_in_mem);
2732  switch (op) {
2733    case Instruction::ADD_LONG:
2734    case Instruction::ADD_LONG_2ADDR:
2735      if (dest_in_mem) {
2736        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2737      } else if (rhs_in_mem) {
2738        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2739      }
2740      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2741    case Instruction::SUB_LONG:
2742    case Instruction::SUB_LONG_2ADDR:
2743      if (dest_in_mem) {
2744        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2745      } else if (rhs_in_mem) {
2746        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2747      }
2748      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2749    case Instruction::AND_LONG_2ADDR:
2750    case Instruction::AND_LONG:
2751      if (dest_in_mem) {
2752        return is64Bit ? kX86And64MR : kX86And32MR;
2753      }
2754      if (is64Bit) {
2755        return rhs_in_mem ? kX86And64RM : kX86And64RR;
2756      }
2757      return rhs_in_mem ? kX86And32RM : kX86And32RR;
2758    case Instruction::OR_LONG:
2759    case Instruction::OR_LONG_2ADDR:
2760      if (dest_in_mem) {
2761        return is64Bit ? kX86Or64MR : kX86Or32MR;
2762      }
2763      if (is64Bit) {
2764        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2765      }
2766      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2767    case Instruction::XOR_LONG:
2768    case Instruction::XOR_LONG_2ADDR:
2769      if (dest_in_mem) {
2770        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2771      }
2772      if (is64Bit) {
2773        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2774      }
2775      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2776    default:
2777      LOG(FATAL) << "Unexpected opcode: " << op;
2778      return kX86Add32RR;
2779  }
2780}
2781
2782X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2783                                int32_t value) {
2784  bool in_mem = loc.location != kLocPhysReg;
2785  bool is64Bit = cu_->target64;
2786  bool byte_imm = IS_SIMM8(value);
2787  DCHECK(in_mem || !loc.reg.IsFloat());
2788  switch (op) {
2789    case Instruction::ADD_LONG:
2790    case Instruction::ADD_LONG_2ADDR:
2791      if (byte_imm) {
2792        if (in_mem) {
2793          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2794        }
2795        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2796      }
2797      if (in_mem) {
2798        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2799      }
2800      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2801    case Instruction::SUB_LONG:
2802    case Instruction::SUB_LONG_2ADDR:
2803      if (byte_imm) {
2804        if (in_mem) {
2805          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2806        }
2807        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2808      }
2809      if (in_mem) {
2810        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2811      }
2812      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2813    case Instruction::AND_LONG_2ADDR:
2814    case Instruction::AND_LONG:
2815      if (byte_imm) {
2816        if (is64Bit) {
2817          return in_mem ? kX86And64MI8 : kX86And64RI8;
2818        }
2819        return in_mem ? kX86And32MI8 : kX86And32RI8;
2820      }
2821      if (is64Bit) {
2822        return in_mem ? kX86And64MI : kX86And64RI;
2823      }
2824      return in_mem ? kX86And32MI : kX86And32RI;
2825    case Instruction::OR_LONG:
2826    case Instruction::OR_LONG_2ADDR:
2827      if (byte_imm) {
2828        if (is64Bit) {
2829          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2830        }
2831        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2832      }
2833      if (is64Bit) {
2834        return in_mem ? kX86Or64MI : kX86Or64RI;
2835      }
2836      return in_mem ? kX86Or32MI : kX86Or32RI;
2837    case Instruction::XOR_LONG:
2838    case Instruction::XOR_LONG_2ADDR:
2839      if (byte_imm) {
2840        if (is64Bit) {
2841          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2842        }
2843        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2844      }
2845      if (is64Bit) {
2846        return in_mem ? kX86Xor64MI : kX86Xor64RI;
2847      }
2848      return in_mem ? kX86Xor32MI : kX86Xor32RI;
2849    default:
2850      LOG(FATAL) << "Unexpected opcode: " << op;
2851      UNREACHABLE();
2852  }
2853}
2854
2855bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2856  DCHECK(rl_src.is_const);
2857  int64_t val = mir_graph_->ConstantValueWide(rl_src);
2858
2859  if (cu_->target64) {
2860    // We can do with imm only if it fits 32 bit
2861    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2862      return false;
2863    }
2864
2865    rl_dest = UpdateLocWideTyped(rl_dest);
2866
2867    if ((rl_dest.location == kLocDalvikFrame) ||
2868        (rl_dest.location == kLocCompilerTemp)) {
2869      int r_base = rs_rX86_SP_32.GetReg();
2870      int displacement = SRegOffset(rl_dest.s_reg_low);
2871
2872      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2873      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2874      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2875      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2876                              true /* is_load */, true /* is64bit */);
2877      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2878                              false /* is_load */, true /* is64bit */);
2879      return true;
2880    }
2881
2882    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2883    DCHECK_EQ(rl_result.location, kLocPhysReg);
2884    DCHECK(!rl_result.reg.IsFloat());
2885
2886    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2887    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2888
2889    StoreValueWide(rl_dest, rl_result);
2890    return true;
2891  }
2892
2893  int32_t val_lo = Low32Bits(val);
2894  int32_t val_hi = High32Bits(val);
2895  rl_dest = UpdateLocWideTyped(rl_dest);
2896
2897  // Can we just do this into memory?
2898  if ((rl_dest.location == kLocDalvikFrame) ||
2899      (rl_dest.location == kLocCompilerTemp)) {
2900    int r_base = rs_rX86_SP_32.GetReg();
2901    int displacement = SRegOffset(rl_dest.s_reg_low);
2902
2903    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2904    if (!IsNoOp(op, val_lo)) {
2905      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2906      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2907      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2908                              true /* is_load */, true /* is64bit */);
2909      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2910                              false /* is_load */, true /* is64bit */);
2911    }
2912    if (!IsNoOp(op, val_hi)) {
2913      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2914      LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2915      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2916                                true /* is_load */, true /* is64bit */);
2917      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2918                                false /* is_load */, true /* is64bit */);
2919    }
2920    return true;
2921  }
2922
2923  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2924  DCHECK_EQ(rl_result.location, kLocPhysReg);
2925  DCHECK(!rl_result.reg.IsFloat());
2926
2927  if (!IsNoOp(op, val_lo)) {
2928    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2929    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2930  }
2931  if (!IsNoOp(op, val_hi)) {
2932    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2933    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2934  }
2935  StoreValueWide(rl_dest, rl_result);
2936  return true;
2937}
2938
2939bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2940                                RegLocation rl_src2, Instruction::Code op) {
2941  DCHECK(rl_src2.is_const);
2942  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2943
2944  if (cu_->target64) {
2945    // We can do with imm only if it fits 32 bit
2946    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2947      return false;
2948    }
2949    if (rl_dest.location == kLocPhysReg &&
2950        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2951      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2952      OpRegCopy(rl_dest.reg, rl_src1.reg);
2953      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2954      StoreFinalValueWide(rl_dest, rl_dest);
2955      return true;
2956    }
2957
2958    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2959    // We need the values to be in a temporary
2960    RegLocation rl_result = ForceTempWide(rl_src1);
2961
2962    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2963    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2964
2965    StoreFinalValueWide(rl_dest, rl_result);
2966    return true;
2967  }
2968
2969  int32_t val_lo = Low32Bits(val);
2970  int32_t val_hi = High32Bits(val);
2971  rl_dest = UpdateLocWideTyped(rl_dest);
2972  rl_src1 = UpdateLocWideTyped(rl_src1);
2973
2974  // Can we do this directly into the destination registers?
2975  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2976      rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2977      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2978    if (!IsNoOp(op, val_lo)) {
2979      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2980      NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2981    }
2982    if (!IsNoOp(op, val_hi)) {
2983      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2984      NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2985    }
2986
2987    StoreFinalValueWide(rl_dest, rl_dest);
2988    return true;
2989  }
2990
2991  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2992  DCHECK_EQ(rl_src1.location, kLocPhysReg);
2993
2994  // We need the values to be in a temporary
2995  RegLocation rl_result = ForceTempWide(rl_src1);
2996  if (!IsNoOp(op, val_lo)) {
2997    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2998    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2999  }
3000  if (!IsNoOp(op, val_hi)) {
3001    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
3002    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
3003  }
3004
3005  StoreFinalValueWide(rl_dest, rl_result);
3006  return true;
3007}
3008
3009// For final classes there are no sub-classes to check and so we can answer the instance-of
3010// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
3011void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
3012                                    RegLocation rl_dest, RegLocation rl_src) {
3013  RegLocation object = LoadValue(rl_src, kRefReg);
3014  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3015  RegStorage result_reg = rl_result.reg;
3016
3017  // For 32-bit, SETcc only works with EAX..EDX.
3018  RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
3019  if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
3020    result_reg = AllocateByteRegister();
3021  }
3022
3023  // Assume that there is no match.
3024  LoadConstant(result_reg, 0);
3025  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, nullptr);
3026
3027  // We will use this register to compare to memory below.
3028  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
3029  // For this reason, force allocation of a 32 bit register to use, so that the
3030  // compare to memory will be done using a 32 bit comparision.
3031  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
3032  RegStorage check_class = AllocTemp();
3033
3034  // If Method* is already in a register, we can save a copy.
3035  RegLocation rl_method = mir_graph_->GetMethodLoc();
3036  int32_t offset_of_type = mirror::Array::DataOffset(
3037      sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
3038      (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
3039
3040  if (rl_method.location == kLocPhysReg) {
3041    if (use_declaring_class) {
3042      LoadRefDisp(rl_method.reg, ArtMethod::DeclaringClassOffset().Int32Value(),
3043                  check_class, kNotVolatile);
3044    } else {
3045      LoadRefDisp(rl_method.reg, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3046                  check_class, kNotVolatile);
3047      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3048    }
3049  } else {
3050    LoadCurrMethodDirect(check_class);
3051    if (use_declaring_class) {
3052      LoadRefDisp(check_class, ArtMethod::DeclaringClassOffset().Int32Value(),
3053                  check_class, kNotVolatile);
3054    } else {
3055      LoadRefDisp(check_class, ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
3056                  check_class, kNotVolatile);
3057      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
3058    }
3059  }
3060
3061  // Compare the computed class to the class in the object.
3062  DCHECK_EQ(object.location, kLocPhysReg);
3063  OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
3064
3065  // Set the low byte of the result to 0 or 1 from the compare condition code.
3066  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
3067
3068  LIR* target = NewLIR0(kPseudoTargetLabel);
3069  null_branchover->target = target;
3070  FreeTemp(check_class);
3071  if (IsTemp(result_reg)) {
3072    OpRegCopy(rl_result.reg, result_reg);
3073    FreeTemp(result_reg);
3074  }
3075  StoreValue(rl_dest, rl_result);
3076}
3077
3078void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
3079                               RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
3080  OpKind op = kOpBkpt;
3081  bool is_div_rem = false;
3082  bool unary = false;
3083  bool shift_op = false;
3084  bool is_two_addr = false;
3085  RegLocation rl_result;
3086  switch (opcode) {
3087    case Instruction::NEG_INT:
3088      op = kOpNeg;
3089      unary = true;
3090      break;
3091    case Instruction::NOT_INT:
3092      op = kOpMvn;
3093      unary = true;
3094      break;
3095    case Instruction::ADD_INT_2ADDR:
3096      is_two_addr = true;
3097      FALLTHROUGH_INTENDED;
3098    case Instruction::ADD_INT:
3099      op = kOpAdd;
3100      break;
3101    case Instruction::SUB_INT_2ADDR:
3102      is_two_addr = true;
3103      FALLTHROUGH_INTENDED;
3104    case Instruction::SUB_INT:
3105      op = kOpSub;
3106      break;
3107    case Instruction::MUL_INT_2ADDR:
3108      is_two_addr = true;
3109      FALLTHROUGH_INTENDED;
3110    case Instruction::MUL_INT:
3111      op = kOpMul;
3112      break;
3113    case Instruction::DIV_INT_2ADDR:
3114      is_two_addr = true;
3115      FALLTHROUGH_INTENDED;
3116    case Instruction::DIV_INT:
3117      op = kOpDiv;
3118      is_div_rem = true;
3119      break;
3120    /* NOTE: returns in kArg1 */
3121    case Instruction::REM_INT_2ADDR:
3122      is_two_addr = true;
3123      FALLTHROUGH_INTENDED;
3124    case Instruction::REM_INT:
3125      op = kOpRem;
3126      is_div_rem = true;
3127      break;
3128    case Instruction::AND_INT_2ADDR:
3129      is_two_addr = true;
3130      FALLTHROUGH_INTENDED;
3131    case Instruction::AND_INT:
3132      op = kOpAnd;
3133      break;
3134    case Instruction::OR_INT_2ADDR:
3135      is_two_addr = true;
3136      FALLTHROUGH_INTENDED;
3137    case Instruction::OR_INT:
3138      op = kOpOr;
3139      break;
3140    case Instruction::XOR_INT_2ADDR:
3141      is_two_addr = true;
3142      FALLTHROUGH_INTENDED;
3143    case Instruction::XOR_INT:
3144      op = kOpXor;
3145      break;
3146    case Instruction::SHL_INT_2ADDR:
3147      is_two_addr = true;
3148      FALLTHROUGH_INTENDED;
3149    case Instruction::SHL_INT:
3150      shift_op = true;
3151      op = kOpLsl;
3152      break;
3153    case Instruction::SHR_INT_2ADDR:
3154      is_two_addr = true;
3155      FALLTHROUGH_INTENDED;
3156    case Instruction::SHR_INT:
3157      shift_op = true;
3158      op = kOpAsr;
3159      break;
3160    case Instruction::USHR_INT_2ADDR:
3161      is_two_addr = true;
3162      FALLTHROUGH_INTENDED;
3163    case Instruction::USHR_INT:
3164      shift_op = true;
3165      op = kOpLsr;
3166      break;
3167    default:
3168      LOG(FATAL) << "Invalid word arith op: " << opcode;
3169  }
3170
3171  // Can we convert to a two address instruction?
3172  if (!is_two_addr &&
3173        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3174         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
3175    is_two_addr = true;
3176  }
3177
3178  if (!GenerateTwoOperandInstructions()) {
3179    is_two_addr = false;
3180  }
3181
3182  // Get the div/rem stuff out of the way.
3183  if (is_div_rem) {
3184    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
3185    StoreValue(rl_dest, rl_result);
3186    return;
3187  }
3188
3189  // If we generate any memory access below, it will reference a dalvik reg.
3190  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3191
3192  if (unary) {
3193    rl_lhs = LoadValue(rl_lhs, kCoreReg);
3194    rl_result = UpdateLocTyped(rl_dest);
3195    rl_result = EvalLoc(rl_dest, kCoreReg, true);
3196    OpRegReg(op, rl_result.reg, rl_lhs.reg);
3197  } else {
3198    if (shift_op) {
3199      // X86 doesn't require masking and must use ECX.
3200      RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
3201      LoadValueDirectFixed(rl_rhs, t_reg);
3202      if (is_two_addr) {
3203        // Can we do this directly into memory?
3204        rl_result = UpdateLocTyped(rl_dest);
3205        if (rl_result.location != kLocPhysReg) {
3206          // Okay, we can do this into memory
3207          OpMemReg(op, rl_result, t_reg.GetReg());
3208          FreeTemp(t_reg);
3209          return;
3210        } else if (!rl_result.reg.IsFloat()) {
3211          // Can do this directly into the result register
3212          OpRegReg(op, rl_result.reg, t_reg);
3213          FreeTemp(t_reg);
3214          StoreFinalValue(rl_dest, rl_result);
3215          return;
3216        }
3217      }
3218      // Three address form, or we can't do directly.
3219      rl_lhs = LoadValue(rl_lhs, kCoreReg);
3220      rl_result = EvalLoc(rl_dest, kCoreReg, true);
3221      OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
3222      FreeTemp(t_reg);
3223    } else {
3224      // Multiply is 3 operand only (sort of).
3225      if (is_two_addr && op != kOpMul) {
3226        // Can we do this directly into memory?
3227        rl_result = UpdateLocTyped(rl_dest);
3228        if (rl_result.location == kLocPhysReg) {
3229          // Ensure res is in a core reg
3230          rl_result = EvalLoc(rl_dest, kCoreReg, true);
3231          // Can we do this from memory directly?
3232          rl_rhs = UpdateLocTyped(rl_rhs);
3233          if (rl_rhs.location != kLocPhysReg) {
3234            OpRegMem(op, rl_result.reg, rl_rhs);
3235            StoreFinalValue(rl_dest, rl_result);
3236            return;
3237          } else if (!rl_rhs.reg.IsFloat()) {
3238            OpRegReg(op, rl_result.reg, rl_rhs.reg);
3239            StoreFinalValue(rl_dest, rl_result);
3240            return;
3241          }
3242        }
3243        rl_rhs = LoadValue(rl_rhs, kCoreReg);
3244        // It might happen rl_rhs and rl_dest are the same VR
3245        // in this case rl_dest is in reg after LoadValue while
3246        // rl_result is not updated yet, so do this
3247        rl_result = UpdateLocTyped(rl_dest);
3248        if (rl_result.location != kLocPhysReg) {
3249          // Okay, we can do this into memory.
3250          OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
3251          return;
3252        } else if (!rl_result.reg.IsFloat()) {
3253          // Can do this directly into the result register.
3254          OpRegReg(op, rl_result.reg, rl_rhs.reg);
3255          StoreFinalValue(rl_dest, rl_result);
3256          return;
3257        } else {
3258          rl_lhs = LoadValue(rl_lhs, kCoreReg);
3259          rl_result = EvalLoc(rl_dest, kCoreReg, true);
3260          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3261        }
3262      } else {
3263        // Try to use reg/memory instructions.
3264        rl_lhs = UpdateLocTyped(rl_lhs);
3265        rl_rhs = UpdateLocTyped(rl_rhs);
3266        // We can't optimize with FP registers.
3267        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
3268          // Something is difficult, so fall back to the standard case.
3269          rl_lhs = LoadValue(rl_lhs, kCoreReg);
3270          rl_rhs = LoadValue(rl_rhs, kCoreReg);
3271          rl_result = EvalLoc(rl_dest, kCoreReg, true);
3272          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3273        } else {
3274          // We can optimize by moving to result and using memory operands.
3275          if (rl_rhs.location != kLocPhysReg) {
3276            // Force LHS into result.
3277            // We should be careful with order here
3278            // If rl_dest and rl_lhs points to the same VR we should load first
3279            // If the are different we should find a register first for dest
3280            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
3281                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
3282              rl_lhs = LoadValue(rl_lhs, kCoreReg);
3283              rl_result = EvalLoc(rl_dest, kCoreReg, true);
3284              // No-op if these are the same.
3285              OpRegCopy(rl_result.reg, rl_lhs.reg);
3286            } else {
3287              rl_result = EvalLoc(rl_dest, kCoreReg, true);
3288              LoadValueDirect(rl_lhs, rl_result.reg);
3289            }
3290            OpRegMem(op, rl_result.reg, rl_rhs);
3291          } else if (rl_lhs.location != kLocPhysReg) {
3292            // RHS is in a register; LHS is in memory.
3293            if (op != kOpSub) {
3294              // Force RHS into result and operate on memory.
3295              rl_result = EvalLoc(rl_dest, kCoreReg, true);
3296              OpRegCopy(rl_result.reg, rl_rhs.reg);
3297              OpRegMem(op, rl_result.reg, rl_lhs);
3298            } else {
3299              // Subtraction isn't commutative.
3300              rl_lhs = LoadValue(rl_lhs, kCoreReg);
3301              rl_rhs = LoadValue(rl_rhs, kCoreReg);
3302              rl_result = EvalLoc(rl_dest, kCoreReg, true);
3303              OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3304            }
3305          } else {
3306            // Both are in registers.
3307            rl_lhs = LoadValue(rl_lhs, kCoreReg);
3308            rl_rhs = LoadValue(rl_rhs, kCoreReg);
3309            rl_result = EvalLoc(rl_dest, kCoreReg, true);
3310            OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
3311          }
3312        }
3313      }
3314    }
3315  }
3316  StoreValue(rl_dest, rl_result);
3317}
3318
3319bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
3320  // If we have non-core registers, then we can't do good things.
3321  if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
3322    return false;
3323  }
3324  if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
3325    return false;
3326  }
3327
3328  // Everything will be fine :-).
3329  return true;
3330}
3331
3332void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
3333  if (!cu_->target64) {
3334    Mir2Lir::GenIntToLong(rl_dest, rl_src);
3335    return;
3336  }
3337  rl_src = UpdateLocTyped(rl_src);
3338  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
3339  if (rl_src.location == kLocPhysReg) {
3340    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
3341  } else {
3342    int displacement = SRegOffset(rl_src.s_reg_low);
3343    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3344    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP_32.GetReg(),
3345                     displacement + LOWORD_OFFSET);
3346    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
3347                            true /* is_load */, true /* is_64bit */);
3348  }
3349  StoreValueWide(rl_dest, rl_result);
3350}
3351
3352void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
3353  rl_src = UpdateLocWide(rl_src);
3354  rl_src = NarrowRegLoc(rl_src);
3355  StoreValue(rl_dest, rl_src);
3356
3357  if (cu_->target64) {
3358    // if src and dest are in the same phys reg then StoreValue generates
3359    // no operation but we need explicit 32-bit mov R, R to clear
3360    // the higher 32-bits
3361    rl_dest = UpdateLoc(rl_dest);
3362    if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
3363           && IsSameReg(rl_src.reg, rl_dest.reg)) {
3364        LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
3365        // remove nop flag set by OpRegCopyNoInsert if src == dest
3366        copy_lir->flags.is_nop = false;
3367        AppendLIR(copy_lir);
3368    }
3369  }
3370}
3371
3372void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
3373                        RegLocation rl_src1, RegLocation rl_shift) {
3374  if (!cu_->target64) {
3375    // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
3376    // the other half, shift the other half, if the shift amount is less than 32 we're done,
3377    // otherwise move one register to the other and place zero or sign bits in the other.
3378    LIR* branch;
3379    FlushAllRegs();
3380    LockCallTemps();
3381    LoadValueDirectFixed(rl_shift, rs_rCX);
3382    RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
3383    LoadValueDirectWideFixed(rl_src1, r_tmp);
3384    switch (opcode) {
3385      case Instruction::SHL_LONG:
3386      case Instruction::SHL_LONG_2ADDR:
3387        NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
3388        NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
3389        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3390        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3391        OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
3392        LoadConstant(r_tmp.GetLow(), 0);
3393        branch->target = NewLIR0(kPseudoTargetLabel);
3394        break;
3395      case Instruction::SHR_LONG:
3396      case Instruction::SHR_LONG_2ADDR:
3397        NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
3398        NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3399        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3400        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3401        OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3402        NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
3403        branch->target = NewLIR0(kPseudoTargetLabel);
3404        break;
3405      case Instruction::USHR_LONG:
3406      case Instruction::USHR_LONG_2ADDR:
3407        NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
3408               rs_rCX.GetReg());
3409        NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
3410        NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
3411        branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
3412        OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
3413        LoadConstant(r_tmp.GetHigh(), 0);
3414        branch->target = NewLIR0(kPseudoTargetLabel);
3415        break;
3416      default:
3417        LOG(FATAL) << "Unexpected case: " << opcode;
3418        return;
3419    }
3420    RegLocation rl_result = LocCReturnWide();
3421    StoreValueWide(rl_dest, rl_result);
3422    return;
3423  }
3424
3425  bool is_two_addr = false;
3426  OpKind op = kOpBkpt;
3427  RegLocation rl_result;
3428
3429  switch (opcode) {
3430    case Instruction::SHL_LONG_2ADDR:
3431      is_two_addr = true;
3432      FALLTHROUGH_INTENDED;
3433    case Instruction::SHL_LONG:
3434      op = kOpLsl;
3435      break;
3436    case Instruction::SHR_LONG_2ADDR:
3437      is_two_addr = true;
3438      FALLTHROUGH_INTENDED;
3439    case Instruction::SHR_LONG:
3440      op = kOpAsr;
3441      break;
3442    case Instruction::USHR_LONG_2ADDR:
3443      is_two_addr = true;
3444      FALLTHROUGH_INTENDED;
3445    case Instruction::USHR_LONG:
3446      op = kOpLsr;
3447      break;
3448    default:
3449      op = kOpBkpt;
3450  }
3451
3452  // X86 doesn't require masking and must use ECX.
3453  RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
3454  LoadValueDirectFixed(rl_shift, t_reg);
3455  if (is_two_addr) {
3456    // Can we do this directly into memory?
3457    rl_result = UpdateLocWideTyped(rl_dest);
3458    if (rl_result.location != kLocPhysReg) {
3459      // Okay, we can do this into memory
3460      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3461      OpMemReg(op, rl_result, t_reg.GetReg());
3462    } else if (!rl_result.reg.IsFloat()) {
3463      // Can do this directly into the result register
3464      OpRegReg(op, rl_result.reg, t_reg);
3465      StoreFinalValueWide(rl_dest, rl_result);
3466    }
3467  } else {
3468    // Three address form, or we can't do directly.
3469    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
3470    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
3471    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
3472    StoreFinalValueWide(rl_dest, rl_result);
3473  }
3474
3475  FreeTemp(t_reg);
3476}
3477
3478}  // namespace art
3479