int_x86.cc revision 021b60f31a4443081e63591e184b5d707bba28c1
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the X86 ISA */
18
19#include "codegen_x86.h"
20#include "dex/quick/mir_to_lir-inl.h"
21#include "dex/reg_storage_eq.h"
22#include "mirror/array.h"
23#include "x86_lir.h"
24
25namespace art {
26
27/*
28 * Compare two 64-bit values
29 *    x = y     return  0
30 *    x < y     return -1
31 *    x > y     return  1
32 */
33void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
34                            RegLocation rl_src2) {
35  if (cu_->target64) {
36    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
37    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
38    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
39    RegStorage temp_reg = AllocTemp();
40    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
41    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
42    NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
43    NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
44    NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
45
46    StoreValue(rl_dest, rl_result);
47    FreeTemp(temp_reg);
48    return;
49  }
50
51  FlushAllRegs();
52  LockCallTemps();  // Prepare for explicit register usage
53  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
54  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
55  LoadValueDirectWideFixed(rl_src1, r_tmp1);
56  LoadValueDirectWideFixed(rl_src2, r_tmp2);
57  // Compute (r1:r0) = (r1:r0) - (r3:r2)
58  OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
59  OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
60  NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
61  NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
62  OpReg(kOpNeg, rs_r2);         // r2 = -r2
63  OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
64  NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
65  NewLIR2(kX86Movzx8RR, r0, r0);
66  OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
67  RegLocation rl_result = LocCReturn();
68  StoreValue(rl_dest, rl_result);
69}
70
71X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
72  switch (cond) {
73    case kCondEq: return kX86CondEq;
74    case kCondNe: return kX86CondNe;
75    case kCondCs: return kX86CondC;
76    case kCondCc: return kX86CondNc;
77    case kCondUlt: return kX86CondC;
78    case kCondUge: return kX86CondNc;
79    case kCondMi: return kX86CondS;
80    case kCondPl: return kX86CondNs;
81    case kCondVs: return kX86CondO;
82    case kCondVc: return kX86CondNo;
83    case kCondHi: return kX86CondA;
84    case kCondLs: return kX86CondBe;
85    case kCondGe: return kX86CondGe;
86    case kCondLt: return kX86CondL;
87    case kCondGt: return kX86CondG;
88    case kCondLe: return kX86CondLe;
89    case kCondAl:
90    case kCondNv: LOG(FATAL) << "Should not reach here";
91  }
92  return kX86CondO;
93}
94
95LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
96  NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
97  X86ConditionCode cc = X86ConditionEncoding(cond);
98  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
99                        cc);
100  branch->target = target;
101  return branch;
102}
103
104LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
105                                int check_value, LIR* target) {
106  if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
107    // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
108    NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
109  } else {
110    if (reg.Is64Bit()) {
111      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
112    } else {
113      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
114    }
115  }
116  X86ConditionCode cc = X86ConditionEncoding(cond);
117  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
118  branch->target = target;
119  return branch;
120}
121
122LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
123  // If src or dest is a pair, we'll be using low reg.
124  if (r_dest.IsPair()) {
125    r_dest = r_dest.GetLow();
126  }
127  if (r_src.IsPair()) {
128    r_src = r_src.GetLow();
129  }
130  if (r_dest.IsFloat() || r_src.IsFloat())
131    return OpFpRegCopy(r_dest, r_src);
132  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
133                    r_dest.GetReg(), r_src.GetReg());
134  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
135    res->flags.is_nop = true;
136  }
137  return res;
138}
139
140void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
141  if (r_dest != r_src) {
142    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
143    AppendLIR(res);
144  }
145}
146
147void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
148  if (r_dest != r_src) {
149    bool dest_fp = r_dest.IsFloat();
150    bool src_fp = r_src.IsFloat();
151    if (dest_fp) {
152      if (src_fp) {
153        OpRegCopy(r_dest, r_src);
154      } else {
155        // TODO: Prevent this from happening in the code. The result is often
156        // unused or could have been loaded more easily from memory.
157        if (!r_src.IsPair()) {
158          DCHECK(!r_dest.IsPair());
159          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
160        } else {
161          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
162          RegStorage r_tmp = AllocTempDouble();
163          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
164          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
165          FreeTemp(r_tmp);
166        }
167      }
168    } else {
169      if (src_fp) {
170        if (!r_dest.IsPair()) {
171          DCHECK(!r_src.IsPair());
172          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
173        } else {
174          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
175          RegStorage temp_reg = AllocTempDouble();
176          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
177          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
178          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
179        }
180      } else {
181        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
182        if (!r_src.IsPair()) {
183          // Just copy the register directly.
184          OpRegCopy(r_dest, r_src);
185        } else {
186          // Handle overlap
187          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
188              r_src.GetLowReg() == r_dest.GetHighReg()) {
189            // Deal with cycles.
190            RegStorage temp_reg = AllocTemp();
191            OpRegCopy(temp_reg, r_dest.GetHigh());
192            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
193            OpRegCopy(r_dest.GetLow(), temp_reg);
194            FreeTemp(temp_reg);
195          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
196            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
197            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
198          } else {
199            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
200            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
201          }
202        }
203      }
204    }
205  }
206}
207
208void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
209  RegLocation rl_result;
210  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
211  RegLocation rl_dest = mir_graph_->GetDest(mir);
212  // Avoid using float regs here.
213  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
214  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
215  rl_src = LoadValue(rl_src, src_reg_class);
216  ConditionCode ccode = mir->meta.ccode;
217
218  // The kMirOpSelect has two variants, one for constants and one for moves.
219  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
220
221  if (is_constant_case) {
222    int true_val = mir->dalvikInsn.vB;
223    int false_val = mir->dalvikInsn.vC;
224    rl_result = EvalLoc(rl_dest, result_reg_class, true);
225
226    /*
227     * For ccode == kCondEq:
228     *
229     * 1) When the true case is zero and result_reg is not same as src_reg:
230     *     xor result_reg, result_reg
231     *     cmp $0, src_reg
232     *     mov t1, $false_case
233     *     cmovnz result_reg, t1
234     * 2) When the false case is zero and result_reg is not same as src_reg:
235     *     xor result_reg, result_reg
236     *     cmp $0, src_reg
237     *     mov t1, $true_case
238     *     cmovz result_reg, t1
239     * 3) All other cases (we do compare first to set eflags):
240     *     cmp $0, src_reg
241     *     mov result_reg, $false_case
242     *     mov t1, $true_case
243     *     cmovz result_reg, t1
244     */
245    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
246    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
247    const bool result_reg_same_as_src =
248        (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
249    const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
250    const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
251    const bool catch_all_case = !(true_zero_case || false_zero_case);
252
253    if (true_zero_case || false_zero_case) {
254      OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
255    }
256
257    if (true_zero_case || false_zero_case || catch_all_case) {
258      OpRegImm(kOpCmp, rl_src.reg, 0);
259    }
260
261    if (catch_all_case) {
262      OpRegImm(kOpMov, rl_result.reg, false_val);
263    }
264
265    if (true_zero_case || false_zero_case || catch_all_case) {
266      ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
267      int immediateForTemp = true_zero_case ? false_val : true_val;
268      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
269      OpRegImm(kOpMov, temp1_reg, immediateForTemp);
270
271      OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
272
273      FreeTemp(temp1_reg);
274    }
275  } else {
276    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
277    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
278    rl_true = LoadValue(rl_true, result_reg_class);
279    rl_false = LoadValue(rl_false, result_reg_class);
280    rl_result = EvalLoc(rl_dest, result_reg_class, true);
281
282    /*
283     * For ccode == kCondEq:
284     *
285     * 1) When true case is already in place:
286     *     cmp $0, src_reg
287     *     cmovnz result_reg, false_reg
288     * 2) When false case is already in place:
289     *     cmp $0, src_reg
290     *     cmovz result_reg, true_reg
291     * 3) When neither cases are in place:
292     *     cmp $0, src_reg
293     *     mov result_reg, false_reg
294     *     cmovz result_reg, true_reg
295     */
296
297    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
298    OpRegImm(kOpCmp, rl_src.reg, 0);
299
300    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
301      OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
302    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
303      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
304    } else {
305      OpRegCopy(rl_result.reg, rl_false.reg);
306      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
307    }
308  }
309
310  StoreValue(rl_dest, rl_result);
311}
312
313void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
314  LIR* taken = &block_label_list_[bb->taken];
315  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
316  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
317  ConditionCode ccode = mir->meta.ccode;
318
319  if (rl_src1.is_const) {
320    std::swap(rl_src1, rl_src2);
321    ccode = FlipComparisonOrder(ccode);
322  }
323  if (rl_src2.is_const) {
324    // Do special compare/branch against simple const operand
325    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
326    GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
327    return;
328  }
329
330  if (cu_->target64) {
331    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
332    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
333
334    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
335    OpCondBranch(ccode, taken);
336    return;
337  }
338
339  FlushAllRegs();
340  LockCallTemps();  // Prepare for explicit register usage
341  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
342  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
343  LoadValueDirectWideFixed(rl_src1, r_tmp1);
344  LoadValueDirectWideFixed(rl_src2, r_tmp2);
345
346  // Swap operands and condition code to prevent use of zero flag.
347  if (ccode == kCondLe || ccode == kCondGt) {
348    // Compute (r3:r2) = (r3:r2) - (r1:r0)
349    OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
350    OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
351  } else {
352    // Compute (r1:r0) = (r1:r0) - (r3:r2)
353    OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
354    OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
355  }
356  switch (ccode) {
357    case kCondEq:
358    case kCondNe:
359      OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
360      break;
361    case kCondLe:
362      ccode = kCondGe;
363      break;
364    case kCondGt:
365      ccode = kCondLt;
366      break;
367    case kCondLt:
368    case kCondGe:
369      break;
370    default:
371      LOG(FATAL) << "Unexpected ccode: " << ccode;
372  }
373  OpCondBranch(ccode, taken);
374}
375
376void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
377                                          int64_t val, ConditionCode ccode) {
378  int32_t val_lo = Low32Bits(val);
379  int32_t val_hi = High32Bits(val);
380  LIR* taken = &block_label_list_[bb->taken];
381  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
382  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
383
384  if (cu_->target64) {
385    if (is_equality_test && val == 0) {
386      // We can simplify of comparing for ==, != to 0.
387      NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
388    } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
389      OpRegImm(kOpCmp, rl_src1.reg, val_lo);
390    } else {
391      RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
392      LoadConstantWide(tmp, val);
393      OpRegReg(kOpCmp, rl_src1.reg, tmp);
394      FreeTemp(tmp);
395    }
396    OpCondBranch(ccode, taken);
397    return;
398  }
399
400  if (is_equality_test && val != 0) {
401    rl_src1 = ForceTempWide(rl_src1);
402  }
403  RegStorage low_reg = rl_src1.reg.GetLow();
404  RegStorage high_reg = rl_src1.reg.GetHigh();
405
406  if (is_equality_test) {
407    // We can simplify of comparing for ==, != to 0.
408    if (val == 0) {
409      if (IsTemp(low_reg)) {
410        OpRegReg(kOpOr, low_reg, high_reg);
411        // We have now changed it; ignore the old values.
412        Clobber(rl_src1.reg);
413      } else {
414        RegStorage t_reg = AllocTemp();
415        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
416        FreeTemp(t_reg);
417      }
418      OpCondBranch(ccode, taken);
419      return;
420    }
421
422    // Need to compute the actual value for ==, !=.
423    OpRegImm(kOpSub, low_reg, val_lo);
424    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
425    OpRegReg(kOpOr, high_reg, low_reg);
426    Clobber(rl_src1.reg);
427  } else if (ccode == kCondLe || ccode == kCondGt) {
428    // Swap operands and condition code to prevent use of zero flag.
429    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
430    LoadConstantWide(tmp, val);
431    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
432    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
433    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
434    FreeTemp(tmp);
435  } else {
436    // We can use a compare for the low word to set CF.
437    OpRegImm(kOpCmp, low_reg, val_lo);
438    if (IsTemp(high_reg)) {
439      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
440      // We have now changed it; ignore the old values.
441      Clobber(rl_src1.reg);
442    } else {
443      // mov temp_reg, high_reg; sbb temp_reg, high_constant
444      RegStorage t_reg = AllocTemp();
445      OpRegCopy(t_reg, high_reg);
446      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
447      FreeTemp(t_reg);
448    }
449  }
450
451  OpCondBranch(ccode, taken);
452}
453
454void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
455  // It does not make sense to calculate magic and shift for zero divisor.
456  DCHECK_NE(divisor, 0);
457
458  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
459   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
460   * The magic number M and shift S can be calculated in the following way:
461   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
462   * where divisor(d) >=2.
463   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
464   * where divisor(d) <= -2.
465   * Thus nc can be calculated like:
466   * nc = 2^31 + 2^31 % d - 1, where d >= 2
467   * nc = -2^31 + (2^31 + 1) % d, where d >= 2.
468   *
469   * So the shift p is the smallest p satisfying
470   * 2^p > nc * (d - 2^p % d), where d >= 2
471   * 2^p > nc * (d + 2^p % d), where d <= -2.
472   *
473   * the magic number M is calcuated by
474   * M = (2^p + d - 2^p % d) / d, where d >= 2
475   * M = (2^p - d - 2^p % d) / d, where d <= -2.
476   *
477   * Notice that p is always bigger than or equal to 32, so we just return 32-p as
478   * the shift number S.
479   */
480
481  int32_t p = 31;
482  const uint32_t two31 = 0x80000000U;
483
484  // Initialize the computations.
485  uint32_t abs_d = (divisor >= 0) ? divisor : -divisor;
486  uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31);
487  uint32_t abs_nc = tmp - 1 - tmp % abs_d;
488  uint32_t quotient1 = two31 / abs_nc;
489  uint32_t remainder1 = two31 % abs_nc;
490  uint32_t quotient2 = two31 / abs_d;
491  uint32_t remainder2 = two31 % abs_d;
492
493  /*
494   * To avoid handling both positive and negative divisor, Hacker's Delight
495   * introduces a method to handle these 2 cases together to avoid duplication.
496   */
497  uint32_t delta;
498  do {
499    p++;
500    quotient1 = 2 * quotient1;
501    remainder1 = 2 * remainder1;
502    if (remainder1 >= abs_nc) {
503      quotient1++;
504      remainder1 = remainder1 - abs_nc;
505    }
506    quotient2 = 2 * quotient2;
507    remainder2 = 2 * remainder2;
508    if (remainder2 >= abs_d) {
509      quotient2++;
510      remainder2 = remainder2 - abs_d;
511    }
512    delta = abs_d - remainder2;
513  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
514
515  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
516  shift = p - 32;
517}
518
519RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
520  LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
521  return rl_dest;
522}
523
524RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
525                                     int imm, bool is_div) {
526  // Use a multiply (and fixup) to perform an int div/rem by a constant.
527
528  // We have to use fixed registers, so flush all the temps.
529  FlushAllRegs();
530  LockCallTemps();  // Prepare for explicit register usage.
531
532  // Assume that the result will be in EDX.
533  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
534
535  // handle div/rem by 1 special case.
536  if (imm == 1) {
537    if (is_div) {
538      // x / 1 == x.
539      StoreValue(rl_result, rl_src);
540    } else {
541      // x % 1 == 0.
542      LoadConstantNoClobber(rs_r0, 0);
543      // For this case, return the result in EAX.
544      rl_result.reg.SetReg(r0);
545    }
546  } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
547    if (is_div) {
548      LIR *minint_branch = 0;
549      LoadValueDirectFixed(rl_src, rs_r0);
550      OpRegImm(kOpCmp, rs_r0, 0x80000000);
551      minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
552
553      // for x != MIN_INT, x / -1 == -x.
554      NewLIR1(kX86Neg32R, r0);
555
556      LIR* branch_around = NewLIR1(kX86Jmp8, 0);
557      // The target for cmp/jmp above.
558      minint_branch->target = NewLIR0(kPseudoTargetLabel);
559      // EAX already contains the right value (0x80000000),
560      branch_around->target = NewLIR0(kPseudoTargetLabel);
561    } else {
562      // x % -1 == 0.
563      LoadConstantNoClobber(rs_r0, 0);
564    }
565    // For this case, return the result in EAX.
566    rl_result.reg.SetReg(r0);
567  } else {
568    CHECK(imm <= -2 || imm >= 2);
569    // Use H.S.Warren's Hacker's Delight Chapter 10 and
570    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
571    int magic, shift;
572    CalculateMagicAndShift(imm, magic, shift);
573
574    /*
575     * For imm >= 2,
576     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
577     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
578     * For imm <= -2,
579     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
580     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
581     * We implement this algorithm in the following way:
582     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
583     * 2. if imm > 0 and magic < 0, add numerator to EDX
584     *    if imm < 0 and magic > 0, sub numerator from EDX
585     * 3. if S !=0, SAR S bits for EDX
586     * 4. add 1 to EDX if EDX < 0
587     * 5. Thus, EDX is the quotient
588     */
589
590    // Numerator into EAX.
591    RegStorage numerator_reg;
592    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
593      // We will need the value later.
594      if (rl_src.location == kLocPhysReg) {
595        // We can use it directly.
596        DCHECK(rl_src.reg.GetReg() != rs_r0.GetReg() && rl_src.reg.GetReg() != rs_r2.GetReg());
597        numerator_reg = rl_src.reg;
598      } else {
599        numerator_reg = rs_r1;
600        LoadValueDirectFixed(rl_src, numerator_reg);
601      }
602      OpRegCopy(rs_r0, numerator_reg);
603    } else {
604      // Only need this once.  Just put it into EAX.
605      LoadValueDirectFixed(rl_src, rs_r0);
606    }
607
608    // EDX = magic.
609    LoadConstantNoClobber(rs_r2, magic);
610
611    // EDX:EAX = magic & dividend.
612    NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
613
614    if (imm > 0 && magic < 0) {
615      // Add numerator to EDX.
616      DCHECK(numerator_reg.Valid());
617      NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
618    } else if (imm < 0 && magic > 0) {
619      DCHECK(numerator_reg.Valid());
620      NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
621    }
622
623    // Do we need the shift?
624    if (shift != 0) {
625      // Shift EDX by 'shift' bits.
626      NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
627    }
628
629    // Add 1 to EDX if EDX < 0.
630
631    // Move EDX to EAX.
632    OpRegCopy(rs_r0, rs_r2);
633
634    // Move sign bit to bit 0, zeroing the rest.
635    NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
636
637    // EDX = EDX + EAX.
638    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
639
640    // Quotient is in EDX.
641    if (!is_div) {
642      // We need to compute the remainder.
643      // Remainder is divisor - (quotient * imm).
644      DCHECK(numerator_reg.Valid());
645      OpRegCopy(rs_r0, numerator_reg);
646
647      // EAX = numerator * imm.
648      OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
649
650      // EDX -= EAX.
651      NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
652
653      // For this case, return the result in EAX.
654      rl_result.reg.SetReg(r0);
655    }
656  }
657
658  return rl_result;
659}
660
661RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
662                                  bool is_div) {
663  LOG(FATAL) << "Unexpected use of GenDivRem for x86";
664  return rl_dest;
665}
666
667RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
668                                  RegLocation rl_src2, bool is_div, bool check_zero) {
669  // We have to use fixed registers, so flush all the temps.
670  FlushAllRegs();
671  LockCallTemps();  // Prepare for explicit register usage.
672
673  // Load LHS into EAX.
674  LoadValueDirectFixed(rl_src1, rs_r0);
675
676  // Load RHS into EBX.
677  LoadValueDirectFixed(rl_src2, rs_r1);
678
679  // Copy LHS sign bit into EDX.
680  NewLIR0(kx86Cdq32Da);
681
682  if (check_zero) {
683    // Handle division by zero case.
684    GenDivZeroCheck(rs_r1);
685  }
686
687  // Have to catch 0x80000000/-1 case, or we will get an exception!
688  OpRegImm(kOpCmp, rs_r1, -1);
689  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
690
691  // RHS is -1.
692  OpRegImm(kOpCmp, rs_r0, 0x80000000);
693  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
694
695  // In 0x80000000/-1 case.
696  if (!is_div) {
697    // For DIV, EAX is already right. For REM, we need EDX 0.
698    LoadConstantNoClobber(rs_r2, 0);
699  }
700  LIR* done = NewLIR1(kX86Jmp8, 0);
701
702  // Expected case.
703  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
704  minint_branch->target = minus_one_branch->target;
705  NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
706  done->target = NewLIR0(kPseudoTargetLabel);
707
708  // Result is in EAX for div and EDX for rem.
709  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
710  if (!is_div) {
711    rl_result.reg.SetReg(r2);
712  }
713  return rl_result;
714}
715
716bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
717  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
718
719  if (is_long && cu_->instruction_set == kX86) {
720    return false;
721  }
722
723  // Get the two arguments to the invoke and place them in GP registers.
724  RegLocation rl_src1 = info->args[0];
725  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
726  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
727  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
728
729  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
730  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
731
732  /*
733   * If the result register is the same as the second element, then we need to be careful.
734   * The reason is that the first copy will inadvertently clobber the second element with
735   * the first one thus yielding the wrong result. Thus we do a swap in that case.
736   */
737  if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
738    std::swap(rl_src1, rl_src2);
739  }
740
741  // Pick the first integer as min/max.
742  OpRegCopy(rl_result.reg, rl_src1.reg);
743
744  // If the integers are both in the same register, then there is nothing else to do
745  // because they are equal and we have already moved one into the result.
746  if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
747    // It is possible we didn't pick correctly so do the actual comparison now.
748    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
749
750    // Conditionally move the other integer into the destination register.
751    ConditionCode condition_code = is_min ? kCondGt : kCondLt;
752    OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
753  }
754
755  if (is_long) {
756    StoreValueWide(rl_dest, rl_result);
757  } else {
758    StoreValue(rl_dest, rl_result);
759  }
760  return true;
761}
762
763bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
764  RegLocation rl_src_address = info->args[0];  // long address
765  RegLocation rl_address;
766  if (!cu_->target64) {
767    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
768    rl_address = LoadValue(rl_src_address, kCoreReg);
769  } else {
770    rl_address = LoadValueWide(rl_src_address, kCoreReg);
771  }
772  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
773  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
774  // Unaligned access is allowed on x86.
775  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
776  if (size == k64) {
777    StoreValueWide(rl_dest, rl_result);
778  } else {
779    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
780    StoreValue(rl_dest, rl_result);
781  }
782  return true;
783}
784
785bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
786  RegLocation rl_src_address = info->args[0];  // long address
787  RegLocation rl_address;
788  if (!cu_->target64) {
789    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
790    rl_address = LoadValue(rl_src_address, kCoreReg);
791  } else {
792    rl_address = LoadValueWide(rl_src_address, kCoreReg);
793  }
794  RegLocation rl_src_value = info->args[2];  // [size] value
795  RegLocation rl_value;
796  if (size == k64) {
797    // Unaligned access is allowed on x86.
798    rl_value = LoadValueWide(rl_src_value, kCoreReg);
799  } else {
800    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
801    // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
802    if (!cu_->target64 && size == kSignedByte) {
803      rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
804      if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
805        RegStorage temp = AllocateByteRegister();
806        OpRegCopy(temp, rl_src_value.reg);
807        rl_value.reg = temp;
808      } else {
809        rl_value = LoadValue(rl_src_value, kCoreReg);
810      }
811    } else {
812      rl_value = LoadValue(rl_src_value, kCoreReg);
813    }
814  }
815  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
816  return true;
817}
818
819void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
820  NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
821}
822
823void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
824  DCHECK_EQ(kX86, cu_->instruction_set);
825  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
826}
827
828void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
829  DCHECK_EQ(kX86_64, cu_->instruction_set);
830  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
831}
832
833static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
834  return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
835}
836
837bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
838  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
839  // Unused - RegLocation rl_src_unsafe = info->args[0];
840  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
841  RegLocation rl_src_offset = info->args[2];  // long low
842  if (!cu_->target64) {
843    rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
844  }
845  RegLocation rl_src_expected = info->args[4];  // int, long or Object
846  // If is_long, high half is in info->args[5]
847  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
848  // If is_long, high half is in info->args[7]
849
850  if (is_long && cu_->target64) {
851    // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
852    FlushReg(rs_r0q);
853    Clobber(rs_r0q);
854    LockTemp(rs_r0q);
855
856    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
857    RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
858    RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
859    LoadValueDirectWide(rl_src_expected, rs_r0q);
860    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
861
862    // After a store we need to insert barrier in case of potential load. Since the
863    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
864    GenMemBarrier(kStoreLoad);
865
866    FreeTemp(rs_r0q);
867  } else if (is_long) {
868    // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
869    // TODO: CFI support.
870    FlushAllRegs();
871    LockCallTemps();
872    RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
873    RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
874    LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
875    LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
876    // FIXME: needs 64-bit update.
877    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
878    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
879    DCHECK(!obj_in_si || !obj_in_di);
880    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
881    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
882    DCHECK(!off_in_si || !off_in_di);
883    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
884    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
885    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
886    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
887    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
888    if (push_di) {
889      NewLIR1(kX86Push32R, rs_rDI.GetReg());
890      MarkTemp(rs_rDI);
891      LockTemp(rs_rDI);
892    }
893    if (push_si) {
894      NewLIR1(kX86Push32R, rs_rSI.GetReg());
895      MarkTemp(rs_rSI);
896      LockTemp(rs_rSI);
897    }
898    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
899    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
900    if (!obj_in_si && !obj_in_di) {
901      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
902      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
903      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
904      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
905      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
906    }
907    if (!off_in_si && !off_in_di) {
908      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
909      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
910      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
911      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
912      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
913    }
914    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
915
916    // After a store we need to insert barrier in case of potential load. Since the
917    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
918    GenMemBarrier(kStoreLoad);
919
920
921    if (push_si) {
922      FreeTemp(rs_rSI);
923      UnmarkTemp(rs_rSI);
924      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
925    }
926    if (push_di) {
927      FreeTemp(rs_rDI);
928      UnmarkTemp(rs_rDI);
929      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
930    }
931    FreeCallTemps();
932  } else {
933    // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
934    FlushReg(rs_r0);
935    Clobber(rs_r0);
936    LockTemp(rs_r0);
937
938    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
939    RegLocation rl_new_value = LoadValue(rl_src_new_value);
940
941    if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
942      // Mark card for object assuming new value is stored.
943      FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
944      MarkGCCard(rl_new_value.reg, rl_object.reg);
945      LockTemp(rs_r0);
946    }
947
948    RegLocation rl_offset;
949    if (cu_->target64) {
950      rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
951    } else {
952      rl_offset = LoadValue(rl_src_offset, kCoreReg);
953    }
954    LoadValueDirect(rl_src_expected, rs_r0);
955    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
956
957    // After a store we need to insert barrier in case of potential load. Since the
958    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
959    GenMemBarrier(kStoreLoad);
960
961    FreeTemp(rs_r0);
962  }
963
964  // Convert ZF to boolean
965  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
966  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
967  RegStorage result_reg = rl_result.reg;
968
969  // For 32-bit, SETcc only works with EAX..EDX.
970  if (!IsByteRegister(result_reg)) {
971    result_reg = AllocateByteRegister();
972  }
973  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
974  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
975  if (IsTemp(result_reg)) {
976    FreeTemp(result_reg);
977  }
978  StoreValue(rl_dest, rl_result);
979  return true;
980}
981
982LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
983  CHECK(base_of_code_ != nullptr);
984
985  // Address the start of the method
986  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
987  if (rl_method.wide) {
988    LoadValueDirectWideFixed(rl_method, reg);
989  } else {
990    LoadValueDirectFixed(rl_method, reg);
991  }
992  store_method_addr_used_ = true;
993
994  // Load the proper value from the literal area.
995  // We don't know the proper offset for the value, so pick one that will force
996  // 4 byte offset.  We will fix this up in the assembler later to have the right
997  // value.
998  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
999  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
1000                    0, 0, target);
1001  res->target = target;
1002  res->flags.fixup = kFixupLoad;
1003  store_method_addr_used_ = true;
1004  return res;
1005}
1006
1007LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1008  LOG(FATAL) << "Unexpected use of OpVldm for x86";
1009  return NULL;
1010}
1011
1012LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1013  LOG(FATAL) << "Unexpected use of OpVstm for x86";
1014  return NULL;
1015}
1016
1017void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1018                                               RegLocation rl_result, int lit,
1019                                               int first_bit, int second_bit) {
1020  RegStorage t_reg = AllocTemp();
1021  OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1022  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1023  FreeTemp(t_reg);
1024  if (first_bit != 0) {
1025    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1026  }
1027}
1028
1029void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1030  if (cu_->target64) {
1031    DCHECK(reg.Is64Bit());
1032
1033    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1034  } else {
1035    DCHECK(reg.IsPair());
1036
1037    // We are not supposed to clobber the incoming storage, so allocate a temporary.
1038    RegStorage t_reg = AllocTemp();
1039    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1040    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1041    // The temp is no longer needed so free it at this time.
1042    FreeTemp(t_reg);
1043  }
1044
1045  // In case of zero, throw ArithmeticException.
1046  GenDivZeroCheck(kCondEq);
1047}
1048
1049void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1050                                     RegStorage array_base,
1051                                     int len_offset) {
1052  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1053   public:
1054    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1055                             RegStorage index, RegStorage array_base, int32_t len_offset)
1056        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1057          index_(index), array_base_(array_base), len_offset_(len_offset) {
1058    }
1059
1060    void Compile() OVERRIDE {
1061      m2l_->ResetRegPool();
1062      m2l_->ResetDefTracking();
1063      GenerateTargetLabel(kPseudoThrowTarget);
1064
1065      RegStorage new_index = index_;
1066      // Move index out of kArg1, either directly to kArg0, or to kArg2.
1067      // TODO: clean-up to check not a number but with type
1068      if (index_ == m2l_->TargetReg(kArg1, false)) {
1069        if (array_base_ == m2l_->TargetRefReg(kArg0)) {
1070          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, false), index_);
1071          new_index = m2l_->TargetReg(kArg2, false);
1072        } else {
1073          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, false), index_);
1074          new_index = m2l_->TargetReg(kArg0, false);
1075        }
1076      }
1077      // Load array length to kArg1.
1078      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
1079      if (cu_->target64) {
1080        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
1081                                      new_index, m2l_->TargetReg(kArg1, false), true);
1082      } else {
1083        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
1084                                      new_index, m2l_->TargetReg(kArg1, false), true);
1085      }
1086    }
1087
1088   private:
1089    const RegStorage index_;
1090    const RegStorage array_base_;
1091    const int32_t len_offset_;
1092  };
1093
1094  OpRegMem(kOpCmp, index, array_base, len_offset);
1095  LIR* branch = OpCondBranch(kCondUge, nullptr);
1096  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1097                                                    index, array_base, len_offset));
1098}
1099
1100void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1101                                     RegStorage array_base,
1102                                     int32_t len_offset) {
1103  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1104   public:
1105    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1106                             int32_t index, RegStorage array_base, int32_t len_offset)
1107        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1108          index_(index), array_base_(array_base), len_offset_(len_offset) {
1109    }
1110
1111    void Compile() OVERRIDE {
1112      m2l_->ResetRegPool();
1113      m2l_->ResetDefTracking();
1114      GenerateTargetLabel(kPseudoThrowTarget);
1115
1116      // Load array length to kArg1.
1117      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
1118      m2l_->LoadConstant(m2l_->TargetReg(kArg0, false), index_);
1119      if (cu_->target64) {
1120        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
1121                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
1122      } else {
1123        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
1124                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
1125      }
1126    }
1127
1128   private:
1129    const int32_t index_;
1130    const RegStorage array_base_;
1131    const int32_t len_offset_;
1132  };
1133
1134  NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1135  LIR* branch = OpCondBranch(kCondLs, nullptr);
1136  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1137                                                    index, array_base, len_offset));
1138}
1139
1140// Test suspend flag, return target of taken suspend branch
1141LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1142  if (cu_->target64) {
1143    OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1144  } else {
1145    OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1146  }
1147  return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
1148}
1149
1150// Decrement register and branch on condition
1151LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1152  OpRegImm(kOpSub, reg, 1);
1153  return OpCondBranch(c_code, target);
1154}
1155
1156bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1157                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
1158  LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1159  return false;
1160}
1161
1162bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1163  LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1164  return false;
1165}
1166
1167LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1168  LOG(FATAL) << "Unexpected use of OpIT in x86";
1169  return NULL;
1170}
1171
1172void X86Mir2Lir::OpEndIT(LIR* it) {
1173  LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1174}
1175
1176void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1177  switch (val) {
1178    case 0:
1179      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1180      break;
1181    case 1:
1182      OpRegCopy(dest, src);
1183      break;
1184    default:
1185      OpRegRegImm(kOpMul, dest, src, val);
1186      break;
1187  }
1188}
1189
1190void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1191  // All memory accesses below reference dalvik regs.
1192  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1193
1194  LIR *m;
1195  switch (val) {
1196    case 0:
1197      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1198      break;
1199    case 1:
1200      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
1201      break;
1202    default:
1203      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1204                  rs_rX86_SP.GetReg(), displacement, val);
1205      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1206      break;
1207  }
1208}
1209
1210void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1211                            RegLocation rl_src2) {
1212  // All memory accesses below reference dalvik regs.
1213  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1214
1215  if (cu_->target64) {
1216    if (rl_src1.is_const) {
1217      std::swap(rl_src1, rl_src2);
1218    }
1219    // Are we multiplying by a constant?
1220    if (rl_src2.is_const) {
1221      int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1222      if (val == 0) {
1223        RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1224        OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1225        StoreValueWide(rl_dest, rl_result);
1226        return;
1227      } else if (val == 1) {
1228        StoreValueWide(rl_dest, rl_src1);
1229        return;
1230      } else if (val == 2) {
1231        GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1232        return;
1233      } else if (IsPowerOfTwo(val)) {
1234        int shift_amount = LowestSetBit(val);
1235        if (!BadOverlap(rl_src1, rl_dest)) {
1236          rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1237          RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
1238                                                    rl_src1, shift_amount);
1239          StoreValueWide(rl_dest, rl_result);
1240          return;
1241        }
1242      }
1243    }
1244    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1245    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1246    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1247    if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1248        rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1249      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1250    } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1251               rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1252      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1253    } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1254               rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1255      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1256    } else {
1257      OpRegCopy(rl_result.reg, rl_src1.reg);
1258      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1259    }
1260    StoreValueWide(rl_dest, rl_result);
1261    return;
1262  }
1263
1264  if (rl_src1.is_const) {
1265    std::swap(rl_src1, rl_src2);
1266  }
1267  // Are we multiplying by a constant?
1268  if (rl_src2.is_const) {
1269    // Do special compare/branch against simple const operand
1270    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1271    if (val == 0) {
1272      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1273      OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1274      OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1275      StoreValueWide(rl_dest, rl_result);
1276      return;
1277    } else if (val == 1) {
1278      StoreValueWide(rl_dest, rl_src1);
1279      return;
1280    } else if (val == 2) {
1281      GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1282      return;
1283    } else if (IsPowerOfTwo(val)) {
1284      int shift_amount = LowestSetBit(val);
1285      if (!BadOverlap(rl_src1, rl_dest)) {
1286        rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1287        RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
1288                                                  rl_src1, shift_amount);
1289        StoreValueWide(rl_dest, rl_result);
1290        return;
1291      }
1292    }
1293
1294    // Okay, just bite the bullet and do it.
1295    int32_t val_lo = Low32Bits(val);
1296    int32_t val_hi = High32Bits(val);
1297    FlushAllRegs();
1298    LockCallTemps();  // Prepare for explicit register usage.
1299    rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1300    bool src1_in_reg = rl_src1.location == kLocPhysReg;
1301    int displacement = SRegOffset(rl_src1.s_reg_low);
1302
1303    // ECX <- 1H * 2L
1304    // EAX <- 1L * 2H
1305    if (src1_in_reg) {
1306      GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1307      GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1308    } else {
1309      GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1310      GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1311    }
1312
1313    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1314    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1315
1316    // EAX <- 2L
1317    LoadConstantNoClobber(rs_r0, val_lo);
1318
1319    // EDX:EAX <- 2L * 1L (double precision)
1320    if (src1_in_reg) {
1321      NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1322    } else {
1323      LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1324      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1325                              true /* is_load */, true /* is_64bit */);
1326    }
1327
1328    // EDX <- EDX + ECX (add high words)
1329    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1330
1331    // Result is EDX:EAX
1332    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1333                             RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1334    StoreValueWide(rl_dest, rl_result);
1335    return;
1336  }
1337
1338  // Nope.  Do it the hard way
1339  // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1340  bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1341                   mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1342
1343  FlushAllRegs();
1344  LockCallTemps();  // Prepare for explicit register usage.
1345  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1346  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1347
1348  // At this point, the VRs are in their home locations.
1349  bool src1_in_reg = rl_src1.location == kLocPhysReg;
1350  bool src2_in_reg = rl_src2.location == kLocPhysReg;
1351
1352  // ECX <- 1H
1353  if (src1_in_reg) {
1354    NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1355  } else {
1356    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1357                 kNotVolatile);
1358  }
1359
1360  if (is_square) {
1361    // Take advantage of the fact that the values are the same.
1362    // ECX <- ECX * 2L  (1H * 2L)
1363    if (src2_in_reg) {
1364      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1365    } else {
1366      int displacement = SRegOffset(rl_src2.s_reg_low);
1367      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1368                       displacement + LOWORD_OFFSET);
1369      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1370                              true /* is_load */, true /* is_64bit */);
1371    }
1372
1373    // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1374    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1375  } else {
1376    // EAX <- 2H
1377    if (src2_in_reg) {
1378      NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1379    } else {
1380      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1381                   kNotVolatile);
1382    }
1383
1384    // EAX <- EAX * 1L  (2H * 1L)
1385    if (src1_in_reg) {
1386      NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1387    } else {
1388      int displacement = SRegOffset(rl_src1.s_reg_low);
1389      LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
1390                       displacement + LOWORD_OFFSET);
1391      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1392                              true /* is_load */, true /* is_64bit */);
1393    }
1394
1395    // ECX <- ECX * 2L  (1H * 2L)
1396    if (src2_in_reg) {
1397      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1398    } else {
1399      int displacement = SRegOffset(rl_src2.s_reg_low);
1400      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1401                       displacement + LOWORD_OFFSET);
1402      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1403                              true /* is_load */, true /* is_64bit */);
1404    }
1405
1406    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1407    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1408  }
1409
1410  // EAX <- 2L
1411  if (src2_in_reg) {
1412    NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1413  } else {
1414    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1415                 kNotVolatile);
1416  }
1417
1418  // EDX:EAX <- 2L * 1L (double precision)
1419  if (src1_in_reg) {
1420    NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1421  } else {
1422    int displacement = SRegOffset(rl_src1.s_reg_low);
1423    LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1424    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1425                            true /* is_load */, true /* is_64bit */);
1426  }
1427
1428  // EDX <- EDX + ECX (add high words)
1429  NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1430
1431  // Result is EDX:EAX
1432  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1433                           RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1434  StoreValueWide(rl_dest, rl_result);
1435}
1436
1437void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1438                                   Instruction::Code op) {
1439  DCHECK_EQ(rl_dest.location, kLocPhysReg);
1440  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1441  if (rl_src.location == kLocPhysReg) {
1442    // Both operands are in registers.
1443    // But we must ensure that rl_src is in pair
1444    if (cu_->target64) {
1445      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1446    } else {
1447      rl_src = LoadValueWide(rl_src, kCoreReg);
1448      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1449        // The registers are the same, so we would clobber it before the use.
1450        RegStorage temp_reg = AllocTemp();
1451        OpRegCopy(temp_reg, rl_dest.reg);
1452        rl_src.reg.SetHighReg(temp_reg.GetReg());
1453      }
1454      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1455
1456      x86op = GetOpcode(op, rl_dest, rl_src, true);
1457      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1458      FreeTemp(rl_src.reg);  // ???
1459    }
1460    return;
1461  }
1462
1463  // RHS is in memory.
1464  DCHECK((rl_src.location == kLocDalvikFrame) ||
1465         (rl_src.location == kLocCompilerTemp));
1466  int r_base = rs_rX86_SP.GetReg();
1467  int displacement = SRegOffset(rl_src.s_reg_low);
1468
1469  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1470  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
1471  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1472                          true /* is_load */, true /* is64bit */);
1473  if (!cu_->target64) {
1474    x86op = GetOpcode(op, rl_dest, rl_src, true);
1475    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1476    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1477                            true /* is_load */, true /* is64bit */);
1478  }
1479}
1480
1481void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1482  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
1483  if (rl_dest.location == kLocPhysReg) {
1484    // Ensure we are in a register pair
1485    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1486
1487    rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
1488    GenLongRegOrMemOp(rl_result, rl_src, op);
1489    StoreFinalValueWide(rl_dest, rl_result);
1490    return;
1491  }
1492
1493  // It wasn't in registers, so it better be in memory.
1494  DCHECK((rl_dest.location == kLocDalvikFrame) ||
1495         (rl_dest.location == kLocCompilerTemp));
1496  rl_src = LoadValueWide(rl_src, kCoreReg);
1497
1498  // Operate directly into memory.
1499  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1500  int r_base = rs_rX86_SP.GetReg();
1501  int displacement = SRegOffset(rl_dest.s_reg_low);
1502
1503  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1504  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
1505                     cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
1506  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1507                          true /* is_load */, true /* is64bit */);
1508  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1509                          false /* is_load */, true /* is64bit */);
1510  if (!cu_->target64) {
1511    x86op = GetOpcode(op, rl_dest, rl_src, true);
1512    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
1513    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1514                            true /* is_load */, true /* is64bit */);
1515    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1516                            false /* is_load */, true /* is64bit */);
1517  }
1518  FreeTemp(rl_src.reg);
1519}
1520
1521void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
1522                              RegLocation rl_src2, Instruction::Code op,
1523                              bool is_commutative) {
1524  // Is this really a 2 operand operation?
1525  switch (op) {
1526    case Instruction::ADD_LONG_2ADDR:
1527    case Instruction::SUB_LONG_2ADDR:
1528    case Instruction::AND_LONG_2ADDR:
1529    case Instruction::OR_LONG_2ADDR:
1530    case Instruction::XOR_LONG_2ADDR:
1531      if (GenerateTwoOperandInstructions()) {
1532        GenLongArith(rl_dest, rl_src2, op);
1533        return;
1534      }
1535      break;
1536
1537    default:
1538      break;
1539  }
1540
1541  if (rl_dest.location == kLocPhysReg) {
1542    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
1543
1544    // We are about to clobber the LHS, so it needs to be a temp.
1545    rl_result = ForceTempWide(rl_result);
1546
1547    // Perform the operation using the RHS.
1548    rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1549    GenLongRegOrMemOp(rl_result, rl_src2, op);
1550
1551    // And now record that the result is in the temp.
1552    StoreFinalValueWide(rl_dest, rl_result);
1553    return;
1554  }
1555
1556  // It wasn't in registers, so it better be in memory.
1557  DCHECK((rl_dest.location == kLocDalvikFrame) ||
1558         (rl_dest.location == kLocCompilerTemp));
1559  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1560  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1561
1562  // Get one of the source operands into temporary register.
1563  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1564  if (cu_->target64) {
1565    if (IsTemp(rl_src1.reg)) {
1566      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1567    } else if (is_commutative) {
1568      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1569      // We need at least one of them to be a temporary.
1570      if (!IsTemp(rl_src2.reg)) {
1571        rl_src1 = ForceTempWide(rl_src1);
1572        GenLongRegOrMemOp(rl_src1, rl_src2, op);
1573      } else {
1574        GenLongRegOrMemOp(rl_src2, rl_src1, op);
1575        StoreFinalValueWide(rl_dest, rl_src2);
1576        return;
1577      }
1578    } else {
1579      // Need LHS to be the temp.
1580      rl_src1 = ForceTempWide(rl_src1);
1581      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1582    }
1583  } else {
1584    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
1585      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1586    } else if (is_commutative) {
1587      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1588      // We need at least one of them to be a temporary.
1589      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
1590        rl_src1 = ForceTempWide(rl_src1);
1591        GenLongRegOrMemOp(rl_src1, rl_src2, op);
1592      } else {
1593        GenLongRegOrMemOp(rl_src2, rl_src1, op);
1594        StoreFinalValueWide(rl_dest, rl_src2);
1595        return;
1596      }
1597    } else {
1598      // Need LHS to be the temp.
1599      rl_src1 = ForceTempWide(rl_src1);
1600      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1601    }
1602  }
1603
1604  StoreFinalValueWide(rl_dest, rl_src1);
1605}
1606
1607void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
1608                            RegLocation rl_src1, RegLocation rl_src2) {
1609  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1610}
1611
1612void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
1613                            RegLocation rl_src1, RegLocation rl_src2) {
1614  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1615}
1616
1617void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
1618                            RegLocation rl_src1, RegLocation rl_src2) {
1619  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1620}
1621
1622void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
1623                           RegLocation rl_src1, RegLocation rl_src2) {
1624  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1625}
1626
1627void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
1628                            RegLocation rl_src1, RegLocation rl_src2) {
1629  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1630}
1631
1632void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1633  if (cu_->target64) {
1634    rl_src = LoadValueWide(rl_src, kCoreReg);
1635    RegLocation rl_result;
1636    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1637    OpRegCopy(rl_result.reg, rl_src.reg);
1638    OpReg(kOpNot, rl_result.reg);
1639    StoreValueWide(rl_dest, rl_result);
1640  } else {
1641    LOG(FATAL) << "Unexpected use GenNotLong()";
1642  }
1643}
1644
1645void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1646                           RegLocation rl_src2, bool is_div) {
1647  if (!cu_->target64) {
1648    LOG(FATAL) << "Unexpected use GenDivRemLong()";
1649    return;
1650  }
1651
1652  // We have to use fixed registers, so flush all the temps.
1653  FlushAllRegs();
1654  LockCallTemps();  // Prepare for explicit register usage.
1655
1656  // Load LHS into RAX.
1657  LoadValueDirectWideFixed(rl_src1, rs_r0q);
1658
1659  // Load RHS into RCX.
1660  LoadValueDirectWideFixed(rl_src2, rs_r1q);
1661
1662  // Copy LHS sign bit into RDX.
1663  NewLIR0(kx86Cqo64Da);
1664
1665  // Handle division by zero case.
1666  GenDivZeroCheckWide(rs_r1q);
1667
1668  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
1669  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
1670  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1671
1672  // RHS is -1.
1673  LoadConstantWide(rs_r6q, 0x8000000000000000);
1674  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
1675  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1676
1677  // In 0x8000000000000000/-1 case.
1678  if (!is_div) {
1679    // For DIV, RAX is already right. For REM, we need RDX 0.
1680    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
1681  }
1682  LIR* done = NewLIR1(kX86Jmp8, 0);
1683
1684  // Expected case.
1685  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
1686  minint_branch->target = minus_one_branch->target;
1687  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
1688  done->target = NewLIR0(kPseudoTargetLabel);
1689
1690  // Result is in RAX for div and RDX for rem.
1691  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
1692  if (!is_div) {
1693    rl_result.reg.SetReg(r2q);
1694  }
1695
1696  StoreValueWide(rl_dest, rl_result);
1697}
1698
1699void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1700  rl_src = LoadValueWide(rl_src, kCoreReg);
1701  RegLocation rl_result;
1702  if (cu_->target64) {
1703    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1704    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
1705  } else {
1706    rl_result = ForceTempWide(rl_src);
1707    if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
1708        ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
1709      // The registers are the same, so we would clobber it before the use.
1710      RegStorage temp_reg = AllocTemp();
1711      OpRegCopy(temp_reg, rl_result.reg);
1712      rl_result.reg.SetHighReg(temp_reg.GetReg());
1713    }
1714    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
1715    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
1716    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
1717  }
1718  StoreValueWide(rl_dest, rl_result);
1719}
1720
1721void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
1722  DCHECK_EQ(kX86, cu_->instruction_set);
1723  X86OpCode opcode = kX86Bkpt;
1724  switch (op) {
1725  case kOpCmp: opcode = kX86Cmp32RT;  break;
1726  case kOpMov: opcode = kX86Mov32RT;  break;
1727  default:
1728    LOG(FATAL) << "Bad opcode: " << op;
1729    break;
1730  }
1731  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
1732}
1733
1734void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
1735  DCHECK_EQ(kX86_64, cu_->instruction_set);
1736  X86OpCode opcode = kX86Bkpt;
1737  if (cu_->target64 && r_dest.Is64BitSolo()) {
1738    switch (op) {
1739    case kOpCmp: opcode = kX86Cmp64RT;  break;
1740    case kOpMov: opcode = kX86Mov64RT;  break;
1741    default:
1742      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
1743      break;
1744    }
1745  } else {
1746    switch (op) {
1747    case kOpCmp: opcode = kX86Cmp32RT;  break;
1748    case kOpMov: opcode = kX86Mov32RT;  break;
1749    default:
1750      LOG(FATAL) << "Bad opcode: " << op;
1751      break;
1752    }
1753  }
1754  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
1755}
1756
1757/*
1758 * Generate array load
1759 */
1760void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1761                             RegLocation rl_index, RegLocation rl_dest, int scale) {
1762  RegisterClass reg_class = RegClassBySize(size);
1763  int len_offset = mirror::Array::LengthOffset().Int32Value();
1764  RegLocation rl_result;
1765  rl_array = LoadValue(rl_array, kRefReg);
1766
1767  int data_offset;
1768  if (size == k64 || size == kDouble) {
1769    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1770  } else {
1771    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1772  }
1773
1774  bool constant_index = rl_index.is_const;
1775  int32_t constant_index_value = 0;
1776  if (!constant_index) {
1777    rl_index = LoadValue(rl_index, kCoreReg);
1778  } else {
1779    constant_index_value = mir_graph_->ConstantValue(rl_index);
1780    // If index is constant, just fold it into the data offset
1781    data_offset += constant_index_value << scale;
1782    // treat as non array below
1783    rl_index.reg = RegStorage::InvalidReg();
1784  }
1785
1786  /* null object? */
1787  GenNullCheck(rl_array.reg, opt_flags);
1788
1789  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
1790    if (constant_index) {
1791      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
1792    } else {
1793      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
1794    }
1795  }
1796  rl_result = EvalLoc(rl_dest, reg_class, true);
1797  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
1798  if ((size == k64) || (size == kDouble)) {
1799    StoreValueWide(rl_dest, rl_result);
1800  } else {
1801    StoreValue(rl_dest, rl_result);
1802  }
1803}
1804
1805/*
1806 * Generate array store
1807 *
1808 */
1809void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1810                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1811  RegisterClass reg_class = RegClassBySize(size);
1812  int len_offset = mirror::Array::LengthOffset().Int32Value();
1813  int data_offset;
1814
1815  if (size == k64 || size == kDouble) {
1816    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1817  } else {
1818    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1819  }
1820
1821  rl_array = LoadValue(rl_array, kRefReg);
1822  bool constant_index = rl_index.is_const;
1823  int32_t constant_index_value = 0;
1824  if (!constant_index) {
1825    rl_index = LoadValue(rl_index, kCoreReg);
1826  } else {
1827    // If index is constant, just fold it into the data offset
1828    constant_index_value = mir_graph_->ConstantValue(rl_index);
1829    data_offset += constant_index_value << scale;
1830    // treat as non array below
1831    rl_index.reg = RegStorage::InvalidReg();
1832  }
1833
1834  /* null object? */
1835  GenNullCheck(rl_array.reg, opt_flags);
1836
1837  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
1838    if (constant_index) {
1839      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
1840    } else {
1841      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
1842    }
1843  }
1844  if ((size == k64) || (size == kDouble)) {
1845    rl_src = LoadValueWide(rl_src, reg_class);
1846  } else {
1847    rl_src = LoadValue(rl_src, reg_class);
1848  }
1849  // If the src reg can't be byte accessed, move it to a temp first.
1850  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
1851    RegStorage temp = AllocTemp();
1852    OpRegCopy(temp, rl_src.reg);
1853    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
1854  } else {
1855    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
1856  }
1857  if (card_mark) {
1858    // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
1859    if (!constant_index) {
1860      FreeTemp(rl_index.reg);
1861    }
1862    MarkGCCard(rl_src.reg, rl_array.reg);
1863  }
1864}
1865
1866RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1867                                          RegLocation rl_src, int shift_amount) {
1868  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1869  if (cu_->target64) {
1870    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
1871    switch (opcode) {
1872      case Instruction::SHL_LONG:
1873      case Instruction::SHL_LONG_2ADDR:
1874        op = kOpLsl;
1875        break;
1876      case Instruction::SHR_LONG:
1877      case Instruction::SHR_LONG_2ADDR:
1878        op = kOpAsr;
1879        break;
1880      case Instruction::USHR_LONG:
1881      case Instruction::USHR_LONG_2ADDR:
1882        op = kOpLsr;
1883        break;
1884      default:
1885        LOG(FATAL) << "Unexpected case";
1886    }
1887    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
1888  } else {
1889    switch (opcode) {
1890      case Instruction::SHL_LONG:
1891      case Instruction::SHL_LONG_2ADDR:
1892        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
1893        if (shift_amount == 32) {
1894          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
1895          LoadConstant(rl_result.reg.GetLow(), 0);
1896        } else if (shift_amount > 31) {
1897          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
1898          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
1899          LoadConstant(rl_result.reg.GetLow(), 0);
1900        } else {
1901          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
1902          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
1903          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
1904                  shift_amount);
1905          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
1906        }
1907        break;
1908      case Instruction::SHR_LONG:
1909      case Instruction::SHR_LONG_2ADDR:
1910        if (shift_amount == 32) {
1911          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1912          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
1913          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
1914        } else if (shift_amount > 31) {
1915          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1916          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
1917          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
1918          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
1919        } else {
1920          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
1921          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
1922          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
1923                  shift_amount);
1924          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
1925        }
1926        break;
1927      case Instruction::USHR_LONG:
1928      case Instruction::USHR_LONG_2ADDR:
1929        if (shift_amount == 32) {
1930          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1931          LoadConstant(rl_result.reg.GetHigh(), 0);
1932        } else if (shift_amount > 31) {
1933          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1934          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
1935          LoadConstant(rl_result.reg.GetHigh(), 0);
1936        } else {
1937          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
1938          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
1939          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
1940                  shift_amount);
1941          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
1942        }
1943        break;
1944      default:
1945        LOG(FATAL) << "Unexpected case";
1946    }
1947  }
1948  return rl_result;
1949}
1950
1951void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
1952                                   RegLocation rl_src, RegLocation rl_shift) {
1953  // Per spec, we only care about low 6 bits of shift amount.
1954  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1955  if (shift_amount == 0) {
1956    rl_src = LoadValueWide(rl_src, kCoreReg);
1957    StoreValueWide(rl_dest, rl_src);
1958    return;
1959  } else if (shift_amount == 1 &&
1960            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
1961    // Need to handle this here to avoid calling StoreValueWide twice.
1962    GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
1963    return;
1964  }
1965  if (BadOverlap(rl_src, rl_dest)) {
1966    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1967    return;
1968  }
1969  rl_src = LoadValueWide(rl_src, kCoreReg);
1970  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
1971  StoreValueWide(rl_dest, rl_result);
1972}
1973
1974void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1975                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
1976  bool isConstSuccess = false;
1977  switch (opcode) {
1978    case Instruction::ADD_LONG:
1979    case Instruction::AND_LONG:
1980    case Instruction::OR_LONG:
1981    case Instruction::XOR_LONG:
1982      if (rl_src2.is_const) {
1983        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
1984      } else {
1985        DCHECK(rl_src1.is_const);
1986        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
1987      }
1988      break;
1989    case Instruction::SUB_LONG:
1990    case Instruction::SUB_LONG_2ADDR:
1991      if (rl_src2.is_const) {
1992        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
1993      } else {
1994        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
1995        isConstSuccess = true;
1996      }
1997      break;
1998    case Instruction::ADD_LONG_2ADDR:
1999    case Instruction::OR_LONG_2ADDR:
2000    case Instruction::XOR_LONG_2ADDR:
2001    case Instruction::AND_LONG_2ADDR:
2002      if (rl_src2.is_const) {
2003        if (GenerateTwoOperandInstructions()) {
2004          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2005        } else {
2006          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2007        }
2008      } else {
2009        DCHECK(rl_src1.is_const);
2010        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2011      }
2012      break;
2013    default:
2014      isConstSuccess = false;
2015      break;
2016  }
2017
2018  if (!isConstSuccess) {
2019    // Default - bail to non-const handler.
2020    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2021  }
2022}
2023
2024bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2025  switch (op) {
2026    case Instruction::AND_LONG_2ADDR:
2027    case Instruction::AND_LONG:
2028      return value == -1;
2029    case Instruction::OR_LONG:
2030    case Instruction::OR_LONG_2ADDR:
2031    case Instruction::XOR_LONG:
2032    case Instruction::XOR_LONG_2ADDR:
2033      return value == 0;
2034    default:
2035      return false;
2036  }
2037}
2038
2039X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2040                                bool is_high_op) {
2041  bool rhs_in_mem = rhs.location != kLocPhysReg;
2042  bool dest_in_mem = dest.location != kLocPhysReg;
2043  bool is64Bit = cu_->target64;
2044  DCHECK(!rhs_in_mem || !dest_in_mem);
2045  switch (op) {
2046    case Instruction::ADD_LONG:
2047    case Instruction::ADD_LONG_2ADDR:
2048      if (dest_in_mem) {
2049        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2050      } else if (rhs_in_mem) {
2051        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2052      }
2053      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2054    case Instruction::SUB_LONG:
2055    case Instruction::SUB_LONG_2ADDR:
2056      if (dest_in_mem) {
2057        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2058      } else if (rhs_in_mem) {
2059        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2060      }
2061      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2062    case Instruction::AND_LONG_2ADDR:
2063    case Instruction::AND_LONG:
2064      if (dest_in_mem) {
2065        return is64Bit ? kX86And64MR : kX86And32MR;
2066      }
2067      if (is64Bit) {
2068        return rhs_in_mem ? kX86And64RM : kX86And64RR;
2069      }
2070      return rhs_in_mem ? kX86And32RM : kX86And32RR;
2071    case Instruction::OR_LONG:
2072    case Instruction::OR_LONG_2ADDR:
2073      if (dest_in_mem) {
2074        return is64Bit ? kX86Or64MR : kX86Or32MR;
2075      }
2076      if (is64Bit) {
2077        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2078      }
2079      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2080    case Instruction::XOR_LONG:
2081    case Instruction::XOR_LONG_2ADDR:
2082      if (dest_in_mem) {
2083        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2084      }
2085      if (is64Bit) {
2086        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2087      }
2088      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2089    default:
2090      LOG(FATAL) << "Unexpected opcode: " << op;
2091      return kX86Add32RR;
2092  }
2093}
2094
2095X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2096                                int32_t value) {
2097  bool in_mem = loc.location != kLocPhysReg;
2098  bool is64Bit = cu_->target64;
2099  bool byte_imm = IS_SIMM8(value);
2100  DCHECK(in_mem || !loc.reg.IsFloat());
2101  switch (op) {
2102    case Instruction::ADD_LONG:
2103    case Instruction::ADD_LONG_2ADDR:
2104      if (byte_imm) {
2105        if (in_mem) {
2106          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2107        }
2108        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2109      }
2110      if (in_mem) {
2111        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2112      }
2113      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2114    case Instruction::SUB_LONG:
2115    case Instruction::SUB_LONG_2ADDR:
2116      if (byte_imm) {
2117        if (in_mem) {
2118          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2119        }
2120        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2121      }
2122      if (in_mem) {
2123        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2124      }
2125      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2126    case Instruction::AND_LONG_2ADDR:
2127    case Instruction::AND_LONG:
2128      if (byte_imm) {
2129        if (is64Bit) {
2130          return in_mem ? kX86And64MI8 : kX86And64RI8;
2131        }
2132        return in_mem ? kX86And32MI8 : kX86And32RI8;
2133      }
2134      if (is64Bit) {
2135        return in_mem ? kX86And64MI : kX86And64RI;
2136      }
2137      return in_mem ? kX86And32MI : kX86And32RI;
2138    case Instruction::OR_LONG:
2139    case Instruction::OR_LONG_2ADDR:
2140      if (byte_imm) {
2141        if (is64Bit) {
2142          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2143        }
2144        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2145      }
2146      if (is64Bit) {
2147        return in_mem ? kX86Or64MI : kX86Or64RI;
2148      }
2149      return in_mem ? kX86Or32MI : kX86Or32RI;
2150    case Instruction::XOR_LONG:
2151    case Instruction::XOR_LONG_2ADDR:
2152      if (byte_imm) {
2153        if (is64Bit) {
2154          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2155        }
2156        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2157      }
2158      if (is64Bit) {
2159        return in_mem ? kX86Xor64MI : kX86Xor64RI;
2160      }
2161      return in_mem ? kX86Xor32MI : kX86Xor32RI;
2162    default:
2163      LOG(FATAL) << "Unexpected opcode: " << op;
2164      return kX86Add32MI;
2165  }
2166}
2167
2168bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2169  DCHECK(rl_src.is_const);
2170  int64_t val = mir_graph_->ConstantValueWide(rl_src);
2171
2172  if (cu_->target64) {
2173    // We can do with imm only if it fits 32 bit
2174    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2175      return false;
2176    }
2177
2178    rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2179
2180    if ((rl_dest.location == kLocDalvikFrame) ||
2181        (rl_dest.location == kLocCompilerTemp)) {
2182      int r_base = rs_rX86_SP.GetReg();
2183      int displacement = SRegOffset(rl_dest.s_reg_low);
2184
2185      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2186      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2187      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2188      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2189                              true /* is_load */, true /* is64bit */);
2190      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2191                              false /* is_load */, true /* is64bit */);
2192      return true;
2193    }
2194
2195    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2196    DCHECK_EQ(rl_result.location, kLocPhysReg);
2197    DCHECK(!rl_result.reg.IsFloat());
2198
2199    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2200    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2201
2202    StoreValueWide(rl_dest, rl_result);
2203    return true;
2204  }
2205
2206  int32_t val_lo = Low32Bits(val);
2207  int32_t val_hi = High32Bits(val);
2208  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2209
2210  // Can we just do this into memory?
2211  if ((rl_dest.location == kLocDalvikFrame) ||
2212      (rl_dest.location == kLocCompilerTemp)) {
2213    int r_base = rs_rX86_SP.GetReg();
2214    int displacement = SRegOffset(rl_dest.s_reg_low);
2215
2216    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2217    if (!IsNoOp(op, val_lo)) {
2218      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2219      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2220      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2221                              true /* is_load */, true /* is64bit */);
2222      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2223                              false /* is_load */, true /* is64bit */);
2224    }
2225    if (!IsNoOp(op, val_hi)) {
2226      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2227      LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2228      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2229                                true /* is_load */, true /* is64bit */);
2230      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2231                                false /* is_load */, true /* is64bit */);
2232    }
2233    return true;
2234  }
2235
2236  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2237  DCHECK_EQ(rl_result.location, kLocPhysReg);
2238  DCHECK(!rl_result.reg.IsFloat());
2239
2240  if (!IsNoOp(op, val_lo)) {
2241    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2242    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2243  }
2244  if (!IsNoOp(op, val_hi)) {
2245    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2246    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2247  }
2248  StoreValueWide(rl_dest, rl_result);
2249  return true;
2250}
2251
2252bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2253                                RegLocation rl_src2, Instruction::Code op) {
2254  DCHECK(rl_src2.is_const);
2255  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2256
2257  if (cu_->target64) {
2258    // We can do with imm only if it fits 32 bit
2259    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2260      return false;
2261    }
2262    if (rl_dest.location == kLocPhysReg &&
2263        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2264      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2265      OpRegCopy(rl_dest.reg, rl_src1.reg);
2266      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2267      StoreFinalValueWide(rl_dest, rl_dest);
2268      return true;
2269    }
2270
2271    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2272    // We need the values to be in a temporary
2273    RegLocation rl_result = ForceTempWide(rl_src1);
2274
2275    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2276    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2277
2278    StoreFinalValueWide(rl_dest, rl_result);
2279    return true;
2280  }
2281
2282  int32_t val_lo = Low32Bits(val);
2283  int32_t val_hi = High32Bits(val);
2284  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2285  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
2286
2287  // Can we do this directly into the destination registers?
2288  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2289      rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2290      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2291    if (!IsNoOp(op, val_lo)) {
2292      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2293      NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2294    }
2295    if (!IsNoOp(op, val_hi)) {
2296      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2297      NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2298    }
2299
2300    StoreFinalValueWide(rl_dest, rl_dest);
2301    return true;
2302  }
2303
2304  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2305  DCHECK_EQ(rl_src1.location, kLocPhysReg);
2306
2307  // We need the values to be in a temporary
2308  RegLocation rl_result = ForceTempWide(rl_src1);
2309  if (!IsNoOp(op, val_lo)) {
2310    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2311    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2312  }
2313  if (!IsNoOp(op, val_hi)) {
2314    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2315    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2316  }
2317
2318  StoreFinalValueWide(rl_dest, rl_result);
2319  return true;
2320}
2321
2322// For final classes there are no sub-classes to check and so we can answer the instance-of
2323// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
2324void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
2325                                    RegLocation rl_dest, RegLocation rl_src) {
2326  RegLocation object = LoadValue(rl_src, kRefReg);
2327  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2328  RegStorage result_reg = rl_result.reg;
2329
2330  // For 32-bit, SETcc only works with EAX..EDX.
2331  RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
2332  if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
2333    result_reg = AllocateByteRegister();
2334  }
2335
2336  // Assume that there is no match.
2337  LoadConstant(result_reg, 0);
2338  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
2339
2340  // We will use this register to compare to memory below.
2341  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
2342  // For this reason, force allocation of a 32 bit register to use, so that the
2343  // compare to memory will be done using a 32 bit comparision.
2344  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
2345  RegStorage check_class = AllocTemp();
2346
2347  // If Method* is already in a register, we can save a copy.
2348  RegLocation rl_method = mir_graph_->GetMethodLoc();
2349  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
2350    (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
2351
2352  if (rl_method.location == kLocPhysReg) {
2353    if (use_declaring_class) {
2354      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2355                  check_class, kNotVolatile);
2356    } else {
2357      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2358                  check_class, kNotVolatile);
2359      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2360    }
2361  } else {
2362    LoadCurrMethodDirect(check_class);
2363    if (use_declaring_class) {
2364      LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2365                  check_class, kNotVolatile);
2366    } else {
2367      LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2368                  check_class, kNotVolatile);
2369      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2370    }
2371  }
2372
2373  // Compare the computed class to the class in the object.
2374  DCHECK_EQ(object.location, kLocPhysReg);
2375  OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
2376
2377  // Set the low byte of the result to 0 or 1 from the compare condition code.
2378  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
2379
2380  LIR* target = NewLIR0(kPseudoTargetLabel);
2381  null_branchover->target = target;
2382  FreeTemp(check_class);
2383  if (IsTemp(result_reg)) {
2384    OpRegCopy(rl_result.reg, result_reg);
2385    FreeTemp(result_reg);
2386  }
2387  StoreValue(rl_dest, rl_result);
2388}
2389
2390void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
2391                                            bool type_known_abstract, bool use_declaring_class,
2392                                            bool can_assume_type_is_in_dex_cache,
2393                                            uint32_t type_idx, RegLocation rl_dest,
2394                                            RegLocation rl_src) {
2395  FlushAllRegs();
2396  // May generate a call - use explicit registers.
2397  LockCallTemps();
2398  RegStorage method_reg = TargetRefReg(kArg1);  // kArg1 gets current Method*.
2399  LoadCurrMethodDirect(method_reg);
2400  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*.
2401  RegStorage ref_reg = TargetRefReg(kArg0);  // kArg2 will hold the ref.
2402  // Reference must end up in kArg0.
2403  if (needs_access_check) {
2404    // Check we have access to type_idx and if not throw IllegalAccessError,
2405    // Caller function returns Class* in kArg0.
2406    if (cu_->target64) {
2407      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
2408                           type_idx, true);
2409    } else {
2410      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
2411                           type_idx, true);
2412    }
2413    OpRegCopy(class_reg, TargetRefReg(kRet0));
2414    LoadValueDirectFixed(rl_src, ref_reg);
2415  } else if (use_declaring_class) {
2416    LoadValueDirectFixed(rl_src, ref_reg);
2417    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2418                class_reg, kNotVolatile);
2419  } else {
2420    // Load dex cache entry into class_reg (kArg2).
2421    LoadValueDirectFixed(rl_src, ref_reg);
2422    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2423                class_reg, kNotVolatile);
2424    int32_t offset_of_type =
2425        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
2426        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
2427    LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
2428    if (!can_assume_type_is_in_dex_cache) {
2429      // Need to test presence of type in dex cache at runtime.
2430      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
2431      // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
2432      if (cu_->target64) {
2433        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
2434      } else {
2435        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
2436      }
2437      OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path.
2438      LoadValueDirectFixed(rl_src, ref_reg);  /* Reload Ref. */
2439      // Rejoin code paths
2440      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
2441      hop_branch->target = hop_target;
2442    }
2443  }
2444  /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
2445  RegLocation rl_result = GetReturn(kRefReg);
2446
2447  // On x86-64 kArg0 is not EAX, so we have to copy ref from kArg0 to EAX.
2448  if (cu_->target64) {
2449    OpRegCopy(rl_result.reg, ref_reg);
2450  }
2451
2452  // For 32-bit, SETcc only works with EAX..EDX.
2453  DCHECK_LT(rl_result.reg.GetRegNum(), 4);
2454
2455  // Is the class NULL?
2456  LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, NULL);
2457
2458  RegStorage ref_class_reg = TargetRefReg(kArg1);  // kArg2 will hold the Class*.
2459  /* Load object->klass_. */
2460  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
2461  LoadRefDisp(ref_reg,  mirror::Object::ClassOffset().Int32Value(), ref_class_reg,
2462              kNotVolatile);
2463  /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
2464  LIR* branchover = nullptr;
2465  if (type_known_final) {
2466    // Ensure top 3 bytes of result are 0.
2467    LoadConstant(rl_result.reg, 0);
2468    OpRegReg(kOpCmp, ref_class_reg, class_reg);
2469    // Set the low byte of the result to 0 or 1 from the compare condition code.
2470    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondEq);
2471  } else {
2472    if (!type_known_abstract) {
2473      LoadConstant(rl_result.reg, 1);     // Assume result succeeds.
2474      branchover = OpCmpBranch(kCondEq, ref_class_reg, class_reg, NULL);
2475    }
2476    OpRegCopy(TargetRefReg(kArg0), class_reg);
2477    if (cu_->target64) {
2478      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
2479    } else {
2480      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
2481    }
2482  }
2483  // TODO: only clobber when type isn't final?
2484  ClobberCallerSave();
2485  /* Branch targets here. */
2486  LIR* target = NewLIR0(kPseudoTargetLabel);
2487  StoreValue(rl_dest, rl_result);
2488  branch1->target = target;
2489  if (branchover != nullptr) {
2490    branchover->target = target;
2491  }
2492}
2493
2494void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
2495                            RegLocation rl_lhs, RegLocation rl_rhs) {
2496  OpKind op = kOpBkpt;
2497  bool is_div_rem = false;
2498  bool unary = false;
2499  bool shift_op = false;
2500  bool is_two_addr = false;
2501  RegLocation rl_result;
2502  switch (opcode) {
2503    case Instruction::NEG_INT:
2504      op = kOpNeg;
2505      unary = true;
2506      break;
2507    case Instruction::NOT_INT:
2508      op = kOpMvn;
2509      unary = true;
2510      break;
2511    case Instruction::ADD_INT_2ADDR:
2512      is_two_addr = true;
2513      // Fallthrough
2514    case Instruction::ADD_INT:
2515      op = kOpAdd;
2516      break;
2517    case Instruction::SUB_INT_2ADDR:
2518      is_two_addr = true;
2519      // Fallthrough
2520    case Instruction::SUB_INT:
2521      op = kOpSub;
2522      break;
2523    case Instruction::MUL_INT_2ADDR:
2524      is_two_addr = true;
2525      // Fallthrough
2526    case Instruction::MUL_INT:
2527      op = kOpMul;
2528      break;
2529    case Instruction::DIV_INT_2ADDR:
2530      is_two_addr = true;
2531      // Fallthrough
2532    case Instruction::DIV_INT:
2533      op = kOpDiv;
2534      is_div_rem = true;
2535      break;
2536    /* NOTE: returns in kArg1 */
2537    case Instruction::REM_INT_2ADDR:
2538      is_two_addr = true;
2539      // Fallthrough
2540    case Instruction::REM_INT:
2541      op = kOpRem;
2542      is_div_rem = true;
2543      break;
2544    case Instruction::AND_INT_2ADDR:
2545      is_two_addr = true;
2546      // Fallthrough
2547    case Instruction::AND_INT:
2548      op = kOpAnd;
2549      break;
2550    case Instruction::OR_INT_2ADDR:
2551      is_two_addr = true;
2552      // Fallthrough
2553    case Instruction::OR_INT:
2554      op = kOpOr;
2555      break;
2556    case Instruction::XOR_INT_2ADDR:
2557      is_two_addr = true;
2558      // Fallthrough
2559    case Instruction::XOR_INT:
2560      op = kOpXor;
2561      break;
2562    case Instruction::SHL_INT_2ADDR:
2563      is_two_addr = true;
2564      // Fallthrough
2565    case Instruction::SHL_INT:
2566      shift_op = true;
2567      op = kOpLsl;
2568      break;
2569    case Instruction::SHR_INT_2ADDR:
2570      is_two_addr = true;
2571      // Fallthrough
2572    case Instruction::SHR_INT:
2573      shift_op = true;
2574      op = kOpAsr;
2575      break;
2576    case Instruction::USHR_INT_2ADDR:
2577      is_two_addr = true;
2578      // Fallthrough
2579    case Instruction::USHR_INT:
2580      shift_op = true;
2581      op = kOpLsr;
2582      break;
2583    default:
2584      LOG(FATAL) << "Invalid word arith op: " << opcode;
2585  }
2586
2587  // Can we convert to a two address instruction?
2588  if (!is_two_addr &&
2589        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2590         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
2591    is_two_addr = true;
2592  }
2593
2594  if (!GenerateTwoOperandInstructions()) {
2595    is_two_addr = false;
2596  }
2597
2598  // Get the div/rem stuff out of the way.
2599  if (is_div_rem) {
2600    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
2601    StoreValue(rl_dest, rl_result);
2602    return;
2603  }
2604
2605  // If we generate any memory access below, it will reference a dalvik reg.
2606  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2607
2608  if (unary) {
2609    rl_lhs = LoadValue(rl_lhs, kCoreReg);
2610    rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2611    rl_result = EvalLoc(rl_dest, kCoreReg, true);
2612    OpRegReg(op, rl_result.reg, rl_lhs.reg);
2613  } else {
2614    if (shift_op) {
2615      // X86 doesn't require masking and must use ECX.
2616      RegStorage t_reg = TargetReg(kCount, false);  // rCX
2617      LoadValueDirectFixed(rl_rhs, t_reg);
2618      if (is_two_addr) {
2619        // Can we do this directly into memory?
2620        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2621        rl_rhs = LoadValue(rl_rhs, kCoreReg);
2622        if (rl_result.location != kLocPhysReg) {
2623          // Okay, we can do this into memory
2624          OpMemReg(op, rl_result, t_reg.GetReg());
2625          FreeTemp(t_reg);
2626          return;
2627        } else if (!rl_result.reg.IsFloat()) {
2628          // Can do this directly into the result register
2629          OpRegReg(op, rl_result.reg, t_reg);
2630          FreeTemp(t_reg);
2631          StoreFinalValue(rl_dest, rl_result);
2632          return;
2633        }
2634      }
2635      // Three address form, or we can't do directly.
2636      rl_lhs = LoadValue(rl_lhs, kCoreReg);
2637      rl_result = EvalLoc(rl_dest, kCoreReg, true);
2638      OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
2639      FreeTemp(t_reg);
2640    } else {
2641      // Multiply is 3 operand only (sort of).
2642      if (is_two_addr && op != kOpMul) {
2643        // Can we do this directly into memory?
2644        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2645        if (rl_result.location == kLocPhysReg) {
2646          // Ensure res is in a core reg
2647          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2648          // Can we do this from memory directly?
2649          rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2650          if (rl_rhs.location != kLocPhysReg) {
2651            OpRegMem(op, rl_result.reg, rl_rhs);
2652            StoreFinalValue(rl_dest, rl_result);
2653            return;
2654          } else if (!rl_rhs.reg.IsFloat()) {
2655            OpRegReg(op, rl_result.reg, rl_rhs.reg);
2656            StoreFinalValue(rl_dest, rl_result);
2657            return;
2658          }
2659        }
2660        rl_rhs = LoadValue(rl_rhs, kCoreReg);
2661        // It might happen rl_rhs and rl_dest are the same VR
2662        // in this case rl_dest is in reg after LoadValue while
2663        // rl_result is not updated yet, so do this
2664        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2665        if (rl_result.location != kLocPhysReg) {
2666          // Okay, we can do this into memory.
2667          OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
2668          return;
2669        } else if (!rl_result.reg.IsFloat()) {
2670          // Can do this directly into the result register.
2671          OpRegReg(op, rl_result.reg, rl_rhs.reg);
2672          StoreFinalValue(rl_dest, rl_result);
2673          return;
2674        } else {
2675          rl_lhs = LoadValue(rl_lhs, kCoreReg);
2676          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2677          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2678        }
2679      } else {
2680        // Try to use reg/memory instructions.
2681        rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
2682        rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2683        // We can't optimize with FP registers.
2684        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
2685          // Something is difficult, so fall back to the standard case.
2686          rl_lhs = LoadValue(rl_lhs, kCoreReg);
2687          rl_rhs = LoadValue(rl_rhs, kCoreReg);
2688          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2689          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2690        } else {
2691          // We can optimize by moving to result and using memory operands.
2692          if (rl_rhs.location != kLocPhysReg) {
2693            // Force LHS into result.
2694            // We should be careful with order here
2695            // If rl_dest and rl_lhs points to the same VR we should load first
2696            // If the are different we should find a register first for dest
2697            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2698                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
2699              rl_lhs = LoadValue(rl_lhs, kCoreReg);
2700              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2701              // No-op if these are the same.
2702              OpRegCopy(rl_result.reg, rl_lhs.reg);
2703            } else {
2704              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2705              LoadValueDirect(rl_lhs, rl_result.reg);
2706            }
2707            OpRegMem(op, rl_result.reg, rl_rhs);
2708          } else if (rl_lhs.location != kLocPhysReg) {
2709            // RHS is in a register; LHS is in memory.
2710            if (op != kOpSub) {
2711              // Force RHS into result and operate on memory.
2712              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2713              OpRegCopy(rl_result.reg, rl_rhs.reg);
2714              OpRegMem(op, rl_result.reg, rl_lhs);
2715            } else {
2716              // Subtraction isn't commutative.
2717              rl_lhs = LoadValue(rl_lhs, kCoreReg);
2718              rl_rhs = LoadValue(rl_rhs, kCoreReg);
2719              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2720              OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2721            }
2722          } else {
2723            // Both are in registers.
2724            rl_lhs = LoadValue(rl_lhs, kCoreReg);
2725            rl_rhs = LoadValue(rl_rhs, kCoreReg);
2726            rl_result = EvalLoc(rl_dest, kCoreReg, true);
2727            OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2728          }
2729        }
2730      }
2731    }
2732  }
2733  StoreValue(rl_dest, rl_result);
2734}
2735
2736bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
2737  // If we have non-core registers, then we can't do good things.
2738  if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
2739    return false;
2740  }
2741  if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
2742    return false;
2743  }
2744
2745  // Everything will be fine :-).
2746  return true;
2747}
2748
2749void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
2750  if (!cu_->target64) {
2751    Mir2Lir::GenIntToLong(rl_dest, rl_src);
2752    return;
2753  }
2754  rl_src = UpdateLocTyped(rl_src, kCoreReg);
2755  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2756  if (rl_src.location == kLocPhysReg) {
2757    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
2758  } else {
2759    int displacement = SRegOffset(rl_src.s_reg_low);
2760    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2761    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
2762                     displacement + LOWORD_OFFSET);
2763    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
2764                            true /* is_load */, true /* is_64bit */);
2765  }
2766  StoreValueWide(rl_dest, rl_result);
2767}
2768
2769void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
2770                        RegLocation rl_src1, RegLocation rl_shift) {
2771  if (!cu_->target64) {
2772    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
2773    return;
2774  }
2775
2776  bool is_two_addr = false;
2777  OpKind op = kOpBkpt;
2778  RegLocation rl_result;
2779
2780  switch (opcode) {
2781    case Instruction::SHL_LONG_2ADDR:
2782      is_two_addr = true;
2783      // Fallthrough
2784    case Instruction::SHL_LONG:
2785      op = kOpLsl;
2786      break;
2787    case Instruction::SHR_LONG_2ADDR:
2788      is_two_addr = true;
2789      // Fallthrough
2790    case Instruction::SHR_LONG:
2791      op = kOpAsr;
2792      break;
2793    case Instruction::USHR_LONG_2ADDR:
2794      is_two_addr = true;
2795      // Fallthrough
2796    case Instruction::USHR_LONG:
2797      op = kOpLsr;
2798      break;
2799    default:
2800      op = kOpBkpt;
2801  }
2802
2803  // X86 doesn't require masking and must use ECX.
2804  RegStorage t_reg = TargetReg(kCount, false);  // rCX
2805  LoadValueDirectFixed(rl_shift, t_reg);
2806  if (is_two_addr) {
2807    // Can we do this directly into memory?
2808    rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
2809    if (rl_result.location != kLocPhysReg) {
2810      // Okay, we can do this into memory
2811      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2812      OpMemReg(op, rl_result, t_reg.GetReg());
2813    } else if (!rl_result.reg.IsFloat()) {
2814      // Can do this directly into the result register
2815      OpRegReg(op, rl_result.reg, t_reg);
2816      StoreFinalValueWide(rl_dest, rl_result);
2817    }
2818  } else {
2819    // Three address form, or we can't do directly.
2820    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2821    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2822    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
2823    StoreFinalValueWide(rl_dest, rl_result);
2824  }
2825
2826  FreeTemp(t_reg);
2827}
2828
2829}  // namespace art
2830