int_x86.cc revision 6bbf0967d217ab2b7bdbb78bfd076b8fb07a44e8
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the X86 ISA */
18
19#include "codegen_x86.h"
20#include "dex/quick/mir_to_lir-inl.h"
21#include "dex/reg_storage_eq.h"
22#include "mirror/art_method.h"
23#include "mirror/array.h"
24#include "x86_lir.h"
25
26namespace art {
27
28/*
29 * Compare two 64-bit values
30 *    x = y     return  0
31 *    x < y     return -1
32 *    x > y     return  1
33 */
34void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
35                            RegLocation rl_src2) {
36  if (cu_->target64) {
37    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
38    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
39    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
40    RegStorage temp_reg = AllocTemp();
41    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
42    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG);   // result = (src1 > src2) ? 1 : 0
43    NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL);  // temp = (src1 >= src2) ? 0 : 1
44    NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
45    NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
46
47    StoreValue(rl_dest, rl_result);
48    FreeTemp(temp_reg);
49    return;
50  }
51
52  FlushAllRegs();
53  LockCallTemps();  // Prepare for explicit register usage
54  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
55  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
56  LoadValueDirectWideFixed(rl_src1, r_tmp1);
57  LoadValueDirectWideFixed(rl_src2, r_tmp2);
58  // Compute (r1:r0) = (r1:r0) - (r3:r2)
59  OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
60  OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
61  NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL);  // r2 = (r1:r0) < (r3:r2) ? 1 : 0
62  NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
63  OpReg(kOpNeg, rs_r2);         // r2 = -r2
64  OpRegReg(kOpOr, rs_r0, rs_r1);   // r0 = high | low - sets ZF
65  NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
66  NewLIR2(kX86Movzx8RR, r0, r0);
67  OpRegReg(kOpOr, rs_r0, rs_r2);   // r0 = r0 | r2
68  RegLocation rl_result = LocCReturn();
69  StoreValue(rl_dest, rl_result);
70}
71
72X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
73  switch (cond) {
74    case kCondEq: return kX86CondEq;
75    case kCondNe: return kX86CondNe;
76    case kCondCs: return kX86CondC;
77    case kCondCc: return kX86CondNc;
78    case kCondUlt: return kX86CondC;
79    case kCondUge: return kX86CondNc;
80    case kCondMi: return kX86CondS;
81    case kCondPl: return kX86CondNs;
82    case kCondVs: return kX86CondO;
83    case kCondVc: return kX86CondNo;
84    case kCondHi: return kX86CondA;
85    case kCondLs: return kX86CondBe;
86    case kCondGe: return kX86CondGe;
87    case kCondLt: return kX86CondL;
88    case kCondGt: return kX86CondG;
89    case kCondLe: return kX86CondLe;
90    case kCondAl:
91    case kCondNv: LOG(FATAL) << "Should not reach here";
92  }
93  return kX86CondO;
94}
95
96LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
97  NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
98  X86ConditionCode cc = X86ConditionEncoding(cond);
99  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
100                        cc);
101  branch->target = target;
102  return branch;
103}
104
105LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
106                                int check_value, LIR* target) {
107  if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
108    // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
109    NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
110  } else {
111    if (reg.Is64Bit()) {
112      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
113    } else {
114      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
115    }
116  }
117  X86ConditionCode cc = X86ConditionEncoding(cond);
118  LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
119  branch->target = target;
120  return branch;
121}
122
123LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
124  // If src or dest is a pair, we'll be using low reg.
125  if (r_dest.IsPair()) {
126    r_dest = r_dest.GetLow();
127  }
128  if (r_src.IsPair()) {
129    r_src = r_src.GetLow();
130  }
131  if (r_dest.IsFloat() || r_src.IsFloat())
132    return OpFpRegCopy(r_dest, r_src);
133  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
134                    r_dest.GetReg(), r_src.GetReg());
135  if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
136    res->flags.is_nop = true;
137  }
138  return res;
139}
140
141void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
142  if (r_dest != r_src) {
143    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
144    AppendLIR(res);
145  }
146}
147
148void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
149  if (r_dest != r_src) {
150    bool dest_fp = r_dest.IsFloat();
151    bool src_fp = r_src.IsFloat();
152    if (dest_fp) {
153      if (src_fp) {
154        OpRegCopy(r_dest, r_src);
155      } else {
156        // TODO: Prevent this from happening in the code. The result is often
157        // unused or could have been loaded more easily from memory.
158        if (!r_src.IsPair()) {
159          DCHECK(!r_dest.IsPair());
160          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
161        } else {
162          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
163          RegStorage r_tmp = AllocTempDouble();
164          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
165          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
166          FreeTemp(r_tmp);
167        }
168      }
169    } else {
170      if (src_fp) {
171        if (!r_dest.IsPair()) {
172          DCHECK(!r_src.IsPair());
173          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
174        } else {
175          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
176          RegStorage temp_reg = AllocTempDouble();
177          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
178          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
179          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
180        }
181      } else {
182        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
183        if (!r_src.IsPair()) {
184          // Just copy the register directly.
185          OpRegCopy(r_dest, r_src);
186        } else {
187          // Handle overlap
188          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
189              r_src.GetLowReg() == r_dest.GetHighReg()) {
190            // Deal with cycles.
191            RegStorage temp_reg = AllocTemp();
192            OpRegCopy(temp_reg, r_dest.GetHigh());
193            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
194            OpRegCopy(r_dest.GetLow(), temp_reg);
195            FreeTemp(temp_reg);
196          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
197            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
198            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
199          } else {
200            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
201            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
202          }
203        }
204      }
205    }
206  }
207}
208
209void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
210                                  int32_t true_val, int32_t false_val, RegStorage rs_dest,
211                                  int dest_reg_class) {
212  DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
213  DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
214
215  // We really need this check for correctness, otherwise we will need to do more checks in
216  // non zero/one case
217  if (true_val == false_val) {
218    LoadConstantNoClobber(rs_dest, true_val);
219    return;
220  }
221
222  const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
223
224  const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
225  if (zero_one_case && IsByteRegister(rs_dest)) {
226    if (!dest_intersect) {
227      LoadConstantNoClobber(rs_dest, 0);
228    }
229    OpRegReg(kOpCmp, left_op, right_op);
230    // Set the low byte of the result to 0 or 1 from the compare condition code.
231    NewLIR2(kX86Set8R, rs_dest.GetReg(),
232            X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
233    if (dest_intersect) {
234      NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
235    }
236  } else {
237    // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
238    // and it cannot use xor because it makes cc flags to be dirty
239    RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
240    if (temp_reg.Valid()) {
241      if (false_val == 0 && dest_intersect) {
242        code = FlipComparisonOrder(code);
243        std::swap(true_val, false_val);
244      }
245      if (!dest_intersect) {
246        LoadConstantNoClobber(rs_dest, false_val);
247      }
248      LoadConstantNoClobber(temp_reg, true_val);
249      OpRegReg(kOpCmp, left_op, right_op);
250      if (dest_intersect) {
251        LoadConstantNoClobber(rs_dest, false_val);
252        DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
253      }
254      OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
255      FreeTemp(temp_reg);
256    } else {
257      // slow path
258      LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
259      LoadConstantNoClobber(rs_dest, false_val);
260      LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
261      LIR* true_case = NewLIR0(kPseudoTargetLabel);
262      cmp_branch->target = true_case;
263      LoadConstantNoClobber(rs_dest, true_val);
264      LIR* end = NewLIR0(kPseudoTargetLabel);
265      that_is_it->target = end;
266    }
267  }
268}
269
270void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
271  RegLocation rl_result;
272  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
273  RegLocation rl_dest = mir_graph_->GetDest(mir);
274  // Avoid using float regs here.
275  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
276  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
277  rl_src = LoadValue(rl_src, src_reg_class);
278  ConditionCode ccode = mir->meta.ccode;
279
280  // The kMirOpSelect has two variants, one for constants and one for moves.
281  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
282
283  if (is_constant_case) {
284    int true_val = mir->dalvikInsn.vB;
285    int false_val = mir->dalvikInsn.vC;
286    rl_result = EvalLoc(rl_dest, result_reg_class, true);
287
288    /*
289     * For ccode == kCondEq:
290     *
291     * 1) When the true case is zero and result_reg is not same as src_reg:
292     *     xor result_reg, result_reg
293     *     cmp $0, src_reg
294     *     mov t1, $false_case
295     *     cmovnz result_reg, t1
296     * 2) When the false case is zero and result_reg is not same as src_reg:
297     *     xor result_reg, result_reg
298     *     cmp $0, src_reg
299     *     mov t1, $true_case
300     *     cmovz result_reg, t1
301     * 3) All other cases (we do compare first to set eflags):
302     *     cmp $0, src_reg
303     *     mov result_reg, $false_case
304     *     mov t1, $true_case
305     *     cmovz result_reg, t1
306     */
307    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
308    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
309    const bool result_reg_same_as_src =
310        (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
311    const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
312    const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
313    const bool catch_all_case = !(true_zero_case || false_zero_case);
314
315    if (true_zero_case || false_zero_case) {
316      OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
317    }
318
319    if (true_zero_case || false_zero_case || catch_all_case) {
320      OpRegImm(kOpCmp, rl_src.reg, 0);
321    }
322
323    if (catch_all_case) {
324      OpRegImm(kOpMov, rl_result.reg, false_val);
325    }
326
327    if (true_zero_case || false_zero_case || catch_all_case) {
328      ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
329      int immediateForTemp = true_zero_case ? false_val : true_val;
330      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
331      OpRegImm(kOpMov, temp1_reg, immediateForTemp);
332
333      OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
334
335      FreeTemp(temp1_reg);
336    }
337  } else {
338    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
339    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
340    rl_true = LoadValue(rl_true, result_reg_class);
341    rl_false = LoadValue(rl_false, result_reg_class);
342    rl_result = EvalLoc(rl_dest, result_reg_class, true);
343
344    /*
345     * For ccode == kCondEq:
346     *
347     * 1) When true case is already in place:
348     *     cmp $0, src_reg
349     *     cmovnz result_reg, false_reg
350     * 2) When false case is already in place:
351     *     cmp $0, src_reg
352     *     cmovz result_reg, true_reg
353     * 3) When neither cases are in place:
354     *     cmp $0, src_reg
355     *     mov result_reg, false_reg
356     *     cmovz result_reg, true_reg
357     */
358
359    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
360    OpRegImm(kOpCmp, rl_src.reg, 0);
361
362    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
363      OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
364    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
365      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
366    } else {
367      OpRegCopy(rl_result.reg, rl_false.reg);
368      OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
369    }
370  }
371
372  StoreValue(rl_dest, rl_result);
373}
374
375void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
376  LIR* taken = &block_label_list_[bb->taken];
377  RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
378  RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
379  ConditionCode ccode = mir->meta.ccode;
380
381  if (rl_src1.is_const) {
382    std::swap(rl_src1, rl_src2);
383    ccode = FlipComparisonOrder(ccode);
384  }
385  if (rl_src2.is_const) {
386    // Do special compare/branch against simple const operand
387    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
388    GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
389    return;
390  }
391
392  if (cu_->target64) {
393    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
394    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
395
396    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
397    OpCondBranch(ccode, taken);
398    return;
399  }
400
401  FlushAllRegs();
402  LockCallTemps();  // Prepare for explicit register usage
403  RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
404  RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
405  LoadValueDirectWideFixed(rl_src1, r_tmp1);
406  LoadValueDirectWideFixed(rl_src2, r_tmp2);
407
408  // Swap operands and condition code to prevent use of zero flag.
409  if (ccode == kCondLe || ccode == kCondGt) {
410    // Compute (r3:r2) = (r3:r2) - (r1:r0)
411    OpRegReg(kOpSub, rs_r2, rs_r0);  // r2 = r2 - r0
412    OpRegReg(kOpSbc, rs_r3, rs_r1);  // r3 = r3 - r1 - CF
413  } else {
414    // Compute (r1:r0) = (r1:r0) - (r3:r2)
415    OpRegReg(kOpSub, rs_r0, rs_r2);  // r0 = r0 - r2
416    OpRegReg(kOpSbc, rs_r1, rs_r3);  // r1 = r1 - r3 - CF
417  }
418  switch (ccode) {
419    case kCondEq:
420    case kCondNe:
421      OpRegReg(kOpOr, rs_r0, rs_r1);  // r0 = r0 | r1
422      break;
423    case kCondLe:
424      ccode = kCondGe;
425      break;
426    case kCondGt:
427      ccode = kCondLt;
428      break;
429    case kCondLt:
430    case kCondGe:
431      break;
432    default:
433      LOG(FATAL) << "Unexpected ccode: " << ccode;
434  }
435  OpCondBranch(ccode, taken);
436}
437
438void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
439                                          int64_t val, ConditionCode ccode) {
440  int32_t val_lo = Low32Bits(val);
441  int32_t val_hi = High32Bits(val);
442  LIR* taken = &block_label_list_[bb->taken];
443  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
444  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
445
446  if (cu_->target64) {
447    if (is_equality_test && val == 0) {
448      // We can simplify of comparing for ==, != to 0.
449      NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
450    } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
451      OpRegImm(kOpCmp, rl_src1.reg, val_lo);
452    } else {
453      RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
454      LoadConstantWide(tmp, val);
455      OpRegReg(kOpCmp, rl_src1.reg, tmp);
456      FreeTemp(tmp);
457    }
458    OpCondBranch(ccode, taken);
459    return;
460  }
461
462  if (is_equality_test && val != 0) {
463    rl_src1 = ForceTempWide(rl_src1);
464  }
465  RegStorage low_reg = rl_src1.reg.GetLow();
466  RegStorage high_reg = rl_src1.reg.GetHigh();
467
468  if (is_equality_test) {
469    // We can simplify of comparing for ==, != to 0.
470    if (val == 0) {
471      if (IsTemp(low_reg)) {
472        OpRegReg(kOpOr, low_reg, high_reg);
473        // We have now changed it; ignore the old values.
474        Clobber(rl_src1.reg);
475      } else {
476        RegStorage t_reg = AllocTemp();
477        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
478        FreeTemp(t_reg);
479      }
480      OpCondBranch(ccode, taken);
481      return;
482    }
483
484    // Need to compute the actual value for ==, !=.
485    OpRegImm(kOpSub, low_reg, val_lo);
486    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
487    OpRegReg(kOpOr, high_reg, low_reg);
488    Clobber(rl_src1.reg);
489  } else if (ccode == kCondLe || ccode == kCondGt) {
490    // Swap operands and condition code to prevent use of zero flag.
491    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
492    LoadConstantWide(tmp, val);
493    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
494    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
495    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
496    FreeTemp(tmp);
497  } else {
498    // We can use a compare for the low word to set CF.
499    OpRegImm(kOpCmp, low_reg, val_lo);
500    if (IsTemp(high_reg)) {
501      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
502      // We have now changed it; ignore the old values.
503      Clobber(rl_src1.reg);
504    } else {
505      // mov temp_reg, high_reg; sbb temp_reg, high_constant
506      RegStorage t_reg = AllocTemp();
507      OpRegCopy(t_reg, high_reg);
508      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
509      FreeTemp(t_reg);
510    }
511  }
512
513  OpCondBranch(ccode, taken);
514}
515
516void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
517  // It does not make sense to calculate magic and shift for zero divisor.
518  DCHECK_NE(divisor, 0);
519
520  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
521   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
522   * The magic number M and shift S can be calculated in the following way:
523   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
524   * where divisor(d) >=2.
525   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
526   * where divisor(d) <= -2.
527   * Thus nc can be calculated like:
528   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
529   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
530   *
531   * So the shift p is the smallest p satisfying
532   * 2^p > nc * (d - 2^p % d), where d >= 2
533   * 2^p > nc * (d + 2^p % d), where d <= -2.
534   *
535   * the magic number M is calcuated by
536   * M = (2^p + d - 2^p % d) / d, where d >= 2
537   * M = (2^p - d - 2^p % d) / d, where d <= -2.
538   *
539   * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
540   * the shift number S.
541   */
542
543  int64_t p = (is_long) ? 63 : 31;
544  const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
545
546  // Initialize the computations.
547  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
548  uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
549                                    static_cast<uint32_t>(divisor) >> 31);
550  uint64_t abs_nc = tmp - 1 - tmp % abs_d;
551  uint64_t quotient1 = exp / abs_nc;
552  uint64_t remainder1 = exp % abs_nc;
553  uint64_t quotient2 = exp / abs_d;
554  uint64_t remainder2 = exp % abs_d;
555
556  /*
557   * To avoid handling both positive and negative divisor, Hacker's Delight
558   * introduces a method to handle these 2 cases together to avoid duplication.
559   */
560  uint64_t delta;
561  do {
562    p++;
563    quotient1 = 2 * quotient1;
564    remainder1 = 2 * remainder1;
565    if (remainder1 >= abs_nc) {
566      quotient1++;
567      remainder1 = remainder1 - abs_nc;
568    }
569    quotient2 = 2 * quotient2;
570    remainder2 = 2 * remainder2;
571    if (remainder2 >= abs_d) {
572      quotient2++;
573      remainder2 = remainder2 - abs_d;
574    }
575    delta = abs_d - remainder2;
576  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
577
578  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
579
580  if (!is_long) {
581    magic = static_cast<int>(magic);
582  }
583
584  shift = (is_long) ? p - 64 : p - 32;
585}
586
587RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
588  LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
589  return rl_dest;
590}
591
592RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
593                                     int imm, bool is_div) {
594  // Use a multiply (and fixup) to perform an int div/rem by a constant.
595  RegLocation rl_result;
596
597  if (imm == 1) {
598    rl_result = EvalLoc(rl_dest, kCoreReg, true);
599    if (is_div) {
600      // x / 1 == x.
601      LoadValueDirectFixed(rl_src, rl_result.reg);
602    } else {
603      // x % 1 == 0.
604      LoadConstantNoClobber(rl_result.reg, 0);
605    }
606  } else if (imm == -1) {  // handle 0x80000000 / -1 special case.
607    rl_result = EvalLoc(rl_dest, kCoreReg, true);
608    if (is_div) {
609      LoadValueDirectFixed(rl_src, rl_result.reg);
610      OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
611      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
612
613      // for x != MIN_INT, x / -1 == -x.
614      NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
615
616      // EAX already contains the right value (0x80000000),
617      minint_branch->target = NewLIR0(kPseudoTargetLabel);
618    } else {
619      // x % -1 == 0.
620      LoadConstantNoClobber(rl_result.reg, 0);
621    }
622  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
623    // Division using shifting.
624    rl_src = LoadValue(rl_src, kCoreReg);
625    rl_result = EvalLoc(rl_dest, kCoreReg, true);
626    if (IsSameReg(rl_result.reg, rl_src.reg)) {
627      RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
628      rl_result.reg.SetReg(rs_temp.GetReg());
629    }
630    NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
631    NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
632    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
633    int shift_amount = LowestSetBit(imm);
634    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
635    if (imm < 0) {
636      OpReg(kOpNeg, rl_result.reg);
637    }
638  } else {
639    CHECK(imm <= -2 || imm >= 2);
640
641    // Use H.S.Warren's Hacker's Delight Chapter 10 and
642    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
643    int64_t magic;
644    int shift;
645    CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
646
647    /*
648     * For imm >= 2,
649     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
650     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
651     * For imm <= -2,
652     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
653     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
654     * We implement this algorithm in the following way:
655     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
656     * 2. if imm > 0 and magic < 0, add numerator to EDX
657     *    if imm < 0 and magic > 0, sub numerator from EDX
658     * 3. if S !=0, SAR S bits for EDX
659     * 4. add 1 to EDX if EDX < 0
660     * 5. Thus, EDX is the quotient
661     */
662
663    FlushReg(rs_r0);
664    Clobber(rs_r0);
665    LockTemp(rs_r0);
666    FlushReg(rs_r2);
667    Clobber(rs_r2);
668    LockTemp(rs_r2);
669
670    // Assume that the result will be in EDX.
671    rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
672
673    // Numerator into EAX.
674    RegStorage numerator_reg;
675    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
676      // We will need the value later.
677      rl_src = LoadValue(rl_src, kCoreReg);
678      numerator_reg = rl_src.reg;
679      OpRegCopy(rs_r0, numerator_reg);
680    } else {
681      // Only need this once.  Just put it into EAX.
682      LoadValueDirectFixed(rl_src, rs_r0);
683    }
684
685    // EDX = magic.
686    LoadConstantNoClobber(rs_r2, magic);
687
688    // EDX:EAX = magic & dividend.
689    NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
690
691    if (imm > 0 && magic < 0) {
692      // Add numerator to EDX.
693      DCHECK(numerator_reg.Valid());
694      NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
695    } else if (imm < 0 && magic > 0) {
696      DCHECK(numerator_reg.Valid());
697      NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
698    }
699
700    // Do we need the shift?
701    if (shift != 0) {
702      // Shift EDX by 'shift' bits.
703      NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
704    }
705
706    // Add 1 to EDX if EDX < 0.
707
708    // Move EDX to EAX.
709    OpRegCopy(rs_r0, rs_r2);
710
711    // Move sign bit to bit 0, zeroing the rest.
712    NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
713
714    // EDX = EDX + EAX.
715    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
716
717    // Quotient is in EDX.
718    if (!is_div) {
719      // We need to compute the remainder.
720      // Remainder is divisor - (quotient * imm).
721      DCHECK(numerator_reg.Valid());
722      OpRegCopy(rs_r0, numerator_reg);
723
724      // EAX = numerator * imm.
725      OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
726
727      // EDX -= EAX.
728      NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
729
730      // For this case, return the result in EAX.
731      rl_result.reg.SetReg(r0);
732    }
733  }
734
735  return rl_result;
736}
737
738RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
739                                  bool is_div) {
740  LOG(FATAL) << "Unexpected use of GenDivRem for x86";
741  return rl_dest;
742}
743
744RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
745                                  RegLocation rl_src2, bool is_div, bool check_zero) {
746  // We have to use fixed registers, so flush all the temps.
747  FlushAllRegs();
748  LockCallTemps();  // Prepare for explicit register usage.
749
750  // Load LHS into EAX.
751  LoadValueDirectFixed(rl_src1, rs_r0);
752
753  // Load RHS into EBX.
754  LoadValueDirectFixed(rl_src2, rs_r1);
755
756  // Copy LHS sign bit into EDX.
757  NewLIR0(kx86Cdq32Da);
758
759  if (check_zero) {
760    // Handle division by zero case.
761    GenDivZeroCheck(rs_r1);
762  }
763
764  // Have to catch 0x80000000/-1 case, or we will get an exception!
765  OpRegImm(kOpCmp, rs_r1, -1);
766  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
767
768  // RHS is -1.
769  OpRegImm(kOpCmp, rs_r0, 0x80000000);
770  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
771
772  // In 0x80000000/-1 case.
773  if (!is_div) {
774    // For DIV, EAX is already right. For REM, we need EDX 0.
775    LoadConstantNoClobber(rs_r2, 0);
776  }
777  LIR* done = NewLIR1(kX86Jmp8, 0);
778
779  // Expected case.
780  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
781  minint_branch->target = minus_one_branch->target;
782  NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
783  done->target = NewLIR0(kPseudoTargetLabel);
784
785  // Result is in EAX for div and EDX for rem.
786  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
787  if (!is_div) {
788    rl_result.reg.SetReg(r2);
789  }
790  return rl_result;
791}
792
793bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
794  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
795
796  if (is_long && cu_->instruction_set == kX86) {
797    return false;
798  }
799
800  // Get the two arguments to the invoke and place them in GP registers.
801  RegLocation rl_src1 = info->args[0];
802  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
803  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
804  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
805
806  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
807  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
808
809  /*
810   * If the result register is the same as the second element, then we need to be careful.
811   * The reason is that the first copy will inadvertently clobber the second element with
812   * the first one thus yielding the wrong result. Thus we do a swap in that case.
813   */
814  if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
815    std::swap(rl_src1, rl_src2);
816  }
817
818  // Pick the first integer as min/max.
819  OpRegCopy(rl_result.reg, rl_src1.reg);
820
821  // If the integers are both in the same register, then there is nothing else to do
822  // because they are equal and we have already moved one into the result.
823  if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
824    // It is possible we didn't pick correctly so do the actual comparison now.
825    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
826
827    // Conditionally move the other integer into the destination register.
828    ConditionCode condition_code = is_min ? kCondGt : kCondLt;
829    OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
830  }
831
832  if (is_long) {
833    StoreValueWide(rl_dest, rl_result);
834  } else {
835    StoreValue(rl_dest, rl_result);
836  }
837  return true;
838}
839
840bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
841  RegLocation rl_src_address = info->args[0];  // long address
842  RegLocation rl_address;
843  if (!cu_->target64) {
844    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
845    rl_address = LoadValue(rl_src_address, kCoreReg);
846  } else {
847    rl_address = LoadValueWide(rl_src_address, kCoreReg);
848  }
849  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
850  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
851  // Unaligned access is allowed on x86.
852  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
853  if (size == k64) {
854    StoreValueWide(rl_dest, rl_result);
855  } else {
856    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
857    StoreValue(rl_dest, rl_result);
858  }
859  return true;
860}
861
862bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
863  RegLocation rl_src_address = info->args[0];  // long address
864  RegLocation rl_address;
865  if (!cu_->target64) {
866    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
867    rl_address = LoadValue(rl_src_address, kCoreReg);
868  } else {
869    rl_address = LoadValueWide(rl_src_address, kCoreReg);
870  }
871  RegLocation rl_src_value = info->args[2];  // [size] value
872  RegLocation rl_value;
873  if (size == k64) {
874    // Unaligned access is allowed on x86.
875    rl_value = LoadValueWide(rl_src_value, kCoreReg);
876  } else {
877    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
878    // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
879    if (!cu_->target64 && size == kSignedByte) {
880      rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
881      if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
882        RegStorage temp = AllocateByteRegister();
883        OpRegCopy(temp, rl_src_value.reg);
884        rl_value.reg = temp;
885      } else {
886        rl_value = LoadValue(rl_src_value, kCoreReg);
887      }
888    } else {
889      rl_value = LoadValue(rl_src_value, kCoreReg);
890    }
891  }
892  StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
893  return true;
894}
895
896void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
897  NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
898}
899
900void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
901  DCHECK_EQ(kX86, cu_->instruction_set);
902  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
903}
904
905void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
906  DCHECK_EQ(kX86_64, cu_->instruction_set);
907  NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
908}
909
910static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
911  return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
912}
913
914bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
915  DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
916  // Unused - RegLocation rl_src_unsafe = info->args[0];
917  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
918  RegLocation rl_src_offset = info->args[2];  // long low
919  if (!cu_->target64) {
920    rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
921  }
922  RegLocation rl_src_expected = info->args[4];  // int, long or Object
923  // If is_long, high half is in info->args[5]
924  RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
925  // If is_long, high half is in info->args[7]
926
927  if (is_long && cu_->target64) {
928    // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
929    FlushReg(rs_r0q);
930    Clobber(rs_r0q);
931    LockTemp(rs_r0q);
932
933    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
934    RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
935    RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
936    LoadValueDirectWide(rl_src_expected, rs_r0q);
937    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
938            rl_new_value.reg.GetReg());
939
940    // After a store we need to insert barrier in case of potential load. Since the
941    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
942    GenMemBarrier(kAnyAny);
943
944    FreeTemp(rs_r0q);
945  } else if (is_long) {
946    // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
947    // TODO: CFI support.
948    FlushAllRegs();
949    LockCallTemps();
950    RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
951    RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
952    LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
953    LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
954    // FIXME: needs 64-bit update.
955    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
956    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
957    DCHECK(!obj_in_si || !obj_in_di);
958    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
959    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
960    DCHECK(!off_in_si || !off_in_di);
961    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
962    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
963    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
964    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
965    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
966    if (push_di) {
967      NewLIR1(kX86Push32R, rs_rDI.GetReg());
968      MarkTemp(rs_rDI);
969      LockTemp(rs_rDI);
970    }
971    if (push_si) {
972      NewLIR1(kX86Push32R, rs_rSI.GetReg());
973      MarkTemp(rs_rSI);
974      LockTemp(rs_rSI);
975    }
976    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
977    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
978    if (!obj_in_si && !obj_in_di) {
979      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
980      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
981      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
982      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
983      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
984    }
985    if (!off_in_si && !off_in_di) {
986      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
987      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
988      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
989      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
990      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
991    }
992    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
993
994    // After a store we need to insert barrier to prevent reordering with either
995    // earlier or later memory accesses.  Since
996    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
997    // and it will be associated with the cmpxchg instruction, preventing both.
998    GenMemBarrier(kAnyAny);
999
1000    if (push_si) {
1001      FreeTemp(rs_rSI);
1002      UnmarkTemp(rs_rSI);
1003      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1004    }
1005    if (push_di) {
1006      FreeTemp(rs_rDI);
1007      UnmarkTemp(rs_rDI);
1008      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1009    }
1010    FreeCallTemps();
1011  } else {
1012    // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1013    FlushReg(rs_r0);
1014    Clobber(rs_r0);
1015    LockTemp(rs_r0);
1016
1017    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1018    RegLocation rl_new_value = LoadValue(rl_src_new_value);
1019
1020    if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1021      // Mark card for object assuming new value is stored.
1022      FreeTemp(rs_r0);  // Temporarily release EAX for MarkGCCard().
1023      MarkGCCard(rl_new_value.reg, rl_object.reg);
1024      LockTemp(rs_r0);
1025    }
1026
1027    RegLocation rl_offset;
1028    if (cu_->target64) {
1029      rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1030    } else {
1031      rl_offset = LoadValue(rl_src_offset, kCoreReg);
1032    }
1033    LoadValueDirect(rl_src_expected, rs_r0);
1034    NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1035            rl_new_value.reg.GetReg());
1036
1037    // After a store we need to insert barrier to prevent reordering with either
1038    // earlier or later memory accesses.  Since
1039    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1040    // and it will be associated with the cmpxchg instruction, preventing both.
1041    GenMemBarrier(kAnyAny);
1042
1043    FreeTemp(rs_r0);
1044  }
1045
1046  // Convert ZF to boolean
1047  RegLocation rl_dest = InlineTarget(info);  // boolean place for result
1048  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1049  RegStorage result_reg = rl_result.reg;
1050
1051  // For 32-bit, SETcc only works with EAX..EDX.
1052  if (!IsByteRegister(result_reg)) {
1053    result_reg = AllocateByteRegister();
1054  }
1055  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1056  NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1057  if (IsTemp(result_reg)) {
1058    FreeTemp(result_reg);
1059  }
1060  StoreValue(rl_dest, rl_result);
1061  return true;
1062}
1063
1064LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1065  CHECK(base_of_code_ != nullptr);
1066
1067  // Address the start of the method
1068  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1069  if (rl_method.wide) {
1070    LoadValueDirectWideFixed(rl_method, reg);
1071  } else {
1072    LoadValueDirectFixed(rl_method, reg);
1073  }
1074  store_method_addr_used_ = true;
1075
1076  // Load the proper value from the literal area.
1077  // We don't know the proper offset for the value, so pick one that will force
1078  // 4 byte offset.  We will fix this up in the assembler later to have the right
1079  // value.
1080  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1081  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
1082                    0, 0, target);
1083  res->target = target;
1084  res->flags.fixup = kFixupLoad;
1085  store_method_addr_used_ = true;
1086  return res;
1087}
1088
1089LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1090  LOG(FATAL) << "Unexpected use of OpVldm for x86";
1091  return NULL;
1092}
1093
1094LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1095  LOG(FATAL) << "Unexpected use of OpVstm for x86";
1096  return NULL;
1097}
1098
1099void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1100                                               RegLocation rl_result, int lit,
1101                                               int first_bit, int second_bit) {
1102  RegStorage t_reg = AllocTemp();
1103  OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1104  OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1105  FreeTemp(t_reg);
1106  if (first_bit != 0) {
1107    OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1108  }
1109}
1110
1111void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1112  if (cu_->target64) {
1113    DCHECK(reg.Is64Bit());
1114
1115    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1116  } else {
1117    DCHECK(reg.IsPair());
1118
1119    // We are not supposed to clobber the incoming storage, so allocate a temporary.
1120    RegStorage t_reg = AllocTemp();
1121    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1122    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1123    // The temp is no longer needed so free it at this time.
1124    FreeTemp(t_reg);
1125  }
1126
1127  // In case of zero, throw ArithmeticException.
1128  GenDivZeroCheck(kCondEq);
1129}
1130
1131void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1132                                     RegStorage array_base,
1133                                     int len_offset) {
1134  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1135   public:
1136    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1137                             RegStorage index, RegStorage array_base, int32_t len_offset)
1138        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1139          index_(index), array_base_(array_base), len_offset_(len_offset) {
1140    }
1141
1142    void Compile() OVERRIDE {
1143      m2l_->ResetRegPool();
1144      m2l_->ResetDefTracking();
1145      GenerateTargetLabel(kPseudoThrowTarget);
1146
1147      RegStorage new_index = index_;
1148      // Move index out of kArg1, either directly to kArg0, or to kArg2.
1149      // TODO: clean-up to check not a number but with type
1150      if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1151        if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1152          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1153          new_index = m2l_->TargetReg(kArg2, kNotWide);
1154        } else {
1155          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1156          new_index = m2l_->TargetReg(kArg0, kNotWide);
1157        }
1158      }
1159      // Load array length to kArg1.
1160      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1161      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1162      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1163                                       m2l_->TargetReg(kArg1, kNotWide), true);
1164    }
1165
1166   private:
1167    const RegStorage index_;
1168    const RegStorage array_base_;
1169    const int32_t len_offset_;
1170  };
1171
1172  OpRegMem(kOpCmp, index, array_base, len_offset);
1173  MarkPossibleNullPointerException(0);
1174  LIR* branch = OpCondBranch(kCondUge, nullptr);
1175  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1176                                                    index, array_base, len_offset));
1177}
1178
1179void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1180                                     RegStorage array_base,
1181                                     int32_t len_offset) {
1182  class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1183   public:
1184    ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1185                             int32_t index, RegStorage array_base, int32_t len_offset)
1186        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1187          index_(index), array_base_(array_base), len_offset_(len_offset) {
1188    }
1189
1190    void Compile() OVERRIDE {
1191      m2l_->ResetRegPool();
1192      m2l_->ResetDefTracking();
1193      GenerateTargetLabel(kPseudoThrowTarget);
1194
1195      // Load array length to kArg1.
1196      X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1197      x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1198      x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1199      x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1200                                       m2l_->TargetReg(kArg1, kNotWide), true);
1201    }
1202
1203   private:
1204    const int32_t index_;
1205    const RegStorage array_base_;
1206    const int32_t len_offset_;
1207  };
1208
1209  NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1210  MarkPossibleNullPointerException(0);
1211  LIR* branch = OpCondBranch(kCondLs, nullptr);
1212  AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1213                                                    index, array_base, len_offset));
1214}
1215
1216// Test suspend flag, return target of taken suspend branch
1217LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1218  if (cu_->target64) {
1219    OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1220  } else {
1221    OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1222  }
1223  return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
1224}
1225
1226// Decrement register and branch on condition
1227LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1228  OpRegImm(kOpSub, reg, 1);
1229  return OpCondBranch(c_code, target);
1230}
1231
1232bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1233                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
1234  LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1235  return false;
1236}
1237
1238bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1239  LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1240  return false;
1241}
1242
1243LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1244  LOG(FATAL) << "Unexpected use of OpIT in x86";
1245  return NULL;
1246}
1247
1248void X86Mir2Lir::OpEndIT(LIR* it) {
1249  LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1250}
1251
1252void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1253  switch (val) {
1254    case 0:
1255      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1256      break;
1257    case 1:
1258      OpRegCopy(dest, src);
1259      break;
1260    default:
1261      OpRegRegImm(kOpMul, dest, src, val);
1262      break;
1263  }
1264}
1265
1266void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1267  // All memory accesses below reference dalvik regs.
1268  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1269
1270  LIR *m;
1271  switch (val) {
1272    case 0:
1273      NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1274      break;
1275    case 1:
1276      LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
1277      break;
1278    default:
1279      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1280                  rs_rX86_SP.GetReg(), displacement, val);
1281      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1282      break;
1283  }
1284}
1285
1286void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1287                            RegLocation rl_src2) {
1288  // All memory accesses below reference dalvik regs.
1289  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1290
1291  if (cu_->target64) {
1292    if (rl_src1.is_const) {
1293      std::swap(rl_src1, rl_src2);
1294    }
1295    // Are we multiplying by a constant?
1296    if (rl_src2.is_const) {
1297      int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1298      if (val == 0) {
1299        RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1300        OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1301        StoreValueWide(rl_dest, rl_result);
1302        return;
1303      } else if (val == 1) {
1304        StoreValueWide(rl_dest, rl_src1);
1305        return;
1306      } else if (val == 2) {
1307        GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1308        return;
1309      } else if (IsPowerOfTwo(val)) {
1310        int shift_amount = LowestSetBit(val);
1311        if (!BadOverlap(rl_src1, rl_dest)) {
1312          rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1313          RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
1314                                                    rl_src1, shift_amount);
1315          StoreValueWide(rl_dest, rl_result);
1316          return;
1317        }
1318      }
1319    }
1320    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1321    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1322    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1323    if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1324        rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1325      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1326    } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1327               rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1328      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1329    } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1330               rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1331      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1332    } else {
1333      OpRegCopy(rl_result.reg, rl_src1.reg);
1334      NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1335    }
1336    StoreValueWide(rl_dest, rl_result);
1337    return;
1338  }
1339
1340  if (rl_src1.is_const) {
1341    std::swap(rl_src1, rl_src2);
1342  }
1343  // Are we multiplying by a constant?
1344  if (rl_src2.is_const) {
1345    // Do special compare/branch against simple const operand
1346    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1347    if (val == 0) {
1348      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1349      OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1350      OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1351      StoreValueWide(rl_dest, rl_result);
1352      return;
1353    } else if (val == 1) {
1354      StoreValueWide(rl_dest, rl_src1);
1355      return;
1356    } else if (val == 2) {
1357      GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1358      return;
1359    } else if (IsPowerOfTwo(val)) {
1360      int shift_amount = LowestSetBit(val);
1361      if (!BadOverlap(rl_src1, rl_dest)) {
1362        rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1363        RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
1364                                                  rl_src1, shift_amount);
1365        StoreValueWide(rl_dest, rl_result);
1366        return;
1367      }
1368    }
1369
1370    // Okay, just bite the bullet and do it.
1371    int32_t val_lo = Low32Bits(val);
1372    int32_t val_hi = High32Bits(val);
1373    FlushAllRegs();
1374    LockCallTemps();  // Prepare for explicit register usage.
1375    rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1376    bool src1_in_reg = rl_src1.location == kLocPhysReg;
1377    int displacement = SRegOffset(rl_src1.s_reg_low);
1378
1379    // ECX <- 1H * 2L
1380    // EAX <- 1L * 2H
1381    if (src1_in_reg) {
1382      GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1383      GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1384    } else {
1385      GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1386      GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1387    }
1388
1389    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1390    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1391
1392    // EAX <- 2L
1393    LoadConstantNoClobber(rs_r0, val_lo);
1394
1395    // EDX:EAX <- 2L * 1L (double precision)
1396    if (src1_in_reg) {
1397      NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1398    } else {
1399      LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1400      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1401                              true /* is_load */, true /* is_64bit */);
1402    }
1403
1404    // EDX <- EDX + ECX (add high words)
1405    NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1406
1407    // Result is EDX:EAX
1408    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1409                             RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1410    StoreValueWide(rl_dest, rl_result);
1411    return;
1412  }
1413
1414  // Nope.  Do it the hard way
1415  // Check for V*V.  We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1416  bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1417                   mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1418
1419  FlushAllRegs();
1420  LockCallTemps();  // Prepare for explicit register usage.
1421  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1422  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1423
1424  // At this point, the VRs are in their home locations.
1425  bool src1_in_reg = rl_src1.location == kLocPhysReg;
1426  bool src2_in_reg = rl_src2.location == kLocPhysReg;
1427
1428  // ECX <- 1H
1429  if (src1_in_reg) {
1430    NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1431  } else {
1432    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1433                 kNotVolatile);
1434  }
1435
1436  if (is_square) {
1437    // Take advantage of the fact that the values are the same.
1438    // ECX <- ECX * 2L  (1H * 2L)
1439    if (src2_in_reg) {
1440      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1441    } else {
1442      int displacement = SRegOffset(rl_src2.s_reg_low);
1443      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1444                       displacement + LOWORD_OFFSET);
1445      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1446                              true /* is_load */, true /* is_64bit */);
1447    }
1448
1449    // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1450    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1451  } else {
1452    // EAX <- 2H
1453    if (src2_in_reg) {
1454      NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1455    } else {
1456      LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1457                   kNotVolatile);
1458    }
1459
1460    // EAX <- EAX * 1L  (2H * 1L)
1461    if (src1_in_reg) {
1462      NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1463    } else {
1464      int displacement = SRegOffset(rl_src1.s_reg_low);
1465      LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
1466                       displacement + LOWORD_OFFSET);
1467      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1468                              true /* is_load */, true /* is_64bit */);
1469    }
1470
1471    // ECX <- ECX * 2L  (1H * 2L)
1472    if (src2_in_reg) {
1473      NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1474    } else {
1475      int displacement = SRegOffset(rl_src2.s_reg_low);
1476      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1477                       displacement + LOWORD_OFFSET);
1478      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1479                              true /* is_load */, true /* is_64bit */);
1480    }
1481
1482    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
1483    NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1484  }
1485
1486  // EAX <- 2L
1487  if (src2_in_reg) {
1488    NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1489  } else {
1490    LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1491                 kNotVolatile);
1492  }
1493
1494  // EDX:EAX <- 2L * 1L (double precision)
1495  if (src1_in_reg) {
1496    NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1497  } else {
1498    int displacement = SRegOffset(rl_src1.s_reg_low);
1499    LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1500    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1501                            true /* is_load */, true /* is_64bit */);
1502  }
1503
1504  // EDX <- EDX + ECX (add high words)
1505  NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1506
1507  // Result is EDX:EAX
1508  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1509                           RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1510  StoreValueWide(rl_dest, rl_result);
1511}
1512
1513void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1514                                   Instruction::Code op) {
1515  DCHECK_EQ(rl_dest.location, kLocPhysReg);
1516  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1517  if (rl_src.location == kLocPhysReg) {
1518    // Both operands are in registers.
1519    // But we must ensure that rl_src is in pair
1520    if (cu_->target64) {
1521      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1522    } else {
1523      rl_src = LoadValueWide(rl_src, kCoreReg);
1524      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1525        // The registers are the same, so we would clobber it before the use.
1526        RegStorage temp_reg = AllocTemp();
1527        OpRegCopy(temp_reg, rl_dest.reg);
1528        rl_src.reg.SetHighReg(temp_reg.GetReg());
1529      }
1530      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1531
1532      x86op = GetOpcode(op, rl_dest, rl_src, true);
1533      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1534      FreeTemp(rl_src.reg);  // ???
1535    }
1536    return;
1537  }
1538
1539  // RHS is in memory.
1540  DCHECK((rl_src.location == kLocDalvikFrame) ||
1541         (rl_src.location == kLocCompilerTemp));
1542  int r_base = rs_rX86_SP.GetReg();
1543  int displacement = SRegOffset(rl_src.s_reg_low);
1544
1545  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1546  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1547                     r_base, displacement + LOWORD_OFFSET);
1548  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1549                          true /* is_load */, true /* is64bit */);
1550  if (!cu_->target64) {
1551    x86op = GetOpcode(op, rl_dest, rl_src, true);
1552    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1553    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1554                            true /* is_load */, true /* is64bit */);
1555  }
1556}
1557
1558void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1559  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
1560  if (rl_dest.location == kLocPhysReg) {
1561    // Ensure we are in a register pair
1562    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1563
1564    rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
1565    GenLongRegOrMemOp(rl_result, rl_src, op);
1566    StoreFinalValueWide(rl_dest, rl_result);
1567    return;
1568  }
1569
1570  // It wasn't in registers, so it better be in memory.
1571  DCHECK((rl_dest.location == kLocDalvikFrame) ||
1572         (rl_dest.location == kLocCompilerTemp));
1573  rl_src = LoadValueWide(rl_src, kCoreReg);
1574
1575  // Operate directly into memory.
1576  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1577  int r_base = rs_rX86_SP.GetReg();
1578  int displacement = SRegOffset(rl_dest.s_reg_low);
1579
1580  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1581  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
1582                     cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
1583  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1584                          true /* is_load */, true /* is64bit */);
1585  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1586                          false /* is_load */, true /* is64bit */);
1587  if (!cu_->target64) {
1588    x86op = GetOpcode(op, rl_dest, rl_src, true);
1589    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
1590    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1591                            true /* is_load */, true /* is64bit */);
1592    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1593                            false /* is_load */, true /* is64bit */);
1594  }
1595  FreeTemp(rl_src.reg);
1596}
1597
1598void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
1599                              RegLocation rl_src2, Instruction::Code op,
1600                              bool is_commutative) {
1601  // Is this really a 2 operand operation?
1602  switch (op) {
1603    case Instruction::ADD_LONG_2ADDR:
1604    case Instruction::SUB_LONG_2ADDR:
1605    case Instruction::AND_LONG_2ADDR:
1606    case Instruction::OR_LONG_2ADDR:
1607    case Instruction::XOR_LONG_2ADDR:
1608      if (GenerateTwoOperandInstructions()) {
1609        GenLongArith(rl_dest, rl_src2, op);
1610        return;
1611      }
1612      break;
1613
1614    default:
1615      break;
1616  }
1617
1618  if (rl_dest.location == kLocPhysReg) {
1619    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
1620
1621    // We are about to clobber the LHS, so it needs to be a temp.
1622    rl_result = ForceTempWide(rl_result);
1623
1624    // Perform the operation using the RHS.
1625    rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1626    GenLongRegOrMemOp(rl_result, rl_src2, op);
1627
1628    // And now record that the result is in the temp.
1629    StoreFinalValueWide(rl_dest, rl_result);
1630    return;
1631  }
1632
1633  // It wasn't in registers, so it better be in memory.
1634  DCHECK((rl_dest.location == kLocDalvikFrame) ||
1635         (rl_dest.location == kLocCompilerTemp));
1636  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1637  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1638
1639  // Get one of the source operands into temporary register.
1640  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1641  if (cu_->target64) {
1642    if (IsTemp(rl_src1.reg)) {
1643      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1644    } else if (is_commutative) {
1645      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1646      // We need at least one of them to be a temporary.
1647      if (!IsTemp(rl_src2.reg)) {
1648        rl_src1 = ForceTempWide(rl_src1);
1649        GenLongRegOrMemOp(rl_src1, rl_src2, op);
1650      } else {
1651        GenLongRegOrMemOp(rl_src2, rl_src1, op);
1652        StoreFinalValueWide(rl_dest, rl_src2);
1653        return;
1654      }
1655    } else {
1656      // Need LHS to be the temp.
1657      rl_src1 = ForceTempWide(rl_src1);
1658      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1659    }
1660  } else {
1661    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
1662      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1663    } else if (is_commutative) {
1664      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1665      // We need at least one of them to be a temporary.
1666      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
1667        rl_src1 = ForceTempWide(rl_src1);
1668        GenLongRegOrMemOp(rl_src1, rl_src2, op);
1669      } else {
1670        GenLongRegOrMemOp(rl_src2, rl_src1, op);
1671        StoreFinalValueWide(rl_dest, rl_src2);
1672        return;
1673      }
1674    } else {
1675      // Need LHS to be the temp.
1676      rl_src1 = ForceTempWide(rl_src1);
1677      GenLongRegOrMemOp(rl_src1, rl_src2, op);
1678    }
1679  }
1680
1681  StoreFinalValueWide(rl_dest, rl_src1);
1682}
1683
1684void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
1685                            RegLocation rl_src1, RegLocation rl_src2) {
1686  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1687}
1688
1689void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
1690                            RegLocation rl_src1, RegLocation rl_src2) {
1691  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1692}
1693
1694void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
1695                            RegLocation rl_src1, RegLocation rl_src2) {
1696  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1697}
1698
1699void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
1700                           RegLocation rl_src1, RegLocation rl_src2) {
1701  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1702}
1703
1704void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
1705                            RegLocation rl_src1, RegLocation rl_src2) {
1706  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1707}
1708
1709void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1710  if (cu_->target64) {
1711    rl_src = LoadValueWide(rl_src, kCoreReg);
1712    RegLocation rl_result;
1713    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1714    OpRegCopy(rl_result.reg, rl_src.reg);
1715    OpReg(kOpNot, rl_result.reg);
1716    StoreValueWide(rl_dest, rl_result);
1717  } else {
1718    LOG(FATAL) << "Unexpected use GenNotLong()";
1719  }
1720}
1721
1722void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
1723                                  int64_t imm, bool is_div) {
1724  if (imm == 0) {
1725    GenDivZeroException();
1726  } else if (imm == 1) {
1727    if (is_div) {
1728      // x / 1 == x.
1729      StoreValueWide(rl_dest, rl_src);
1730    } else {
1731      // x % 1 == 0.
1732      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1733      LoadConstantWide(rl_result.reg, 0);
1734      StoreValueWide(rl_dest, rl_result);
1735    }
1736  } else if (imm == -1) {  // handle 0x8000000000000000 / -1 special case.
1737    if (is_div) {
1738      rl_src = LoadValueWide(rl_src, kCoreReg);
1739      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1740      RegStorage rs_temp = AllocTempWide();
1741
1742      OpRegCopy(rl_result.reg, rl_src.reg);
1743      LoadConstantWide(rs_temp, 0x8000000000000000);
1744
1745      // If x == MIN_LONG, return MIN_LONG.
1746      OpRegReg(kOpCmp, rl_src.reg, rs_temp);
1747      LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
1748
1749      // For x != MIN_LONG, x / -1 == -x.
1750      OpReg(kOpNeg, rl_result.reg);
1751
1752      minint_branch->target = NewLIR0(kPseudoTargetLabel);
1753      FreeTemp(rs_temp);
1754      StoreValueWide(rl_dest, rl_result);
1755    } else {
1756      // x % -1 == 0.
1757      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1758      LoadConstantWide(rl_result.reg, 0);
1759      StoreValueWide(rl_dest, rl_result);
1760    }
1761  } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
1762    // Division using shifting.
1763    rl_src = LoadValueWide(rl_src, kCoreReg);
1764    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1765    if (IsSameReg(rl_result.reg, rl_src.reg)) {
1766      RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
1767      rl_result.reg.SetReg(rs_temp.GetReg());
1768    }
1769    LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
1770    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
1771    NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
1772    OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
1773    int shift_amount = LowestSetBit(imm);
1774    OpRegImm(kOpAsr, rl_result.reg, shift_amount);
1775    if (imm < 0) {
1776      OpReg(kOpNeg, rl_result.reg);
1777    }
1778    StoreValueWide(rl_dest, rl_result);
1779  } else {
1780    CHECK(imm <= -2 || imm >= 2);
1781
1782    FlushReg(rs_r0q);
1783    Clobber(rs_r0q);
1784    LockTemp(rs_r0q);
1785    FlushReg(rs_r2q);
1786    Clobber(rs_r2q);
1787    LockTemp(rs_r2q);
1788
1789    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
1790
1791    // Use H.S.Warren's Hacker's Delight Chapter 10 and
1792    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
1793    int64_t magic;
1794    int shift;
1795    CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
1796
1797    /*
1798     * For imm >= 2,
1799     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
1800     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
1801     * For imm <= -2,
1802     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
1803     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
1804     * We implement this algorithm in the following way:
1805     * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
1806     * 2. if imm > 0 and magic < 0, add numerator to RDX
1807     *    if imm < 0 and magic > 0, sub numerator from RDX
1808     * 3. if S !=0, SAR S bits for RDX
1809     * 4. add 1 to RDX if RDX < 0
1810     * 5. Thus, RDX is the quotient
1811     */
1812
1813    // Numerator into RAX.
1814    RegStorage numerator_reg;
1815    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
1816      // We will need the value later.
1817      rl_src = LoadValueWide(rl_src, kCoreReg);
1818      numerator_reg = rl_src.reg;
1819      OpRegCopyWide(rs_r0q, numerator_reg);
1820    } else {
1821      // Only need this once.  Just put it into RAX.
1822      LoadValueDirectWideFixed(rl_src, rs_r0q);
1823    }
1824
1825    // RDX = magic.
1826    LoadConstantWide(rs_r2q, magic);
1827
1828    // RDX:RAX = magic & dividend.
1829    NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
1830
1831    if (imm > 0 && magic < 0) {
1832      // Add numerator to RDX.
1833      DCHECK(numerator_reg.Valid());
1834      OpRegReg(kOpAdd, rs_r2q, numerator_reg);
1835    } else if (imm < 0 && magic > 0) {
1836      DCHECK(numerator_reg.Valid());
1837      OpRegReg(kOpSub, rs_r2q, numerator_reg);
1838    }
1839
1840    // Do we need the shift?
1841    if (shift != 0) {
1842      // Shift RDX by 'shift' bits.
1843      OpRegImm(kOpAsr, rs_r2q, shift);
1844    }
1845
1846    // Move RDX to RAX.
1847    OpRegCopyWide(rs_r0q, rs_r2q);
1848
1849    // Move sign bit to bit 0, zeroing the rest.
1850    OpRegImm(kOpLsr, rs_r2q, 63);
1851
1852    // RDX = RDX + RAX.
1853    OpRegReg(kOpAdd, rs_r2q, rs_r0q);
1854
1855    // Quotient is in RDX.
1856    if (!is_div) {
1857      // We need to compute the remainder.
1858      // Remainder is divisor - (quotient * imm).
1859      DCHECK(numerator_reg.Valid());
1860      OpRegCopyWide(rs_r0q, numerator_reg);
1861
1862      // Imul doesn't support 64-bit imms.
1863      if (imm > std::numeric_limits<int32_t>::max() ||
1864          imm < std::numeric_limits<int32_t>::min()) {
1865        RegStorage rs_temp = AllocTempWide();
1866        LoadConstantWide(rs_temp, imm);
1867
1868        // RAX = numerator * imm.
1869        NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
1870
1871        FreeTemp(rs_temp);
1872      } else {
1873        // RAX = numerator * imm.
1874        int short_imm = static_cast<int>(imm);
1875        NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
1876      }
1877
1878      // RDX -= RAX.
1879      OpRegReg(kOpSub, rs_r0q, rs_r2q);
1880
1881      // Store result.
1882      OpRegCopyWide(rl_result.reg, rs_r0q);
1883    } else {
1884      // Store result.
1885      OpRegCopyWide(rl_result.reg, rs_r2q);
1886    }
1887    StoreValueWide(rl_dest, rl_result);
1888    FreeTemp(rs_r0q);
1889    FreeTemp(rs_r2q);
1890  }
1891}
1892
1893void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1894                               RegLocation rl_src2, bool is_div) {
1895  if (!cu_->target64) {
1896    LOG(FATAL) << "Unexpected use GenDivRemLong()";
1897    return;
1898  }
1899
1900  if (rl_src2.is_const) {
1901    DCHECK(rl_src2.wide);
1902    int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
1903    GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
1904    return;
1905  }
1906
1907  // We have to use fixed registers, so flush all the temps.
1908  FlushAllRegs();
1909  LockCallTemps();  // Prepare for explicit register usage.
1910
1911  // Load LHS into RAX.
1912  LoadValueDirectWideFixed(rl_src1, rs_r0q);
1913
1914  // Load RHS into RCX.
1915  LoadValueDirectWideFixed(rl_src2, rs_r1q);
1916
1917  // Copy LHS sign bit into RDX.
1918  NewLIR0(kx86Cqo64Da);
1919
1920  // Handle division by zero case.
1921  GenDivZeroCheckWide(rs_r1q);
1922
1923  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
1924  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
1925  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1926
1927  // RHS is -1.
1928  LoadConstantWide(rs_r6q, 0x8000000000000000);
1929  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
1930  LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1931
1932  // In 0x8000000000000000/-1 case.
1933  if (!is_div) {
1934    // For DIV, RAX is already right. For REM, we need RDX 0.
1935    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
1936  }
1937  LIR* done = NewLIR1(kX86Jmp8, 0);
1938
1939  // Expected case.
1940  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
1941  minint_branch->target = minus_one_branch->target;
1942  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
1943  done->target = NewLIR0(kPseudoTargetLabel);
1944
1945  // Result is in RAX for div and RDX for rem.
1946  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
1947  if (!is_div) {
1948    rl_result.reg.SetReg(r2q);
1949  }
1950
1951  StoreValueWide(rl_dest, rl_result);
1952}
1953
1954void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1955  rl_src = LoadValueWide(rl_src, kCoreReg);
1956  RegLocation rl_result;
1957  if (cu_->target64) {
1958    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1959    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
1960  } else {
1961    rl_result = ForceTempWide(rl_src);
1962    if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
1963        ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
1964      // The registers are the same, so we would clobber it before the use.
1965      RegStorage temp_reg = AllocTemp();
1966      OpRegCopy(temp_reg, rl_result.reg);
1967      rl_result.reg.SetHighReg(temp_reg.GetReg());
1968    }
1969    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
1970    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
1971    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
1972  }
1973  StoreValueWide(rl_dest, rl_result);
1974}
1975
1976void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
1977  DCHECK_EQ(kX86, cu_->instruction_set);
1978  X86OpCode opcode = kX86Bkpt;
1979  switch (op) {
1980  case kOpCmp: opcode = kX86Cmp32RT;  break;
1981  case kOpMov: opcode = kX86Mov32RT;  break;
1982  default:
1983    LOG(FATAL) << "Bad opcode: " << op;
1984    break;
1985  }
1986  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
1987}
1988
1989void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
1990  DCHECK_EQ(kX86_64, cu_->instruction_set);
1991  X86OpCode opcode = kX86Bkpt;
1992  if (cu_->target64 && r_dest.Is64BitSolo()) {
1993    switch (op) {
1994    case kOpCmp: opcode = kX86Cmp64RT;  break;
1995    case kOpMov: opcode = kX86Mov64RT;  break;
1996    default:
1997      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
1998      break;
1999    }
2000  } else {
2001    switch (op) {
2002    case kOpCmp: opcode = kX86Cmp32RT;  break;
2003    case kOpMov: opcode = kX86Mov32RT;  break;
2004    default:
2005      LOG(FATAL) << "Bad opcode: " << op;
2006      break;
2007    }
2008  }
2009  NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2010}
2011
2012/*
2013 * Generate array load
2014 */
2015void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2016                             RegLocation rl_index, RegLocation rl_dest, int scale) {
2017  RegisterClass reg_class = RegClassBySize(size);
2018  int len_offset = mirror::Array::LengthOffset().Int32Value();
2019  RegLocation rl_result;
2020  rl_array = LoadValue(rl_array, kRefReg);
2021
2022  int data_offset;
2023  if (size == k64 || size == kDouble) {
2024    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2025  } else {
2026    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2027  }
2028
2029  bool constant_index = rl_index.is_const;
2030  int32_t constant_index_value = 0;
2031  if (!constant_index) {
2032    rl_index = LoadValue(rl_index, kCoreReg);
2033  } else {
2034    constant_index_value = mir_graph_->ConstantValue(rl_index);
2035    // If index is constant, just fold it into the data offset
2036    data_offset += constant_index_value << scale;
2037    // treat as non array below
2038    rl_index.reg = RegStorage::InvalidReg();
2039  }
2040
2041  /* null object? */
2042  GenNullCheck(rl_array.reg, opt_flags);
2043
2044  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2045    if (constant_index) {
2046      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2047    } else {
2048      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2049    }
2050  }
2051  rl_result = EvalLoc(rl_dest, reg_class, true);
2052  LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2053  if ((size == k64) || (size == kDouble)) {
2054    StoreValueWide(rl_dest, rl_result);
2055  } else {
2056    StoreValue(rl_dest, rl_result);
2057  }
2058}
2059
2060/*
2061 * Generate array store
2062 *
2063 */
2064void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2065                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2066  RegisterClass reg_class = RegClassBySize(size);
2067  int len_offset = mirror::Array::LengthOffset().Int32Value();
2068  int data_offset;
2069
2070  if (size == k64 || size == kDouble) {
2071    data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2072  } else {
2073    data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2074  }
2075
2076  rl_array = LoadValue(rl_array, kRefReg);
2077  bool constant_index = rl_index.is_const;
2078  int32_t constant_index_value = 0;
2079  if (!constant_index) {
2080    rl_index = LoadValue(rl_index, kCoreReg);
2081  } else {
2082    // If index is constant, just fold it into the data offset
2083    constant_index_value = mir_graph_->ConstantValue(rl_index);
2084    data_offset += constant_index_value << scale;
2085    // treat as non array below
2086    rl_index.reg = RegStorage::InvalidReg();
2087  }
2088
2089  /* null object? */
2090  GenNullCheck(rl_array.reg, opt_flags);
2091
2092  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2093    if (constant_index) {
2094      GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2095    } else {
2096      GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2097    }
2098  }
2099  if ((size == k64) || (size == kDouble)) {
2100    rl_src = LoadValueWide(rl_src, reg_class);
2101  } else {
2102    rl_src = LoadValue(rl_src, reg_class);
2103  }
2104  // If the src reg can't be byte accessed, move it to a temp first.
2105  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2106    RegStorage temp = AllocTemp();
2107    OpRegCopy(temp, rl_src.reg);
2108    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
2109  } else {
2110    StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
2111  }
2112  if (card_mark) {
2113    // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2114    if (!constant_index) {
2115      FreeTemp(rl_index.reg);
2116    }
2117    MarkGCCard(rl_src.reg, rl_array.reg);
2118  }
2119}
2120
2121RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2122                                          RegLocation rl_src, int shift_amount) {
2123  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2124  if (cu_->target64) {
2125    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
2126    switch (opcode) {
2127      case Instruction::SHL_LONG:
2128      case Instruction::SHL_LONG_2ADDR:
2129        op = kOpLsl;
2130        break;
2131      case Instruction::SHR_LONG:
2132      case Instruction::SHR_LONG_2ADDR:
2133        op = kOpAsr;
2134        break;
2135      case Instruction::USHR_LONG:
2136      case Instruction::USHR_LONG_2ADDR:
2137        op = kOpLsr;
2138        break;
2139      default:
2140        LOG(FATAL) << "Unexpected case";
2141    }
2142    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2143  } else {
2144    switch (opcode) {
2145      case Instruction::SHL_LONG:
2146      case Instruction::SHL_LONG_2ADDR:
2147        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
2148        if (shift_amount == 32) {
2149          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2150          LoadConstant(rl_result.reg.GetLow(), 0);
2151        } else if (shift_amount > 31) {
2152          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2153          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2154          LoadConstant(rl_result.reg.GetLow(), 0);
2155        } else {
2156          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2157          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2158          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2159                  shift_amount);
2160          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2161        }
2162        break;
2163      case Instruction::SHR_LONG:
2164      case Instruction::SHR_LONG_2ADDR:
2165        if (shift_amount == 32) {
2166          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2167          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2168          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2169        } else if (shift_amount > 31) {
2170          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2171          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2172          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2173          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2174        } else {
2175          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2176          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2177          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2178                  shift_amount);
2179          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2180        }
2181        break;
2182      case Instruction::USHR_LONG:
2183      case Instruction::USHR_LONG_2ADDR:
2184        if (shift_amount == 32) {
2185          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2186          LoadConstant(rl_result.reg.GetHigh(), 0);
2187        } else if (shift_amount > 31) {
2188          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2189          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2190          LoadConstant(rl_result.reg.GetHigh(), 0);
2191        } else {
2192          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2193          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2194          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2195                  shift_amount);
2196          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2197        }
2198        break;
2199      default:
2200        LOG(FATAL) << "Unexpected case";
2201    }
2202  }
2203  return rl_result;
2204}
2205
2206void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2207                                   RegLocation rl_src, RegLocation rl_shift) {
2208  // Per spec, we only care about low 6 bits of shift amount.
2209  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2210  if (shift_amount == 0) {
2211    rl_src = LoadValueWide(rl_src, kCoreReg);
2212    StoreValueWide(rl_dest, rl_src);
2213    return;
2214  } else if (shift_amount == 1 &&
2215            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2216    // Need to handle this here to avoid calling StoreValueWide twice.
2217    GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
2218    return;
2219  }
2220  if (BadOverlap(rl_src, rl_dest)) {
2221    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2222    return;
2223  }
2224  rl_src = LoadValueWide(rl_src, kCoreReg);
2225  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
2226  StoreValueWide(rl_dest, rl_result);
2227}
2228
2229void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2230                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
2231  bool isConstSuccess = false;
2232  switch (opcode) {
2233    case Instruction::ADD_LONG:
2234    case Instruction::AND_LONG:
2235    case Instruction::OR_LONG:
2236    case Instruction::XOR_LONG:
2237      if (rl_src2.is_const) {
2238        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2239      } else {
2240        DCHECK(rl_src1.is_const);
2241        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2242      }
2243      break;
2244    case Instruction::SUB_LONG:
2245    case Instruction::SUB_LONG_2ADDR:
2246      if (rl_src2.is_const) {
2247        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2248      } else {
2249        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
2250        isConstSuccess = true;
2251      }
2252      break;
2253    case Instruction::ADD_LONG_2ADDR:
2254    case Instruction::OR_LONG_2ADDR:
2255    case Instruction::XOR_LONG_2ADDR:
2256    case Instruction::AND_LONG_2ADDR:
2257      if (rl_src2.is_const) {
2258        if (GenerateTwoOperandInstructions()) {
2259          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2260        } else {
2261          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2262        }
2263      } else {
2264        DCHECK(rl_src1.is_const);
2265        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2266      }
2267      break;
2268    default:
2269      isConstSuccess = false;
2270      break;
2271  }
2272
2273  if (!isConstSuccess) {
2274    // Default - bail to non-const handler.
2275    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2276  }
2277}
2278
2279bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2280  switch (op) {
2281    case Instruction::AND_LONG_2ADDR:
2282    case Instruction::AND_LONG:
2283      return value == -1;
2284    case Instruction::OR_LONG:
2285    case Instruction::OR_LONG_2ADDR:
2286    case Instruction::XOR_LONG:
2287    case Instruction::XOR_LONG_2ADDR:
2288      return value == 0;
2289    default:
2290      return false;
2291  }
2292}
2293
2294X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2295                                bool is_high_op) {
2296  bool rhs_in_mem = rhs.location != kLocPhysReg;
2297  bool dest_in_mem = dest.location != kLocPhysReg;
2298  bool is64Bit = cu_->target64;
2299  DCHECK(!rhs_in_mem || !dest_in_mem);
2300  switch (op) {
2301    case Instruction::ADD_LONG:
2302    case Instruction::ADD_LONG_2ADDR:
2303      if (dest_in_mem) {
2304        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2305      } else if (rhs_in_mem) {
2306        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2307      }
2308      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2309    case Instruction::SUB_LONG:
2310    case Instruction::SUB_LONG_2ADDR:
2311      if (dest_in_mem) {
2312        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2313      } else if (rhs_in_mem) {
2314        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2315      }
2316      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2317    case Instruction::AND_LONG_2ADDR:
2318    case Instruction::AND_LONG:
2319      if (dest_in_mem) {
2320        return is64Bit ? kX86And64MR : kX86And32MR;
2321      }
2322      if (is64Bit) {
2323        return rhs_in_mem ? kX86And64RM : kX86And64RR;
2324      }
2325      return rhs_in_mem ? kX86And32RM : kX86And32RR;
2326    case Instruction::OR_LONG:
2327    case Instruction::OR_LONG_2ADDR:
2328      if (dest_in_mem) {
2329        return is64Bit ? kX86Or64MR : kX86Or32MR;
2330      }
2331      if (is64Bit) {
2332        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2333      }
2334      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2335    case Instruction::XOR_LONG:
2336    case Instruction::XOR_LONG_2ADDR:
2337      if (dest_in_mem) {
2338        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2339      }
2340      if (is64Bit) {
2341        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2342      }
2343      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2344    default:
2345      LOG(FATAL) << "Unexpected opcode: " << op;
2346      return kX86Add32RR;
2347  }
2348}
2349
2350X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2351                                int32_t value) {
2352  bool in_mem = loc.location != kLocPhysReg;
2353  bool is64Bit = cu_->target64;
2354  bool byte_imm = IS_SIMM8(value);
2355  DCHECK(in_mem || !loc.reg.IsFloat());
2356  switch (op) {
2357    case Instruction::ADD_LONG:
2358    case Instruction::ADD_LONG_2ADDR:
2359      if (byte_imm) {
2360        if (in_mem) {
2361          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2362        }
2363        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2364      }
2365      if (in_mem) {
2366        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2367      }
2368      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2369    case Instruction::SUB_LONG:
2370    case Instruction::SUB_LONG_2ADDR:
2371      if (byte_imm) {
2372        if (in_mem) {
2373          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2374        }
2375        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2376      }
2377      if (in_mem) {
2378        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2379      }
2380      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2381    case Instruction::AND_LONG_2ADDR:
2382    case Instruction::AND_LONG:
2383      if (byte_imm) {
2384        if (is64Bit) {
2385          return in_mem ? kX86And64MI8 : kX86And64RI8;
2386        }
2387        return in_mem ? kX86And32MI8 : kX86And32RI8;
2388      }
2389      if (is64Bit) {
2390        return in_mem ? kX86And64MI : kX86And64RI;
2391      }
2392      return in_mem ? kX86And32MI : kX86And32RI;
2393    case Instruction::OR_LONG:
2394    case Instruction::OR_LONG_2ADDR:
2395      if (byte_imm) {
2396        if (is64Bit) {
2397          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2398        }
2399        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2400      }
2401      if (is64Bit) {
2402        return in_mem ? kX86Or64MI : kX86Or64RI;
2403      }
2404      return in_mem ? kX86Or32MI : kX86Or32RI;
2405    case Instruction::XOR_LONG:
2406    case Instruction::XOR_LONG_2ADDR:
2407      if (byte_imm) {
2408        if (is64Bit) {
2409          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2410        }
2411        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2412      }
2413      if (is64Bit) {
2414        return in_mem ? kX86Xor64MI : kX86Xor64RI;
2415      }
2416      return in_mem ? kX86Xor32MI : kX86Xor32RI;
2417    default:
2418      LOG(FATAL) << "Unexpected opcode: " << op;
2419      return kX86Add32MI;
2420  }
2421}
2422
2423bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2424  DCHECK(rl_src.is_const);
2425  int64_t val = mir_graph_->ConstantValueWide(rl_src);
2426
2427  if (cu_->target64) {
2428    // We can do with imm only if it fits 32 bit
2429    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2430      return false;
2431    }
2432
2433    rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2434
2435    if ((rl_dest.location == kLocDalvikFrame) ||
2436        (rl_dest.location == kLocCompilerTemp)) {
2437      int r_base = rs_rX86_SP.GetReg();
2438      int displacement = SRegOffset(rl_dest.s_reg_low);
2439
2440      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2441      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2442      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2443      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2444                              true /* is_load */, true /* is64bit */);
2445      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2446                              false /* is_load */, true /* is64bit */);
2447      return true;
2448    }
2449
2450    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2451    DCHECK_EQ(rl_result.location, kLocPhysReg);
2452    DCHECK(!rl_result.reg.IsFloat());
2453
2454    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2455    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2456
2457    StoreValueWide(rl_dest, rl_result);
2458    return true;
2459  }
2460
2461  int32_t val_lo = Low32Bits(val);
2462  int32_t val_hi = High32Bits(val);
2463  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2464
2465  // Can we just do this into memory?
2466  if ((rl_dest.location == kLocDalvikFrame) ||
2467      (rl_dest.location == kLocCompilerTemp)) {
2468    int r_base = rs_rX86_SP.GetReg();
2469    int displacement = SRegOffset(rl_dest.s_reg_low);
2470
2471    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2472    if (!IsNoOp(op, val_lo)) {
2473      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2474      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2475      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2476                              true /* is_load */, true /* is64bit */);
2477      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2478                              false /* is_load */, true /* is64bit */);
2479    }
2480    if (!IsNoOp(op, val_hi)) {
2481      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2482      LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2483      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2484                                true /* is_load */, true /* is64bit */);
2485      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2486                                false /* is_load */, true /* is64bit */);
2487    }
2488    return true;
2489  }
2490
2491  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2492  DCHECK_EQ(rl_result.location, kLocPhysReg);
2493  DCHECK(!rl_result.reg.IsFloat());
2494
2495  if (!IsNoOp(op, val_lo)) {
2496    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2497    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2498  }
2499  if (!IsNoOp(op, val_hi)) {
2500    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2501    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2502  }
2503  StoreValueWide(rl_dest, rl_result);
2504  return true;
2505}
2506
2507bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2508                                RegLocation rl_src2, Instruction::Code op) {
2509  DCHECK(rl_src2.is_const);
2510  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2511
2512  if (cu_->target64) {
2513    // We can do with imm only if it fits 32 bit
2514    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2515      return false;
2516    }
2517    if (rl_dest.location == kLocPhysReg &&
2518        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2519      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2520      OpRegCopy(rl_dest.reg, rl_src1.reg);
2521      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2522      StoreFinalValueWide(rl_dest, rl_dest);
2523      return true;
2524    }
2525
2526    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2527    // We need the values to be in a temporary
2528    RegLocation rl_result = ForceTempWide(rl_src1);
2529
2530    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2531    NewLIR2(x86op, rl_result.reg.GetReg(), val);
2532
2533    StoreFinalValueWide(rl_dest, rl_result);
2534    return true;
2535  }
2536
2537  int32_t val_lo = Low32Bits(val);
2538  int32_t val_hi = High32Bits(val);
2539  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2540  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
2541
2542  // Can we do this directly into the destination registers?
2543  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2544      rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2545      rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2546    if (!IsNoOp(op, val_lo)) {
2547      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2548      NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2549    }
2550    if (!IsNoOp(op, val_hi)) {
2551      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2552      NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2553    }
2554
2555    StoreFinalValueWide(rl_dest, rl_dest);
2556    return true;
2557  }
2558
2559  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2560  DCHECK_EQ(rl_src1.location, kLocPhysReg);
2561
2562  // We need the values to be in a temporary
2563  RegLocation rl_result = ForceTempWide(rl_src1);
2564  if (!IsNoOp(op, val_lo)) {
2565    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2566    NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2567  }
2568  if (!IsNoOp(op, val_hi)) {
2569    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2570    NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2571  }
2572
2573  StoreFinalValueWide(rl_dest, rl_result);
2574  return true;
2575}
2576
2577// For final classes there are no sub-classes to check and so we can answer the instance-of
2578// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
2579void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
2580                                    RegLocation rl_dest, RegLocation rl_src) {
2581  RegLocation object = LoadValue(rl_src, kRefReg);
2582  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2583  RegStorage result_reg = rl_result.reg;
2584
2585  // For 32-bit, SETcc only works with EAX..EDX.
2586  RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
2587  if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
2588    result_reg = AllocateByteRegister();
2589  }
2590
2591  // Assume that there is no match.
2592  LoadConstant(result_reg, 0);
2593  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
2594
2595  // We will use this register to compare to memory below.
2596  // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
2597  // For this reason, force allocation of a 32 bit register to use, so that the
2598  // compare to memory will be done using a 32 bit comparision.
2599  // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
2600  RegStorage check_class = AllocTemp();
2601
2602  // If Method* is already in a register, we can save a copy.
2603  RegLocation rl_method = mir_graph_->GetMethodLoc();
2604  int32_t offset_of_type = mirror::Array::DataOffset(
2605      sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
2606      (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
2607
2608  if (rl_method.location == kLocPhysReg) {
2609    if (use_declaring_class) {
2610      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2611                  check_class, kNotVolatile);
2612    } else {
2613      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2614                  check_class, kNotVolatile);
2615      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2616    }
2617  } else {
2618    LoadCurrMethodDirect(check_class);
2619    if (use_declaring_class) {
2620      LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2621                  check_class, kNotVolatile);
2622    } else {
2623      LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2624                  check_class, kNotVolatile);
2625      LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2626    }
2627  }
2628
2629  // Compare the computed class to the class in the object.
2630  DCHECK_EQ(object.location, kLocPhysReg);
2631  OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
2632
2633  // Set the low byte of the result to 0 or 1 from the compare condition code.
2634  NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
2635
2636  LIR* target = NewLIR0(kPseudoTargetLabel);
2637  null_branchover->target = target;
2638  FreeTemp(check_class);
2639  if (IsTemp(result_reg)) {
2640    OpRegCopy(rl_result.reg, result_reg);
2641    FreeTemp(result_reg);
2642  }
2643  StoreValue(rl_dest, rl_result);
2644}
2645
2646void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
2647                            RegLocation rl_lhs, RegLocation rl_rhs) {
2648  OpKind op = kOpBkpt;
2649  bool is_div_rem = false;
2650  bool unary = false;
2651  bool shift_op = false;
2652  bool is_two_addr = false;
2653  RegLocation rl_result;
2654  switch (opcode) {
2655    case Instruction::NEG_INT:
2656      op = kOpNeg;
2657      unary = true;
2658      break;
2659    case Instruction::NOT_INT:
2660      op = kOpMvn;
2661      unary = true;
2662      break;
2663    case Instruction::ADD_INT_2ADDR:
2664      is_two_addr = true;
2665      // Fallthrough
2666    case Instruction::ADD_INT:
2667      op = kOpAdd;
2668      break;
2669    case Instruction::SUB_INT_2ADDR:
2670      is_two_addr = true;
2671      // Fallthrough
2672    case Instruction::SUB_INT:
2673      op = kOpSub;
2674      break;
2675    case Instruction::MUL_INT_2ADDR:
2676      is_two_addr = true;
2677      // Fallthrough
2678    case Instruction::MUL_INT:
2679      op = kOpMul;
2680      break;
2681    case Instruction::DIV_INT_2ADDR:
2682      is_two_addr = true;
2683      // Fallthrough
2684    case Instruction::DIV_INT:
2685      op = kOpDiv;
2686      is_div_rem = true;
2687      break;
2688    /* NOTE: returns in kArg1 */
2689    case Instruction::REM_INT_2ADDR:
2690      is_two_addr = true;
2691      // Fallthrough
2692    case Instruction::REM_INT:
2693      op = kOpRem;
2694      is_div_rem = true;
2695      break;
2696    case Instruction::AND_INT_2ADDR:
2697      is_two_addr = true;
2698      // Fallthrough
2699    case Instruction::AND_INT:
2700      op = kOpAnd;
2701      break;
2702    case Instruction::OR_INT_2ADDR:
2703      is_two_addr = true;
2704      // Fallthrough
2705    case Instruction::OR_INT:
2706      op = kOpOr;
2707      break;
2708    case Instruction::XOR_INT_2ADDR:
2709      is_two_addr = true;
2710      // Fallthrough
2711    case Instruction::XOR_INT:
2712      op = kOpXor;
2713      break;
2714    case Instruction::SHL_INT_2ADDR:
2715      is_two_addr = true;
2716      // Fallthrough
2717    case Instruction::SHL_INT:
2718      shift_op = true;
2719      op = kOpLsl;
2720      break;
2721    case Instruction::SHR_INT_2ADDR:
2722      is_two_addr = true;
2723      // Fallthrough
2724    case Instruction::SHR_INT:
2725      shift_op = true;
2726      op = kOpAsr;
2727      break;
2728    case Instruction::USHR_INT_2ADDR:
2729      is_two_addr = true;
2730      // Fallthrough
2731    case Instruction::USHR_INT:
2732      shift_op = true;
2733      op = kOpLsr;
2734      break;
2735    default:
2736      LOG(FATAL) << "Invalid word arith op: " << opcode;
2737  }
2738
2739  // Can we convert to a two address instruction?
2740  if (!is_two_addr &&
2741        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2742         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
2743    is_two_addr = true;
2744  }
2745
2746  if (!GenerateTwoOperandInstructions()) {
2747    is_two_addr = false;
2748  }
2749
2750  // Get the div/rem stuff out of the way.
2751  if (is_div_rem) {
2752    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
2753    StoreValue(rl_dest, rl_result);
2754    return;
2755  }
2756
2757  // If we generate any memory access below, it will reference a dalvik reg.
2758  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2759
2760  if (unary) {
2761    rl_lhs = LoadValue(rl_lhs, kCoreReg);
2762    rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2763    rl_result = EvalLoc(rl_dest, kCoreReg, true);
2764    OpRegReg(op, rl_result.reg, rl_lhs.reg);
2765  } else {
2766    if (shift_op) {
2767      // X86 doesn't require masking and must use ECX.
2768      RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
2769      LoadValueDirectFixed(rl_rhs, t_reg);
2770      if (is_two_addr) {
2771        // Can we do this directly into memory?
2772        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2773        rl_rhs = LoadValue(rl_rhs, kCoreReg);
2774        if (rl_result.location != kLocPhysReg) {
2775          // Okay, we can do this into memory
2776          OpMemReg(op, rl_result, t_reg.GetReg());
2777          FreeTemp(t_reg);
2778          return;
2779        } else if (!rl_result.reg.IsFloat()) {
2780          // Can do this directly into the result register
2781          OpRegReg(op, rl_result.reg, t_reg);
2782          FreeTemp(t_reg);
2783          StoreFinalValue(rl_dest, rl_result);
2784          return;
2785        }
2786      }
2787      // Three address form, or we can't do directly.
2788      rl_lhs = LoadValue(rl_lhs, kCoreReg);
2789      rl_result = EvalLoc(rl_dest, kCoreReg, true);
2790      OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
2791      FreeTemp(t_reg);
2792    } else {
2793      // Multiply is 3 operand only (sort of).
2794      if (is_two_addr && op != kOpMul) {
2795        // Can we do this directly into memory?
2796        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2797        if (rl_result.location == kLocPhysReg) {
2798          // Ensure res is in a core reg
2799          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2800          // Can we do this from memory directly?
2801          rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2802          if (rl_rhs.location != kLocPhysReg) {
2803            OpRegMem(op, rl_result.reg, rl_rhs);
2804            StoreFinalValue(rl_dest, rl_result);
2805            return;
2806          } else if (!rl_rhs.reg.IsFloat()) {
2807            OpRegReg(op, rl_result.reg, rl_rhs.reg);
2808            StoreFinalValue(rl_dest, rl_result);
2809            return;
2810          }
2811        }
2812        rl_rhs = LoadValue(rl_rhs, kCoreReg);
2813        // It might happen rl_rhs and rl_dest are the same VR
2814        // in this case rl_dest is in reg after LoadValue while
2815        // rl_result is not updated yet, so do this
2816        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2817        if (rl_result.location != kLocPhysReg) {
2818          // Okay, we can do this into memory.
2819          OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
2820          return;
2821        } else if (!rl_result.reg.IsFloat()) {
2822          // Can do this directly into the result register.
2823          OpRegReg(op, rl_result.reg, rl_rhs.reg);
2824          StoreFinalValue(rl_dest, rl_result);
2825          return;
2826        } else {
2827          rl_lhs = LoadValue(rl_lhs, kCoreReg);
2828          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2829          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2830        }
2831      } else {
2832        // Try to use reg/memory instructions.
2833        rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
2834        rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2835        // We can't optimize with FP registers.
2836        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
2837          // Something is difficult, so fall back to the standard case.
2838          rl_lhs = LoadValue(rl_lhs, kCoreReg);
2839          rl_rhs = LoadValue(rl_rhs, kCoreReg);
2840          rl_result = EvalLoc(rl_dest, kCoreReg, true);
2841          OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2842        } else {
2843          // We can optimize by moving to result and using memory operands.
2844          if (rl_rhs.location != kLocPhysReg) {
2845            // Force LHS into result.
2846            // We should be careful with order here
2847            // If rl_dest and rl_lhs points to the same VR we should load first
2848            // If the are different we should find a register first for dest
2849            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2850                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
2851              rl_lhs = LoadValue(rl_lhs, kCoreReg);
2852              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2853              // No-op if these are the same.
2854              OpRegCopy(rl_result.reg, rl_lhs.reg);
2855            } else {
2856              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2857              LoadValueDirect(rl_lhs, rl_result.reg);
2858            }
2859            OpRegMem(op, rl_result.reg, rl_rhs);
2860          } else if (rl_lhs.location != kLocPhysReg) {
2861            // RHS is in a register; LHS is in memory.
2862            if (op != kOpSub) {
2863              // Force RHS into result and operate on memory.
2864              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2865              OpRegCopy(rl_result.reg, rl_rhs.reg);
2866              OpRegMem(op, rl_result.reg, rl_lhs);
2867            } else {
2868              // Subtraction isn't commutative.
2869              rl_lhs = LoadValue(rl_lhs, kCoreReg);
2870              rl_rhs = LoadValue(rl_rhs, kCoreReg);
2871              rl_result = EvalLoc(rl_dest, kCoreReg, true);
2872              OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2873            }
2874          } else {
2875            // Both are in registers.
2876            rl_lhs = LoadValue(rl_lhs, kCoreReg);
2877            rl_rhs = LoadValue(rl_rhs, kCoreReg);
2878            rl_result = EvalLoc(rl_dest, kCoreReg, true);
2879            OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2880          }
2881        }
2882      }
2883    }
2884  }
2885  StoreValue(rl_dest, rl_result);
2886}
2887
2888bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
2889  // If we have non-core registers, then we can't do good things.
2890  if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
2891    return false;
2892  }
2893  if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
2894    return false;
2895  }
2896
2897  // Everything will be fine :-).
2898  return true;
2899}
2900
2901void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
2902  if (!cu_->target64) {
2903    Mir2Lir::GenIntToLong(rl_dest, rl_src);
2904    return;
2905  }
2906  rl_src = UpdateLocTyped(rl_src, kCoreReg);
2907  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2908  if (rl_src.location == kLocPhysReg) {
2909    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
2910  } else {
2911    int displacement = SRegOffset(rl_src.s_reg_low);
2912    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2913    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
2914                     displacement + LOWORD_OFFSET);
2915    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
2916                            true /* is_load */, true /* is_64bit */);
2917  }
2918  StoreValueWide(rl_dest, rl_result);
2919}
2920
2921void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
2922                        RegLocation rl_src1, RegLocation rl_shift) {
2923  if (!cu_->target64) {
2924    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
2925    return;
2926  }
2927
2928  bool is_two_addr = false;
2929  OpKind op = kOpBkpt;
2930  RegLocation rl_result;
2931
2932  switch (opcode) {
2933    case Instruction::SHL_LONG_2ADDR:
2934      is_two_addr = true;
2935      // Fallthrough
2936    case Instruction::SHL_LONG:
2937      op = kOpLsl;
2938      break;
2939    case Instruction::SHR_LONG_2ADDR:
2940      is_two_addr = true;
2941      // Fallthrough
2942    case Instruction::SHR_LONG:
2943      op = kOpAsr;
2944      break;
2945    case Instruction::USHR_LONG_2ADDR:
2946      is_two_addr = true;
2947      // Fallthrough
2948    case Instruction::USHR_LONG:
2949      op = kOpLsr;
2950      break;
2951    default:
2952      op = kOpBkpt;
2953  }
2954
2955  // X86 doesn't require masking and must use ECX.
2956  RegStorage t_reg = TargetReg(kCount, kNotWide);  // rCX
2957  LoadValueDirectFixed(rl_shift, t_reg);
2958  if (is_two_addr) {
2959    // Can we do this directly into memory?
2960    rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
2961    if (rl_result.location != kLocPhysReg) {
2962      // Okay, we can do this into memory
2963      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2964      OpMemReg(op, rl_result, t_reg.GetReg());
2965    } else if (!rl_result.reg.IsFloat()) {
2966      // Can do this directly into the result register
2967      OpRegReg(op, rl_result.reg, t_reg);
2968      StoreFinalValueWide(rl_dest, rl_result);
2969    }
2970  } else {
2971    // Three address form, or we can't do directly.
2972    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2973    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2974    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
2975    StoreFinalValueWide(rl_dest, rl_result);
2976  }
2977
2978  FreeTemp(t_reg);
2979}
2980
2981}  // namespace art
2982