fp_x86.cc revision 1222c96fafe98061cfc57d3bd115f46edb64e624
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_x86.h"
18#include "dex/quick/mir_to_lir-inl.h"
19#include "dex/reg_storage_eq.h"
20#include "x86_lir.h"
21
22namespace art {
23
24void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
25                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
26  X86OpCode op = kX86Nop;
27  RegLocation rl_result;
28
29  /*
30   * Don't attempt to optimize register usage since these opcodes call out to
31   * the handlers.
32   */
33  switch (opcode) {
34    case Instruction::ADD_FLOAT_2ADDR:
35    case Instruction::ADD_FLOAT:
36      op = kX86AddssRR;
37      break;
38    case Instruction::SUB_FLOAT_2ADDR:
39    case Instruction::SUB_FLOAT:
40      op = kX86SubssRR;
41      break;
42    case Instruction::DIV_FLOAT_2ADDR:
43    case Instruction::DIV_FLOAT:
44      op = kX86DivssRR;
45      break;
46    case Instruction::MUL_FLOAT_2ADDR:
47    case Instruction::MUL_FLOAT:
48      op = kX86MulssRR;
49      break;
50    case Instruction::REM_FLOAT_2ADDR:
51    case Instruction::REM_FLOAT:
52      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
53      return;
54    case Instruction::NEG_FLOAT:
55      GenNegFloat(rl_dest, rl_src1);
56      return;
57    default:
58      LOG(FATAL) << "Unexpected opcode: " << opcode;
59  }
60  rl_src1 = LoadValue(rl_src1, kFPReg);
61  rl_src2 = LoadValue(rl_src2, kFPReg);
62  rl_result = EvalLoc(rl_dest, kFPReg, true);
63  RegStorage r_dest = rl_result.reg;
64  RegStorage r_src1 = rl_src1.reg;
65  RegStorage r_src2 = rl_src2.reg;
66  if (r_dest == r_src2) {
67    r_src2 = AllocTempSingle();
68    OpRegCopy(r_src2, r_dest);
69  }
70  OpRegCopy(r_dest, r_src1);
71  NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
72  StoreValue(rl_dest, rl_result);
73}
74
75void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
76                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
77  DCHECK(rl_dest.wide);
78  DCHECK(rl_dest.fp);
79  DCHECK(rl_src1.wide);
80  DCHECK(rl_src1.fp);
81  DCHECK(rl_src2.wide);
82  DCHECK(rl_src2.fp);
83  X86OpCode op = kX86Nop;
84  RegLocation rl_result;
85
86  switch (opcode) {
87    case Instruction::ADD_DOUBLE_2ADDR:
88    case Instruction::ADD_DOUBLE:
89      op = kX86AddsdRR;
90      break;
91    case Instruction::SUB_DOUBLE_2ADDR:
92    case Instruction::SUB_DOUBLE:
93      op = kX86SubsdRR;
94      break;
95    case Instruction::DIV_DOUBLE_2ADDR:
96    case Instruction::DIV_DOUBLE:
97      op = kX86DivsdRR;
98      break;
99    case Instruction::MUL_DOUBLE_2ADDR:
100    case Instruction::MUL_DOUBLE:
101      op = kX86MulsdRR;
102      break;
103    case Instruction::REM_DOUBLE_2ADDR:
104    case Instruction::REM_DOUBLE:
105      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
106      return;
107    case Instruction::NEG_DOUBLE:
108      GenNegDouble(rl_dest, rl_src1);
109      return;
110    default:
111      LOG(FATAL) << "Unexpected opcode: " << opcode;
112  }
113  rl_src1 = LoadValueWide(rl_src1, kFPReg);
114  rl_src2 = LoadValueWide(rl_src2, kFPReg);
115  rl_result = EvalLoc(rl_dest, kFPReg, true);
116  if (rl_result.reg == rl_src2.reg) {
117    rl_src2.reg = AllocTempDouble();
118    OpRegCopy(rl_src2.reg, rl_result.reg);
119  }
120  OpRegCopy(rl_result.reg, rl_src1.reg);
121  NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
122  StoreValueWide(rl_dest, rl_result);
123}
124
125void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
126  // Compute offsets to the source and destination VRs on stack
127  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
128  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
129
130  // Update the in-register state of source.
131  rl_src = UpdateLocWide(rl_src);
132
133  // All memory accesses below reference dalvik regs.
134  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
135
136  // If the source is in physical register, then put it in its location on stack.
137  if (rl_src.location == kLocPhysReg) {
138    RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
139
140    if (reg_info != nullptr && reg_info->IsTemp()) {
141      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
142      FlushSpecificReg(reg_info);
143      // ResetDef to prevent NullifyRange from removing stores.
144      ResetDef(rl_src.reg);
145    } else {
146      // It must have been register promoted if it is not a temp but is still in physical
147      // register. Since we need it to be in memory to convert, we place it there now.
148      StoreBaseDisp(rs_rX86_SP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
149    }
150  }
151
152  // Push the source virtual register onto the x87 stack.
153  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP.GetReg(),
154                              src_v_reg_offset + LOWORD_OFFSET);
155  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
156                          true /* is_load */, true /* is64bit */);
157
158  // Now pop off x87 stack and store it in the destination VR's stack location.
159  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
160  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
161  LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP.GetReg(), displacement);
162  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
163
164  /*
165   * The result is in a physical register if it was in a temp or was register
166   * promoted. For that reason it is enough to check if it is in physical
167   * register. If it is, then we must do all of the bookkeeping necessary to
168   * invalidate temp (if needed) and load in promoted register (if needed).
169   * If the result's location is in memory, then we do not need to do anything
170   * more since the fstp has already placed the correct value in memory.
171   */
172  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
173      UpdateLocTyped(rl_dest, kFPReg);
174  if (rl_result.location == kLocPhysReg) {
175    /*
176     * We already know that the result is in a physical register but do not know if it is the
177     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
178     * correct register class.
179     */
180    rl_result = EvalLoc(rl_dest, kFPReg, true);
181    if (is_double) {
182      LoadBaseDisp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
183
184      StoreFinalValueWide(rl_dest, rl_result);
185    } else {
186      Load32Disp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg);
187
188      StoreFinalValue(rl_dest, rl_result);
189    }
190  }
191}
192
193void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
194                               RegLocation rl_src) {
195  RegisterClass rcSrc = kFPReg;
196  X86OpCode op = kX86Nop;
197  RegLocation rl_result;
198  switch (opcode) {
199    case Instruction::INT_TO_FLOAT:
200      rcSrc = kCoreReg;
201      op = kX86Cvtsi2ssRR;
202      break;
203    case Instruction::DOUBLE_TO_FLOAT:
204      rcSrc = kFPReg;
205      op = kX86Cvtsd2ssRR;
206      break;
207    case Instruction::FLOAT_TO_DOUBLE:
208      rcSrc = kFPReg;
209      op = kX86Cvtss2sdRR;
210      break;
211    case Instruction::INT_TO_DOUBLE:
212      rcSrc = kCoreReg;
213      op = kX86Cvtsi2sdRR;
214      break;
215    case Instruction::FLOAT_TO_INT: {
216      rl_src = LoadValue(rl_src, kFPReg);
217      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
218      ClobberSReg(rl_dest.s_reg_low);
219      rl_result = EvalLoc(rl_dest, kCoreReg, true);
220      RegStorage temp_reg = AllocTempSingle();
221
222      LoadConstant(rl_result.reg, 0x7fffffff);
223      NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
224      NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
225      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
226      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
227      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
228      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
229      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
230      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
231      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
232      branch_normal->target = NewLIR0(kPseudoTargetLabel);
233      StoreValue(rl_dest, rl_result);
234      return;
235    }
236    case Instruction::DOUBLE_TO_INT: {
237      rl_src = LoadValueWide(rl_src, kFPReg);
238      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
239      ClobberSReg(rl_dest.s_reg_low);
240      rl_result = EvalLoc(rl_dest, kCoreReg, true);
241      RegStorage temp_reg = AllocTempDouble();
242
243      LoadConstant(rl_result.reg, 0x7fffffff);
244      NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
245      NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
246      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
247      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
248      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
249      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
250      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
251      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
252      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
253      branch_normal->target = NewLIR0(kPseudoTargetLabel);
254      StoreValue(rl_dest, rl_result);
255      return;
256    }
257    case Instruction::LONG_TO_DOUBLE:
258      if (cu_->target64) {
259        rcSrc = kCoreReg;
260        op = kX86Cvtsqi2sdRR;
261        break;
262      }
263      GenLongToFP(rl_dest, rl_src, true /* is_double */);
264      return;
265    case Instruction::LONG_TO_FLOAT:
266      if (cu_->target64) {
267        rcSrc = kCoreReg;
268        op = kX86Cvtsqi2ssRR;
269       break;
270      }
271      GenLongToFP(rl_dest, rl_src, false /* is_double */);
272      return;
273    case Instruction::FLOAT_TO_LONG:
274      if (cu_->target64) {
275        rl_src = LoadValue(rl_src, kFPReg);
276        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
277        ClobberSReg(rl_dest.s_reg_low);
278        rl_result = EvalLoc(rl_dest, kCoreReg, true);
279        RegStorage temp_reg = AllocTempSingle();
280
281        // Set 0x7fffffffffffffff to rl_result
282        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
283        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
284        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
285        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
286        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
287        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
288        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
289        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
290        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
291        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
292        branch_normal->target = NewLIR0(kPseudoTargetLabel);
293        StoreValueWide(rl_dest, rl_result);
294      } else {
295        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
296      }
297      return;
298    case Instruction::DOUBLE_TO_LONG:
299      if (cu_->target64) {
300        rl_src = LoadValueWide(rl_src, kFPReg);
301        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
302        ClobberSReg(rl_dest.s_reg_low);
303        rl_result = EvalLoc(rl_dest, kCoreReg, true);
304        RegStorage temp_reg = AllocTempDouble();
305
306        // Set 0x7fffffffffffffff to rl_result
307        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
308        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
309        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
310        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
311        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
312        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
313        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
314        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
315        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
316        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
317        branch_normal->target = NewLIR0(kPseudoTargetLabel);
318        StoreValueWide(rl_dest, rl_result);
319      } else {
320        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
321      }
322      return;
323    default:
324      LOG(INFO) << "Unexpected opcode: " << opcode;
325  }
326  // At this point, target will be either float or double.
327  DCHECK(rl_dest.fp);
328  if (rl_src.wide) {
329    rl_src = LoadValueWide(rl_src, rcSrc);
330  } else {
331    rl_src = LoadValue(rl_src, rcSrc);
332  }
333  rl_result = EvalLoc(rl_dest, kFPReg, true);
334  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
335  if (rl_dest.wide) {
336    StoreValueWide(rl_dest, rl_result);
337  } else {
338    StoreValue(rl_dest, rl_result);
339  }
340}
341
342void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
343  // Compute offsets to the source and destination VRs on stack.
344  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
345  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
346  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
347
348  // Update the in-register state of sources.
349  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
350  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
351
352  // All memory accesses below reference dalvik regs.
353  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
354
355  // If the source is in physical register, then put it in its location on stack.
356  if (rl_src1.location == kLocPhysReg) {
357    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
358
359    if (reg_info != nullptr && reg_info->IsTemp()) {
360      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
361      FlushSpecificReg(reg_info);
362      // ResetDef to prevent NullifyRange from removing stores.
363      ResetDef(rl_src1.reg);
364    } else {
365      // It must have been register promoted if it is not a temp but is still in physical
366      // register. Since we need it to be in memory to convert, we place it there now.
367      StoreBaseDisp(rs_rX86_SP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
368                    kNotVolatile);
369    }
370  }
371
372  if (rl_src2.location == kLocPhysReg) {
373    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
374    if (reg_info != nullptr && reg_info->IsTemp()) {
375      FlushSpecificReg(reg_info);
376      ResetDef(rl_src2.reg);
377    } else {
378      StoreBaseDisp(rs_rX86_SP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
379                    kNotVolatile);
380    }
381  }
382
383  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
384
385  // Push the source virtual registers onto the x87 stack.
386  LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rX86_SP.GetReg(),
387                             src2_v_reg_offset + LOWORD_OFFSET);
388  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
389                          true /* is_load */, is_double /* is64bit */);
390
391  LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rX86_SP.GetReg(),
392                             src1_v_reg_offset + LOWORD_OFFSET);
393  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
394                          true /* is_load */, is_double /* is64bit */);
395
396  FlushReg(rs_rAX);
397  Clobber(rs_rAX);
398  LockTemp(rs_rAX);
399
400  LIR* retry = NewLIR0(kPseudoTargetLabel);
401
402  // Divide ST(0) by ST(1) and place result to ST(0).
403  NewLIR0(kX86Fprem);
404
405  // Move FPU status word to AX.
406  NewLIR0(kX86Fstsw16R);
407
408  // Check if reduction is complete.
409  OpRegImm(kOpAnd, rs_rAX, 0x400);
410
411  // If no then continue to compute remainder.
412  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
413  branch->target = retry;
414
415  FreeTemp(rs_rAX);
416
417  // Now store result in the destination VR's stack location.
418  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
419  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
420  LIR *fst = NewLIR2NoDest(opcode, rs_rX86_SP.GetReg(), displacement);
421  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
422
423  // Pop ST(1) and ST(0).
424  NewLIR0(kX86Fucompp);
425
426  /*
427   * The result is in a physical register if it was in a temp or was register
428   * promoted. For that reason it is enough to check if it is in physical
429   * register. If it is, then we must do all of the bookkeeping necessary to
430   * invalidate temp (if needed) and load in promoted register (if needed).
431   * If the result's location is in memory, then we do not need to do anything
432   * more since the fstp has already placed the correct value in memory.
433   */
434  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
435      UpdateLocTyped(rl_dest, kFPReg);
436  if (rl_result.location == kLocPhysReg) {
437    rl_result = EvalLoc(rl_dest, kFPReg, true);
438    if (is_double) {
439      LoadBaseDisp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
440      StoreFinalValueWide(rl_dest, rl_result);
441    } else {
442      Load32Disp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg);
443      StoreFinalValue(rl_dest, rl_result);
444    }
445  }
446}
447
448void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
449                          RegLocation rl_src1, RegLocation rl_src2) {
450  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
451  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
452  if (single) {
453    rl_src1 = LoadValue(rl_src1, kFPReg);
454    rl_src2 = LoadValue(rl_src2, kFPReg);
455  } else {
456    rl_src1 = LoadValueWide(rl_src1, kFPReg);
457    rl_src2 = LoadValueWide(rl_src2, kFPReg);
458  }
459  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
460  ClobberSReg(rl_dest.s_reg_low);
461  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
462  LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
463  if (single) {
464    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
465  } else {
466    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
467  }
468  LIR* branch = NULL;
469  if (unordered_gt) {
470    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
471  }
472  // If the result reg can't be byte accessed, use a jump and move instead of a set.
473  if (!IsByteRegister(rl_result.reg)) {
474    LIR* branch2 = NULL;
475    if (unordered_gt) {
476      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
477      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
478    } else {
479      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
480      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
481    }
482    branch2->target = NewLIR0(kPseudoTargetLabel);
483  } else {
484    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
485  }
486  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
487  if (unordered_gt) {
488    branch->target = NewLIR0(kPseudoTargetLabel);
489  }
490  StoreValue(rl_dest, rl_result);
491}
492
493void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
494                                     bool is_double) {
495  LIR* taken = &block_label_list_[bb->taken];
496  LIR* not_taken = &block_label_list_[bb->fall_through];
497  LIR* branch = NULL;
498  RegLocation rl_src1;
499  RegLocation rl_src2;
500  if (is_double) {
501    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
502    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
503    rl_src1 = LoadValueWide(rl_src1, kFPReg);
504    rl_src2 = LoadValueWide(rl_src2, kFPReg);
505    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
506  } else {
507    rl_src1 = mir_graph_->GetSrc(mir, 0);
508    rl_src2 = mir_graph_->GetSrc(mir, 1);
509    rl_src1 = LoadValue(rl_src1, kFPReg);
510    rl_src2 = LoadValue(rl_src2, kFPReg);
511    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
512  }
513  ConditionCode ccode = mir->meta.ccode;
514  switch (ccode) {
515    case kCondEq:
516      if (!gt_bias) {
517        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
518        branch->target = not_taken;
519      }
520      break;
521    case kCondNe:
522      if (!gt_bias) {
523        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
524        branch->target = taken;
525      }
526      break;
527    case kCondLt:
528      if (gt_bias) {
529        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
530        branch->target = not_taken;
531      }
532      ccode = kCondUlt;
533      break;
534    case kCondLe:
535      if (gt_bias) {
536        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
537        branch->target = not_taken;
538      }
539      ccode = kCondLs;
540      break;
541    case kCondGt:
542      if (gt_bias) {
543        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
544        branch->target = taken;
545      }
546      ccode = kCondHi;
547      break;
548    case kCondGe:
549      if (gt_bias) {
550        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
551        branch->target = taken;
552      }
553      ccode = kCondUge;
554      break;
555    default:
556      LOG(FATAL) << "Unexpected ccode: " << ccode;
557  }
558  OpCondBranch(ccode, taken);
559}
560
561void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
562  RegLocation rl_result;
563  rl_src = LoadValue(rl_src, kCoreReg);
564  rl_result = EvalLoc(rl_dest, kCoreReg, true);
565  OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
566  StoreValue(rl_dest, rl_result);
567}
568
569void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
570  RegLocation rl_result;
571  rl_src = LoadValueWide(rl_src, kCoreReg);
572  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
573  if (cu_->target64) {
574    OpRegCopy(rl_result.reg, rl_src.reg);
575    // Flip sign bit.
576    NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
577    NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
578    NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
579  } else {
580    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
581    OpRegCopy(rl_result.reg, rl_src.reg);
582  }
583  StoreValueWide(rl_dest, rl_result);
584}
585
586bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
587  RegLocation rl_src = info->args[0];
588  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
589  rl_src = LoadValueWide(rl_src, kFPReg);
590  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
591  NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
592  StoreValueWide(rl_dest, rl_result);
593  return true;
594}
595
596bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
597  // Get the argument
598  RegLocation rl_src = info->args[0];
599
600  // Get the inlined intrinsic target virtual register
601  RegLocation rl_dest = InlineTarget(info);
602
603  // Get the virtual register number
604  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
605  if (rl_dest.s_reg_low == INVALID_SREG) {
606    // Result is unused, the code is dead. Inlining successful, no code generated.
607    return true;
608  }
609  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
610  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
611
612  // if argument is the same as inlined intrinsic target
613  if (v_src_reg == v_dst_reg) {
614    rl_src = UpdateLoc(rl_src);
615
616    // if argument is in the physical register
617    if (rl_src.location == kLocPhysReg) {
618      rl_src = LoadValue(rl_src, kCoreReg);
619      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
620      StoreValue(rl_dest, rl_src);
621      return true;
622    }
623    // the argument is in memory
624    DCHECK((rl_src.location == kLocDalvikFrame) ||
625         (rl_src.location == kLocCompilerTemp));
626
627    // Operate directly into memory.
628    int displacement = SRegOffset(rl_dest.s_reg_low);
629    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
630    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP.GetReg(), displacement, 0x7fffffff);
631    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
632    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
633    return true;
634  } else {
635    rl_src = LoadValue(rl_src, kCoreReg);
636    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
637    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
638    StoreValue(rl_dest, rl_result);
639    return true;
640  }
641}
642
643bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
644  RegLocation rl_src = info->args[0];
645  RegLocation rl_dest = InlineTargetWide(info);
646  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
647  if (rl_dest.s_reg_low == INVALID_SREG) {
648    // Result is unused, the code is dead. Inlining successful, no code generated.
649    return true;
650  }
651  if (cu_->target64) {
652    rl_src = LoadValueWide(rl_src, kCoreReg);
653    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
654    OpRegCopyWide(rl_result.reg, rl_src.reg);
655    OpRegImm(kOpLsl, rl_result.reg, 1);
656    OpRegImm(kOpLsr, rl_result.reg, 1);
657    StoreValueWide(rl_dest, rl_result);
658    return true;
659  }
660  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
661  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
662  rl_src = UpdateLocWide(rl_src);
663
664  // if argument is in the physical XMM register
665  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
666    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
667    if (rl_result.reg != rl_src.reg) {
668      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
669      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
670    } else {
671      RegStorage sign_mask = AllocTempDouble();
672      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
673      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
674      FreeTemp(sign_mask);
675    }
676    StoreValueWide(rl_dest, rl_result);
677    return true;
678  } else if (v_src_reg == v_dst_reg) {
679    // if argument is the same as inlined intrinsic target
680    // if argument is in the physical register
681    if (rl_src.location == kLocPhysReg) {
682      rl_src = LoadValueWide(rl_src, kCoreReg);
683      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
684      StoreValueWide(rl_dest, rl_src);
685      return true;
686    }
687    // the argument is in memory
688    DCHECK((rl_src.location == kLocDalvikFrame) ||
689           (rl_src.location == kLocCompilerTemp));
690
691    // Operate directly into memory.
692    int displacement = SRegOffset(rl_dest.s_reg_low);
693    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
694    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
695    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
696    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
697    return true;
698  } else {
699    rl_src = LoadValueWide(rl_src, kCoreReg);
700    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
701    OpRegCopyWide(rl_result.reg, rl_src.reg);
702    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
703    StoreValueWide(rl_dest, rl_result);
704    return true;
705  }
706}
707
708bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
709  if (is_double) {
710    RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
711    RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
712    RegLocation rl_dest = InlineTargetWide(info);
713    RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
714
715    // Avoid src2 corruption by OpRegCopyWide.
716    if (rl_result.reg == rl_src2.reg) {
717        std::swap(rl_src2.reg, rl_src1.reg);
718    }
719
720    OpRegCopyWide(rl_result.reg, rl_src1.reg);
721    NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
722    // If either arg is NaN, return NaN.
723    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
724    // Min/Max branches.
725    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
726    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
727    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
728    NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
729    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
730    // Handle NaN.
731    branch_nan->target = NewLIR0(kPseudoTargetLabel);
732    LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
733    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
734    // Handle Min/Max. Copy greater/lesser value from src2.
735    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
736    OpRegCopyWide(rl_result.reg, rl_src2.reg);
737    // Right operand is already in result reg.
738    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
739    // Exit.
740    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
741    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
742    StoreValueWide(rl_dest, rl_result);
743  } else {
744    RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
745    RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
746    RegLocation rl_dest = InlineTarget(info);
747    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
748
749    // Avoid src2 corruption by OpRegCopyWide.
750    if (rl_result.reg == rl_src2.reg) {
751        std::swap(rl_src2.reg, rl_src1.reg);
752    }
753
754    OpRegCopy(rl_result.reg, rl_src1.reg);
755    NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
756    // If either arg is NaN, return NaN.
757    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
758    // Min/Max branches.
759    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
760    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
761    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
762    NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
763    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
764    // Handle NaN.
765    branch_nan->target = NewLIR0(kPseudoTargetLabel);
766    LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
767    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
768    // Handle Min/Max. Copy greater/lesser value from src2.
769    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
770    OpRegCopy(rl_result.reg, rl_src2.reg);
771    // Right operand is already in result reg.
772    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
773    // Exit.
774    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
775    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
776    StoreValue(rl_dest, rl_result);
777  }
778  return true;
779}
780
781}  // namespace art
782