fp_x86.cc revision 51a80d72dc436a4a89c636987b9cedabe774fdd6
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_x86.h"
18#include "dex/quick/mir_to_lir-inl.h"
19#include "x86_lir.h"
20
21namespace art {
22
23void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
24                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
25  X86OpCode op = kX86Nop;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kX86AddssRR;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kX86SubssRR;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kX86DivssRR;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kX86MulssRR;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2,
53                                              false);
54      rl_result = GetReturn(true);
55      StoreValue(rl_dest, rl_result);
56      return;
57    case Instruction::NEG_FLOAT:
58      GenNegFloat(rl_dest, rl_src1);
59      return;
60    default:
61      LOG(FATAL) << "Unexpected opcode: " << opcode;
62  }
63  rl_src1 = LoadValue(rl_src1, kFPReg);
64  rl_src2 = LoadValue(rl_src2, kFPReg);
65  rl_result = EvalLoc(rl_dest, kFPReg, true);
66  int r_dest = rl_result.reg.GetReg();
67  int r_src1 = rl_src1.reg.GetReg();
68  int r_src2 = rl_src2.reg.GetReg();
69  if (r_dest == r_src2) {
70    r_src2 = AllocTempFloat();
71    OpRegCopy(r_src2, r_dest);
72  }
73  OpRegCopy(r_dest, r_src1);
74  NewLIR2(op, r_dest, r_src2);
75  StoreValue(rl_dest, rl_result);
76}
77
78void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
79                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
80  X86OpCode op = kX86Nop;
81  RegLocation rl_result;
82
83  switch (opcode) {
84    case Instruction::ADD_DOUBLE_2ADDR:
85    case Instruction::ADD_DOUBLE:
86      op = kX86AddsdRR;
87      break;
88    case Instruction::SUB_DOUBLE_2ADDR:
89    case Instruction::SUB_DOUBLE:
90      op = kX86SubsdRR;
91      break;
92    case Instruction::DIV_DOUBLE_2ADDR:
93    case Instruction::DIV_DOUBLE:
94      op = kX86DivsdRR;
95      break;
96    case Instruction::MUL_DOUBLE_2ADDR:
97    case Instruction::MUL_DOUBLE:
98      op = kX86MulsdRR;
99      break;
100    case Instruction::REM_DOUBLE_2ADDR:
101    case Instruction::REM_DOUBLE:
102      FlushAllRegs();   // Send everything to home location
103      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2,
104                                              false);
105      rl_result = GetReturnWide(true);
106      StoreValueWide(rl_dest, rl_result);
107      return;
108    case Instruction::NEG_DOUBLE:
109      GenNegDouble(rl_dest, rl_src1);
110      return;
111    default:
112      LOG(FATAL) << "Unexpected opcode: " << opcode;
113  }
114  rl_src1 = LoadValueWide(rl_src1, kFPReg);
115  DCHECK(rl_src1.wide);
116  rl_src2 = LoadValueWide(rl_src2, kFPReg);
117  DCHECK(rl_src2.wide);
118  rl_result = EvalLoc(rl_dest, kFPReg, true);
119  DCHECK(rl_dest.wide);
120  DCHECK(rl_result.wide);
121  int r_dest = S2d(rl_result.reg.GetReg(), rl_result.reg.GetHighReg());
122  int r_src1 = S2d(rl_src1.reg.GetReg(), rl_src1.reg.GetHighReg());
123  int r_src2 = S2d(rl_src2.reg.GetReg(), rl_src2.reg.GetHighReg());
124  if (r_dest == r_src2) {
125    r_src2 = AllocTempDouble() | X86_FP_DOUBLE;
126    OpRegCopy(r_src2, r_dest);
127  }
128  OpRegCopy(r_dest, r_src1);
129  NewLIR2(op, r_dest, r_src2);
130  StoreValueWide(rl_dest, rl_result);
131}
132
133void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
134  // Compute offsets to the source and destination VRs on stack
135  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
136  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
137
138  // Update the in-register state of source.
139  rl_src = UpdateLocWide(rl_src);
140
141  // If the source is in physical register, then put it in its location on stack.
142  if (rl_src.location == kLocPhysReg) {
143    RegisterInfo* lo_info = GetRegInfo(rl_src.reg.GetReg());
144
145    if (lo_info != nullptr && lo_info->is_temp) {
146      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
147      FlushSpecificReg(lo_info);
148    } else {
149      // It must have been register promoted if it is not a temp but is still in physical
150      // register. Since we need it to be in memory to convert, we place it there now.
151      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
152    }
153  }
154
155  // Push the source virtual register onto the x87 stack.
156  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp), src_v_reg_offset + LOWORD_OFFSET);
157  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
158      true /* is_load */, true /* is64bit */);
159
160  // Now pop off x87 stack and store it in the destination VR's stack location.
161  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
162  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
163  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp), displacement);
164  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
165
166  /*
167   * The result is in a physical register if it was in a temp or was register
168   * promoted. For that reason it is enough to check if it is in physical
169   * register. If it is, then we must do all of the bookkeeping necessary to
170   * invalidate temp (if needed) and load in promoted register (if needed).
171   * If the result's location is in memory, then we do not need to do anything
172   * more since the fstp has already placed the correct value in memory.
173   */
174  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
175  if (rl_result.location == kLocPhysReg) {
176    /*
177     * We already know that the result is in a physical register but do not know if it is the
178     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
179     * correct register class.
180     */
181    if (is_double) {
182      rl_result = EvalLocWide(rl_dest, kFPReg, true);
183
184      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG);
185
186      StoreFinalValueWide(rl_dest, rl_result);
187    } else {
188      rl_result = EvalLoc(rl_dest, kFPReg, true);
189
190      LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg.GetReg());
191
192      StoreFinalValue(rl_dest, rl_result);
193    }
194  }
195}
196
197void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
198                               RegLocation rl_src) {
199  RegisterClass rcSrc = kFPReg;
200  X86OpCode op = kX86Nop;
201  int src_reg;
202  RegLocation rl_result;
203  switch (opcode) {
204    case Instruction::INT_TO_FLOAT:
205      rcSrc = kCoreReg;
206      op = kX86Cvtsi2ssRR;
207      break;
208    case Instruction::DOUBLE_TO_FLOAT:
209      rcSrc = kFPReg;
210      op = kX86Cvtsd2ssRR;
211      break;
212    case Instruction::FLOAT_TO_DOUBLE:
213      rcSrc = kFPReg;
214      op = kX86Cvtss2sdRR;
215      break;
216    case Instruction::INT_TO_DOUBLE:
217      rcSrc = kCoreReg;
218      op = kX86Cvtsi2sdRR;
219      break;
220    case Instruction::FLOAT_TO_INT: {
221      rl_src = LoadValue(rl_src, kFPReg);
222      src_reg = rl_src.reg.GetReg();
223      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
224      ClobberSReg(rl_dest.s_reg_low);
225      rl_result = EvalLoc(rl_dest, kCoreReg, true);
226      int temp_reg = AllocTempFloat();
227
228      LoadConstant(rl_result.reg.GetReg(), 0x7fffffff);
229      NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.reg.GetReg());
230      NewLIR2(kX86ComissRR, src_reg, temp_reg);
231      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
232      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
233      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), src_reg);
234      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
235      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
236      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
237      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
238      branch_normal->target = NewLIR0(kPseudoTargetLabel);
239      StoreValue(rl_dest, rl_result);
240      return;
241    }
242    case Instruction::DOUBLE_TO_INT: {
243      rl_src = LoadValueWide(rl_src, kFPReg);
244      src_reg = rl_src.reg.GetReg();
245      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
246      ClobberSReg(rl_dest.s_reg_low);
247      rl_result = EvalLoc(rl_dest, kCoreReg, true);
248      int temp_reg = AllocTempDouble() | X86_FP_DOUBLE;
249
250      LoadConstant(rl_result.reg.GetReg(), 0x7fffffff);
251      NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.reg.GetReg());
252      NewLIR2(kX86ComisdRR, src_reg, temp_reg);
253      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
254      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
255      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), src_reg);
256      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
257      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
258      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
259      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
260      branch_normal->target = NewLIR0(kPseudoTargetLabel);
261      StoreValue(rl_dest, rl_result);
262      return;
263    }
264    case Instruction::LONG_TO_DOUBLE:
265      GenLongToFP(rl_dest, rl_src, true /* is_double */);
266      return;
267    case Instruction::LONG_TO_FLOAT:
268      GenLongToFP(rl_dest, rl_src, false /* is_double */);
269      return;
270    case Instruction::FLOAT_TO_LONG:
271      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
272      return;
273    case Instruction::DOUBLE_TO_LONG:
274      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src);
275      return;
276    default:
277      LOG(INFO) << "Unexpected opcode: " << opcode;
278  }
279  if (rl_src.wide) {
280    rl_src = LoadValueWide(rl_src, rcSrc);
281    src_reg = S2d(rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
282  } else {
283    rl_src = LoadValue(rl_src, rcSrc);
284    src_reg = rl_src.reg.GetReg();
285  }
286  if (rl_dest.wide) {
287    rl_result = EvalLoc(rl_dest, kFPReg, true);
288    NewLIR2(op, S2d(rl_result.reg.GetReg(), rl_result.reg.GetHighReg()), src_reg);
289    StoreValueWide(rl_dest, rl_result);
290  } else {
291    rl_result = EvalLoc(rl_dest, kFPReg, true);
292    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
293    StoreValue(rl_dest, rl_result);
294  }
295}
296
297void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
298                          RegLocation rl_src1, RegLocation rl_src2) {
299  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
300  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
301  int src_reg1;
302  int src_reg2;
303  if (single) {
304    rl_src1 = LoadValue(rl_src1, kFPReg);
305    src_reg1 = rl_src1.reg.GetReg();
306    rl_src2 = LoadValue(rl_src2, kFPReg);
307    src_reg2 = rl_src2.reg.GetReg();
308  } else {
309    rl_src1 = LoadValueWide(rl_src1, kFPReg);
310    src_reg1 = S2d(rl_src1.reg.GetReg(), rl_src1.reg.GetHighReg());
311    rl_src2 = LoadValueWide(rl_src2, kFPReg);
312    src_reg2 = S2d(rl_src2.reg.GetReg(), rl_src2.reg.GetHighReg());
313  }
314  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
315  ClobberSReg(rl_dest.s_reg_low);
316  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
317  LoadConstantNoClobber(rl_result.reg.GetReg(), unordered_gt ? 1 : 0);
318  if (single) {
319    NewLIR2(kX86UcomissRR, src_reg1, src_reg2);
320  } else {
321    NewLIR2(kX86UcomisdRR, src_reg1, src_reg2);
322  }
323  LIR* branch = NULL;
324  if (unordered_gt) {
325    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
326  }
327  // If the result reg can't be byte accessed, use a jump and move instead of a set.
328  if (rl_result.reg.GetReg() >= 4) {
329    LIR* branch2 = NULL;
330    if (unordered_gt) {
331      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
332      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
333    } else {
334      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
335      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
336    }
337    branch2->target = NewLIR0(kPseudoTargetLabel);
338  } else {
339    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
340  }
341  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
342  if (unordered_gt) {
343    branch->target = NewLIR0(kPseudoTargetLabel);
344  }
345  StoreValue(rl_dest, rl_result);
346}
347
348void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
349                                     bool is_double) {
350  LIR* taken = &block_label_list_[bb->taken];
351  LIR* not_taken = &block_label_list_[bb->fall_through];
352  LIR* branch = NULL;
353  RegLocation rl_src1;
354  RegLocation rl_src2;
355  if (is_double) {
356    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
357    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
358    rl_src1 = LoadValueWide(rl_src1, kFPReg);
359    rl_src2 = LoadValueWide(rl_src2, kFPReg);
360    NewLIR2(kX86UcomisdRR, S2d(rl_src1.reg.GetReg(), rl_src1.reg.GetHighReg()),
361            S2d(rl_src2.reg.GetReg(), rl_src2.reg.GetHighReg()));
362  } else {
363    rl_src1 = mir_graph_->GetSrc(mir, 0);
364    rl_src2 = mir_graph_->GetSrc(mir, 1);
365    rl_src1 = LoadValue(rl_src1, kFPReg);
366    rl_src2 = LoadValue(rl_src2, kFPReg);
367    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
368  }
369  ConditionCode ccode = mir->meta.ccode;
370  switch (ccode) {
371    case kCondEq:
372      if (!gt_bias) {
373        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
374        branch->target = not_taken;
375      }
376      break;
377    case kCondNe:
378      if (!gt_bias) {
379        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
380        branch->target = taken;
381      }
382      break;
383    case kCondLt:
384      if (gt_bias) {
385        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
386        branch->target = not_taken;
387      }
388      ccode = kCondUlt;
389      break;
390    case kCondLe:
391      if (gt_bias) {
392        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
393        branch->target = not_taken;
394      }
395      ccode = kCondLs;
396      break;
397    case kCondGt:
398      if (gt_bias) {
399        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
400        branch->target = taken;
401      }
402      ccode = kCondHi;
403      break;
404    case kCondGe:
405      if (gt_bias) {
406        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
407        branch->target = taken;
408      }
409      ccode = kCondUge;
410      break;
411    default:
412      LOG(FATAL) << "Unexpected ccode: " << ccode;
413  }
414  OpCondBranch(ccode, taken);
415}
416
417void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
418  RegLocation rl_result;
419  rl_src = LoadValue(rl_src, kCoreReg);
420  rl_result = EvalLoc(rl_dest, kCoreReg, true);
421  OpRegRegImm(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0x80000000);
422  StoreValue(rl_dest, rl_result);
423}
424
425void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
426  RegLocation rl_result;
427  rl_src = LoadValueWide(rl_src, kCoreReg);
428  rl_result = EvalLoc(rl_dest, kCoreReg, true);
429  OpRegRegImm(kOpAdd, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), 0x80000000);
430  OpRegCopy(rl_result.reg.GetReg(), rl_src.reg.GetReg());
431  StoreValueWide(rl_dest, rl_result);
432}
433
434bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
435  RegLocation rl_src = info->args[0];
436  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
437  rl_src = LoadValueWide(rl_src, kFPReg);
438  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
439  NewLIR2(kX86SqrtsdRR, S2d(rl_result.reg.GetReg(), rl_result.reg.GetHighReg()),
440          S2d(rl_src.reg.GetReg(), rl_src.reg.GetHighReg()));
441  StoreValueWide(rl_dest, rl_result);
442  return true;
443}
444
445
446
447}  // namespace art
448