fp_x86.cc revision 6170f5576f46f64eec6cd6c125c3de001afa5bcf
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_x86.h"
18#include "dex/quick/mir_to_lir-inl.h"
19#include "x86_lir.h"
20
21namespace art {
22
23void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
24                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
25  X86OpCode op = kX86Nop;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kX86AddssRR;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kX86SubssRR;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kX86DivssRR;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kX86MulssRR;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2,
53                                              false);
54      rl_result = GetReturn(true);
55      StoreValue(rl_dest, rl_result);
56      return;
57    case Instruction::NEG_FLOAT:
58      GenNegFloat(rl_dest, rl_src1);
59      return;
60    default:
61      LOG(FATAL) << "Unexpected opcode: " << opcode;
62  }
63  rl_src1 = LoadValue(rl_src1, kFPReg);
64  rl_src2 = LoadValue(rl_src2, kFPReg);
65  rl_result = EvalLoc(rl_dest, kFPReg, true);
66  RegStorage r_dest = rl_result.reg;
67  RegStorage r_src1 = rl_src1.reg;
68  RegStorage r_src2 = rl_src2.reg;
69  if (r_dest == r_src2) {
70    r_src2 = AllocTempFloat();
71    OpRegCopy(r_src2, r_dest);
72  }
73  OpRegCopy(r_dest, r_src1);
74  NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
75  StoreValue(rl_dest, rl_result);
76}
77
78void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
79                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
80  X86OpCode op = kX86Nop;
81  RegLocation rl_result;
82
83  switch (opcode) {
84    case Instruction::ADD_DOUBLE_2ADDR:
85    case Instruction::ADD_DOUBLE:
86      op = kX86AddsdRR;
87      break;
88    case Instruction::SUB_DOUBLE_2ADDR:
89    case Instruction::SUB_DOUBLE:
90      op = kX86SubsdRR;
91      break;
92    case Instruction::DIV_DOUBLE_2ADDR:
93    case Instruction::DIV_DOUBLE:
94      op = kX86DivsdRR;
95      break;
96    case Instruction::MUL_DOUBLE_2ADDR:
97    case Instruction::MUL_DOUBLE:
98      op = kX86MulsdRR;
99      break;
100    case Instruction::REM_DOUBLE_2ADDR:
101    case Instruction::REM_DOUBLE:
102      FlushAllRegs();   // Send everything to home location
103      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2,
104                                              false);
105      rl_result = GetReturnWide(true);
106      StoreValueWide(rl_dest, rl_result);
107      return;
108    case Instruction::NEG_DOUBLE:
109      GenNegDouble(rl_dest, rl_src1);
110      return;
111    default:
112      LOG(FATAL) << "Unexpected opcode: " << opcode;
113  }
114  rl_src1 = LoadValueWide(rl_src1, kFPReg);
115  DCHECK(rl_src1.wide);
116  rl_src2 = LoadValueWide(rl_src2, kFPReg);
117  DCHECK(rl_src2.wide);
118  rl_result = EvalLoc(rl_dest, kFPReg, true);
119  DCHECK(rl_dest.wide);
120  DCHECK(rl_result.wide);
121  // TODO: update with direct 64-bit reg.
122  int r_dest = S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg());
123  int r_src1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
124  int r_src2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg());
125  if (r_dest == r_src2) {
126    r_src2 = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE;
127    OpRegCopy(RegStorage::Solo64(r_src2), RegStorage::Solo64(r_dest));
128  }
129  OpRegCopy(RegStorage::Solo64(r_dest), RegStorage::Solo64(r_src1));
130  NewLIR2(op, r_dest, r_src2);
131  StoreValueWide(rl_dest, rl_result);
132}
133
134void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
135  // Compute offsets to the source and destination VRs on stack
136  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
137  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
138
139  // Update the in-register state of source.
140  rl_src = UpdateLocWide(rl_src);
141
142  // If the source is in physical register, then put it in its location on stack.
143  if (rl_src.location == kLocPhysReg) {
144    RegisterInfo* lo_info = GetRegInfo(rl_src.reg.GetLowReg());
145
146    if (lo_info != nullptr && lo_info->is_temp) {
147      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
148      FlushSpecificReg(lo_info);
149      // ResetDef for low/high to prevent NullifyRange from removing stores.
150      ResetDef(rl_src.reg.GetLowReg());
151      if (rl_src.reg.GetLowReg() != rl_src.reg.GetHighReg() && GetRegInfo(rl_src.reg.GetHighReg()) != nullptr) {
152        ResetDef(rl_src.reg.GetHighReg());
153      }
154    } else {
155      // It must have been register promoted if it is not a temp but is still in physical
156      // register. Since we need it to be in memory to convert, we place it there now.
157      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.reg);
158    }
159  }
160
161  // Push the source virtual register onto the x87 stack.
162  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(), src_v_reg_offset + LOWORD_OFFSET);
163  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
164      true /* is_load */, true /* is64bit */);
165
166  // Now pop off x87 stack and store it in the destination VR's stack location.
167  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
168  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
169  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
170  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
171
172  /*
173   * The result is in a physical register if it was in a temp or was register
174   * promoted. For that reason it is enough to check if it is in physical
175   * register. If it is, then we must do all of the bookkeeping necessary to
176   * invalidate temp (if needed) and load in promoted register (if needed).
177   * If the result's location is in memory, then we do not need to do anything
178   * more since the fstp has already placed the correct value in memory.
179   */
180  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
181  if (rl_result.location == kLocPhysReg) {
182    /*
183     * We already know that the result is in a physical register but do not know if it is the
184     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
185     * correct register class.
186     */
187    if (is_double) {
188      rl_result = EvalLocWide(rl_dest, kFPReg, true);
189
190      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, INVALID_SREG);
191
192      StoreFinalValueWide(rl_dest, rl_result);
193    } else {
194      rl_result = EvalLoc(rl_dest, kFPReg, true);
195
196      LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
197
198      StoreFinalValue(rl_dest, rl_result);
199    }
200  }
201}
202
203void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
204                               RegLocation rl_src) {
205  RegisterClass rcSrc = kFPReg;
206  X86OpCode op = kX86Nop;
207  int src_reg;
208  RegLocation rl_result;
209  switch (opcode) {
210    case Instruction::INT_TO_FLOAT:
211      rcSrc = kCoreReg;
212      op = kX86Cvtsi2ssRR;
213      break;
214    case Instruction::DOUBLE_TO_FLOAT:
215      rcSrc = kFPReg;
216      op = kX86Cvtsd2ssRR;
217      break;
218    case Instruction::FLOAT_TO_DOUBLE:
219      rcSrc = kFPReg;
220      op = kX86Cvtss2sdRR;
221      break;
222    case Instruction::INT_TO_DOUBLE:
223      rcSrc = kCoreReg;
224      op = kX86Cvtsi2sdRR;
225      break;
226    case Instruction::FLOAT_TO_INT: {
227      rl_src = LoadValue(rl_src, kFPReg);
228      src_reg = rl_src.reg.GetReg();
229      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
230      ClobberSReg(rl_dest.s_reg_low);
231      rl_result = EvalLoc(rl_dest, kCoreReg, true);
232      int temp_reg = AllocTempFloat().GetReg();
233
234      LoadConstant(rl_result.reg, 0x7fffffff);
235      NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.reg.GetReg());
236      NewLIR2(kX86ComissRR, src_reg, temp_reg);
237      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
238      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
239      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), src_reg);
240      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
241      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
242      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
243      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
244      branch_normal->target = NewLIR0(kPseudoTargetLabel);
245      StoreValue(rl_dest, rl_result);
246      return;
247    }
248    case Instruction::DOUBLE_TO_INT: {
249      rl_src = LoadValueWide(rl_src, kFPReg);
250      src_reg = rl_src.reg.GetLowReg();
251      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
252      ClobberSReg(rl_dest.s_reg_low);
253      rl_result = EvalLoc(rl_dest, kCoreReg, true);
254      int temp_reg = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE;
255
256      LoadConstant(rl_result.reg, 0x7fffffff);
257      NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.reg.GetReg());
258      NewLIR2(kX86ComisdRR, src_reg, temp_reg);
259      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
260      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
261      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), src_reg);
262      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
263      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
264      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
265      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
266      branch_normal->target = NewLIR0(kPseudoTargetLabel);
267      StoreValue(rl_dest, rl_result);
268      return;
269    }
270    case Instruction::LONG_TO_DOUBLE:
271      GenLongToFP(rl_dest, rl_src, true /* is_double */);
272      return;
273    case Instruction::LONG_TO_FLOAT:
274      GenLongToFP(rl_dest, rl_src, false /* is_double */);
275      return;
276    case Instruction::FLOAT_TO_LONG:
277      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
278      return;
279    case Instruction::DOUBLE_TO_LONG:
280      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src);
281      return;
282    default:
283      LOG(INFO) << "Unexpected opcode: " << opcode;
284  }
285  if (rl_src.wide) {
286    rl_src = LoadValueWide(rl_src, rcSrc);
287    src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg());
288  } else {
289    rl_src = LoadValue(rl_src, rcSrc);
290    src_reg = rl_src.reg.GetReg();
291  }
292  if (rl_dest.wide) {
293    rl_result = EvalLoc(rl_dest, kFPReg, true);
294    NewLIR2(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), src_reg);
295    StoreValueWide(rl_dest, rl_result);
296  } else {
297    rl_result = EvalLoc(rl_dest, kFPReg, true);
298    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
299    StoreValue(rl_dest, rl_result);
300  }
301}
302
303void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
304                          RegLocation rl_src1, RegLocation rl_src2) {
305  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
306  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
307  int src_reg1;
308  int src_reg2;
309  if (single) {
310    rl_src1 = LoadValue(rl_src1, kFPReg);
311    src_reg1 = rl_src1.reg.GetReg();
312    rl_src2 = LoadValue(rl_src2, kFPReg);
313    src_reg2 = rl_src2.reg.GetReg();
314  } else {
315    rl_src1 = LoadValueWide(rl_src1, kFPReg);
316    src_reg1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
317    rl_src2 = LoadValueWide(rl_src2, kFPReg);
318    src_reg2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg());
319  }
320  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
321  ClobberSReg(rl_dest.s_reg_low);
322  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
323  LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
324  if (single) {
325    NewLIR2(kX86UcomissRR, src_reg1, src_reg2);
326  } else {
327    NewLIR2(kX86UcomisdRR, src_reg1, src_reg2);
328  }
329  LIR* branch = NULL;
330  if (unordered_gt) {
331    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
332  }
333  // If the result reg can't be byte accessed, use a jump and move instead of a set.
334  if (rl_result.reg.GetReg() >= 4) {
335    LIR* branch2 = NULL;
336    if (unordered_gt) {
337      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
338      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
339    } else {
340      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
341      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
342    }
343    branch2->target = NewLIR0(kPseudoTargetLabel);
344  } else {
345    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
346  }
347  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
348  if (unordered_gt) {
349    branch->target = NewLIR0(kPseudoTargetLabel);
350  }
351  StoreValue(rl_dest, rl_result);
352}
353
354void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
355                                     bool is_double) {
356  LIR* taken = &block_label_list_[bb->taken];
357  LIR* not_taken = &block_label_list_[bb->fall_through];
358  LIR* branch = NULL;
359  RegLocation rl_src1;
360  RegLocation rl_src2;
361  if (is_double) {
362    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
363    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
364    rl_src1 = LoadValueWide(rl_src1, kFPReg);
365    rl_src2 = LoadValueWide(rl_src2, kFPReg);
366    NewLIR2(kX86UcomisdRR, S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()),
367            S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()));
368  } else {
369    rl_src1 = mir_graph_->GetSrc(mir, 0);
370    rl_src2 = mir_graph_->GetSrc(mir, 1);
371    rl_src1 = LoadValue(rl_src1, kFPReg);
372    rl_src2 = LoadValue(rl_src2, kFPReg);
373    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
374  }
375  ConditionCode ccode = mir->meta.ccode;
376  switch (ccode) {
377    case kCondEq:
378      if (!gt_bias) {
379        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
380        branch->target = not_taken;
381      }
382      break;
383    case kCondNe:
384      if (!gt_bias) {
385        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
386        branch->target = taken;
387      }
388      break;
389    case kCondLt:
390      if (gt_bias) {
391        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
392        branch->target = not_taken;
393      }
394      ccode = kCondUlt;
395      break;
396    case kCondLe:
397      if (gt_bias) {
398        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
399        branch->target = not_taken;
400      }
401      ccode = kCondLs;
402      break;
403    case kCondGt:
404      if (gt_bias) {
405        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
406        branch->target = taken;
407      }
408      ccode = kCondHi;
409      break;
410    case kCondGe:
411      if (gt_bias) {
412        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
413        branch->target = taken;
414      }
415      ccode = kCondUge;
416      break;
417    default:
418      LOG(FATAL) << "Unexpected ccode: " << ccode;
419  }
420  OpCondBranch(ccode, taken);
421}
422
423void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
424  RegLocation rl_result;
425  rl_src = LoadValue(rl_src, kCoreReg);
426  rl_result = EvalLoc(rl_dest, kCoreReg, true);
427  OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
428  StoreValue(rl_dest, rl_result);
429}
430
431void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
432  RegLocation rl_result;
433  rl_src = LoadValueWide(rl_src, kCoreReg);
434  rl_result = EvalLoc(rl_dest, kCoreReg, true);
435  OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
436  OpRegCopy(rl_result.reg, rl_src.reg);
437  StoreValueWide(rl_dest, rl_result);
438}
439
440bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
441  RegLocation rl_src = info->args[0];
442  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
443  rl_src = LoadValueWide(rl_src, kFPReg);
444  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
445  NewLIR2(kX86SqrtsdRR, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()),
446          S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()));
447  StoreValueWide(rl_dest, rl_result);
448  return true;
449}
450
451
452
453}  // namespace art
454