fp_x86.cc revision 30adc7383a74eb3cb6db3bf42cea3a5595055ce1
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_x86.h"
18#include "dex/quick/mir_to_lir-inl.h"
19#include "x86_lir.h"
20
21namespace art {
22
23void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
24                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
25  X86OpCode op = kX86Nop;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kX86AddssRR;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kX86SubssRR;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kX86DivssRR;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kX86MulssRR;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
53                                              false);
54      rl_result = GetReturn(true);
55      StoreValue(rl_dest, rl_result);
56      return;
57    case Instruction::NEG_FLOAT:
58      GenNegFloat(rl_dest, rl_src1);
59      return;
60    default:
61      LOG(FATAL) << "Unexpected opcode: " << opcode;
62  }
63  rl_src1 = LoadValue(rl_src1, kFPReg);
64  rl_src2 = LoadValue(rl_src2, kFPReg);
65  rl_result = EvalLoc(rl_dest, kFPReg, true);
66  RegStorage r_dest = rl_result.reg;
67  RegStorage r_src1 = rl_src1.reg;
68  RegStorage r_src2 = rl_src2.reg;
69  if (r_dest == r_src2) {
70    r_src2 = AllocTempSingle();
71    OpRegCopy(r_src2, r_dest);
72  }
73  OpRegCopy(r_dest, r_src1);
74  NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
75  StoreValue(rl_dest, rl_result);
76}
77
78void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
79                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
80  DCHECK(rl_dest.wide);
81  DCHECK(rl_dest.fp);
82  DCHECK(rl_src1.wide);
83  DCHECK(rl_src1.fp);
84  DCHECK(rl_src2.wide);
85  DCHECK(rl_src2.fp);
86  X86OpCode op = kX86Nop;
87  RegLocation rl_result;
88
89  switch (opcode) {
90    case Instruction::ADD_DOUBLE_2ADDR:
91    case Instruction::ADD_DOUBLE:
92      op = kX86AddsdRR;
93      break;
94    case Instruction::SUB_DOUBLE_2ADDR:
95    case Instruction::SUB_DOUBLE:
96      op = kX86SubsdRR;
97      break;
98    case Instruction::DIV_DOUBLE_2ADDR:
99    case Instruction::DIV_DOUBLE:
100      op = kX86DivsdRR;
101      break;
102    case Instruction::MUL_DOUBLE_2ADDR:
103    case Instruction::MUL_DOUBLE:
104      op = kX86MulsdRR;
105      break;
106    case Instruction::REM_DOUBLE_2ADDR:
107    case Instruction::REM_DOUBLE:
108      FlushAllRegs();   // Send everything to home location
109      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
110                                              false);
111      rl_result = GetReturnWide(true);
112      StoreValueWide(rl_dest, rl_result);
113      return;
114    case Instruction::NEG_DOUBLE:
115      GenNegDouble(rl_dest, rl_src1);
116      return;
117    default:
118      LOG(FATAL) << "Unexpected opcode: " << opcode;
119  }
120  rl_src1 = LoadValueWide(rl_src1, kFPReg);
121  rl_src2 = LoadValueWide(rl_src2, kFPReg);
122  rl_result = EvalLoc(rl_dest, kFPReg, true);
123  if (rl_result.reg == rl_src2.reg) {
124    rl_src2.reg = AllocTempDouble();
125    OpRegCopy(rl_src2.reg, rl_result.reg);
126  }
127  OpRegCopy(rl_result.reg, rl_src1.reg);
128  NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
129  StoreValueWide(rl_dest, rl_result);
130}
131
132void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
133  // Compute offsets to the source and destination VRs on stack
134  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
135  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
136
137  // Update the in-register state of source.
138  rl_src = UpdateLocWide(rl_src);
139
140  // If the source is in physical register, then put it in its location on stack.
141  if (rl_src.location == kLocPhysReg) {
142    RegisterInfo* reg_info = GetRegInfo(rl_src.reg);
143
144    if (reg_info != nullptr && reg_info->IsTemp()) {
145      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
146      FlushSpecificReg(reg_info);
147      // ResetDef to prevent NullifyRange from removing stores.
148      ResetDef(rl_src.reg);
149    } else {
150      // It must have been register promoted if it is not a temp but is still in physical
151      // register. Since we need it to be in memory to convert, we place it there now.
152      StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64);
153    }
154  }
155
156  // Push the source virtual register onto the x87 stack.
157  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(),
158                              src_v_reg_offset + LOWORD_OFFSET);
159  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
160                          true /* is_load */, true /* is64bit */);
161
162  // Now pop off x87 stack and store it in the destination VR's stack location.
163  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
164  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
165  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
166  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
167
168  /*
169   * The result is in a physical register if it was in a temp or was register
170   * promoted. For that reason it is enough to check if it is in physical
171   * register. If it is, then we must do all of the bookkeeping necessary to
172   * invalidate temp (if needed) and load in promoted register (if needed).
173   * If the result's location is in memory, then we do not need to do anything
174   * more since the fstp has already placed the correct value in memory.
175   */
176  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
177      UpdateLocTyped(rl_dest, kFPReg);
178  if (rl_result.location == kLocPhysReg) {
179    /*
180     * We already know that the result is in a physical register but do not know if it is the
181     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
182     * correct register class.
183     */
184    if (is_double) {
185      rl_result = EvalLocWide(rl_dest, kFPReg, true);
186
187      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
188
189      StoreFinalValueWide(rl_dest, rl_result);
190    } else {
191      rl_result = EvalLoc(rl_dest, kFPReg, true);
192
193      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
194
195      StoreFinalValue(rl_dest, rl_result);
196    }
197  }
198}
199
200void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
201                               RegLocation rl_src) {
202  RegisterClass rcSrc = kFPReg;
203  X86OpCode op = kX86Nop;
204  RegLocation rl_result;
205  switch (opcode) {
206    case Instruction::INT_TO_FLOAT:
207      rcSrc = kCoreReg;
208      op = kX86Cvtsi2ssRR;
209      break;
210    case Instruction::DOUBLE_TO_FLOAT:
211      rcSrc = kFPReg;
212      op = kX86Cvtsd2ssRR;
213      break;
214    case Instruction::FLOAT_TO_DOUBLE:
215      rcSrc = kFPReg;
216      op = kX86Cvtss2sdRR;
217      break;
218    case Instruction::INT_TO_DOUBLE:
219      rcSrc = kCoreReg;
220      op = kX86Cvtsi2sdRR;
221      break;
222    case Instruction::FLOAT_TO_INT: {
223      rl_src = LoadValue(rl_src, kFPReg);
224      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
225      ClobberSReg(rl_dest.s_reg_low);
226      rl_result = EvalLoc(rl_dest, kCoreReg, true);
227      RegStorage temp_reg = AllocTempSingle();
228
229      LoadConstant(rl_result.reg, 0x7fffffff);
230      NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
231      NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
232      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
233      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
234      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
235      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
236      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
237      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
238      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
239      branch_normal->target = NewLIR0(kPseudoTargetLabel);
240      StoreValue(rl_dest, rl_result);
241      return;
242    }
243    case Instruction::DOUBLE_TO_INT: {
244      rl_src = LoadValueWide(rl_src, kFPReg);
245      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
246      ClobberSReg(rl_dest.s_reg_low);
247      rl_result = EvalLoc(rl_dest, kCoreReg, true);
248      RegStorage temp_reg = AllocTempDouble();
249
250      LoadConstant(rl_result.reg, 0x7fffffff);
251      NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
252      NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
253      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
254      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
255      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
256      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
257      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
258      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
259      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
260      branch_normal->target = NewLIR0(kPseudoTargetLabel);
261      StoreValue(rl_dest, rl_result);
262      return;
263    }
264    case Instruction::LONG_TO_DOUBLE:
265      GenLongToFP(rl_dest, rl_src, true /* is_double */);
266      return;
267    case Instruction::LONG_TO_FLOAT:
268      GenLongToFP(rl_dest, rl_src, false /* is_double */);
269      return;
270    case Instruction::FLOAT_TO_LONG:
271      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
272      return;
273    case Instruction::DOUBLE_TO_LONG:
274      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
275      return;
276    default:
277      LOG(INFO) << "Unexpected opcode: " << opcode;
278  }
279  // At this point, target will be either float or double.
280  DCHECK(rl_dest.fp);
281  if (rl_src.wide) {
282    rl_src = LoadValueWide(rl_src, rcSrc);
283  } else {
284    rl_src = LoadValue(rl_src, rcSrc);
285  }
286  rl_result = EvalLoc(rl_dest, kFPReg, true);
287  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
288  if (rl_dest.wide) {
289    StoreValueWide(rl_dest, rl_result);
290  } else {
291    StoreValue(rl_dest, rl_result);
292  }
293}
294
295void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
296                          RegLocation rl_src1, RegLocation rl_src2) {
297  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
298  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
299  if (single) {
300    rl_src1 = LoadValue(rl_src1, kFPReg);
301    rl_src2 = LoadValue(rl_src2, kFPReg);
302  } else {
303    rl_src1 = LoadValueWide(rl_src1, kFPReg);
304    rl_src2 = LoadValueWide(rl_src2, kFPReg);
305  }
306  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
307  ClobberSReg(rl_dest.s_reg_low);
308  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
309  LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
310  if (single) {
311    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
312  } else {
313    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
314  }
315  LIR* branch = NULL;
316  if (unordered_gt) {
317    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
318  }
319  // If the result reg can't be byte accessed, use a jump and move instead of a set.
320  if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) {
321    LIR* branch2 = NULL;
322    if (unordered_gt) {
323      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
324      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
325    } else {
326      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
327      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
328    }
329    branch2->target = NewLIR0(kPseudoTargetLabel);
330  } else {
331    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
332  }
333  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
334  if (unordered_gt) {
335    branch->target = NewLIR0(kPseudoTargetLabel);
336  }
337  StoreValue(rl_dest, rl_result);
338}
339
340void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
341                                     bool is_double) {
342  LIR* taken = &block_label_list_[bb->taken];
343  LIR* not_taken = &block_label_list_[bb->fall_through];
344  LIR* branch = NULL;
345  RegLocation rl_src1;
346  RegLocation rl_src2;
347  if (is_double) {
348    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
349    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
350    rl_src1 = LoadValueWide(rl_src1, kFPReg);
351    rl_src2 = LoadValueWide(rl_src2, kFPReg);
352    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
353  } else {
354    rl_src1 = mir_graph_->GetSrc(mir, 0);
355    rl_src2 = mir_graph_->GetSrc(mir, 1);
356    rl_src1 = LoadValue(rl_src1, kFPReg);
357    rl_src2 = LoadValue(rl_src2, kFPReg);
358    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
359  }
360  ConditionCode ccode = mir->meta.ccode;
361  switch (ccode) {
362    case kCondEq:
363      if (!gt_bias) {
364        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
365        branch->target = not_taken;
366      }
367      break;
368    case kCondNe:
369      if (!gt_bias) {
370        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
371        branch->target = taken;
372      }
373      break;
374    case kCondLt:
375      if (gt_bias) {
376        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
377        branch->target = not_taken;
378      }
379      ccode = kCondUlt;
380      break;
381    case kCondLe:
382      if (gt_bias) {
383        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
384        branch->target = not_taken;
385      }
386      ccode = kCondLs;
387      break;
388    case kCondGt:
389      if (gt_bias) {
390        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
391        branch->target = taken;
392      }
393      ccode = kCondHi;
394      break;
395    case kCondGe:
396      if (gt_bias) {
397        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
398        branch->target = taken;
399      }
400      ccode = kCondUge;
401      break;
402    default:
403      LOG(FATAL) << "Unexpected ccode: " << ccode;
404  }
405  OpCondBranch(ccode, taken);
406}
407
408void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
409  RegLocation rl_result;
410  rl_src = LoadValue(rl_src, kCoreReg);
411  rl_result = EvalLoc(rl_dest, kCoreReg, true);
412  OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
413  StoreValue(rl_dest, rl_result);
414}
415
416void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
417  RegLocation rl_result;
418  rl_src = LoadValueWide(rl_src, kCoreReg);
419  rl_result = EvalLoc(rl_dest, kCoreReg, true);
420  OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
421  OpRegCopy(rl_result.reg, rl_src.reg);
422  StoreValueWide(rl_dest, rl_result);
423}
424
425bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
426  RegLocation rl_src = info->args[0];
427  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
428  rl_src = LoadValueWide(rl_src, kFPReg);
429  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
430  NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
431  StoreValueWide(rl_dest, rl_result);
432  return true;
433}
434
435
436
437}  // namespace art
438