fp_x86.cc revision 7940e44f4517de5e2634a7e07d58d0fb26160513
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "codegen_x86.h"
18#include "dex/quick/mir_to_lir-inl.h"
19#include "x86_lir.h"
20
21namespace art {
22
23void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
24                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
25  X86OpCode op = kX86Nop;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kX86AddssRR;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kX86SubssRR;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kX86DivssRR;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kX86MulssRR;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, false);
53      rl_result = GetReturn(true);
54      StoreValue(rl_dest, rl_result);
55      return;
56    case Instruction::NEG_FLOAT:
57      GenNegFloat(rl_dest, rl_src1);
58      return;
59    default:
60      LOG(FATAL) << "Unexpected opcode: " << opcode;
61  }
62  rl_src1 = LoadValue(rl_src1, kFPReg);
63  rl_src2 = LoadValue(rl_src2, kFPReg);
64  rl_result = EvalLoc(rl_dest, kFPReg, true);
65  int r_dest = rl_result.low_reg;
66  int r_src1 = rl_src1.low_reg;
67  int r_src2 = rl_src2.low_reg;
68  if (r_dest == r_src2) {
69    r_src2 = AllocTempFloat();
70    OpRegCopy(r_src2, r_dest);
71  }
72  OpRegCopy(r_dest, r_src1);
73  NewLIR2(op, r_dest, r_src2);
74  StoreValue(rl_dest, rl_result);
75}
76
77void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
78                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
79  X86OpCode op = kX86Nop;
80  RegLocation rl_result;
81
82  switch (opcode) {
83    case Instruction::ADD_DOUBLE_2ADDR:
84    case Instruction::ADD_DOUBLE:
85      op = kX86AddsdRR;
86      break;
87    case Instruction::SUB_DOUBLE_2ADDR:
88    case Instruction::SUB_DOUBLE:
89      op = kX86SubsdRR;
90      break;
91    case Instruction::DIV_DOUBLE_2ADDR:
92    case Instruction::DIV_DOUBLE:
93      op = kX86DivsdRR;
94      break;
95    case Instruction::MUL_DOUBLE_2ADDR:
96    case Instruction::MUL_DOUBLE:
97      op = kX86MulsdRR;
98      break;
99    case Instruction::REM_DOUBLE_2ADDR:
100    case Instruction::REM_DOUBLE:
101      FlushAllRegs();   // Send everything to home location
102      CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, false);
103      rl_result = GetReturnWide(true);
104      StoreValueWide(rl_dest, rl_result);
105      return;
106    case Instruction::NEG_DOUBLE:
107      GenNegDouble(rl_dest, rl_src1);
108      return;
109    default:
110      LOG(FATAL) << "Unexpected opcode: " << opcode;
111  }
112  rl_src1 = LoadValueWide(rl_src1, kFPReg);
113  DCHECK(rl_src1.wide);
114  rl_src2 = LoadValueWide(rl_src2, kFPReg);
115  DCHECK(rl_src2.wide);
116  rl_result = EvalLoc(rl_dest, kFPReg, true);
117  DCHECK(rl_dest.wide);
118  DCHECK(rl_result.wide);
119  int r_dest = S2d(rl_result.low_reg, rl_result.high_reg);
120  int r_src1 = S2d(rl_src1.low_reg, rl_src1.high_reg);
121  int r_src2 = S2d(rl_src2.low_reg, rl_src2.high_reg);
122  if (r_dest == r_src2) {
123    r_src2 = AllocTempDouble() | X86_FP_DOUBLE;
124    OpRegCopy(r_src2, r_dest);
125  }
126  OpRegCopy(r_dest, r_src1);
127  NewLIR2(op, r_dest, r_src2);
128  StoreValueWide(rl_dest, rl_result);
129}
130
131void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
132                               RegLocation rl_src) {
133  RegisterClass rcSrc = kFPReg;
134  X86OpCode op = kX86Nop;
135  int src_reg;
136  RegLocation rl_result;
137  switch (opcode) {
138    case Instruction::INT_TO_FLOAT:
139      rcSrc = kCoreReg;
140      op = kX86Cvtsi2ssRR;
141      break;
142    case Instruction::DOUBLE_TO_FLOAT:
143      rcSrc = kFPReg;
144      op = kX86Cvtsd2ssRR;
145      break;
146    case Instruction::FLOAT_TO_DOUBLE:
147      rcSrc = kFPReg;
148      op = kX86Cvtss2sdRR;
149      break;
150    case Instruction::INT_TO_DOUBLE:
151      rcSrc = kCoreReg;
152      op = kX86Cvtsi2sdRR;
153      break;
154    case Instruction::FLOAT_TO_INT: {
155      rl_src = LoadValue(rl_src, kFPReg);
156      src_reg = rl_src.low_reg;
157      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
158      ClobberSReg(rl_dest.s_reg_low);
159      rl_result = EvalLoc(rl_dest, kCoreReg, true);
160      int temp_reg = AllocTempFloat();
161
162      LoadConstant(rl_result.low_reg, 0x7fffffff);
163      NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.low_reg);
164      NewLIR2(kX86ComissRR, src_reg, temp_reg);
165      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
166      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
167      NewLIR2(kX86Cvttss2siRR, rl_result.low_reg, src_reg);
168      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
169      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
170      NewLIR2(kX86Xor32RR, rl_result.low_reg, rl_result.low_reg);
171      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
172      branch_normal->target = NewLIR0(kPseudoTargetLabel);
173      StoreValue(rl_dest, rl_result);
174      return;
175    }
176    case Instruction::DOUBLE_TO_INT: {
177      rl_src = LoadValueWide(rl_src, kFPReg);
178      src_reg = rl_src.low_reg;
179      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
180      ClobberSReg(rl_dest.s_reg_low);
181      rl_result = EvalLoc(rl_dest, kCoreReg, true);
182      int temp_reg = AllocTempDouble() | X86_FP_DOUBLE;
183
184      LoadConstant(rl_result.low_reg, 0x7fffffff);
185      NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.low_reg);
186      NewLIR2(kX86ComisdRR, src_reg, temp_reg);
187      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
188      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
189      NewLIR2(kX86Cvttsd2siRR, rl_result.low_reg, src_reg);
190      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
191      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
192      NewLIR2(kX86Xor32RR, rl_result.low_reg, rl_result.low_reg);
193      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
194      branch_normal->target = NewLIR0(kPseudoTargetLabel);
195      StoreValue(rl_dest, rl_result);
196      return;
197    }
198    case Instruction::LONG_TO_DOUBLE:
199      GenConversionCall(ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
200      return;
201    case Instruction::LONG_TO_FLOAT:
202      // TODO: inline by using memory as a 64-bit source. Be careful about promoted registers.
203      GenConversionCall(ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src);
204      return;
205    case Instruction::FLOAT_TO_LONG:
206      GenConversionCall(ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
207      return;
208    case Instruction::DOUBLE_TO_LONG:
209      GenConversionCall(ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src);
210      return;
211    default:
212      LOG(INFO) << "Unexpected opcode: " << opcode;
213  }
214  if (rl_src.wide) {
215    rl_src = LoadValueWide(rl_src, rcSrc);
216    src_reg = S2d(rl_src.low_reg, rl_src.high_reg);
217  } else {
218    rl_src = LoadValue(rl_src, rcSrc);
219    src_reg = rl_src.low_reg;
220  }
221  if (rl_dest.wide) {
222    rl_result = EvalLoc(rl_dest, kFPReg, true);
223    NewLIR2(op, S2d(rl_result.low_reg, rl_result.high_reg), src_reg);
224    StoreValueWide(rl_dest, rl_result);
225  } else {
226    rl_result = EvalLoc(rl_dest, kFPReg, true);
227    NewLIR2(op, rl_result.low_reg, src_reg);
228    StoreValue(rl_dest, rl_result);
229  }
230}
231
232void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
233                          RegLocation rl_src1, RegLocation rl_src2) {
234  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
235  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
236  int src_reg1;
237  int src_reg2;
238  if (single) {
239    rl_src1 = LoadValue(rl_src1, kFPReg);
240    src_reg1 = rl_src1.low_reg;
241    rl_src2 = LoadValue(rl_src2, kFPReg);
242    src_reg2 = rl_src2.low_reg;
243  } else {
244    rl_src1 = LoadValueWide(rl_src1, kFPReg);
245    src_reg1 = S2d(rl_src1.low_reg, rl_src1.high_reg);
246    rl_src2 = LoadValueWide(rl_src2, kFPReg);
247    src_reg2 = S2d(rl_src2.low_reg, rl_src2.high_reg);
248  }
249  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
250  ClobberSReg(rl_dest.s_reg_low);
251  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
252  LoadConstantNoClobber(rl_result.low_reg, unordered_gt ? 1 : 0);
253  if (single) {
254    NewLIR2(kX86UcomissRR, src_reg1, src_reg2);
255  } else {
256    NewLIR2(kX86UcomisdRR, src_reg1, src_reg2);
257  }
258  LIR* branch = NULL;
259  if (unordered_gt) {
260    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
261  }
262  // If the result reg can't be byte accessed, use a jump and move instead of a set.
263  if (rl_result.low_reg >= 4) {
264    LIR* branch2 = NULL;
265    if (unordered_gt) {
266      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
267      NewLIR2(kX86Mov32RI, rl_result.low_reg, 0x0);
268    } else {
269      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
270      NewLIR2(kX86Mov32RI, rl_result.low_reg, 0x1);
271    }
272    branch2->target = NewLIR0(kPseudoTargetLabel);
273  } else {
274    NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondA /* above - unsigned > */);
275  }
276  NewLIR2(kX86Sbb32RI, rl_result.low_reg, 0);
277  if (unordered_gt) {
278    branch->target = NewLIR0(kPseudoTargetLabel);
279  }
280  StoreValue(rl_dest, rl_result);
281}
282
283void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
284                                     bool is_double) {
285  LIR* taken = &block_label_list_[bb->taken->id];
286  LIR* not_taken = &block_label_list_[bb->fall_through->id];
287  LIR* branch = NULL;
288  RegLocation rl_src1;
289  RegLocation rl_src2;
290  if (is_double) {
291    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
292    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
293    rl_src1 = LoadValueWide(rl_src1, kFPReg);
294    rl_src2 = LoadValueWide(rl_src2, kFPReg);
295    NewLIR2(kX86UcomisdRR, S2d(rl_src1.low_reg, rl_src1.high_reg),
296            S2d(rl_src2.low_reg, rl_src2.high_reg));
297  } else {
298    rl_src1 = mir_graph_->GetSrc(mir, 0);
299    rl_src2 = mir_graph_->GetSrc(mir, 1);
300    rl_src1 = LoadValue(rl_src1, kFPReg);
301    rl_src2 = LoadValue(rl_src2, kFPReg);
302    NewLIR2(kX86UcomissRR, rl_src1.low_reg, rl_src2.low_reg);
303  }
304  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
305  switch (ccode) {
306    case kCondEq:
307      if (!gt_bias) {
308        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
309        branch->target = not_taken;
310      }
311      break;
312    case kCondNe:
313      if (!gt_bias) {
314        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
315        branch->target = taken;
316      }
317      break;
318    case kCondLt:
319      if (gt_bias) {
320        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
321        branch->target = not_taken;
322      }
323      ccode = kCondCs;
324      break;
325    case kCondLe:
326      if (gt_bias) {
327        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
328        branch->target = not_taken;
329      }
330      ccode = kCondLs;
331      break;
332    case kCondGt:
333      if (gt_bias) {
334        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
335        branch->target = taken;
336      }
337      ccode = kCondHi;
338      break;
339    case kCondGe:
340      if (gt_bias) {
341        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
342        branch->target = taken;
343      }
344      ccode = kCondCc;
345      break;
346    default:
347      LOG(FATAL) << "Unexpected ccode: " << ccode;
348  }
349  OpCondBranch(ccode, taken);
350}
351
352void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src)
353{
354  RegLocation rl_result;
355  rl_src = LoadValue(rl_src, kCoreReg);
356  rl_result = EvalLoc(rl_dest, kCoreReg, true);
357  OpRegRegImm(kOpAdd, rl_result.low_reg, rl_src.low_reg, 0x80000000);
358  StoreValue(rl_dest, rl_result);
359}
360
361void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src)
362{
363  RegLocation rl_result;
364  rl_src = LoadValueWide(rl_src, kCoreReg);
365  rl_result = EvalLoc(rl_dest, kCoreReg, true);
366  OpRegRegImm(kOpAdd, rl_result.high_reg, rl_src.high_reg, 0x80000000);
367  OpRegCopy(rl_result.low_reg, rl_src.low_reg);
368  StoreValueWide(rl_dest, rl_result);
369}
370
371bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
372  DCHECK_NE(cu_->instruction_set, kThumb2);
373  return false;
374}
375
376
377
378} //  namespace art
379