fp_arm.cc revision 397297f92ff3004cfcd623690d98bc49edf8c032
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "arm_lir.h"
18#include "codegen_arm.h"
19#include "dex/quick/mir_to_lir-inl.h"
20
21namespace art {
22
23void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
24                                 RegLocation rl_src1, RegLocation rl_src2) {
25  int op = kThumbBkpt;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kThumb2Vadds;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kThumb2Vsubs;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kThumb2Vdivs;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kThumb2Vmuls;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
53      rl_result = GetReturn(kFPReg);
54      StoreValue(rl_dest, rl_result);
55      return;
56    case Instruction::NEG_FLOAT:
57      GenNegFloat(rl_dest, rl_src1);
58      return;
59    default:
60      LOG(FATAL) << "Unexpected opcode: " << opcode;
61  }
62  rl_src1 = LoadValue(rl_src1, kFPReg);
63  rl_src2 = LoadValue(rl_src2, kFPReg);
64  rl_result = EvalLoc(rl_dest, kFPReg, true);
65  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
66  StoreValue(rl_dest, rl_result);
67}
68
69void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode,
70                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
71  int op = kThumbBkpt;
72  RegLocation rl_result;
73
74  switch (opcode) {
75    case Instruction::ADD_DOUBLE_2ADDR:
76    case Instruction::ADD_DOUBLE:
77      op = kThumb2Vaddd;
78      break;
79    case Instruction::SUB_DOUBLE_2ADDR:
80    case Instruction::SUB_DOUBLE:
81      op = kThumb2Vsubd;
82      break;
83    case Instruction::DIV_DOUBLE_2ADDR:
84    case Instruction::DIV_DOUBLE:
85      op = kThumb2Vdivd;
86      break;
87    case Instruction::MUL_DOUBLE_2ADDR:
88    case Instruction::MUL_DOUBLE:
89      op = kThumb2Vmuld;
90      break;
91    case Instruction::REM_DOUBLE_2ADDR:
92    case Instruction::REM_DOUBLE:
93      FlushAllRegs();   // Send everything to home location
94      CallRuntimeHelperRegLocationRegLocation(kQuickFmod, rl_src1, rl_src2, false);
95      rl_result = GetReturnWide(kFPReg);
96      StoreValueWide(rl_dest, rl_result);
97      return;
98    case Instruction::NEG_DOUBLE:
99      GenNegDouble(rl_dest, rl_src1);
100      return;
101    default:
102      LOG(FATAL) << "Unexpected opcode: " << opcode;
103  }
104
105  rl_src1 = LoadValueWide(rl_src1, kFPReg);
106  DCHECK(rl_src1.wide);
107  rl_src2 = LoadValueWide(rl_src2, kFPReg);
108  DCHECK(rl_src2.wide);
109  rl_result = EvalLoc(rl_dest, kFPReg, true);
110  DCHECK(rl_dest.wide);
111  DCHECK(rl_result.wide);
112  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
113  StoreValueWide(rl_dest, rl_result);
114}
115
116void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
117  int op = kThumbBkpt;
118  int src_reg;
119  RegLocation rl_result;
120
121  switch (opcode) {
122    case Instruction::INT_TO_FLOAT:
123      op = kThumb2VcvtIF;
124      break;
125    case Instruction::FLOAT_TO_INT:
126      op = kThumb2VcvtFI;
127      break;
128    case Instruction::DOUBLE_TO_FLOAT:
129      op = kThumb2VcvtDF;
130      break;
131    case Instruction::FLOAT_TO_DOUBLE:
132      op = kThumb2VcvtFd;
133      break;
134    case Instruction::INT_TO_DOUBLE:
135      op = kThumb2VcvtF64S32;
136      break;
137    case Instruction::DOUBLE_TO_INT:
138      op = kThumb2VcvtDI;
139      break;
140    case Instruction::LONG_TO_DOUBLE: {
141      rl_src = LoadValueWide(rl_src, kFPReg);
142      RegisterInfo* info = GetRegInfo(rl_src.reg);
143      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
144      DCHECK(src_low.Valid());
145      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
146      DCHECK(src_high.Valid());
147      rl_result = EvalLoc(rl_dest, kFPReg, true);
148      RegStorage tmp1 = AllocTempDouble();
149      RegStorage tmp2 = AllocTempDouble();
150
151      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
152      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
153      LoadConstantWide(tmp2, 0x41f0000000000000LL);
154      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
155      FreeTemp(tmp1);
156      FreeTemp(tmp2);
157      StoreValueWide(rl_dest, rl_result);
158      return;
159    }
160    case Instruction::FLOAT_TO_LONG:
161      GenConversionCall(kQuickF2l, rl_dest, rl_src);
162      return;
163    case Instruction::LONG_TO_FLOAT: {
164      rl_src = LoadValueWide(rl_src, kFPReg);
165      RegisterInfo* info = GetRegInfo(rl_src.reg);
166      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
167      DCHECK(src_low.Valid());
168      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
169      DCHECK(src_high.Valid());
170      rl_result = EvalLoc(rl_dest, kFPReg, true);
171      // Allocate temp registers.
172      RegStorage high_val = AllocTempDouble();
173      RegStorage low_val = AllocTempDouble();
174      RegStorage const_val = AllocTempDouble();
175      // Long to double.
176      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
177      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
178      LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
179      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
180      // Double to float.
181      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
182      // Free temp registers.
183      FreeTemp(high_val);
184      FreeTemp(low_val);
185      FreeTemp(const_val);
186      // Store result.
187      StoreValue(rl_dest, rl_result);
188      return;
189    }
190    case Instruction::DOUBLE_TO_LONG:
191      GenConversionCall(kQuickD2l, rl_dest, rl_src);
192      return;
193    default:
194      LOG(FATAL) << "Unexpected opcode: " << opcode;
195  }
196  if (rl_src.wide) {
197    rl_src = LoadValueWide(rl_src, kFPReg);
198    src_reg = rl_src.reg.GetReg();
199  } else {
200    rl_src = LoadValue(rl_src, kFPReg);
201    src_reg = rl_src.reg.GetReg();
202  }
203  if (rl_dest.wide) {
204    rl_result = EvalLoc(rl_dest, kFPReg, true);
205    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
206    StoreValueWide(rl_dest, rl_result);
207  } else {
208    rl_result = EvalLoc(rl_dest, kFPReg, true);
209    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
210    StoreValue(rl_dest, rl_result);
211  }
212}
213
214void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
215                                     bool is_double) {
216  LIR* target = &block_label_list_[bb->taken];
217  RegLocation rl_src1;
218  RegLocation rl_src2;
219  if (is_double) {
220    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
221    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
222    rl_src1 = LoadValueWide(rl_src1, kFPReg);
223    rl_src2 = LoadValueWide(rl_src2, kFPReg);
224    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
225  } else {
226    rl_src1 = mir_graph_->GetSrc(mir, 0);
227    rl_src2 = mir_graph_->GetSrc(mir, 1);
228    rl_src1 = LoadValue(rl_src1, kFPReg);
229    rl_src2 = LoadValue(rl_src2, kFPReg);
230    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
231  }
232  NewLIR0(kThumb2Fmstat);
233  ConditionCode ccode = mir->meta.ccode;
234  switch (ccode) {
235    case kCondEq:
236    case kCondNe:
237      break;
238    case kCondLt:
239      if (gt_bias) {
240        ccode = kCondMi;
241      }
242      break;
243    case kCondLe:
244      if (gt_bias) {
245        ccode = kCondLs;
246      }
247      break;
248    case kCondGt:
249      if (gt_bias) {
250        ccode = kCondHi;
251      }
252      break;
253    case kCondGe:
254      if (gt_bias) {
255        ccode = kCondUge;
256      }
257      break;
258    default:
259      LOG(FATAL) << "Unexpected ccode: " << ccode;
260  }
261  OpCondBranch(ccode, target);
262}
263
264
265void ArmMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
266                          RegLocation rl_src1, RegLocation rl_src2) {
267  bool is_double = false;
268  int default_result = -1;
269  RegLocation rl_result;
270
271  switch (opcode) {
272    case Instruction::CMPL_FLOAT:
273      is_double = false;
274      default_result = -1;
275      break;
276    case Instruction::CMPG_FLOAT:
277      is_double = false;
278      default_result = 1;
279      break;
280    case Instruction::CMPL_DOUBLE:
281      is_double = true;
282      default_result = -1;
283      break;
284    case Instruction::CMPG_DOUBLE:
285      is_double = true;
286      default_result = 1;
287      break;
288    default:
289      LOG(FATAL) << "Unexpected opcode: " << opcode;
290  }
291  if (is_double) {
292    rl_src1 = LoadValueWide(rl_src1, kFPReg);
293    rl_src2 = LoadValueWide(rl_src2, kFPReg);
294    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
295    ClobberSReg(rl_dest.s_reg_low);
296    rl_result = EvalLoc(rl_dest, kCoreReg, true);
297    LoadConstant(rl_result.reg, default_result);
298    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
299  } else {
300    rl_src1 = LoadValue(rl_src1, kFPReg);
301    rl_src2 = LoadValue(rl_src2, kFPReg);
302    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
303    ClobberSReg(rl_dest.s_reg_low);
304    rl_result = EvalLoc(rl_dest, kCoreReg, true);
305    LoadConstant(rl_result.reg, default_result);
306    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
307  }
308  DCHECK(!rl_result.reg.IsFloat());
309  NewLIR0(kThumb2Fmstat);
310
311  LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
312  NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
313          ModifiedImmediate(-default_result));  // Must not alter ccodes
314  OpEndIT(it);
315
316  it = OpIT(kCondEq, "");
317  LoadConstant(rl_result.reg, 0);
318  OpEndIT(it);
319
320  StoreValue(rl_dest, rl_result);
321}
322
323void ArmMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
324  RegLocation rl_result;
325  rl_src = LoadValue(rl_src, kFPReg);
326  rl_result = EvalLoc(rl_dest, kFPReg, true);
327  NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
328  StoreValue(rl_dest, rl_result);
329}
330
331void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
332  RegLocation rl_result;
333  rl_src = LoadValueWide(rl_src, kFPReg);
334  rl_result = EvalLoc(rl_dest, kFPReg, true);
335  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
336  StoreValueWide(rl_dest, rl_result);
337}
338
339static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
340  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
341  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
342      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
343    return kCoreReg;
344  }
345  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
346  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
347    return kFPReg;
348  }
349  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
350  // is faster using a core reg or fp reg depends on the particular CPU. Without further
351  // investigation and testing we prefer core register. (If the result is subsequently used in
352  // another fp operation, the dalvik reg will probably get promoted and that should be handled
353  // by the cases above.)
354  return kCoreReg;
355}
356
357bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
358  if (info->result.location == kLocInvalid) {
359    return true;  // Result is unused: inlining successful, no code generated.
360  }
361  RegLocation rl_dest = info->result;
362  RegLocation rl_src = UpdateLoc(info->args[0]);
363  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
364  rl_src = LoadValue(rl_src, reg_class);
365  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
366  if (reg_class == kFPReg) {
367    NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
368  } else {
369    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
370  }
371  StoreValue(rl_dest, rl_result);
372  return true;
373}
374
375bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
376  if (info->result.location == kLocInvalid) {
377    return true;  // Result is unused: inlining successful, no code generated.
378  }
379  RegLocation rl_dest = info->result;
380  RegLocation rl_src = UpdateLocWide(info->args[0]);
381  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
382  rl_src = LoadValueWide(rl_src, reg_class);
383  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
384  if (reg_class == kFPReg) {
385    NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
386  } else if (rl_result.reg.GetLow().GetReg() != rl_src.reg.GetHigh().GetReg()) {
387    // No inconvenient overlap.
388    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
389    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x7fffffff);
390  } else {
391    // Inconvenient overlap, use a temp register to preserve the high word of the source.
392    RegStorage rs_tmp = AllocTemp();
393    OpRegCopy(rs_tmp, rl_src.reg.GetHigh());
394    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
395    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rs_tmp, 0x7fffffff);
396    FreeTemp(rs_tmp);
397  }
398  StoreValueWide(rl_dest, rl_result);
399  return true;
400}
401
402bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
403  DCHECK_EQ(cu_->instruction_set, kThumb2);
404  RegLocation rl_src = info->args[0];
405  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
406  rl_src = LoadValueWide(rl_src, kFPReg);
407  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
408  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
409  StoreValueWide(rl_dest, rl_result);
410  return true;
411}
412
413
414}  // namespace art
415