fp_arm.cc revision 675e09b2753c2fcd521bd8f0230a0abf06e9b0e9
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "arm_lir.h"
18#include "codegen_arm.h"
19#include "dex/quick/mir_to_lir-inl.h"
20
21namespace art {
22
23void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
24                                 RegLocation rl_src1, RegLocation rl_src2) {
25  int op = kThumbBkpt;
26  RegLocation rl_result;
27
28  /*
29   * Don't attempt to optimize register usage since these opcodes call out to
30   * the handlers.
31   */
32  switch (opcode) {
33    case Instruction::ADD_FLOAT_2ADDR:
34    case Instruction::ADD_FLOAT:
35      op = kThumb2Vadds;
36      break;
37    case Instruction::SUB_FLOAT_2ADDR:
38    case Instruction::SUB_FLOAT:
39      op = kThumb2Vsubs;
40      break;
41    case Instruction::DIV_FLOAT_2ADDR:
42    case Instruction::DIV_FLOAT:
43      op = kThumb2Vdivs;
44      break;
45    case Instruction::MUL_FLOAT_2ADDR:
46    case Instruction::MUL_FLOAT:
47      op = kThumb2Vmuls;
48      break;
49    case Instruction::REM_FLOAT_2ADDR:
50    case Instruction::REM_FLOAT:
51      FlushAllRegs();   // Send everything to home location
52      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
53      rl_result = GetReturn(kFPReg);
54      StoreValue(rl_dest, rl_result);
55      return;
56    case Instruction::NEG_FLOAT:
57      GenNegFloat(rl_dest, rl_src1);
58      return;
59    default:
60      LOG(FATAL) << "Unexpected opcode: " << opcode;
61  }
62  rl_src1 = LoadValue(rl_src1, kFPReg);
63  rl_src2 = LoadValue(rl_src2, kFPReg);
64  rl_result = EvalLoc(rl_dest, kFPReg, true);
65  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
66  StoreValue(rl_dest, rl_result);
67}
68
69void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode,
70                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
71  int op = kThumbBkpt;
72  RegLocation rl_result;
73
74  switch (opcode) {
75    case Instruction::ADD_DOUBLE_2ADDR:
76    case Instruction::ADD_DOUBLE:
77      op = kThumb2Vaddd;
78      break;
79    case Instruction::SUB_DOUBLE_2ADDR:
80    case Instruction::SUB_DOUBLE:
81      op = kThumb2Vsubd;
82      break;
83    case Instruction::DIV_DOUBLE_2ADDR:
84    case Instruction::DIV_DOUBLE:
85      op = kThumb2Vdivd;
86      break;
87    case Instruction::MUL_DOUBLE_2ADDR:
88    case Instruction::MUL_DOUBLE:
89      op = kThumb2Vmuld;
90      break;
91    case Instruction::REM_DOUBLE_2ADDR:
92    case Instruction::REM_DOUBLE:
93      FlushAllRegs();   // Send everything to home location
94      CallRuntimeHelperRegLocationRegLocation(kQuickFmod, rl_src1, rl_src2, false);
95      rl_result = GetReturnWide(kFPReg);
96      StoreValueWide(rl_dest, rl_result);
97      return;
98    case Instruction::NEG_DOUBLE:
99      GenNegDouble(rl_dest, rl_src1);
100      return;
101    default:
102      LOG(FATAL) << "Unexpected opcode: " << opcode;
103  }
104
105  rl_src1 = LoadValueWide(rl_src1, kFPReg);
106  DCHECK(rl_src1.wide);
107  rl_src2 = LoadValueWide(rl_src2, kFPReg);
108  DCHECK(rl_src2.wide);
109  rl_result = EvalLoc(rl_dest, kFPReg, true);
110  DCHECK(rl_dest.wide);
111  DCHECK(rl_result.wide);
112  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
113  StoreValueWide(rl_dest, rl_result);
114}
115
116void ArmMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
117                                            int32_t constant) {
118  RegLocation rl_result;
119  RegStorage r_tmp = AllocTempSingle();
120  LoadConstantNoClobber(r_tmp, constant);
121  rl_src1 = LoadValue(rl_src1, kFPReg);
122  rl_result = EvalLoc(rl_dest, kFPReg, true);
123  NewLIR3(kThumb2Vmuls, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
124  StoreValue(rl_dest, rl_result);
125}
126
127void ArmMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
128                                             int64_t constant) {
129  RegLocation rl_result;
130  RegStorage r_tmp = AllocTempDouble();
131  DCHECK(r_tmp.IsDouble());
132  LoadConstantWide(r_tmp, constant);
133  rl_src1 = LoadValueWide(rl_src1, kFPReg);
134  DCHECK(rl_src1.wide);
135  rl_result = EvalLocWide(rl_dest, kFPReg, true);
136  DCHECK(rl_dest.wide);
137  DCHECK(rl_result.wide);
138  NewLIR3(kThumb2Vmuld, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
139  StoreValueWide(rl_dest, rl_result);
140}
141
142void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
143  int op = kThumbBkpt;
144  int src_reg;
145  RegLocation rl_result;
146
147  switch (opcode) {
148    case Instruction::INT_TO_FLOAT:
149      op = kThumb2VcvtIF;
150      break;
151    case Instruction::FLOAT_TO_INT:
152      op = kThumb2VcvtFI;
153      break;
154    case Instruction::DOUBLE_TO_FLOAT:
155      op = kThumb2VcvtDF;
156      break;
157    case Instruction::FLOAT_TO_DOUBLE:
158      op = kThumb2VcvtFd;
159      break;
160    case Instruction::INT_TO_DOUBLE:
161      op = kThumb2VcvtF64S32;
162      break;
163    case Instruction::DOUBLE_TO_INT:
164      op = kThumb2VcvtDI;
165      break;
166    case Instruction::LONG_TO_DOUBLE: {
167      rl_src = LoadValueWide(rl_src, kFPReg);
168      RegisterInfo* info = GetRegInfo(rl_src.reg);
169      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
170      DCHECK(src_low.Valid());
171      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
172      DCHECK(src_high.Valid());
173      rl_result = EvalLoc(rl_dest, kFPReg, true);
174      RegStorage tmp1 = AllocTempDouble();
175      RegStorage tmp2 = AllocTempDouble();
176
177      NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
178      NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
179      LoadConstantWide(tmp2, 0x41f0000000000000LL);
180      NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
181      FreeTemp(tmp1);
182      FreeTemp(tmp2);
183      StoreValueWide(rl_dest, rl_result);
184      return;
185    }
186    case Instruction::FLOAT_TO_LONG:
187      GenConversionCall(kQuickF2l, rl_dest, rl_src);
188      return;
189    case Instruction::LONG_TO_FLOAT: {
190      rl_src = LoadValueWide(rl_src, kFPReg);
191      RegisterInfo* info = GetRegInfo(rl_src.reg);
192      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
193      DCHECK(src_low.Valid());
194      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
195      DCHECK(src_high.Valid());
196      rl_result = EvalLoc(rl_dest, kFPReg, true);
197      // Allocate temp registers.
198      RegStorage high_val = AllocTempDouble();
199      RegStorage low_val = AllocTempDouble();
200      RegStorage const_val = AllocTempDouble();
201      // Long to double.
202      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
203      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
204      LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
205      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
206      // Double to float.
207      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
208      // Free temp registers.
209      FreeTemp(high_val);
210      FreeTemp(low_val);
211      FreeTemp(const_val);
212      // Store result.
213      StoreValue(rl_dest, rl_result);
214      return;
215    }
216    case Instruction::DOUBLE_TO_LONG:
217      GenConversionCall(kQuickD2l, rl_dest, rl_src);
218      return;
219    default:
220      LOG(FATAL) << "Unexpected opcode: " << opcode;
221  }
222  if (rl_src.wide) {
223    rl_src = LoadValueWide(rl_src, kFPReg);
224    src_reg = rl_src.reg.GetReg();
225  } else {
226    rl_src = LoadValue(rl_src, kFPReg);
227    src_reg = rl_src.reg.GetReg();
228  }
229  if (rl_dest.wide) {
230    rl_result = EvalLoc(rl_dest, kFPReg, true);
231    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
232    StoreValueWide(rl_dest, rl_result);
233  } else {
234    rl_result = EvalLoc(rl_dest, kFPReg, true);
235    NewLIR2(op, rl_result.reg.GetReg(), src_reg);
236    StoreValue(rl_dest, rl_result);
237  }
238}
239
240void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
241                                     bool is_double) {
242  LIR* target = &block_label_list_[bb->taken];
243  RegLocation rl_src1;
244  RegLocation rl_src2;
245  if (is_double) {
246    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
247    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
248    rl_src1 = LoadValueWide(rl_src1, kFPReg);
249    rl_src2 = LoadValueWide(rl_src2, kFPReg);
250    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
251  } else {
252    rl_src1 = mir_graph_->GetSrc(mir, 0);
253    rl_src2 = mir_graph_->GetSrc(mir, 1);
254    rl_src1 = LoadValue(rl_src1, kFPReg);
255    rl_src2 = LoadValue(rl_src2, kFPReg);
256    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
257  }
258  NewLIR0(kThumb2Fmstat);
259  ConditionCode ccode = mir->meta.ccode;
260  switch (ccode) {
261    case kCondEq:
262    case kCondNe:
263      break;
264    case kCondLt:
265      if (gt_bias) {
266        ccode = kCondMi;
267      }
268      break;
269    case kCondLe:
270      if (gt_bias) {
271        ccode = kCondLs;
272      }
273      break;
274    case kCondGt:
275      if (gt_bias) {
276        ccode = kCondHi;
277      }
278      break;
279    case kCondGe:
280      if (gt_bias) {
281        ccode = kCondUge;
282      }
283      break;
284    default:
285      LOG(FATAL) << "Unexpected ccode: " << ccode;
286  }
287  OpCondBranch(ccode, target);
288}
289
290
291void ArmMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
292                          RegLocation rl_src1, RegLocation rl_src2) {
293  bool is_double = false;
294  int default_result = -1;
295  RegLocation rl_result;
296
297  switch (opcode) {
298    case Instruction::CMPL_FLOAT:
299      is_double = false;
300      default_result = -1;
301      break;
302    case Instruction::CMPG_FLOAT:
303      is_double = false;
304      default_result = 1;
305      break;
306    case Instruction::CMPL_DOUBLE:
307      is_double = true;
308      default_result = -1;
309      break;
310    case Instruction::CMPG_DOUBLE:
311      is_double = true;
312      default_result = 1;
313      break;
314    default:
315      LOG(FATAL) << "Unexpected opcode: " << opcode;
316  }
317  if (is_double) {
318    rl_src1 = LoadValueWide(rl_src1, kFPReg);
319    rl_src2 = LoadValueWide(rl_src2, kFPReg);
320    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
321    ClobberSReg(rl_dest.s_reg_low);
322    rl_result = EvalLoc(rl_dest, kCoreReg, true);
323    LoadConstant(rl_result.reg, default_result);
324    NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
325  } else {
326    rl_src1 = LoadValue(rl_src1, kFPReg);
327    rl_src2 = LoadValue(rl_src2, kFPReg);
328    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
329    ClobberSReg(rl_dest.s_reg_low);
330    rl_result = EvalLoc(rl_dest, kCoreReg, true);
331    LoadConstant(rl_result.reg, default_result);
332    NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
333  }
334  DCHECK(!rl_result.reg.IsFloat());
335  NewLIR0(kThumb2Fmstat);
336
337  LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
338  NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
339          ModifiedImmediate(-default_result));  // Must not alter ccodes
340  OpEndIT(it);
341
342  it = OpIT(kCondEq, "");
343  LoadConstant(rl_result.reg, 0);
344  OpEndIT(it);
345
346  StoreValue(rl_dest, rl_result);
347}
348
349void ArmMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
350  RegLocation rl_result;
351  rl_src = LoadValue(rl_src, kFPReg);
352  rl_result = EvalLoc(rl_dest, kFPReg, true);
353  NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
354  StoreValue(rl_dest, rl_result);
355}
356
357void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
358  RegLocation rl_result;
359  rl_src = LoadValueWide(rl_src, kFPReg);
360  rl_result = EvalLoc(rl_dest, kFPReg, true);
361  NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
362  StoreValueWide(rl_dest, rl_result);
363}
364
365static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
366  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
367  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
368      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
369    return kCoreReg;
370  }
371  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
372  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
373    return kFPReg;
374  }
375  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
376  // is faster using a core reg or fp reg depends on the particular CPU. Without further
377  // investigation and testing we prefer core register. (If the result is subsequently used in
378  // another fp operation, the dalvik reg will probably get promoted and that should be handled
379  // by the cases above.)
380  return kCoreReg;
381}
382
383bool ArmMir2Lir::GenInlinedAbsFloat(CallInfo* info) {
384  if (info->result.location == kLocInvalid) {
385    return true;  // Result is unused: inlining successful, no code generated.
386  }
387  RegLocation rl_dest = info->result;
388  RegLocation rl_src = UpdateLoc(info->args[0]);
389  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
390  rl_src = LoadValue(rl_src, reg_class);
391  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
392  if (reg_class == kFPReg) {
393    NewLIR2(kThumb2Vabss, rl_result.reg.GetReg(), rl_src.reg.GetReg());
394  } else {
395    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
396  }
397  StoreValue(rl_dest, rl_result);
398  return true;
399}
400
401bool ArmMir2Lir::GenInlinedAbsDouble(CallInfo* info) {
402  if (info->result.location == kLocInvalid) {
403    return true;  // Result is unused: inlining successful, no code generated.
404  }
405  RegLocation rl_dest = info->result;
406  RegLocation rl_src = UpdateLocWide(info->args[0]);
407  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
408  rl_src = LoadValueWide(rl_src, reg_class);
409  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
410  if (reg_class == kFPReg) {
411    NewLIR2(kThumb2Vabsd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
412  } else if (rl_result.reg.GetLow().GetReg() != rl_src.reg.GetHigh().GetReg()) {
413    // No inconvenient overlap.
414    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
415    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x7fffffff);
416  } else {
417    // Inconvenient overlap, use a temp register to preserve the high word of the source.
418    RegStorage rs_tmp = AllocTemp();
419    OpRegCopy(rs_tmp, rl_src.reg.GetHigh());
420    OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
421    OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rs_tmp, 0x7fffffff);
422    FreeTemp(rs_tmp);
423  }
424  StoreValueWide(rl_dest, rl_result);
425  return true;
426}
427
428bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
429  DCHECK_EQ(cu_->instruction_set, kThumb2);
430  RegLocation rl_src = info->args[0];
431  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
432  rl_src = LoadValueWide(rl_src, kFPReg);
433  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
434  NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
435  StoreValueWide(rl_dest, rl_result);
436  return true;
437}
438
439
440}  // namespace art
441