fp_arm64.cc revision 984305917bf57b3f8d92965e4715a0370cc5bcfb
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "arm64_lir.h"
18#include "codegen_arm64.h"
19#include "dex/quick/mir_to_lir-inl.h"
20
21namespace art {
22
23void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
24                                   RegLocation rl_src1, RegLocation rl_src2) {
25  int op = kA64Brk1d;
26  RegLocation rl_result;
27
28  switch (opcode) {
29    case Instruction::ADD_FLOAT_2ADDR:
30    case Instruction::ADD_FLOAT:
31      op = kA64Fadd3fff;
32      break;
33    case Instruction::SUB_FLOAT_2ADDR:
34    case Instruction::SUB_FLOAT:
35      op = kA64Fsub3fff;
36      break;
37    case Instruction::DIV_FLOAT_2ADDR:
38    case Instruction::DIV_FLOAT:
39      op = kA64Fdiv3fff;
40      break;
41    case Instruction::MUL_FLOAT_2ADDR:
42    case Instruction::MUL_FLOAT:
43      op = kA64Fmul3fff;
44      break;
45    case Instruction::REM_FLOAT_2ADDR:
46    case Instruction::REM_FLOAT:
47      FlushAllRegs();   // Send everything to home location
48      CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false);
49      rl_result = GetReturn(kFPReg);
50      StoreValue(rl_dest, rl_result);
51      return;
52    case Instruction::NEG_FLOAT:
53      GenNegFloat(rl_dest, rl_src1);
54      return;
55    default:
56      LOG(FATAL) << "Unexpected opcode: " << opcode;
57  }
58  rl_src1 = LoadValue(rl_src1, kFPReg);
59  rl_src2 = LoadValue(rl_src2, kFPReg);
60  rl_result = EvalLoc(rl_dest, kFPReg, true);
61  NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
62  StoreValue(rl_dest, rl_result);
63}
64
65void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
66                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
67  int op = kA64Brk1d;
68  RegLocation rl_result;
69
70  switch (opcode) {
71    case Instruction::ADD_DOUBLE_2ADDR:
72    case Instruction::ADD_DOUBLE:
73      op = kA64Fadd3fff;
74      break;
75    case Instruction::SUB_DOUBLE_2ADDR:
76    case Instruction::SUB_DOUBLE:
77      op = kA64Fsub3fff;
78      break;
79    case Instruction::DIV_DOUBLE_2ADDR:
80    case Instruction::DIV_DOUBLE:
81      op = kA64Fdiv3fff;
82      break;
83    case Instruction::MUL_DOUBLE_2ADDR:
84    case Instruction::MUL_DOUBLE:
85      op = kA64Fmul3fff;
86      break;
87    case Instruction::REM_DOUBLE_2ADDR:
88    case Instruction::REM_DOUBLE:
89      FlushAllRegs();   // Send everything to home location
90      {
91        RegStorage r_tgt = CallHelperSetup(kQuickFmod);
92        LoadValueDirectWideFixed(rl_src1, rs_d0);
93        LoadValueDirectWideFixed(rl_src2, rs_d1);
94        ClobberCallerSave();
95        CallHelper(r_tgt, kQuickFmod, false);
96      }
97      rl_result = GetReturnWide(kFPReg);
98      StoreValueWide(rl_dest, rl_result);
99      return;
100    case Instruction::NEG_DOUBLE:
101      GenNegDouble(rl_dest, rl_src1);
102      return;
103    default:
104      LOG(FATAL) << "Unexpected opcode: " << opcode;
105  }
106
107  rl_src1 = LoadValueWide(rl_src1, kFPReg);
108  DCHECK(rl_src1.wide);
109  rl_src2 = LoadValueWide(rl_src2, kFPReg);
110  DCHECK(rl_src2.wide);
111  rl_result = EvalLoc(rl_dest, kFPReg, true);
112  DCHECK(rl_dest.wide);
113  DCHECK(rl_result.wide);
114  NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
115  StoreValueWide(rl_dest, rl_result);
116}
117
118void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
119                                 RegLocation rl_dest, RegLocation rl_src) {
120  int op = kA64Brk1d;
121  RegLocation rl_result;
122  RegisterClass src_reg_class = kInvalidRegClass;
123  RegisterClass dst_reg_class = kInvalidRegClass;
124
125  switch (opcode) {
126    case Instruction::INT_TO_FLOAT:
127      op = kA64Scvtf2fw;
128      src_reg_class = kCoreReg;
129      dst_reg_class = kFPReg;
130      break;
131    case Instruction::FLOAT_TO_INT:
132      op = kA64Fcvtzs2wf;
133      src_reg_class = kFPReg;
134      dst_reg_class = kCoreReg;
135      break;
136    case Instruction::DOUBLE_TO_FLOAT:
137      op = kA64Fcvt2sS;
138      src_reg_class = kFPReg;
139      dst_reg_class = kFPReg;
140      break;
141    case Instruction::FLOAT_TO_DOUBLE:
142      op = kA64Fcvt2Ss;
143      src_reg_class = kFPReg;
144      dst_reg_class = kFPReg;
145      break;
146    case Instruction::INT_TO_DOUBLE:
147      op = FWIDE(kA64Scvtf2fw);
148      src_reg_class = kCoreReg;
149      dst_reg_class = kFPReg;
150      break;
151    case Instruction::DOUBLE_TO_INT:
152      op = FWIDE(kA64Fcvtzs2wf);
153      src_reg_class = kFPReg;
154      dst_reg_class = kCoreReg;
155      break;
156    case Instruction::LONG_TO_DOUBLE:
157      op = FWIDE(kA64Scvtf2fx);
158      src_reg_class = kCoreReg;
159      dst_reg_class = kFPReg;
160      break;
161    case Instruction::FLOAT_TO_LONG:
162      op = kA64Fcvtzs2xf;
163      src_reg_class = kFPReg;
164      dst_reg_class = kCoreReg;
165      break;
166    case Instruction::LONG_TO_FLOAT:
167      op = kA64Scvtf2fx;
168      src_reg_class = kCoreReg;
169      dst_reg_class = kFPReg;
170      break;
171    case Instruction::DOUBLE_TO_LONG:
172      op = FWIDE(kA64Fcvtzs2xf);
173      src_reg_class = kFPReg;
174      dst_reg_class = kCoreReg;
175      break;
176    default:
177      LOG(FATAL) << "Unexpected opcode: " << opcode;
178  }
179
180  DCHECK_NE(src_reg_class, kInvalidRegClass);
181  DCHECK_NE(dst_reg_class, kInvalidRegClass);
182  DCHECK_NE(op, kA64Brk1d);
183
184  if (rl_src.wide) {
185    rl_src = LoadValueWide(rl_src, src_reg_class);
186  } else {
187    rl_src = LoadValue(rl_src, src_reg_class);
188  }
189
190  rl_result = EvalLoc(rl_dest, dst_reg_class, true);
191  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
192
193  if (rl_dest.wide) {
194    StoreValueWide(rl_dest, rl_result);
195  } else {
196    StoreValue(rl_dest, rl_result);
197  }
198}
199
200void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
201                                     bool is_double) {
202  LIR* target = &block_label_list_[bb->taken];
203  RegLocation rl_src1;
204  RegLocation rl_src2;
205  if (is_double) {
206    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
207    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
208    rl_src1 = LoadValueWide(rl_src1, kFPReg);
209    rl_src2 = LoadValueWide(rl_src2, kFPReg);
210    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
211  } else {
212    rl_src1 = mir_graph_->GetSrc(mir, 0);
213    rl_src2 = mir_graph_->GetSrc(mir, 1);
214    rl_src1 = LoadValue(rl_src1, kFPReg);
215    rl_src2 = LoadValue(rl_src2, kFPReg);
216    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
217  }
218  ConditionCode ccode = mir->meta.ccode;
219  switch (ccode) {
220    case kCondEq:
221    case kCondNe:
222      break;
223    case kCondLt:
224      if (gt_bias) {
225        ccode = kCondMi;
226      }
227      break;
228    case kCondLe:
229      if (gt_bias) {
230        ccode = kCondLs;
231      }
232      break;
233    case kCondGt:
234      if (gt_bias) {
235        ccode = kCondHi;
236      }
237      break;
238    case kCondGe:
239      if (gt_bias) {
240        ccode = kCondUge;
241      }
242      break;
243    default:
244      LOG(FATAL) << "Unexpected ccode: " << ccode;
245  }
246  OpCondBranch(ccode, target);
247}
248
249
250void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
251                            RegLocation rl_src1, RegLocation rl_src2) {
252  bool is_double = false;
253  int default_result = -1;
254  RegLocation rl_result;
255
256  switch (opcode) {
257    case Instruction::CMPL_FLOAT:
258      is_double = false;
259      default_result = -1;
260      break;
261    case Instruction::CMPG_FLOAT:
262      is_double = false;
263      default_result = 1;
264      break;
265    case Instruction::CMPL_DOUBLE:
266      is_double = true;
267      default_result = -1;
268      break;
269    case Instruction::CMPG_DOUBLE:
270      is_double = true;
271      default_result = 1;
272      break;
273    default:
274      LOG(FATAL) << "Unexpected opcode: " << opcode;
275  }
276  if (is_double) {
277    rl_src1 = LoadValueWide(rl_src1, kFPReg);
278    rl_src2 = LoadValueWide(rl_src2, kFPReg);
279    // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
280    ClobberSReg(rl_dest.s_reg_low);
281    rl_result = EvalLoc(rl_dest, kCoreReg, true);
282    LoadConstant(rl_result.reg, default_result);
283    NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
284  } else {
285    rl_src1 = LoadValue(rl_src1, kFPReg);
286    rl_src2 = LoadValue(rl_src2, kFPReg);
287    // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
288    ClobberSReg(rl_dest.s_reg_low);
289    rl_result = EvalLoc(rl_dest, kCoreReg, true);
290    LoadConstant(rl_result.reg, default_result);
291    NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
292  }
293  DCHECK(!rl_result.reg.IsFloat());
294
295  // TODO(Arm64): should we rather do this?
296  // csinc wD, wzr, wzr, eq
297  // csneg wD, wD, wD, le
298  // (which requires 2 instructions rather than 3)
299
300  // Rd = if cond then Rd else -Rd.
301  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
302          rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
303  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
304          kArmCondEq);
305  StoreValue(rl_dest, rl_result);
306}
307
308void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
309  RegLocation rl_result;
310  rl_src = LoadValue(rl_src, kFPReg);
311  rl_result = EvalLoc(rl_dest, kFPReg, true);
312  NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
313  StoreValue(rl_dest, rl_result);
314}
315
316void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
317  RegLocation rl_result;
318  rl_src = LoadValueWide(rl_src, kFPReg);
319  rl_result = EvalLoc(rl_dest, kFPReg, true);
320  NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
321  StoreValueWide(rl_dest, rl_result);
322}
323
324static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) {
325  // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg.
326  if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) ||
327      (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) {
328    return kCoreReg;
329  }
330  // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg.
331  if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) {
332    return kFPReg;
333  }
334  // With both src and dest in the stack frame we have to perform load+abs+store. Whether this
335  // is faster using a core reg or fp reg depends on the particular CPU. For example, on A53
336  // it's faster using core reg while on A57 it's faster with fp reg, the difference being
337  // bigger on the A53. Without further investigation and testing we prefer core register.
338  // (If the result is subsequently used in another fp operation, the dalvik reg will probably
339  // get promoted and that should be handled by the cases above.)
340  return kCoreReg;
341}
342
343bool Arm64Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
344  if (info->result.location == kLocInvalid) {
345    return true;  // Result is unused: inlining successful, no code generated.
346  }
347  RegLocation rl_dest = info->result;
348  RegLocation rl_src = UpdateLoc(info->args[0]);
349  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
350  rl_src = LoadValue(rl_src, reg_class);
351  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
352  if (reg_class == kFPReg) {
353    NewLIR2(kA64Fabs2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
354  } else {
355    NewLIR4(kA64Ubfm4rrdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 30);
356  }
357  StoreValue(rl_dest, rl_result);
358  return true;
359}
360
361bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
362  if (info->result.location == kLocInvalid) {
363    return true;  // Result is unused: inlining successful, no code generated.
364  }
365  RegLocation rl_dest = info->result;
366  RegLocation rl_src = UpdateLocWide(info->args[0]);
367  RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest);
368  rl_src = LoadValueWide(rl_src, reg_class);
369  RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
370  if (reg_class == kFPReg) {
371    NewLIR2(FWIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
372  } else {
373    NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
374  }
375  StoreValueWide(rl_dest, rl_result);
376  return true;
377}
378
379bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
380  RegLocation rl_src = info->args[0];
381  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
382  rl_src = LoadValueWide(rl_src, kFPReg);
383  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
384  NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
385  StoreValueWide(rl_dest, rl_result);
386  return true;
387}
388
389bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
390  DCHECK_EQ(cu_->instruction_set, kArm64);
391  int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff;
392  ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0);
393  RegLocation rl_src1 = info->args[0];
394  RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1];
395  rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg);
396  rl_src2 = (is_double) ? LoadValueWide(rl_src2, kFPReg) : LoadValue(rl_src2, kFPReg);
397  RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
398  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
399  NewLIR3(op | wide, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
400  (is_double) ?  StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
401  return true;
402}
403
404}  // namespace art
405