1//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARMTargetTransformInfo.h"
11#include "llvm/Support/Debug.h"
12#include "llvm/Target/CostTable.h"
13#include "llvm/Target/TargetLowering.h"
14using namespace llvm;
15
16#define DEBUG_TYPE "armtti"
17
18unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
19  assert(Ty->isIntegerTy());
20
21  unsigned Bits = Ty->getPrimitiveSizeInBits();
22  if (Bits == 0 || Bits > 32)
23    return 4;
24
25  int32_t SImmVal = Imm.getSExtValue();
26  uint32_t ZImmVal = Imm.getZExtValue();
27  if (!ST->isThumb()) {
28    if ((SImmVal >= 0 && SImmVal < 65536) ||
29        (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
30        (ARM_AM::getSOImmVal(~ZImmVal) != -1))
31      return 1;
32    return ST->hasV6T2Ops() ? 2 : 3;
33  }
34  if (ST->isThumb2()) {
35    if ((SImmVal >= 0 && SImmVal < 65536) ||
36        (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
37        (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
38      return 1;
39    return ST->hasV6T2Ops() ? 2 : 3;
40  }
41  // Thumb1.
42  if (SImmVal >= 0 && SImmVal < 256)
43    return 1;
44  if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
45    return 2;
46  // Load from constantpool.
47  return 3;
48}
49
50unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
51  int ISD = TLI->InstructionOpcodeToISD(Opcode);
52  assert(ISD && "Invalid opcode");
53
54  // Single to/from double precision conversions.
55  static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
56    // Vector fptrunc/fpext conversions.
57    { ISD::FP_ROUND,   MVT::v2f64, 2 },
58    { ISD::FP_EXTEND,  MVT::v2f32, 2 },
59    { ISD::FP_EXTEND,  MVT::v4f32, 4 }
60  };
61
62  if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
63                                          ISD == ISD::FP_EXTEND)) {
64    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
65    int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
66    if (Idx != -1)
67      return LT.first * NEONFltDblTbl[Idx].Cost;
68  }
69
70  EVT SrcTy = TLI->getValueType(Src);
71  EVT DstTy = TLI->getValueType(Dst);
72
73  if (!SrcTy.isSimple() || !DstTy.isSimple())
74    return BaseT::getCastInstrCost(Opcode, Dst, Src);
75
76  // Some arithmetic, load and store operations have specific instructions
77  // to cast up/down their types automatically at no extra cost.
78  // TODO: Get these tables to know at least what the related operations are.
79  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
80  NEONVectorConversionTbl[] = {
81    { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
82    { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
83    { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
84    { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
85    { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 0 },
86    { ISD::TRUNCATE,    MVT::v4i16, MVT::v4i32, 1 },
87
88    // The number of vmovl instructions for the extension.
89    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
90    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
91    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
92    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
93    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
94    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
95    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
96    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
97    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
98    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
99
100    // Operations that we legalize using splitting.
101    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 6 },
102    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 3 },
103
104    // Vector float <-> i32 conversions.
105    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
106    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
107
108    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
109    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
110    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
111    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
112    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
113    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
114    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
115    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
116    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
117    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
118    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
119    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
120    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
121    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
122    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
123    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
124    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
125    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
126    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
127    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
128
129    { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
130    { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
131    { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
132    { ISD::FP_TO_UINT,  MVT::v4i8, MVT::v4f32, 3 },
133    { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f32, 2 },
134    { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f32, 2 },
135
136    // Vector double <-> i32 conversions.
137    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
138    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
139
140    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
141    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
142    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
143    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
144    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
145    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
146
147    { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
148    { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
149    { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
150    { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 4 },
151    { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 8 },
152    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 8 }
153  };
154
155  if (SrcTy.isVector() && ST->hasNEON()) {
156    int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
157                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
158    if (Idx != -1)
159      return NEONVectorConversionTbl[Idx].Cost;
160  }
161
162  // Scalar float to integer conversions.
163  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
164  NEONFloatConversionTbl[] = {
165    { ISD::FP_TO_SINT,  MVT::i1, MVT::f32, 2 },
166    { ISD::FP_TO_UINT,  MVT::i1, MVT::f32, 2 },
167    { ISD::FP_TO_SINT,  MVT::i1, MVT::f64, 2 },
168    { ISD::FP_TO_UINT,  MVT::i1, MVT::f64, 2 },
169    { ISD::FP_TO_SINT,  MVT::i8, MVT::f32, 2 },
170    { ISD::FP_TO_UINT,  MVT::i8, MVT::f32, 2 },
171    { ISD::FP_TO_SINT,  MVT::i8, MVT::f64, 2 },
172    { ISD::FP_TO_UINT,  MVT::i8, MVT::f64, 2 },
173    { ISD::FP_TO_SINT,  MVT::i16, MVT::f32, 2 },
174    { ISD::FP_TO_UINT,  MVT::i16, MVT::f32, 2 },
175    { ISD::FP_TO_SINT,  MVT::i16, MVT::f64, 2 },
176    { ISD::FP_TO_UINT,  MVT::i16, MVT::f64, 2 },
177    { ISD::FP_TO_SINT,  MVT::i32, MVT::f32, 2 },
178    { ISD::FP_TO_UINT,  MVT::i32, MVT::f32, 2 },
179    { ISD::FP_TO_SINT,  MVT::i32, MVT::f64, 2 },
180    { ISD::FP_TO_UINT,  MVT::i32, MVT::f64, 2 },
181    { ISD::FP_TO_SINT,  MVT::i64, MVT::f32, 10 },
182    { ISD::FP_TO_UINT,  MVT::i64, MVT::f32, 10 },
183    { ISD::FP_TO_SINT,  MVT::i64, MVT::f64, 10 },
184    { ISD::FP_TO_UINT,  MVT::i64, MVT::f64, 10 }
185  };
186  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
187    int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
188                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
189    if (Idx != -1)
190        return NEONFloatConversionTbl[Idx].Cost;
191  }
192
193  // Scalar integer to float conversions.
194  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
195  NEONIntegerConversionTbl[] = {
196    { ISD::SINT_TO_FP,  MVT::f32, MVT::i1, 2 },
197    { ISD::UINT_TO_FP,  MVT::f32, MVT::i1, 2 },
198    { ISD::SINT_TO_FP,  MVT::f64, MVT::i1, 2 },
199    { ISD::UINT_TO_FP,  MVT::f64, MVT::i1, 2 },
200    { ISD::SINT_TO_FP,  MVT::f32, MVT::i8, 2 },
201    { ISD::UINT_TO_FP,  MVT::f32, MVT::i8, 2 },
202    { ISD::SINT_TO_FP,  MVT::f64, MVT::i8, 2 },
203    { ISD::UINT_TO_FP,  MVT::f64, MVT::i8, 2 },
204    { ISD::SINT_TO_FP,  MVT::f32, MVT::i16, 2 },
205    { ISD::UINT_TO_FP,  MVT::f32, MVT::i16, 2 },
206    { ISD::SINT_TO_FP,  MVT::f64, MVT::i16, 2 },
207    { ISD::UINT_TO_FP,  MVT::f64, MVT::i16, 2 },
208    { ISD::SINT_TO_FP,  MVT::f32, MVT::i32, 2 },
209    { ISD::UINT_TO_FP,  MVT::f32, MVT::i32, 2 },
210    { ISD::SINT_TO_FP,  MVT::f64, MVT::i32, 2 },
211    { ISD::UINT_TO_FP,  MVT::f64, MVT::i32, 2 },
212    { ISD::SINT_TO_FP,  MVT::f32, MVT::i64, 10 },
213    { ISD::UINT_TO_FP,  MVT::f32, MVT::i64, 10 },
214    { ISD::SINT_TO_FP,  MVT::f64, MVT::i64, 10 },
215    { ISD::UINT_TO_FP,  MVT::f64, MVT::i64, 10 }
216  };
217
218  if (SrcTy.isInteger() && ST->hasNEON()) {
219    int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
220                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
221    if (Idx != -1)
222      return NEONIntegerConversionTbl[Idx].Cost;
223  }
224
225  // Scalar integer conversion costs.
226  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
227  ARMIntegerConversionTbl[] = {
228    // i16 -> i64 requires two dependent operations.
229    { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
230
231    // Truncates on i64 are assumed to be free.
232    { ISD::TRUNCATE,    MVT::i32, MVT::i64, 0 },
233    { ISD::TRUNCATE,    MVT::i16, MVT::i64, 0 },
234    { ISD::TRUNCATE,    MVT::i8,  MVT::i64, 0 },
235    { ISD::TRUNCATE,    MVT::i1,  MVT::i64, 0 }
236  };
237
238  if (SrcTy.isInteger()) {
239    int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
240                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
241    if (Idx != -1)
242      return ARMIntegerConversionTbl[Idx].Cost;
243  }
244
245  return BaseT::getCastInstrCost(Opcode, Dst, Src);
246}
247
248unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
249                                        unsigned Index) {
250  // Penalize inserting into an D-subregister. We end up with a three times
251  // lower estimated throughput on swift.
252  if (ST->isSwift() &&
253      Opcode == Instruction::InsertElement &&
254      ValTy->isVectorTy() &&
255      ValTy->getScalarSizeInBits() <= 32)
256    return 3;
257
258  // Cross-class copies are expensive on many microarchitectures,
259  // so assume they are expensive by default.
260  if ((Opcode == Instruction::InsertElement ||
261       Opcode == Instruction::ExtractElement) &&
262      ValTy->getVectorElementType()->isIntegerTy())
263    return 3;
264
265  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
266}
267
268unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
269                                        Type *CondTy) {
270
271  int ISD = TLI->InstructionOpcodeToISD(Opcode);
272  // On NEON a a vector select gets lowered to vbsl.
273  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
274    // Lowering of some vector selects is currently far from perfect.
275    static const TypeConversionCostTblEntry<MVT::SimpleValueType>
276    NEONVectorSelectTbl[] = {
277      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
278      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
279      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
280      { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
281      { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
282      { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
283    };
284
285    EVT SelCondTy = TLI->getValueType(CondTy);
286    EVT SelValTy = TLI->getValueType(ValTy);
287    if (SelCondTy.isSimple() && SelValTy.isSimple()) {
288      int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
289                                       SelCondTy.getSimpleVT(),
290                                       SelValTy.getSimpleVT());
291      if (Idx != -1)
292        return NEONVectorSelectTbl[Idx].Cost;
293    }
294
295    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
296    return LT.first;
297  }
298
299  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
300}
301
302unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
303  // Address computations in vectorized code with non-consecutive addresses will
304  // likely result in more instructions compared to scalar code where the
305  // computation can more often be merged into the index mode. The resulting
306  // extra micro-ops can significantly decrease throughput.
307  unsigned NumVectorInstToHideOverhead = 10;
308
309  if (Ty->isVectorTy() && IsComplex)
310    return NumVectorInstToHideOverhead;
311
312  // In many cases the address computation is not merged into the instruction
313  // addressing mode.
314  return 1;
315}
316
317unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
318  // Use similar logic that's in ARMISelLowering:
319  // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
320  // to VFP.
321
322  if (ST->hasVFP2() && !ST->isThumb1Only()) {
323    if (Ty->isFloatTy()) {
324      return TargetTransformInfo::TCC_Basic;
325    }
326
327    if (Ty->isDoubleTy()) {
328      return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
329        TargetTransformInfo::TCC_Basic;
330    }
331  }
332
333  return TargetTransformInfo::TCC_Expensive;
334}
335
336unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
337                                    Type *SubTp) {
338  // We only handle costs of reverse and alternate shuffles for now.
339  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
340    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
341
342  if (Kind == TTI::SK_Reverse) {
343    static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
344        // Reverse shuffle cost one instruction if we are shuffling within a
345        // double word (vrev) or two if we shuffle a quad word (vrev, vext).
346        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
347        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
348        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
349        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
350
351        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
352        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
353        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
354        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
355
356    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
357
358    int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
359    if (Idx == -1)
360      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
361
362    return LT.first * NEONShuffleTbl[Idx].Cost;
363  }
364  if (Kind == TTI::SK_Alternate) {
365    static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
366        // Alt shuffle cost table for ARM. Cost is the number of instructions
367        // required to create the shuffled vector.
368
369        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
370        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
371        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
372        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
373
374        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
375        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
376        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
377
378        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
379
380        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
381
382    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
383    int Idx =
384        CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
385    if (Idx == -1)
386      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
387    return LT.first * NEONAltShuffleTbl[Idx].Cost;
388  }
389  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
390}
391
392unsigned ARMTTIImpl::getArithmeticInstrCost(
393    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
394    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
395    TTI::OperandValueProperties Opd2PropInfo) {
396
397  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
398  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
399
400  const unsigned FunctionCallDivCost = 20;
401  const unsigned ReciprocalDivCost = 10;
402  static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
403    // Division.
404    // These costs are somewhat random. Choose a cost of 20 to indicate that
405    // vectorizing devision (added function call) is going to be very expensive.
406    // Double registers types.
407    { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
408    { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
409    { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
410    { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
411    { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
412    { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
413    { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
414    { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
415    { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
416    { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
417    { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
418    { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
419    { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
420    { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
421    { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
422    { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
423    // Quad register types.
424    { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
425    { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
426    { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
427    { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
428    { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
429    { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
430    { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
431    { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
432    { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
433    { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
434    { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
435    { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
436    { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
437    { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
438    { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
439    { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
440    // Multiplication.
441  };
442
443  int Idx = -1;
444
445  if (ST->hasNEON())
446    Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
447
448  if (Idx != -1)
449    return LT.first * CostTbl[Idx].Cost;
450
451  unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
452                                                Opd1PropInfo, Opd2PropInfo);
453
454  // This is somewhat of a hack. The problem that we are facing is that SROA
455  // creates a sequence of shift, and, or instructions to construct values.
456  // These sequences are recognized by the ISel and have zero-cost. Not so for
457  // the vectorized code. Because we have support for v2i64 but not i64 those
458  // sequences look particularly beneficial to vectorize.
459  // To work around this we increase the cost of v2i64 operations to make them
460  // seem less beneficial.
461  if (LT.second == MVT::v2i64 &&
462      Op2Info == TargetTransformInfo::OK_UniformConstantValue)
463    Cost += 4;
464
465  return Cost;
466}
467
468unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
469                                     unsigned Alignment,
470                                     unsigned AddressSpace) {
471  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
472
473  if (Src->isVectorTy() && Alignment != 16 &&
474      Src->getVectorElementType()->isDoubleTy()) {
475    // Unaligned loads/stores are extremely inefficient.
476    // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
477    return LT.first * 4;
478  }
479  return LT.first;
480}
481