1//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that Hexagon uses to lower LLVM code
11// into a selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "HexagonISelLowering.h"
16#include "HexagonMachineFunctionInfo.h"
17#include "HexagonSubtarget.h"
18#include "HexagonTargetMachine.h"
19#include "HexagonTargetObjectFile.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineJumpTableInfo.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAGISel.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/CallingConv.h"
29#include "llvm/IR/DerivedTypes.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/GlobalAlias.h"
32#include "llvm/IR/GlobalVariable.h"
33#include "llvm/IR/InlineAsm.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/raw_ostream.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "hexagon-lowering"
43
44static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
45  cl::init(true), cl::Hidden,
46  cl::desc("Control jump table emission on Hexagon target"));
47
48static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
49  cl::Hidden, cl::ZeroOrMore, cl::init(false),
50  cl::desc("Enable Hexagon SDNode scheduling"));
51
52static cl::opt<bool> EnableFastMath("ffast-math",
53  cl::Hidden, cl::ZeroOrMore, cl::init(false),
54  cl::desc("Enable Fast Math processing"));
55
56static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
57  cl::Hidden, cl::ZeroOrMore, cl::init(5),
58  cl::desc("Set minimum jump tables"));
59
60static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
61  cl::Hidden, cl::ZeroOrMore, cl::init(6),
62  cl::desc("Max #stores to inline memcpy"));
63
64static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
65  cl::Hidden, cl::ZeroOrMore, cl::init(4),
66  cl::desc("Max #stores to inline memcpy"));
67
68static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
69  cl::Hidden, cl::ZeroOrMore, cl::init(6),
70  cl::desc("Max #stores to inline memmove"));
71
72static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
73  cl::Hidden, cl::ZeroOrMore, cl::init(4),
74  cl::desc("Max #stores to inline memmove"));
75
76static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
77  cl::Hidden, cl::ZeroOrMore, cl::init(8),
78  cl::desc("Max #stores to inline memset"));
79
80static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
81  cl::Hidden, cl::ZeroOrMore, cl::init(4),
82  cl::desc("Max #stores to inline memset"));
83
84
85namespace {
86class HexagonCCState : public CCState {
87  unsigned NumNamedVarArgParams;
88
89public:
90  HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
91                 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
92                 int NumNamedVarArgParams)
93      : CCState(CC, isVarArg, MF, locs, C),
94        NumNamedVarArgParams(NumNamedVarArgParams) {}
95
96  unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
97};
98}
99
100// Implement calling convention for Hexagon.
101
102static bool IsHvxVectorType(MVT ty);
103
104static bool
105CC_Hexagon(unsigned ValNo, MVT ValVT,
106           MVT LocVT, CCValAssign::LocInfo LocInfo,
107           ISD::ArgFlagsTy ArgFlags, CCState &State);
108
109static bool
110CC_Hexagon32(unsigned ValNo, MVT ValVT,
111             MVT LocVT, CCValAssign::LocInfo LocInfo,
112             ISD::ArgFlagsTy ArgFlags, CCState &State);
113
114static bool
115CC_Hexagon64(unsigned ValNo, MVT ValVT,
116             MVT LocVT, CCValAssign::LocInfo LocInfo,
117             ISD::ArgFlagsTy ArgFlags, CCState &State);
118
119static bool
120CC_HexagonVector(unsigned ValNo, MVT ValVT,
121                 MVT LocVT, CCValAssign::LocInfo LocInfo,
122                 ISD::ArgFlagsTy ArgFlags, CCState &State);
123
124static bool
125RetCC_Hexagon(unsigned ValNo, MVT ValVT,
126              MVT LocVT, CCValAssign::LocInfo LocInfo,
127              ISD::ArgFlagsTy ArgFlags, CCState &State);
128
129static bool
130RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
131                MVT LocVT, CCValAssign::LocInfo LocInfo,
132                ISD::ArgFlagsTy ArgFlags, CCState &State);
133
134static bool
135RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
136                MVT LocVT, CCValAssign::LocInfo LocInfo,
137                ISD::ArgFlagsTy ArgFlags, CCState &State);
138
139static bool
140RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
141                    MVT LocVT, CCValAssign::LocInfo LocInfo,
142                    ISD::ArgFlagsTy ArgFlags, CCState &State);
143
144static bool
145CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
146            MVT LocVT, CCValAssign::LocInfo LocInfo,
147            ISD::ArgFlagsTy ArgFlags, CCState &State) {
148  HexagonCCState &HState = static_cast<HexagonCCState &>(State);
149
150  if (ValNo < HState.getNumNamedVarArgParams()) {
151    // Deal with named arguments.
152    return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
153  }
154
155  // Deal with un-named arguments.
156  unsigned ofst;
157  if (ArgFlags.isByVal()) {
158    // If pass-by-value, the size allocated on stack is decided
159    // by ArgFlags.getByValSize(), not by the size of LocVT.
160    ofst = State.AllocateStack(ArgFlags.getByValSize(),
161                               ArgFlags.getByValAlign());
162    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
163    return false;
164  }
165  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
166    LocVT = MVT::i32;
167    ValVT = MVT::i32;
168    if (ArgFlags.isSExt())
169      LocInfo = CCValAssign::SExt;
170    else if (ArgFlags.isZExt())
171      LocInfo = CCValAssign::ZExt;
172    else
173      LocInfo = CCValAssign::AExt;
174  }
175  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
176    ofst = State.AllocateStack(4, 4);
177    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
178    return false;
179  }
180  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
181    ofst = State.AllocateStack(8, 8);
182    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
183    return false;
184  }
185  if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
186      LocVT == MVT::v16i8) {
187    ofst = State.AllocateStack(16, 16);
188    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
189    return false;
190  }
191  if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
192      LocVT == MVT::v32i8) {
193    ofst = State.AllocateStack(32, 32);
194    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
195    return false;
196  }
197  if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
198      LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
199    ofst = State.AllocateStack(64, 64);
200    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
201    return false;
202  }
203  if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
204      LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
205    ofst = State.AllocateStack(128, 128);
206    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
207    return false;
208  }
209  if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
210      LocVT == MVT::v256i8) {
211    ofst = State.AllocateStack(256, 256);
212    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
213    return false;
214  }
215
216  llvm_unreachable(nullptr);
217}
218
219
220static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
221      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
222  if (ArgFlags.isByVal()) {
223    // Passed on stack.
224    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
225                                          ArgFlags.getByValAlign());
226    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
227    return false;
228  }
229
230  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
231    LocVT = MVT::i32;
232    ValVT = MVT::i32;
233    if (ArgFlags.isSExt())
234      LocInfo = CCValAssign::SExt;
235    else if (ArgFlags.isZExt())
236      LocInfo = CCValAssign::ZExt;
237    else
238      LocInfo = CCValAssign::AExt;
239  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
240    LocVT = MVT::i32;
241    LocInfo = CCValAssign::BCvt;
242  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
243    LocVT = MVT::i64;
244    LocInfo = CCValAssign::BCvt;
245  }
246
247  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
248    if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
249      return false;
250  }
251
252  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
253    if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
254      return false;
255  }
256
257  if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
258    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
259    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
260    return false;
261  }
262
263  if (IsHvxVectorType(LocVT)) {
264    if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
265      return false;
266  }
267
268  return true;  // CC didn't match.
269}
270
271
272static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
273                         MVT LocVT, CCValAssign::LocInfo LocInfo,
274                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
275
276  static const MCPhysReg RegList[] = {
277    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
278    Hexagon::R5
279  };
280  if (unsigned Reg = State.AllocateReg(RegList)) {
281    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
282    return false;
283  }
284
285  unsigned Offset = State.AllocateStack(4, 4);
286  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
287  return false;
288}
289
290static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
291                         MVT LocVT, CCValAssign::LocInfo LocInfo,
292                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
293
294  if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
295    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
296    return false;
297  }
298
299  static const MCPhysReg RegList1[] = {
300    Hexagon::D1, Hexagon::D2
301  };
302  static const MCPhysReg RegList2[] = {
303    Hexagon::R1, Hexagon::R3
304  };
305  if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
306    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
307    return false;
308  }
309
310  unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
311  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
312  return false;
313}
314
315static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
316                             MVT LocVT, CCValAssign::LocInfo LocInfo,
317                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
318
319    static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
320                                         Hexagon::V2, Hexagon::V3,
321                                         Hexagon::V4, Hexagon::V5,
322                                         Hexagon::V6, Hexagon::V7,
323                                         Hexagon::V8, Hexagon::V9,
324                                         Hexagon::V10, Hexagon::V11,
325                                         Hexagon::V12, Hexagon::V13,
326                                         Hexagon::V14, Hexagon::V15};
327    static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
328                                         Hexagon::W2, Hexagon::W3,
329                                         Hexagon::W4, Hexagon::W5,
330                                         Hexagon::W6, Hexagon::W7};
331  auto &MF = State.getMachineFunction();
332  auto &HST = MF.getSubtarget<HexagonSubtarget>();
333  bool UseHVX = HST.useHVXOps();
334  bool UseHVXDbl = HST.useHVXDblOps();
335
336  if ((UseHVX && !UseHVXDbl) &&
337      (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
338       LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
339    if (unsigned Reg = State.AllocateReg(VecLstS)) {
340      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
341      return false;
342    }
343    unsigned Offset = State.AllocateStack(64, 64);
344    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
345    return false;
346  }
347  if ((UseHVX && !UseHVXDbl) &&
348      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
349       LocVT == MVT::v128i8)) {
350    if (unsigned Reg = State.AllocateReg(VecLstD)) {
351      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
352      return false;
353    }
354    unsigned Offset = State.AllocateStack(128, 128);
355    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
356    return false;
357  }
358  // 128B Mode
359  if ((UseHVX && UseHVXDbl) &&
360      (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
361       LocVT == MVT::v256i8)) {
362    if (unsigned Reg = State.AllocateReg(VecLstD)) {
363      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
364      return false;
365    }
366    unsigned Offset = State.AllocateStack(256, 256);
367    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
368    return false;
369  }
370  if ((UseHVX && UseHVXDbl) &&
371      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
372       LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
373    if (unsigned Reg = State.AllocateReg(VecLstS)) {
374      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
375      return false;
376    }
377    unsigned Offset = State.AllocateStack(128, 128);
378    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
379    return false;
380  }
381  return true;
382}
383
384static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
385                          MVT LocVT, CCValAssign::LocInfo LocInfo,
386                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
387  auto &MF = State.getMachineFunction();
388  auto &HST = MF.getSubtarget<HexagonSubtarget>();
389  bool UseHVX = HST.useHVXOps();
390  bool UseHVXDbl = HST.useHVXDblOps();
391
392  if (LocVT == MVT::i1) {
393    // Return values of type MVT::i1 still need to be assigned to R0, but
394    // the value type needs to remain i1. LowerCallResult will deal with it,
395    // but it needs to recognize i1 as the value type.
396    LocVT = MVT::i32;
397  } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
398    LocVT = MVT::i32;
399    ValVT = MVT::i32;
400    if (ArgFlags.isSExt())
401      LocInfo = CCValAssign::SExt;
402    else if (ArgFlags.isZExt())
403      LocInfo = CCValAssign::ZExt;
404    else
405      LocInfo = CCValAssign::AExt;
406  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
407    LocVT = MVT::i32;
408    LocInfo = CCValAssign::BCvt;
409  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
410    LocVT = MVT::i64;
411    LocInfo = CCValAssign::BCvt;
412  } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
413             LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
414             LocVT == MVT::v512i1) {
415    LocVT = MVT::v16i32;
416    ValVT = MVT::v16i32;
417    LocInfo = CCValAssign::Full;
418  } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
419             LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
420             (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
421    LocVT = MVT::v32i32;
422    ValVT = MVT::v32i32;
423    LocInfo = CCValAssign::Full;
424  } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
425             LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
426    LocVT = MVT::v64i32;
427    ValVT = MVT::v64i32;
428    LocInfo = CCValAssign::Full;
429  }
430  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
431    if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
432    return false;
433  }
434
435  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
436    if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
437    return false;
438  }
439  if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
440    if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
441    return false;
442  }
443  return true;  // CC didn't match.
444}
445
446static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
447                            MVT LocVT, CCValAssign::LocInfo LocInfo,
448                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
449  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
450    if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
451      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
452      return false;
453    }
454  }
455
456  unsigned Offset = State.AllocateStack(4, 4);
457  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
458  return false;
459}
460
461static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
462                            MVT LocVT, CCValAssign::LocInfo LocInfo,
463                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
464  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
465    if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
466      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
467      return false;
468    }
469  }
470
471  unsigned Offset = State.AllocateStack(8, 8);
472  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
473  return false;
474}
475
476static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
477                                MVT LocVT, CCValAssign::LocInfo LocInfo,
478                                ISD::ArgFlagsTy ArgFlags, CCState &State) {
479  auto &MF = State.getMachineFunction();
480  auto &HST = MF.getSubtarget<HexagonSubtarget>();
481  bool UseHVX = HST.useHVXOps();
482  bool UseHVXDbl = HST.useHVXDblOps();
483
484  unsigned OffSiz = 64;
485  if (LocVT == MVT::v16i32) {
486    if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
487      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
488      return false;
489    }
490  } else if (LocVT == MVT::v32i32) {
491    unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
492    if (unsigned Reg = State.AllocateReg(Req)) {
493      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
494      return false;
495    }
496    OffSiz = 128;
497  } else if (LocVT == MVT::v64i32) {
498    if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
499      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
500      return false;
501    }
502    OffSiz = 256;
503  }
504
505  unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
506  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
507  return false;
508}
509
510void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
511  if (VT != PromotedLdStVT) {
512    setOperationAction(ISD::LOAD, VT, Promote);
513    AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
514
515    setOperationAction(ISD::STORE, VT, Promote);
516    AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
517  }
518}
519
520SDValue
521HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
522const {
523  return SDValue();
524}
525
526/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
527/// by "Src" to address "Dst" of size "Size".  Alignment information is
528/// specified by the specific parameter attribute. The copy will be passed as
529/// a byval function parameter.  Sometimes what we are copying is the end of a
530/// larger object, the part that does not fit in registers.
531static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
532                                         SDValue Chain, ISD::ArgFlagsTy Flags,
533                                         SelectionDAG &DAG, const SDLoc &dl) {
534
535  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
536  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
537                       /*isVolatile=*/false, /*AlwaysInline=*/false,
538                       /*isTailCall=*/false,
539                       MachinePointerInfo(), MachinePointerInfo());
540}
541
542static bool IsHvxVectorType(MVT ty) {
543  return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
544          ty == MVT::v64i8 ||
545          ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
546          ty == MVT::v128i8 ||
547          ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
548          ty == MVT::v256i8 ||
549          ty == MVT::v512i1 || ty == MVT::v1024i1);
550}
551
552// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
553// passed by value, the function prototype is modified to return void and
554// the value is stored in memory pointed by a pointer passed by caller.
555SDValue
556HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
557                                   bool isVarArg,
558                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
559                                   const SmallVectorImpl<SDValue> &OutVals,
560                                   const SDLoc &dl, SelectionDAG &DAG) const {
561
562  // CCValAssign - represent the assignment of the return value to locations.
563  SmallVector<CCValAssign, 16> RVLocs;
564
565  // CCState - Info about the registers and stack slot.
566  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
567                 *DAG.getContext());
568
569  // Analyze return values of ISD::RET
570  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
571
572  SDValue Flag;
573  SmallVector<SDValue, 4> RetOps(1, Chain);
574
575  // Copy the result values into the output registers.
576  for (unsigned i = 0; i != RVLocs.size(); ++i) {
577    CCValAssign &VA = RVLocs[i];
578
579    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
580
581    // Guarantee that all emitted copies are stuck together with flags.
582    Flag = Chain.getValue(1);
583    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
584  }
585
586  RetOps[0] = Chain;  // Update chain.
587
588  // Add the flag if we have it.
589  if (Flag.getNode())
590    RetOps.push_back(Flag);
591
592  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
593}
594
595bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
596  // If either no tail call or told not to tail call at all, don't.
597  auto Attr =
598      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
599  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
600    return false;
601
602  return true;
603}
604
605/// LowerCallResult - Lower the result values of an ISD::CALL into the
606/// appropriate copies out of appropriate physical registers.  This assumes that
607/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
608/// being lowered. Returns a SDNode with the same number of values as the
609/// ISD::CALL.
610SDValue HexagonTargetLowering::LowerCallResult(
611    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
612    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
613    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
614    const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
615  // Assign locations to each value returned by this call.
616  SmallVector<CCValAssign, 16> RVLocs;
617
618  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
619                 *DAG.getContext());
620
621  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
622
623  // Copy all of the result registers out of their specified physreg.
624  for (unsigned i = 0; i != RVLocs.size(); ++i) {
625    SDValue RetVal;
626    if (RVLocs[i].getValVT() == MVT::i1) {
627      // Return values of type MVT::i1 require special handling. The reason
628      // is that MVT::i1 is associated with the PredRegs register class, but
629      // values of that type are still returned in R0. Generate an explicit
630      // copy into a predicate register from R0, and treat the value of the
631      // predicate register as the call result.
632      auto &MRI = DAG.getMachineFunction().getRegInfo();
633      SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
634                                       MVT::i32, InFlag);
635      // FR0 = (Value, Chain, Glue)
636      unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
637      SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
638                                     FR0.getValue(0), FR0.getValue(2));
639      // TPR = (Chain, Glue)
640      RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1,
641                                  TPR.getValue(1));
642    } else {
643      RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
644                                  RVLocs[i].getValVT(), InFlag);
645    }
646    InVals.push_back(RetVal.getValue(0));
647    Chain = RetVal.getValue(1);
648    InFlag = RetVal.getValue(2);
649  }
650
651  return Chain;
652}
653
654/// LowerCall - Functions arguments are copied from virtual regs to
655/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
656SDValue
657HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
658                                 SmallVectorImpl<SDValue> &InVals) const {
659  SelectionDAG &DAG                     = CLI.DAG;
660  SDLoc &dl                             = CLI.DL;
661  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
662  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
663  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
664  SDValue Chain                         = CLI.Chain;
665  SDValue Callee                        = CLI.Callee;
666  bool &isTailCall                      = CLI.IsTailCall;
667  CallingConv::ID CallConv              = CLI.CallConv;
668  bool isVarArg                         = CLI.IsVarArg;
669  bool doesNotReturn                    = CLI.DoesNotReturn;
670
671  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
672  MachineFunction &MF = DAG.getMachineFunction();
673  auto PtrVT = getPointerTy(MF.getDataLayout());
674
675  // Check for varargs.
676  int NumNamedVarArgParams = -1;
677  if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
678    const GlobalValue *GV = GAN->getGlobal();
679    Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
680    if (const Function* F = dyn_cast<Function>(GV)) {
681      // If a function has zero args and is a vararg function, that's
682      // disallowed so it must be an undeclared function.  Do not assume
683      // varargs if the callee is undefined.
684      if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
685        NumNamedVarArgParams = F->getFunctionType()->getNumParams();
686    }
687  }
688
689  // Analyze operands of the call, assigning locations to each operand.
690  SmallVector<CCValAssign, 16> ArgLocs;
691  HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
692                        *DAG.getContext(), NumNamedVarArgParams);
693
694  if (isVarArg)
695    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
696  else
697    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
698
699  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
700  if (Attr.getValueAsString() == "true")
701    isTailCall = false;
702
703  if (isTailCall) {
704    bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
705    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
706                                                   isVarArg, IsStructRet,
707                                                   StructAttrFlag,
708                                                   Outs, OutVals, Ins, DAG);
709    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
710      CCValAssign &VA = ArgLocs[i];
711      if (VA.isMemLoc()) {
712        isTailCall = false;
713        break;
714      }
715    }
716    DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
717                                : "Argument must be passed on stack. "
718                                  "Not eligible for Tail Call\n"));
719  }
720  // Get a count of how many bytes are to be pushed on the stack.
721  unsigned NumBytes = CCInfo.getNextStackOffset();
722  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
723  SmallVector<SDValue, 8> MemOpChains;
724
725  auto &HRI = *Subtarget.getRegisterInfo();
726  SDValue StackPtr =
727      DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
728
729  bool NeedsArgAlign = false;
730  unsigned LargestAlignSeen = 0;
731  // Walk the register/memloc assignments, inserting copies/loads.
732  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
733    CCValAssign &VA = ArgLocs[i];
734    SDValue Arg = OutVals[i];
735    ISD::ArgFlagsTy Flags = Outs[i].Flags;
736    // Record if we need > 8 byte alignment on an argument.
737    bool ArgAlign = IsHvxVectorType(VA.getValVT());
738    NeedsArgAlign |= ArgAlign;
739
740    // Promote the value if needed.
741    switch (VA.getLocInfo()) {
742      default:
743        // Loc info must be one of Full, SExt, ZExt, or AExt.
744        llvm_unreachable("Unknown loc info!");
745      case CCValAssign::BCvt:
746      case CCValAssign::Full:
747        break;
748      case CCValAssign::SExt:
749        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
750        break;
751      case CCValAssign::ZExt:
752        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
753        break;
754      case CCValAssign::AExt:
755        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
756        break;
757    }
758
759    if (VA.isMemLoc()) {
760      unsigned LocMemOffset = VA.getLocMemOffset();
761      SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
762                                        StackPtr.getValueType());
763      MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
764      if (ArgAlign)
765        LargestAlignSeen = std::max(LargestAlignSeen,
766                                    VA.getLocVT().getStoreSizeInBits() >> 3);
767      if (Flags.isByVal()) {
768        // The argument is a struct passed by value. According to LLVM, "Arg"
769        // is is pointer.
770        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
771                                                        Flags, DAG, dl));
772      } else {
773        MachinePointerInfo LocPI = MachinePointerInfo::getStack(
774            DAG.getMachineFunction(), LocMemOffset);
775        SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
776                                 false, 0);
777        MemOpChains.push_back(S);
778      }
779      continue;
780    }
781
782    // Arguments that can be passed on register must be kept at RegsToPass
783    // vector.
784    if (VA.isRegLoc())
785      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
786  }
787
788  if (NeedsArgAlign && Subtarget.hasV60TOps()) {
789    DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
790    MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
791    // V6 vectors passed by value have 64 or 128 byte alignment depending
792    // on whether we are 64 byte vector mode or 128 byte.
793    bool UseHVXDbl = Subtarget.useHVXDblOps();
794    assert(Subtarget.useHVXOps());
795    const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
796    LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
797    MFI->ensureMaxAlignment(LargestAlignSeen);
798  }
799  // Transform all store nodes into one single node because all store
800  // nodes are independent of each other.
801  if (!MemOpChains.empty())
802    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
803
804  if (!isTailCall) {
805    SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
806    Chain = DAG.getCALLSEQ_START(Chain, C, dl);
807  }
808
809  // Build a sequence of copy-to-reg nodes chained together with token
810  // chain and flag operands which copy the outgoing args into registers.
811  // The InFlag in necessary since all emitted instructions must be
812  // stuck together.
813  SDValue InFlag;
814  if (!isTailCall) {
815    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
816      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
817                               RegsToPass[i].second, InFlag);
818      InFlag = Chain.getValue(1);
819    }
820  } else {
821    // For tail calls lower the arguments to the 'real' stack slot.
822    //
823    // Force all the incoming stack arguments to be loaded from the stack
824    // before any new outgoing arguments are stored to the stack, because the
825    // outgoing stack slots may alias the incoming argument stack slots, and
826    // the alias isn't otherwise explicit. This is slightly more conservative
827    // than necessary, because it means that each store effectively depends
828    // on every argument instead of just those arguments it would clobber.
829    //
830    // Do not flag preceding copytoreg stuff together with the following stuff.
831    InFlag = SDValue();
832    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
833      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
834                               RegsToPass[i].second, InFlag);
835      InFlag = Chain.getValue(1);
836    }
837    InFlag = SDValue();
838  }
839
840  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
841  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
842  // node so that legalize doesn't hack it.
843  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
844    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
845  } else if (ExternalSymbolSDNode *S =
846             dyn_cast<ExternalSymbolSDNode>(Callee)) {
847    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
848  }
849
850  // Returns a chain & a flag for retval copy to use.
851  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
852  SmallVector<SDValue, 8> Ops;
853  Ops.push_back(Chain);
854  Ops.push_back(Callee);
855
856  // Add argument registers to the end of the list so that they are
857  // known live into the call.
858  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
859    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
860                                  RegsToPass[i].second.getValueType()));
861  }
862
863  if (InFlag.getNode())
864    Ops.push_back(InFlag);
865
866  if (isTailCall) {
867    MF.getFrameInfo()->setHasTailCall();
868    return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
869  }
870
871  int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
872  Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
873  InFlag = Chain.getValue(1);
874
875  // Create the CALLSEQ_END node.
876  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
877                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
878  InFlag = Chain.getValue(1);
879
880  // Handle result values, copying them out of physregs into vregs that we
881  // return.
882  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
883                         InVals, OutVals, Callee);
884}
885
886static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
887                                   bool isSEXTLoad, SDValue &Base,
888                                   SDValue &Offset, bool &isInc,
889                                   SelectionDAG &DAG) {
890  if (Ptr->getOpcode() != ISD::ADD)
891    return false;
892
893  auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
894  bool UseHVX = HST.useHVXOps();
895  bool UseHVXDbl = HST.useHVXDblOps();
896
897  bool ValidHVXDblType =
898    (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
899                              VT == MVT::v64i16 || VT == MVT::v128i8);
900  bool ValidHVXType =
901    UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
902                             VT == MVT::v32i16 || VT == MVT::v64i8);
903
904  if (ValidHVXDblType || ValidHVXType ||
905      VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
906    isInc = (Ptr->getOpcode() == ISD::ADD);
907    Base = Ptr->getOperand(0);
908    Offset = Ptr->getOperand(1);
909    // Ensure that Offset is a constant.
910    return (isa<ConstantSDNode>(Offset));
911  }
912
913  return false;
914}
915
916/// getPostIndexedAddressParts - returns true by value, base pointer and
917/// offset pointer and addressing mode by reference if this node can be
918/// combined with a load / store to form a post-indexed load / store.
919bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
920                                                       SDValue &Base,
921                                                       SDValue &Offset,
922                                                       ISD::MemIndexedMode &AM,
923                                                       SelectionDAG &DAG) const
924{
925  EVT VT;
926  SDValue Ptr;
927  bool isSEXTLoad = false;
928
929  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
930    VT  = LD->getMemoryVT();
931    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
932  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
933    VT  = ST->getMemoryVT();
934    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
935      return false;
936    }
937  } else {
938    return false;
939  }
940
941  bool isInc = false;
942  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
943                                        isInc, DAG);
944  if (isLegal) {
945    auto &HII = *Subtarget.getInstrInfo();
946    int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
947    if (HII.isValidAutoIncImm(VT, OffsetVal)) {
948      AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
949      return true;
950    }
951  }
952
953  return false;
954}
955
956SDValue
957HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
958  SDNode *Node = Op.getNode();
959  MachineFunction &MF = DAG.getMachineFunction();
960  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
961  switch (Node->getOpcode()) {
962    case ISD::INLINEASM: {
963      unsigned NumOps = Node->getNumOperands();
964      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
965        --NumOps;  // Ignore the flag operand.
966
967      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
968        if (FuncInfo.hasClobberLR())
969          break;
970        unsigned Flags =
971          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
972        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
973        ++i;  // Skip the ID value.
974
975        switch (InlineAsm::getKind(Flags)) {
976        default: llvm_unreachable("Bad flags!");
977          case InlineAsm::Kind_RegDef:
978          case InlineAsm::Kind_RegUse:
979          case InlineAsm::Kind_Imm:
980          case InlineAsm::Kind_Clobber:
981          case InlineAsm::Kind_Mem: {
982            for (; NumVals; --NumVals, ++i) {}
983            break;
984          }
985          case InlineAsm::Kind_RegDefEarlyClobber: {
986            for (; NumVals; --NumVals, ++i) {
987              unsigned Reg =
988                cast<RegisterSDNode>(Node->getOperand(i))->getReg();
989
990              // Check it to be lr
991              const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
992              if (Reg == QRI->getRARegister()) {
993                FuncInfo.setHasClobberLR(true);
994                break;
995              }
996            }
997            break;
998          }
999        }
1000      }
1001    }
1002  } // Node->getOpcode
1003  return Op;
1004}
1005
1006// Need to transform ISD::PREFETCH into something that doesn't inherit
1007// all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
1008// SDNPMayStore.
1009SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
1010                                             SelectionDAG &DAG) const {
1011  SDValue Chain = Op.getOperand(0);
1012  SDValue Addr = Op.getOperand(1);
1013  // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
1014  // if the "reg" is fed by an "add".
1015  SDLoc DL(Op);
1016  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1017  return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
1018}
1019
1020SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1021      SelectionDAG &DAG) const {
1022  SDValue Chain = Op.getOperand(0);
1023  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1024  // Lower the hexagon_prefetch builtin to DCFETCH, as above.
1025  if (IntNo == Intrinsic::hexagon_prefetch) {
1026    SDValue Addr = Op.getOperand(2);
1027    SDLoc DL(Op);
1028    SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1029    return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
1030  }
1031  return SDValue();
1032}
1033
1034SDValue
1035HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1036                                               SelectionDAG &DAG) const {
1037  SDValue Chain = Op.getOperand(0);
1038  SDValue Size = Op.getOperand(1);
1039  SDValue Align = Op.getOperand(2);
1040  SDLoc dl(Op);
1041
1042  ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
1043  assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
1044
1045  unsigned A = AlignConst->getSExtValue();
1046  auto &HFI = *Subtarget.getFrameLowering();
1047  // "Zero" means natural stack alignment.
1048  if (A == 0)
1049    A = HFI.getStackAlignment();
1050
1051  DEBUG({
1052    dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
1053    Size.getNode()->dump(&DAG);
1054    dbgs() << "\n";
1055  });
1056
1057  SDValue AC = DAG.getConstant(A, dl, MVT::i32);
1058  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
1059  SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
1060
1061  DAG.ReplaceAllUsesOfValueWith(Op, AA);
1062  return AA;
1063}
1064
1065SDValue HexagonTargetLowering::LowerFormalArguments(
1066    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1067    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1068    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1069
1070  MachineFunction &MF = DAG.getMachineFunction();
1071  MachineFrameInfo *MFI = MF.getFrameInfo();
1072  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1073  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
1074
1075  // Assign locations to all of the incoming arguments.
1076  SmallVector<CCValAssign, 16> ArgLocs;
1077  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1078                 *DAG.getContext());
1079
1080  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
1081
1082  // For LLVM, in the case when returning a struct by value (>8byte),
1083  // the first argument is a pointer that points to the location on caller's
1084  // stack where the return value will be stored. For Hexagon, the location on
1085  // caller's stack is passed only when the struct size is smaller than (and
1086  // equal to) 8 bytes. If not, no address will be passed into callee and
1087  // callee return the result direclty through R0/R1.
1088
1089  SmallVector<SDValue, 8> MemOps;
1090  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
1091
1092  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1093    CCValAssign &VA = ArgLocs[i];
1094    ISD::ArgFlagsTy Flags = Ins[i].Flags;
1095    unsigned ObjSize;
1096    unsigned StackLocation;
1097    int FI;
1098
1099    if (   (VA.isRegLoc() && !Flags.isByVal())
1100        || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
1101      // Arguments passed in registers
1102      // 1. int, long long, ptr args that get allocated in register.
1103      // 2. Large struct that gets an register to put its address in.
1104      EVT RegVT = VA.getLocVT();
1105      if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
1106          RegVT == MVT::i32 || RegVT == MVT::f32) {
1107        unsigned VReg =
1108          RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
1109        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1110        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1111      } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
1112        unsigned VReg =
1113          RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
1114        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1115        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1116
1117      // Single Vector
1118      } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
1119                  RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
1120        unsigned VReg =
1121          RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
1122        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1123        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1124    } else if (UseHVX && UseHVXDbl &&
1125               ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
1126                 RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
1127        unsigned VReg =
1128          RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
1129        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1130        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1131
1132      // Double Vector
1133      } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
1134                  RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
1135        unsigned VReg =
1136          RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
1137        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1138        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1139      } else if (UseHVX && UseHVXDbl &&
1140                ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
1141                  RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
1142        unsigned VReg =
1143          RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
1144        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1145        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1146      } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
1147        assert(0 && "need to support VecPred regs");
1148        unsigned VReg =
1149          RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
1150        RegInfo.addLiveIn(VA.getLocReg(), VReg);
1151        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
1152      } else {
1153        assert (0);
1154      }
1155    } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
1156      assert (0 && "ByValSize must be bigger than 8 bytes");
1157    } else {
1158      // Sanity check.
1159      assert(VA.isMemLoc());
1160
1161      if (Flags.isByVal()) {
1162        // If it's a byval parameter, then we need to compute the
1163        // "real" size, not the size of the pointer.
1164        ObjSize = Flags.getByValSize();
1165      } else {
1166        ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
1167      }
1168
1169      StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
1170      // Create the frame index object for this incoming parameter...
1171      FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
1172
1173      // Create the SelectionDAG nodes cordl, responding to a load
1174      // from this parameter.
1175      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1176
1177      if (Flags.isByVal()) {
1178        // If it's a pass-by-value aggregate, then do not dereference the stack
1179        // location. Instead, we should generate a reference to the stack
1180        // location.
1181        InVals.push_back(FIN);
1182      } else {
1183        InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
1184                                     MachinePointerInfo(), false, false,
1185                                     false, 0));
1186      }
1187    }
1188  }
1189
1190  if (!MemOps.empty())
1191    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
1192
1193  if (isVarArg) {
1194    // This will point to the next argument passed via stack.
1195    int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
1196                                            HEXAGON_LRFP_SIZE +
1197                                            CCInfo.getNextStackOffset(),
1198                                            true);
1199    FuncInfo.setVarArgsFrameIndex(FrameIndex);
1200  }
1201
1202  return Chain;
1203}
1204
1205SDValue
1206HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1207  // VASTART stores the address of the VarArgsFrameIndex slot into the
1208  // memory location argument.
1209  MachineFunction &MF = DAG.getMachineFunction();
1210  HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
1211  SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
1212  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1213  return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr,
1214                      Op.getOperand(1), MachinePointerInfo(SV), false,
1215                      false, 0);
1216}
1217
1218// Creates a SPLAT instruction for a constant value VAL.
1219static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
1220                           SDValue Val) {
1221  if (VT.getSimpleVT() == MVT::v4i8)
1222    return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
1223
1224  if (VT.getSimpleVT() == MVT::v4i16)
1225    return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
1226
1227  return SDValue();
1228}
1229
1230static bool isSExtFree(SDValue N) {
1231  // A sign-extend of a truncate of a sign-extend is free.
1232  if (N.getOpcode() == ISD::TRUNCATE &&
1233      N.getOperand(0).getOpcode() == ISD::AssertSext)
1234    return true;
1235  // We have sign-extended loads.
1236  if (N.getOpcode() == ISD::LOAD)
1237    return true;
1238  return false;
1239}
1240
1241SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
1242  SDLoc dl(Op);
1243  SDValue InpVal = Op.getOperand(0);
1244  if (isa<ConstantSDNode>(InpVal)) {
1245    uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
1246    return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
1247  }
1248  SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
1249  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
1250}
1251
1252SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1253  SDLoc dl(Op);
1254
1255  SDValue LHS = Op.getOperand(0);
1256  SDValue RHS = Op.getOperand(1);
1257  SDValue Cmp = Op.getOperand(2);
1258  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
1259
1260  EVT VT = Op.getValueType();
1261  EVT LHSVT = LHS.getValueType();
1262  EVT RHSVT = RHS.getValueType();
1263
1264  if (LHSVT == MVT::v2i16) {
1265    assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
1266    unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
1267                                                : ISD::ZERO_EXTEND;
1268    SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
1269    SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
1270    SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
1271    return SC;
1272  }
1273
1274  // Treat all other vector types as legal.
1275  if (VT.isVector())
1276    return Op;
1277
1278  // Equals and not equals should use sign-extend, not zero-extend, since
1279  // we can represent small negative values in the compare instructions.
1280  // The LLVM default is to use zero-extend arbitrarily in these cases.
1281  if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
1282      (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
1283      (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
1284    ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
1285    if (C && C->getAPIntValue().isNegative()) {
1286      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
1287      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
1288      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
1289                         LHS, RHS, Op.getOperand(2));
1290    }
1291    if (isSExtFree(LHS) || isSExtFree(RHS)) {
1292      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
1293      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
1294      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
1295                         LHS, RHS, Op.getOperand(2));
1296    }
1297  }
1298  return SDValue();
1299}
1300
1301SDValue
1302HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1303  SDValue PredOp = Op.getOperand(0);
1304  SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
1305  EVT OpVT = Op1.getValueType();
1306  SDLoc DL(Op);
1307
1308  if (OpVT == MVT::v2i16) {
1309    SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
1310    SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
1311    SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
1312    SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
1313    return TR;
1314  }
1315
1316  return SDValue();
1317}
1318
1319// Handle only specific vector loads.
1320SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1321  EVT VT = Op.getValueType();
1322  SDLoc DL(Op);
1323  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1324  SDValue Chain = LoadNode->getChain();
1325  SDValue Ptr = Op.getOperand(1);
1326  SDValue LoweredLoad;
1327  SDValue Result;
1328  SDValue Base = LoadNode->getBasePtr();
1329  ISD::LoadExtType Ext = LoadNode->getExtensionType();
1330  unsigned Alignment = LoadNode->getAlignment();
1331  SDValue LoadChain;
1332
1333  if(Ext == ISD::NON_EXTLOAD)
1334    Ext = ISD::ZEXTLOAD;
1335
1336  if (VT == MVT::v4i16) {
1337    if (Alignment == 2) {
1338      SDValue Loads[4];
1339      // Base load.
1340      Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
1341                                LoadNode->getPointerInfo(), MVT::i16,
1342                                LoadNode->isVolatile(),
1343                                LoadNode->isNonTemporal(),
1344                                LoadNode->isInvariant(),
1345                                Alignment);
1346      // Base+2 load.
1347      SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
1348      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
1349      Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
1350                                LoadNode->getPointerInfo(), MVT::i16,
1351                                LoadNode->isVolatile(),
1352                                LoadNode->isNonTemporal(),
1353                                LoadNode->isInvariant(),
1354                                Alignment);
1355      // SHL 16, then OR base and base+2.
1356      SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
1357      SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
1358      SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
1359      // Base + 4.
1360      Increment = DAG.getConstant(4, DL, MVT::i32);
1361      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
1362      Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
1363                                LoadNode->getPointerInfo(), MVT::i16,
1364                                LoadNode->isVolatile(),
1365                                LoadNode->isNonTemporal(),
1366                                LoadNode->isInvariant(),
1367                                Alignment);
1368      // Base + 6.
1369      Increment = DAG.getConstant(6, DL, MVT::i32);
1370      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
1371      Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
1372                                LoadNode->getPointerInfo(), MVT::i16,
1373                                LoadNode->isVolatile(),
1374                                LoadNode->isNonTemporal(),
1375                                LoadNode->isInvariant(),
1376                                Alignment);
1377      // SHL 16, then OR base+4 and base+6.
1378      Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
1379      SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
1380      // Combine to i64. This could be optimised out later if we can
1381      // affect reg allocation of this code.
1382      Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
1383      LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1384                              Loads[0].getValue(1), Loads[1].getValue(1),
1385                              Loads[2].getValue(1), Loads[3].getValue(1));
1386    } else {
1387      // Perform default type expansion.
1388      Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
1389                           LoadNode->isVolatile(), LoadNode->isNonTemporal(),
1390                          LoadNode->isInvariant(), LoadNode->getAlignment());
1391      LoadChain = Result.getValue(1);
1392    }
1393  } else
1394    llvm_unreachable("Custom lowering unsupported load");
1395
1396  Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
1397  // Since we pretend to lower a load, we need the original chain
1398  // info attached to the result.
1399  SDValue Ops[] = { Result, LoadChain };
1400
1401  return DAG.getMergeValues(Ops, DL);
1402}
1403
1404
1405SDValue
1406HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1407  EVT ValTy = Op.getValueType();
1408  ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
1409  unsigned Align = CPN->getAlignment();
1410  bool IsPositionIndependent = isPositionIndependent();
1411  unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1412
1413  SDValue T;
1414  if (CPN->isMachineConstantPoolEntry())
1415    T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
1416  else
1417    T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
1418  if (IsPositionIndependent)
1419    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1420  return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1421}
1422
1423SDValue
1424HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1425  EVT VT = Op.getValueType();
1426  int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1427  if (isPositionIndependent()) {
1428    SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
1429    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1430  }
1431
1432  SDValue T = DAG.getTargetJumpTable(Idx, VT);
1433  return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1434}
1435
1436SDValue
1437HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1438  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1439  MachineFunction &MF = DAG.getMachineFunction();
1440  MachineFrameInfo &MFI = *MF.getFrameInfo();
1441  MFI.setReturnAddressIsTaken(true);
1442
1443  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1444    return SDValue();
1445
1446  EVT VT = Op.getValueType();
1447  SDLoc dl(Op);
1448  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1449  if (Depth) {
1450    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1451    SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1452    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1453                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1454                       MachinePointerInfo(), false, false, false, 0);
1455  }
1456
1457  // Return LR, which contains the return address. Mark it an implicit live-in.
1458  unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1459  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1460}
1461
1462SDValue
1463HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1464  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1465  MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
1466  MFI.setFrameAddressIsTaken(true);
1467
1468  EVT VT = Op.getValueType();
1469  SDLoc dl(Op);
1470  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1471  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1472                                         HRI.getFrameRegister(), VT);
1473  while (Depth--)
1474    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1475                            MachinePointerInfo(),
1476                            false, false, false, 0);
1477  return FrameAddr;
1478}
1479
1480SDValue
1481HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1482  SDLoc dl(Op);
1483  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1484}
1485
1486
1487SDValue
1488HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1489  SDLoc dl(Op);
1490  auto *GAN = cast<GlobalAddressSDNode>(Op);
1491  auto PtrVT = getPointerTy(DAG.getDataLayout());
1492  auto *GV = GAN->getGlobal();
1493  int64_t Offset = GAN->getOffset();
1494
1495  auto &HLOF = *HTM.getObjFileLowering();
1496  Reloc::Model RM = HTM.getRelocationModel();
1497
1498  if (RM == Reloc::Static) {
1499    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1500    if (HLOF.isGlobalInSmallSection(GV, HTM))
1501      return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1502    return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1503  }
1504
1505  bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1506  if (UsePCRel) {
1507    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1508                                            HexagonII::MO_PCREL);
1509    return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1510  }
1511
1512  // Use GOT index.
1513  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1514  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1515  SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1516  return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1517}
1518
1519// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1520SDValue
1521HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1522  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1523  SDLoc dl(Op);
1524  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1525
1526  Reloc::Model RM = HTM.getRelocationModel();
1527  if (RM == Reloc::Static) {
1528    SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1529    return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1530  }
1531
1532  SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1533  return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1534}
1535
1536SDValue
1537HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1538      const {
1539  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1540  SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1541                                               HexagonII::MO_PCREL);
1542  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1543}
1544
1545SDValue
1546HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1547      GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg,
1548      unsigned char OperandFlags) const {
1549  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
1550  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1551  SDLoc dl(GA);
1552  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1553                                           GA->getValueType(0),
1554                                           GA->getOffset(),
1555                                           OperandFlags);
1556  // Create Operands for the call.The Operands should have the following:
1557  // 1. Chain SDValue
1558  // 2. Callee which in this case is the Global address value.
1559  // 3. Registers live into the call.In this case its R0, as we
1560  //    have just one argument to be passed.
1561  // 4. InFlag if there is any.
1562  // Note: The order is important.
1563
1564  if (InFlag) {
1565    SDValue Ops[] = { Chain, TGA,
1566                      DAG.getRegister(Hexagon::R0, PtrVT), *InFlag };
1567    Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops);
1568  } else {
1569    SDValue Ops[]  = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)};
1570    Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops);
1571  }
1572
1573  // Inform MFI that function has calls.
1574  MFI->setAdjustsStack(true);
1575
1576  SDValue Flag = Chain.getValue(1);
1577  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
1578}
1579
1580//
1581// Lower using the intial executable model for TLS addresses
1582//
1583SDValue
1584HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1585      SelectionDAG &DAG) const {
1586  SDLoc dl(GA);
1587  int64_t Offset = GA->getOffset();
1588  auto PtrVT = getPointerTy(DAG.getDataLayout());
1589
1590  // Get the thread pointer.
1591  SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1592
1593  bool IsPositionIndependent = isPositionIndependent();
1594  unsigned char TF =
1595      IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1596
1597  // First generate the TLS symbol address
1598  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1599                                           Offset, TF);
1600
1601  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1602
1603  if (IsPositionIndependent) {
1604    // Generate the GOT pointer in case of position independent code
1605    SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1606
1607    // Add the TLS Symbol address to GOT pointer.This gives
1608    // GOT relative relocation for the symbol.
1609    Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1610  }
1611
1612  // Load the offset value for TLS symbol.This offset is relative to
1613  // thread pointer.
1614  SDValue LoadOffset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym,
1615                                   MachinePointerInfo(),
1616                                   false, false, false, 0);
1617
1618  // Address of the thread local variable is the add of thread
1619  // pointer and the offset of the variable.
1620  return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1621}
1622
1623//
1624// Lower using the local executable model for TLS addresses
1625//
1626SDValue
1627HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1628      SelectionDAG &DAG) const {
1629  SDLoc dl(GA);
1630  int64_t Offset = GA->getOffset();
1631  auto PtrVT = getPointerTy(DAG.getDataLayout());
1632
1633  // Get the thread pointer.
1634  SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1635  // Generate the TLS symbol address
1636  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1637                                           HexagonII::MO_TPREL);
1638  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1639
1640  // Address of the thread local variable is the add of thread
1641  // pointer and the offset of the variable.
1642  return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1643}
1644
1645//
1646// Lower using the general dynamic model for TLS addresses
1647//
1648SDValue
1649HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1650      SelectionDAG &DAG) const {
1651  SDLoc dl(GA);
1652  int64_t Offset = GA->getOffset();
1653  auto PtrVT = getPointerTy(DAG.getDataLayout());
1654
1655  // First generate the TLS symbol address
1656  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1657                                           HexagonII::MO_GDGOT);
1658
1659  // Then, generate the GOT pointer
1660  SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1661
1662  // Add the TLS symbol and the GOT pointer
1663  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1664  SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1665
1666  // Copy over the argument to R0
1667  SDValue InFlag;
1668  Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1669  InFlag = Chain.getValue(1);
1670
1671  return GetDynamicTLSAddr(DAG, Chain, GA, &InFlag, PtrVT,
1672                           Hexagon::R0, HexagonII::MO_GDPLT);
1673}
1674
1675//
1676// Lower TLS addresses.
1677//
1678// For now for dynamic models, we only support the general dynamic model.
1679//
1680SDValue
1681HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1682      SelectionDAG &DAG) const {
1683  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1684
1685  switch (HTM.getTLSModel(GA->getGlobal())) {
1686    case TLSModel::GeneralDynamic:
1687    case TLSModel::LocalDynamic:
1688      return LowerToTLSGeneralDynamicModel(GA, DAG);
1689    case TLSModel::InitialExec:
1690      return LowerToTLSInitialExecModel(GA, DAG);
1691    case TLSModel::LocalExec:
1692      return LowerToTLSLocalExecModel(GA, DAG);
1693  }
1694  llvm_unreachable("Bogus TLS model");
1695}
1696
1697//===----------------------------------------------------------------------===//
1698// TargetLowering Implementation
1699//===----------------------------------------------------------------------===//
1700
1701HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1702                                             const HexagonSubtarget &ST)
1703    : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1704      Subtarget(ST) {
1705  bool IsV4 = !Subtarget.hasV5TOps();
1706  auto &HRI = *Subtarget.getRegisterInfo();
1707  bool UseHVX = Subtarget.useHVXOps();
1708  bool UseHVXSgl = Subtarget.useHVXSglOps();
1709  bool UseHVXDbl = Subtarget.useHVXDblOps();
1710
1711  setPrefLoopAlignment(4);
1712  setPrefFunctionAlignment(4);
1713  setMinFunctionAlignment(2);
1714  setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1715
1716  setMaxAtomicSizeInBitsSupported(64);
1717  setMinCmpXchgSizeInBits(32);
1718
1719  if (EnableHexSDNodeSched)
1720    setSchedulingPreference(Sched::VLIW);
1721  else
1722    setSchedulingPreference(Sched::Source);
1723
1724  // Limits for inline expansion of memcpy/memmove
1725  MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1726  MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1727  MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1728  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1729  MaxStoresPerMemset = MaxStoresPerMemsetCL;
1730  MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1731
1732  //
1733  // Set up register classes.
1734  //
1735
1736  addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1737  addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1738  addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1739  addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1740  addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1741  addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1742  addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1743  addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1744  addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1745  addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1746  addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1747
1748  if (Subtarget.hasV5TOps()) {
1749    addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1750    addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1751  }
1752
1753  if (Subtarget.hasV60TOps()) {
1754    if (Subtarget.useHVXSglOps()) {
1755      addRegisterClass(MVT::v64i8,  &Hexagon::VectorRegsRegClass);
1756      addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
1757      addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
1758      addRegisterClass(MVT::v8i64,  &Hexagon::VectorRegsRegClass);
1759      addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
1760      addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
1761      addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
1762      addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
1763      addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
1764    } else if (Subtarget.useHVXDblOps()) {
1765      addRegisterClass(MVT::v128i8,  &Hexagon::VectorRegs128BRegClass);
1766      addRegisterClass(MVT::v64i16,  &Hexagon::VectorRegs128BRegClass);
1767      addRegisterClass(MVT::v32i32,  &Hexagon::VectorRegs128BRegClass);
1768      addRegisterClass(MVT::v16i64,  &Hexagon::VectorRegs128BRegClass);
1769      addRegisterClass(MVT::v256i8,  &Hexagon::VecDblRegs128BRegClass);
1770      addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
1771      addRegisterClass(MVT::v64i32,  &Hexagon::VecDblRegs128BRegClass);
1772      addRegisterClass(MVT::v32i64,  &Hexagon::VecDblRegs128BRegClass);
1773      addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
1774    }
1775
1776  }
1777
1778  //
1779  // Handling of scalar operations.
1780  //
1781  // All operations default to "legal", except:
1782  // - indexed loads and stores (pre-/post-incremented),
1783  // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1784  //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1785  //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1786  //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1787  // which default to "expand" for at least one type.
1788
1789  // Misc operations.
1790  setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
1791  setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
1792
1793  setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1794  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
1795  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
1796  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1797  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
1798  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1799  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1800  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
1801  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
1802  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1803  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1804
1805  // Custom legalize GlobalAddress nodes into CONST32.
1806  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1807  setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1808  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1809
1810  // Hexagon needs to optimize cases with negative constants.
1811  setOperationAction(ISD::SETCC, MVT::i8,  Custom);
1812  setOperationAction(ISD::SETCC, MVT::i16, Custom);
1813
1814  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1815  setOperationAction(ISD::VASTART, MVT::Other, Custom);
1816  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1817  setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1818
1819  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1820  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1821  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1822
1823  if (EmitJumpTables)
1824    setMinimumJumpTableEntries(MinimumJumpTables);
1825  else
1826    setMinimumJumpTableEntries(INT_MAX);
1827  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1828
1829  // Hexagon has instructions for add/sub with carry. The problem with
1830  // modeling these instructions is that they produce 2 results: Rdd and Px.
1831  // To model the update of Px, we will have to use Defs[p0..p3] which will
1832  // cause any predicate live range to spill. So, we pretend we dont't have
1833  // these instructions.
1834  setOperationAction(ISD::ADDE, MVT::i8,  Expand);
1835  setOperationAction(ISD::ADDE, MVT::i16, Expand);
1836  setOperationAction(ISD::ADDE, MVT::i32, Expand);
1837  setOperationAction(ISD::ADDE, MVT::i64, Expand);
1838  setOperationAction(ISD::SUBE, MVT::i8,  Expand);
1839  setOperationAction(ISD::SUBE, MVT::i16, Expand);
1840  setOperationAction(ISD::SUBE, MVT::i32, Expand);
1841  setOperationAction(ISD::SUBE, MVT::i64, Expand);
1842  setOperationAction(ISD::ADDC, MVT::i8,  Expand);
1843  setOperationAction(ISD::ADDC, MVT::i16, Expand);
1844  setOperationAction(ISD::ADDC, MVT::i32, Expand);
1845  setOperationAction(ISD::ADDC, MVT::i64, Expand);
1846  setOperationAction(ISD::SUBC, MVT::i8,  Expand);
1847  setOperationAction(ISD::SUBC, MVT::i16, Expand);
1848  setOperationAction(ISD::SUBC, MVT::i32, Expand);
1849  setOperationAction(ISD::SUBC, MVT::i64, Expand);
1850
1851  // Only add and sub that detect overflow are the saturating ones.
1852  for (MVT VT : MVT::integer_valuetypes()) {
1853    setOperationAction(ISD::UADDO, VT, Expand);
1854    setOperationAction(ISD::SADDO, VT, Expand);
1855    setOperationAction(ISD::USUBO, VT, Expand);
1856    setOperationAction(ISD::SSUBO, VT, Expand);
1857  }
1858
1859  setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1860  setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1861  setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1862  setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1863
1864  // In V5, popcount can count # of 1s in i64 but returns i32.
1865  // On V4 it will be expanded (set later).
1866  setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1867  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1868  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1869  setOperationAction(ISD::CTPOP, MVT::i64, Custom);
1870
1871  // We custom lower i64 to i64 mul, so that it is not considered as a legal
1872  // operation. There is a pattern that will match i64 mul and transform it
1873  // to a series of instructions.
1874  setOperationAction(ISD::MUL,   MVT::i64, Expand);
1875  setOperationAction(ISD::MULHS, MVT::i64, Expand);
1876
1877  for (unsigned IntExpOp :
1878       { ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1879         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1880         ISD::BSWAP,     ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1881         ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
1882    setOperationAction(IntExpOp, MVT::i32, Expand);
1883    setOperationAction(IntExpOp, MVT::i64, Expand);
1884  }
1885
1886  for (unsigned FPExpOp :
1887       {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1888        ISD::FPOW, ISD::FCOPYSIGN}) {
1889    setOperationAction(FPExpOp, MVT::f32, Expand);
1890    setOperationAction(FPExpOp, MVT::f64, Expand);
1891  }
1892
1893  // No extending loads from i32.
1894  for (MVT VT : MVT::integer_valuetypes()) {
1895    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1896    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1897    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1898  }
1899  // Turn FP truncstore into trunc + store.
1900  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1901  // Turn FP extload into load/fextend.
1902  for (MVT VT : MVT::fp_valuetypes())
1903    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1904
1905  // Expand BR_CC and SELECT_CC for all integer and fp types.
1906  for (MVT VT : MVT::integer_valuetypes()) {
1907    setOperationAction(ISD::BR_CC,     VT, Expand);
1908    setOperationAction(ISD::SELECT_CC, VT, Expand);
1909  }
1910  for (MVT VT : MVT::fp_valuetypes()) {
1911    setOperationAction(ISD::BR_CC,     VT, Expand);
1912    setOperationAction(ISD::SELECT_CC, VT, Expand);
1913  }
1914  setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1915
1916  //
1917  // Handling of vector operations.
1918  //
1919
1920  // Custom lower v4i16 load only. Let v4i16 store to be
1921  // promoted for now.
1922  promoteLdStType(MVT::v4i8,  MVT::i32);
1923  promoteLdStType(MVT::v2i16, MVT::i32);
1924  promoteLdStType(MVT::v8i8,  MVT::i64);
1925  promoteLdStType(MVT::v2i32, MVT::i64);
1926
1927  setOperationAction(ISD::LOAD,  MVT::v4i16, Custom);
1928  setOperationAction(ISD::STORE, MVT::v4i16, Promote);
1929  AddPromotedToType(ISD::LOAD,  MVT::v4i16, MVT::i64);
1930  AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
1931
1932  // Set the action for vector operations to "expand", then override it with
1933  // either "custom" or "legal" for specific cases.
1934  static const unsigned VectExpOps[] = {
1935    // Integer arithmetic:
1936    ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,    ISD::UDIV,
1937    ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
1938    ISD::SUBC,    ISD::SADDO,   ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,
1939    ISD::SMUL_LOHI,             ISD::UMUL_LOHI,
1940    // Logical/bit:
1941    ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1942    ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
1943    // Floating point arithmetic/math functions:
1944    ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1945    ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1946    ISD::FCOS,    ISD::FPOWI,   ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1947    ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1948    ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1949    ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
1950    // Misc:
1951    ISD::SELECT,  ISD::ConstantPool,
1952    // Vector:
1953    ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1954    ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1955    ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1956    ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
1957  };
1958
1959  for (MVT VT : MVT::vector_valuetypes()) {
1960    for (unsigned VectExpOp : VectExpOps)
1961      setOperationAction(VectExpOp, VT, Expand);
1962
1963    // Expand all extended loads and truncating stores:
1964    for (MVT TargetVT : MVT::vector_valuetypes()) {
1965      setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1966      setTruncStoreAction(VT, TargetVT, Expand);
1967    }
1968
1969    setOperationAction(ISD::SRA, VT, Custom);
1970    setOperationAction(ISD::SHL, VT, Custom);
1971    setOperationAction(ISD::SRL, VT, Custom);
1972  }
1973
1974  // Types natively supported:
1975  for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
1976                       MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
1977                       MVT::v2i32, MVT::v1i64}) {
1978    setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1979    setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1980    setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1981    setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1982    setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1983    setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1984
1985    setOperationAction(ISD::ADD, NativeVT, Legal);
1986    setOperationAction(ISD::SUB, NativeVT, Legal);
1987    setOperationAction(ISD::MUL, NativeVT, Legal);
1988    setOperationAction(ISD::AND, NativeVT, Legal);
1989    setOperationAction(ISD::OR,  NativeVT, Legal);
1990    setOperationAction(ISD::XOR, NativeVT, Legal);
1991  }
1992
1993  setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1994  setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1995  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1996  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1997  if (UseHVX) {
1998    if (UseHVXSgl) {
1999      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8,  Custom);
2000      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16,  Custom);
2001      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32,  Custom);
2002      setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64,  Custom);
2003    } else if (UseHVXDbl) {
2004      setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8,  Custom);
2005      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
2006      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32,  Custom);
2007      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64,  Custom);
2008    } else {
2009      llvm_unreachable("Unrecognized HVX mode");
2010    }
2011  }
2012  // Subtarget-specific operation actions.
2013  //
2014  if (Subtarget.hasV5TOps()) {
2015    setOperationAction(ISD::FMA,  MVT::f64, Expand);
2016    setOperationAction(ISD::FADD, MVT::f64, Expand);
2017    setOperationAction(ISD::FSUB, MVT::f64, Expand);
2018    setOperationAction(ISD::FMUL, MVT::f64, Expand);
2019
2020    setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
2021    setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
2022    setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
2023    setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
2024    setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
2025    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
2026    setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
2027    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
2028    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
2029    setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
2030    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
2031    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
2032
2033  } else { // V4
2034    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
2035    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
2036    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
2037    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
2038    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
2039    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
2040    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
2041    setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
2042    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
2043
2044    setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
2045    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
2046    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
2047    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
2048
2049    // Expand these operations for both f32 and f64:
2050    for (unsigned FPExpOpV4 :
2051         {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
2052      setOperationAction(FPExpOpV4, MVT::f32, Expand);
2053      setOperationAction(FPExpOpV4, MVT::f64, Expand);
2054    }
2055
2056    for (ISD::CondCode FPExpCCV4 :
2057         {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
2058          ISD::SETUO,  ISD::SETO}) {
2059      setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
2060      setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
2061    }
2062  }
2063
2064  // Handling of indexed loads/stores: default is "expand".
2065  //
2066  for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
2067    setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
2068    setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
2069  }
2070
2071  if (UseHVXDbl) {
2072    for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
2073      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
2074      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
2075    }
2076  }
2077
2078  computeRegisterProperties(&HRI);
2079
2080  //
2081  // Library calls for unsupported operations
2082  //
2083  bool FastMath  = EnableFastMath;
2084
2085  setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
2086  setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
2087  setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
2088  setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
2089  setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
2090  setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
2091  setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
2092  setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
2093
2094  setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
2095  setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
2096  setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
2097  setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
2098  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
2099  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
2100
2101  if (IsV4) {
2102    // Handle single-precision floating point operations on V4.
2103    if (FastMath) {
2104      setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
2105      setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
2106      setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
2107      setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
2108      setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
2109      // Double-precision compares.
2110      setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
2111      setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
2112    } else {
2113      setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
2114      setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
2115      setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
2116      setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
2117      setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
2118      // Double-precision compares.
2119      setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
2120      setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
2121    }
2122  }
2123
2124  // This is the only fast library function for sqrtd.
2125  if (FastMath)
2126    setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
2127
2128  // Prefix is: nothing  for "slow-math",
2129  //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
2130  // (actually, keep fast-math and fast-math2 separate for now)
2131  if (FastMath) {
2132    setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
2133    setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
2134    setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
2135    setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
2136    // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
2137    setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
2138  } else {
2139    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
2140    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
2141    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
2142    setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
2143    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
2144  }
2145
2146  if (Subtarget.hasV5TOps()) {
2147    if (FastMath)
2148      setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
2149    else
2150      setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
2151  } else {
2152    // V4
2153    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
2154    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
2155    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
2156    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
2157    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
2158    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
2159    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
2160    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
2161    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
2162    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
2163    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
2164    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
2165    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
2166    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
2167    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
2168    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
2169    setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
2170    setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
2171    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
2172    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
2173    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
2174    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
2175    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
2176    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
2177    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
2178    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
2179    setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
2180    setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
2181    setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
2182    setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
2183  }
2184
2185  // These cause problems when the shift amount is non-constant.
2186  setLibcallName(RTLIB::SHL_I128, nullptr);
2187  setLibcallName(RTLIB::SRL_I128, nullptr);
2188  setLibcallName(RTLIB::SRA_I128, nullptr);
2189}
2190
2191
2192const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
2193  switch ((HexagonISD::NodeType)Opcode) {
2194  case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
2195  case HexagonISD::ARGEXTEND:     return "HexagonISD::ARGEXTEND";
2196  case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
2197  case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
2198  case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
2199  case HexagonISD::CALLR:         return "HexagonISD::CALLR";
2200  case HexagonISD::CALLv3nr:      return "HexagonISD::CALLv3nr";
2201  case HexagonISD::CALLv3:        return "HexagonISD::CALLv3";
2202  case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
2203  case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
2204  case HexagonISD::CONST32:       return "HexagonISD::CONST32";
2205  case HexagonISD::CP:            return "HexagonISD::CP";
2206  case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
2207  case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
2208  case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
2209  case HexagonISD::EXTRACTURP:    return "HexagonISD::EXTRACTURP";
2210  case HexagonISD::FCONST32:      return "HexagonISD::FCONST32";
2211  case HexagonISD::INSERT:        return "HexagonISD::INSERT";
2212  case HexagonISD::INSERTRP:      return "HexagonISD::INSERTRP";
2213  case HexagonISD::JT:            return "HexagonISD::JT";
2214  case HexagonISD::PACKHL:        return "HexagonISD::PACKHL";
2215  case HexagonISD::POPCOUNT:      return "HexagonISD::POPCOUNT";
2216  case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
2217  case HexagonISD::SHUFFEB:       return "HexagonISD::SHUFFEB";
2218  case HexagonISD::SHUFFEH:       return "HexagonISD::SHUFFEH";
2219  case HexagonISD::SHUFFOB:       return "HexagonISD::SHUFFOB";
2220  case HexagonISD::SHUFFOH:       return "HexagonISD::SHUFFOH";
2221  case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
2222  case HexagonISD::VCMPBEQ:       return "HexagonISD::VCMPBEQ";
2223  case HexagonISD::VCMPBGT:       return "HexagonISD::VCMPBGT";
2224  case HexagonISD::VCMPBGTU:      return "HexagonISD::VCMPBGTU";
2225  case HexagonISD::VCMPHEQ:       return "HexagonISD::VCMPHEQ";
2226  case HexagonISD::VCMPHGT:       return "HexagonISD::VCMPHGT";
2227  case HexagonISD::VCMPHGTU:      return "HexagonISD::VCMPHGTU";
2228  case HexagonISD::VCMPWEQ:       return "HexagonISD::VCMPWEQ";
2229  case HexagonISD::VCMPWGT:       return "HexagonISD::VCMPWGT";
2230  case HexagonISD::VCMPWGTU:      return "HexagonISD::VCMPWGTU";
2231  case HexagonISD::VCOMBINE:      return "HexagonISD::VCOMBINE";
2232  case HexagonISD::VSHLH:         return "HexagonISD::VSHLH";
2233  case HexagonISD::VSHLW:         return "HexagonISD::VSHLW";
2234  case HexagonISD::VSPLATB:       return "HexagonISD::VSPLTB";
2235  case HexagonISD::VSPLATH:       return "HexagonISD::VSPLATH";
2236  case HexagonISD::VSRAH:         return "HexagonISD::VSRAH";
2237  case HexagonISD::VSRAW:         return "HexagonISD::VSRAW";
2238  case HexagonISD::VSRLH:         return "HexagonISD::VSRLH";
2239  case HexagonISD::VSRLW:         return "HexagonISD::VSRLW";
2240  case HexagonISD::VSXTBH:        return "HexagonISD::VSXTBH";
2241  case HexagonISD::VSXTBW:        return "HexagonISD::VSXTBW";
2242  case HexagonISD::OP_END:        break;
2243  }
2244  return nullptr;
2245}
2246
2247bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
2248  EVT MTy1 = EVT::getEVT(Ty1);
2249  EVT MTy2 = EVT::getEVT(Ty2);
2250  if (!MTy1.isSimple() || !MTy2.isSimple())
2251    return false;
2252  return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
2253}
2254
2255bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2256  if (!VT1.isSimple() || !VT2.isSimple())
2257    return false;
2258  return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
2259}
2260
2261// shouldExpandBuildVectorWithShuffles
2262// Should we expand the build vector with shuffles?
2263bool
2264HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2265                                  unsigned DefinedValues) const {
2266
2267  // Hexagon vector shuffle operates on element sizes of bytes or halfwords
2268  EVT EltVT = VT.getVectorElementType();
2269  int EltBits = EltVT.getSizeInBits();
2270  if ((EltBits != 8) && (EltBits != 16))
2271    return false;
2272
2273  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
2274}
2275
2276// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3).  V1 and
2277// V2 are the two vectors to select data from, V3 is the permutation.
2278static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
2279  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
2280  SDValue V1 = Op.getOperand(0);
2281  SDValue V2 = Op.getOperand(1);
2282  SDLoc dl(Op);
2283  EVT VT = Op.getValueType();
2284
2285  if (V2.isUndef())
2286    V2 = V1;
2287
2288  if (SVN->isSplat()) {
2289    int Lane = SVN->getSplatIndex();
2290    if (Lane == -1) Lane = 0;
2291
2292    // Test if V1 is a SCALAR_TO_VECTOR.
2293    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
2294      return createSplat(DAG, dl, VT, V1.getOperand(0));
2295
2296    // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
2297    // (and probably will turn into a SCALAR_TO_VECTOR once legalization
2298    // reaches it).
2299    if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
2300        !isa<ConstantSDNode>(V1.getOperand(0))) {
2301      bool IsScalarToVector = true;
2302      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
2303        if (!V1.getOperand(i).isUndef()) {
2304          IsScalarToVector = false;
2305          break;
2306        }
2307      if (IsScalarToVector)
2308        return createSplat(DAG, dl, VT, V1.getOperand(0));
2309    }
2310    return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
2311  }
2312
2313  // FIXME: We need to support more general vector shuffles.  See
2314  // below the comment from the ARM backend that deals in the general
2315  // case with the vector shuffles.  For now, let expand handle these.
2316  return SDValue();
2317
2318  // If the shuffle is not directly supported and it has 4 elements, use
2319  // the PerfectShuffle-generated table to synthesize it from other shuffles.
2320}
2321
2322// If BUILD_VECTOR has same base element repeated several times,
2323// report true.
2324static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
2325  unsigned NElts = BVN->getNumOperands();
2326  SDValue V0 = BVN->getOperand(0);
2327
2328  for (unsigned i = 1, e = NElts; i != e; ++i) {
2329    if (BVN->getOperand(i) != V0)
2330      return false;
2331  }
2332  return true;
2333}
2334
2335// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
2336// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
2337// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
2338static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
2339  BuildVectorSDNode *BVN = 0;
2340  SDValue V1 = Op.getOperand(0);
2341  SDValue V2 = Op.getOperand(1);
2342  SDValue V3;
2343  SDLoc dl(Op);
2344  EVT VT = Op.getValueType();
2345
2346  if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
2347      isCommonSplatElement(BVN))
2348    V3 = V2;
2349  else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
2350           isCommonSplatElement(BVN))
2351    V3 = V1;
2352  else
2353    return SDValue();
2354
2355  SDValue CommonSplat = BVN->getOperand(0);
2356  SDValue Result;
2357
2358  if (VT.getSimpleVT() == MVT::v4i16) {
2359    switch (Op.getOpcode()) {
2360    case ISD::SRA:
2361      Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
2362      break;
2363    case ISD::SHL:
2364      Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
2365      break;
2366    case ISD::SRL:
2367      Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
2368      break;
2369    default:
2370      return SDValue();
2371    }
2372  } else if (VT.getSimpleVT() == MVT::v2i32) {
2373    switch (Op.getOpcode()) {
2374    case ISD::SRA:
2375      Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
2376      break;
2377    case ISD::SHL:
2378      Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
2379      break;
2380    case ISD::SRL:
2381      Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
2382      break;
2383    default:
2384      return SDValue();
2385    }
2386  } else {
2387    return SDValue();
2388  }
2389
2390  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
2391}
2392
2393SDValue
2394HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2395  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
2396  SDLoc dl(Op);
2397  EVT VT = Op.getValueType();
2398
2399  unsigned Size = VT.getSizeInBits();
2400
2401  // Only handle vectors of 64 bits or shorter.
2402  if (Size > 64)
2403    return SDValue();
2404
2405  APInt APSplatBits, APSplatUndef;
2406  unsigned SplatBitSize;
2407  bool HasAnyUndefs;
2408  unsigned NElts = BVN->getNumOperands();
2409
2410  // Try to generate a SPLAT instruction.
2411  if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
2412      (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2413                            HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
2414    unsigned SplatBits = APSplatBits.getZExtValue();
2415    int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
2416                       (32 - SplatBitSize));
2417    return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
2418  }
2419
2420  // Try to generate COMBINE to build v2i32 vectors.
2421  if (VT.getSimpleVT() == MVT::v2i32) {
2422    SDValue V0 = BVN->getOperand(0);
2423    SDValue V1 = BVN->getOperand(1);
2424
2425    if (V0.isUndef())
2426      V0 = DAG.getConstant(0, dl, MVT::i32);
2427    if (V1.isUndef())
2428      V1 = DAG.getConstant(0, dl, MVT::i32);
2429
2430    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
2431    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
2432    // If the element isn't a constant, it is in a register:
2433    // generate a COMBINE Register Register instruction.
2434    if (!C0 || !C1)
2435      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
2436
2437    // If one of the operands is an 8 bit integer constant, generate
2438    // a COMBINE Immediate Immediate instruction.
2439    if (isInt<8>(C0->getSExtValue()) ||
2440        isInt<8>(C1->getSExtValue()))
2441      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
2442  }
2443
2444  // Try to generate a S2_packhl to build v2i16 vectors.
2445  if (VT.getSimpleVT() == MVT::v2i16) {
2446    for (unsigned i = 0, e = NElts; i != e; ++i) {
2447      if (BVN->getOperand(i).isUndef())
2448        continue;
2449      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
2450      // If the element isn't a constant, it is in a register:
2451      // generate a S2_packhl instruction.
2452      if (!Cst) {
2453        SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
2454                                   BVN->getOperand(1), BVN->getOperand(0));
2455
2456        return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
2457                                          pack);
2458      }
2459    }
2460  }
2461
2462  // In the general case, generate a CONST32 or a CONST64 for constant vectors,
2463  // and insert_vector_elt for all the other cases.
2464  uint64_t Res = 0;
2465  unsigned EltSize = Size / NElts;
2466  SDValue ConstVal;
2467  uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
2468  bool HasNonConstantElements = false;
2469
2470  for (unsigned i = 0, e = NElts; i != e; ++i) {
2471    // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
2472    // combine, const64, etc. are Big Endian.
2473    unsigned OpIdx = NElts - i - 1;
2474    SDValue Operand = BVN->getOperand(OpIdx);
2475    if (Operand.isUndef())
2476      continue;
2477
2478    int64_t Val = 0;
2479    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
2480      Val = Cst->getSExtValue();
2481    else
2482      HasNonConstantElements = true;
2483
2484    Val &= Mask;
2485    Res = (Res << EltSize) | Val;
2486  }
2487
2488  if (Size == 64)
2489    ConstVal = DAG.getConstant(Res, dl, MVT::i64);
2490  else
2491    ConstVal = DAG.getConstant(Res, dl, MVT::i32);
2492
2493  // When there are non constant operands, add them with INSERT_VECTOR_ELT to
2494  // ConstVal, the constant part of the vector.
2495  if (HasNonConstantElements) {
2496    EVT EltVT = VT.getVectorElementType();
2497    SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64);
2498    SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
2499                                  DAG.getConstant(32, dl, MVT::i64));
2500
2501    for (unsigned i = 0, e = NElts; i != e; ++i) {
2502      // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
2503      // is Big Endian.
2504      unsigned OpIdx = NElts - i - 1;
2505      SDValue Operand = BVN->getOperand(OpIdx);
2506      if (isa<ConstantSDNode>(Operand))
2507        // This operand is already in ConstVal.
2508        continue;
2509
2510      if (VT.getSizeInBits() == 64 &&
2511          Operand.getValueType().getSizeInBits() == 32) {
2512        SDValue C = DAG.getConstant(0, dl, MVT::i32);
2513        Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
2514      }
2515
2516      SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
2517      SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
2518      SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
2519      const SDValue Ops[] = {ConstVal, Operand, Combined};
2520
2521      if (VT.getSizeInBits() == 32)
2522        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
2523      else
2524        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
2525    }
2526  }
2527
2528  return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
2529}
2530
2531SDValue
2532HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2533                                           SelectionDAG &DAG) const {
2534  SDLoc dl(Op);
2535  bool UseHVX = Subtarget.useHVXOps();
2536  EVT VT = Op.getValueType();
2537  unsigned NElts = Op.getNumOperands();
2538  SDValue Vec0 = Op.getOperand(0);
2539  EVT VecVT = Vec0.getValueType();
2540  unsigned Width = VecVT.getSizeInBits();
2541
2542  if (NElts == 2) {
2543    MVT ST = VecVT.getSimpleVT();
2544    // We are trying to concat two v2i16 to a single v4i16, or two v4i8
2545    // into a single v8i8.
2546    if (ST == MVT::v2i16 || ST == MVT::v4i8)
2547      return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
2548
2549    if (UseHVX) {
2550      assert((Width ==  64*8 && Subtarget.useHVXSglOps()) ||
2551             (Width == 128*8 && Subtarget.useHVXDblOps()));
2552      SDValue Vec1 = Op.getOperand(1);
2553      MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
2554      MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
2555      SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
2556      SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
2557      SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
2558      return DAG.getNode(ISD::BITCAST, dl, VT, VC);
2559    }
2560  }
2561
2562  if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
2563    return SDValue();
2564
2565  SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
2566  SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
2567  SDValue W = DAG.getConstant(Width, dl, MVT::i64);
2568  // Create the "width" part of the argument to insert_rp/insertp_rp.
2569  SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
2570  SDValue V = C0;
2571
2572  for (unsigned i = 0, e = NElts; i != e; ++i) {
2573    unsigned N = NElts-i-1;
2574    SDValue OpN = Op.getOperand(N);
2575
2576    if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
2577      SDValue C = DAG.getConstant(0, dl, MVT::i32);
2578      OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
2579    }
2580    SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
2581    SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
2582    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
2583    if (VT.getSizeInBits() == 32)
2584      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
2585    else
2586      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
2587  }
2588
2589  return DAG.getNode(ISD::BITCAST, dl, VT, V);
2590}
2591
2592SDValue
2593HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
2594                                           SelectionDAG &DAG) const {
2595  EVT VT = Op.getValueType();
2596  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
2597  SDLoc dl(Op);
2598  SDValue Idx = Op.getOperand(1);
2599  SDValue Vec = Op.getOperand(0);
2600  EVT VecVT = Vec.getValueType();
2601  EVT EltVT = VecVT.getVectorElementType();
2602  int EltSize = EltVT.getSizeInBits();
2603  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
2604                                  EltSize : VTN * EltSize, dl, MVT::i64);
2605
2606  // Constant element number.
2607  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) {
2608    uint64_t X = CI->getZExtValue();
2609    SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32);
2610    const SDValue Ops[] = {Vec, Width, Offset};
2611
2612    ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width);
2613    assert(CW && "Non constant width in LowerEXTRACT_VECTOR");
2614
2615    SDValue N;
2616    MVT SVT = VecVT.getSimpleVT();
2617    uint64_t W = CW->getZExtValue();
2618
2619    if (W == 32) {
2620      // Translate this node into EXTRACT_SUBREG.
2621      unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
2622
2623      if (X == 0)
2624        Subreg = Hexagon::subreg_loreg;
2625      else if (SVT == MVT::v2i32 && X == 1)
2626        Subreg = Hexagon::subreg_hireg;
2627      else if (SVT == MVT::v4i16 && X == 2)
2628        Subreg = Hexagon::subreg_hireg;
2629      else if (SVT == MVT::v8i8 && X == 4)
2630        Subreg = Hexagon::subreg_hireg;
2631      else
2632        llvm_unreachable("Bad offset");
2633      N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
2634
2635    } else if (VecVT.getSizeInBits() == 32) {
2636      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
2637    } else {
2638      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
2639      if (VT.getSizeInBits() == 32)
2640        N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
2641    }
2642
2643    return DAG.getNode(ISD::BITCAST, dl, VT, N);
2644  }
2645
2646  // Variable element number.
2647  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
2648                               DAG.getConstant(EltSize, dl, MVT::i32));
2649  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
2650                                DAG.getConstant(32, dl, MVT::i64));
2651  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
2652
2653  const SDValue Ops[] = {Vec, Combined};
2654
2655  SDValue N;
2656  if (VecVT.getSizeInBits() == 32) {
2657    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops);
2658  } else {
2659    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
2660    if (VT.getSizeInBits() == 32)
2661      N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
2662  }
2663  return DAG.getNode(ISD::BITCAST, dl, VT, N);
2664}
2665
2666SDValue
2667HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
2668                                          SelectionDAG &DAG) const {
2669  EVT VT = Op.getValueType();
2670  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
2671  SDLoc dl(Op);
2672  SDValue Vec = Op.getOperand(0);
2673  SDValue Val = Op.getOperand(1);
2674  SDValue Idx = Op.getOperand(2);
2675  EVT VecVT = Vec.getValueType();
2676  EVT EltVT = VecVT.getVectorElementType();
2677  int EltSize = EltVT.getSizeInBits();
2678  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
2679                                  EltSize : VTN * EltSize, dl, MVT::i64);
2680
2681  if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
2682    SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32);
2683    const SDValue Ops[] = {Vec, Val, Width, Offset};
2684
2685    SDValue N;
2686    if (VT.getSizeInBits() == 32)
2687      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
2688    else
2689      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
2690
2691    return DAG.getNode(ISD::BITCAST, dl, VT, N);
2692  }
2693
2694  // Variable element number.
2695  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
2696                               DAG.getConstant(EltSize, dl, MVT::i32));
2697  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
2698                                DAG.getConstant(32, dl, MVT::i64));
2699  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
2700
2701  if (VT.getSizeInBits() == 64 &&
2702      Val.getValueType().getSizeInBits() == 32) {
2703    SDValue C = DAG.getConstant(0, dl, MVT::i32);
2704    Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
2705  }
2706
2707  const SDValue Ops[] = {Vec, Val, Combined};
2708
2709  SDValue N;
2710  if (VT.getSizeInBits() == 32)
2711    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
2712  else
2713    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
2714
2715  return DAG.getNode(ISD::BITCAST, dl, VT, N);
2716}
2717
2718bool
2719HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
2720  // Assuming the caller does not have either a signext or zeroext modifier, and
2721  // only one value is accepted, any reasonable truncation is allowed.
2722  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
2723    return false;
2724
2725  // FIXME: in principle up to 64-bit could be made safe, but it would be very
2726  // fragile at the moment: any support for multiple value returns would be
2727  // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2728  return Ty1->getPrimitiveSizeInBits() <= 32;
2729}
2730
2731SDValue
2732HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
2733  SDValue Chain     = Op.getOperand(0);
2734  SDValue Offset    = Op.getOperand(1);
2735  SDValue Handler   = Op.getOperand(2);
2736  SDLoc dl(Op);
2737  auto PtrVT = getPointerTy(DAG.getDataLayout());
2738
2739  // Mark function as containing a call to EH_RETURN.
2740  HexagonMachineFunctionInfo *FuncInfo =
2741    DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
2742  FuncInfo->setHasEHReturn();
2743
2744  unsigned OffsetReg = Hexagon::R28;
2745
2746  SDValue StoreAddr =
2747      DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
2748                  DAG.getIntPtrConstant(4, dl));
2749  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
2750                       false, false, 0);
2751  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
2752
2753  // Not needed we already use it as explict input to EH_RETURN.
2754  // MF.getRegInfo().addLiveOut(OffsetReg);
2755
2756  return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
2757}
2758
2759SDValue
2760HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2761  unsigned Opc = Op.getOpcode();
2762  switch (Opc) {
2763    default:
2764#ifndef NDEBUG
2765      Op.getNode()->dumpr(&DAG);
2766      if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
2767        errs() << "Check for a non-legal type in this operation\n";
2768#endif
2769      llvm_unreachable("Should not custom lower this!");
2770    case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
2771    case ISD::INSERT_SUBVECTOR:     return LowerINSERT_VECTOR(Op, DAG);
2772    case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR(Op, DAG);
2773    case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_VECTOR(Op, DAG);
2774    case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR(Op, DAG);
2775    case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
2776    case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
2777    case ISD::SRA:
2778    case ISD::SHL:
2779    case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
2780    case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
2781    case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
2782    case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
2783      // Frame & Return address. Currently unimplemented.
2784    case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
2785    case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
2786    case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
2787    case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
2788    case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
2789    case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
2790    case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
2791    case ISD::VASTART:              return LowerVASTART(Op, DAG);
2792    // Custom lower some vector loads.
2793    case ISD::LOAD:                 return LowerLOAD(Op, DAG);
2794    case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
2795    case ISD::SETCC:                return LowerSETCC(Op, DAG);
2796    case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
2797    case ISD::CTPOP:                return LowerCTPOP(Op, DAG);
2798    case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2799    case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
2800    case ISD::INLINEASM:            return LowerINLINEASM(Op, DAG);
2801    case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
2802  }
2803}
2804
2805/// Returns relocation base for the given PIC jumptable.
2806SDValue
2807HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2808                                                SelectionDAG &DAG) const {
2809  int Idx = cast<JumpTableSDNode>(Table)->getIndex();
2810  EVT VT = Table.getValueType();
2811  SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
2812  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
2813}
2814
2815MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter(
2816    MachineInstr &MI, MachineBasicBlock *BB) const {
2817  switch (MI.getOpcode()) {
2818  case Hexagon::ALLOCA: {
2819    MachineFunction *MF = BB->getParent();
2820    auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
2821    FuncInfo->addAllocaAdjustInst(&MI);
2822    return BB;
2823  }
2824  default:
2825    llvm_unreachable("Unexpected instr type to insert");
2826  } // switch
2827}
2828
2829//===----------------------------------------------------------------------===//
2830// Inline Assembly Support
2831//===----------------------------------------------------------------------===//
2832
2833TargetLowering::ConstraintType
2834HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
2835  if (Constraint.size() == 1) {
2836    switch (Constraint[0]) {
2837      case 'q':
2838      case 'v':
2839        if (Subtarget.useHVXOps())
2840          return C_Register;
2841        break;
2842    }
2843  }
2844  return TargetLowering::getConstraintType(Constraint);
2845}
2846
2847std::pair<unsigned, const TargetRegisterClass *>
2848HexagonTargetLowering::getRegForInlineAsmConstraint(
2849    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
2850  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
2851
2852  if (Constraint.size() == 1) {
2853    switch (Constraint[0]) {
2854    case 'r':   // R0-R31
2855       switch (VT.SimpleTy) {
2856       default:
2857         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
2858       case MVT::i32:
2859       case MVT::i16:
2860       case MVT::i8:
2861       case MVT::f32:
2862         return std::make_pair(0U, &Hexagon::IntRegsRegClass);
2863       case MVT::i64:
2864       case MVT::f64:
2865         return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
2866      }
2867    case 'q': // q0-q3
2868       switch (VT.SimpleTy) {
2869       default:
2870         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
2871       case MVT::v1024i1:
2872       case MVT::v512i1:
2873       case MVT::v32i16:
2874       case MVT::v16i32:
2875       case MVT::v64i8:
2876       case MVT::v8i64:
2877         return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
2878    }
2879    case 'v': // V0-V31
2880       switch (VT.SimpleTy) {
2881       default:
2882         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
2883       case MVT::v16i32:
2884       case MVT::v32i16:
2885       case MVT::v64i8:
2886       case MVT::v8i64:
2887         return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
2888       case MVT::v32i32:
2889       case MVT::v64i16:
2890       case MVT::v16i64:
2891       case MVT::v128i8:
2892         if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
2893           return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
2894         else
2895           return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
2896       case MVT::v256i8:
2897       case MVT::v128i16:
2898       case MVT::v64i32:
2899       case MVT::v32i64:
2900         return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
2901       }
2902
2903    default:
2904      llvm_unreachable("Unknown asm register class");
2905    }
2906  }
2907
2908  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2909}
2910
2911/// isFPImmLegal - Returns true if the target can instruction select the
2912/// specified FP immediate natively. If false, the legalizer will
2913/// materialize the FP immediate as a load from a constant pool.
2914bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
2915  return Subtarget.hasV5TOps();
2916}
2917
2918/// isLegalAddressingMode - Return true if the addressing mode represented by
2919/// AM is legal for this target, for a load/store of the specified type.
2920bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
2921                                                  const AddrMode &AM, Type *Ty,
2922                                                  unsigned AS) const {
2923  // Allows a signed-extended 11-bit immediate field.
2924  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
2925    return false;
2926
2927  // No global is ever allowed as a base.
2928  if (AM.BaseGV)
2929    return false;
2930
2931  int Scale = AM.Scale;
2932  if (Scale < 0) Scale = -Scale;
2933  switch (Scale) {
2934  case 0:  // No scale reg, "r+i", "r", or just "i".
2935    break;
2936  default: // No scaled addressing mode.
2937    return false;
2938  }
2939  return true;
2940}
2941
2942/// Return true if folding a constant offset with the given GlobalAddress is
2943/// legal.  It is frequently not legal in PIC relocation models.
2944bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
2945      const {
2946  return HTM.getRelocationModel() == Reloc::Static;
2947}
2948
2949
2950/// isLegalICmpImmediate - Return true if the specified immediate is legal
2951/// icmp immediate, that is the target has icmp instructions which can compare
2952/// a register against the immediate without having to materialize the
2953/// immediate into a register.
2954bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
2955  return Imm >= -512 && Imm <= 511;
2956}
2957
2958/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2959/// for tail call optimization. Targets which want to do tail call
2960/// optimization should implement this function.
2961bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
2962                                 SDValue Callee,
2963                                 CallingConv::ID CalleeCC,
2964                                 bool isVarArg,
2965                                 bool isCalleeStructRet,
2966                                 bool isCallerStructRet,
2967                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
2968                                 const SmallVectorImpl<SDValue> &OutVals,
2969                                 const SmallVectorImpl<ISD::InputArg> &Ins,
2970                                 SelectionDAG& DAG) const {
2971  const Function *CallerF = DAG.getMachineFunction().getFunction();
2972  CallingConv::ID CallerCC = CallerF->getCallingConv();
2973  bool CCMatch = CallerCC == CalleeCC;
2974
2975  // ***************************************************************************
2976  //  Look for obvious safe cases to perform tail call optimization that do not
2977  //  require ABI changes.
2978  // ***************************************************************************
2979
2980  // If this is a tail call via a function pointer, then don't do it!
2981  if (!(isa<GlobalAddressSDNode>(Callee)) &&
2982      !(isa<ExternalSymbolSDNode>(Callee))) {
2983    return false;
2984  }
2985
2986  // Do not optimize if the calling conventions do not match.
2987  if (!CCMatch)
2988    return false;
2989
2990  // Do not tail call optimize vararg calls.
2991  if (isVarArg)
2992    return false;
2993
2994  // Also avoid tail call optimization if either caller or callee uses struct
2995  // return semantics.
2996  if (isCalleeStructRet || isCallerStructRet)
2997    return false;
2998
2999  // In addition to the cases above, we also disable Tail Call Optimization if
3000  // the calling convention code that at least one outgoing argument needs to
3001  // go on the stack. We cannot check that here because at this point that
3002  // information is not available.
3003  return true;
3004}
3005
3006// Return true when the given node fits in a positive half word.
3007bool llvm::isPositiveHalfWord(SDNode *N) {
3008  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3009  if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue()))
3010    return true;
3011
3012  switch (N->getOpcode()) {
3013  default:
3014    return false;
3015  case ISD::SIGN_EXTEND_INREG:
3016    return true;
3017  }
3018}
3019
3020bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
3021      unsigned AS, unsigned Align, bool *Fast) const {
3022  if (Fast)
3023    *Fast = false;
3024
3025  switch (VT.getSimpleVT().SimpleTy) {
3026  default:
3027    return false;
3028  case MVT::v64i8:
3029  case MVT::v128i8:
3030  case MVT::v256i8:
3031  case MVT::v32i16:
3032  case MVT::v64i16:
3033  case MVT::v128i16:
3034  case MVT::v16i32:
3035  case MVT::v32i32:
3036  case MVT::v64i32:
3037  case MVT::v8i64:
3038  case MVT::v16i64:
3039  case MVT::v32i64:
3040    return true;
3041  }
3042  return false;
3043}
3044
3045
3046std::pair<const TargetRegisterClass*, uint8_t>
3047HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3048      MVT VT) const {
3049  const TargetRegisterClass *RRC = nullptr;
3050
3051  uint8_t Cost = 1;
3052  switch (VT.SimpleTy) {
3053  default:
3054    return TargetLowering::findRepresentativeClass(TRI, VT);
3055  case MVT::v64i8:
3056  case MVT::v32i16:
3057  case MVT::v16i32:
3058  case MVT::v8i64:
3059    RRC = &Hexagon::VectorRegsRegClass;
3060    break;
3061  case MVT::v128i8:
3062  case MVT::v64i16:
3063  case MVT::v32i32:
3064  case MVT::v16i64:
3065    if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
3066        Subtarget.useHVXDblOps())
3067      RRC = &Hexagon::VectorRegs128BRegClass;
3068    else
3069      RRC = &Hexagon::VecDblRegsRegClass;
3070    break;
3071  case MVT::v256i8:
3072  case MVT::v128i16:
3073  case MVT::v64i32:
3074  case MVT::v32i64:
3075    RRC = &Hexagon::VecDblRegs128BRegClass;
3076    break;
3077  }
3078  return std::make_pair(RRC, Cost);
3079}
3080
3081Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
3082      AtomicOrdering Ord) const {
3083  BasicBlock *BB = Builder.GetInsertBlock();
3084  Module *M = BB->getParent()->getParent();
3085  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
3086  unsigned SZ = Ty->getPrimitiveSizeInBits();
3087  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3088  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3089                                   : Intrinsic::hexagon_L4_loadd_locked;
3090  Value *Fn = Intrinsic::getDeclaration(M, IntID);
3091  return Builder.CreateCall(Fn, Addr, "larx");
3092}
3093
3094/// Perform a store-conditional operation to Addr. Return the status of the
3095/// store. This should be 0 if the store succeeded, non-zero otherwise.
3096Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
3097      Value *Val, Value *Addr, AtomicOrdering Ord) const {
3098  BasicBlock *BB = Builder.GetInsertBlock();
3099  Module *M = BB->getParent()->getParent();
3100  Type *Ty = Val->getType();
3101  unsigned SZ = Ty->getPrimitiveSizeInBits();
3102  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3103  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3104                                   : Intrinsic::hexagon_S4_stored_locked;
3105  Value *Fn = Intrinsic::getDeclaration(M, IntID);
3106  Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3107  Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3108  Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3109  return Ext;
3110}
3111
3112TargetLowering::AtomicExpansionKind
3113HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3114  // Do not expand loads and stores that don't exceed 64 bits.
3115  return LI->getType()->getPrimitiveSizeInBits() > 64
3116             ? AtomicExpansionKind::LLOnly
3117             : AtomicExpansionKind::None;
3118}
3119
3120bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3121  // Do not expand loads and stores that don't exceed 64 bits.
3122  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3123}
3124
3125bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3126      AtomicCmpXchgInst *AI) const {
3127  const DataLayout &DL = AI->getModule()->getDataLayout();
3128  unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
3129  return Size >= 4 && Size <= 8;
3130}
3131