NeonEmitter.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMullHiP64,
56  OpMullHiN,
57  OpMlalHi,
58  OpMlalHiN,
59  OpMls,
60  OpMlsl,
61  OpMlslHi,
62  OpMlslHiN,
63  OpMulN,
64  OpMlaN,
65  OpMlsN,
66  OpFMlaN,
67  OpFMlsN,
68  OpMlalN,
69  OpMlslN,
70  OpMulLane,
71  OpMulXLane,
72  OpMullLane,
73  OpMullHiLane,
74  OpMlaLane,
75  OpMlsLane,
76  OpMlalLane,
77  OpMlalHiLane,
78  OpMlslLane,
79  OpMlslHiLane,
80  OpQDMullLane,
81  OpQDMullHiLane,
82  OpQDMlalLane,
83  OpQDMlalHiLane,
84  OpQDMlslLane,
85  OpQDMlslHiLane,
86  OpQDMulhLane,
87  OpQRDMulhLane,
88  OpFMSLane,
89  OpFMSLaneQ,
90  OpTrn1,
91  OpZip1,
92  OpUzp1,
93  OpTrn2,
94  OpZip2,
95  OpUzp2,
96  OpEq,
97  OpGe,
98  OpLe,
99  OpGt,
100  OpLt,
101  OpNeg,
102  OpNot,
103  OpAnd,
104  OpOr,
105  OpXor,
106  OpAndNot,
107  OpOrNot,
108  OpCast,
109  OpConcat,
110  OpDup,
111  OpDupLane,
112  OpHi,
113  OpLo,
114  OpSelect,
115  OpRev16,
116  OpRev32,
117  OpRev64,
118  OpXtnHi,
119  OpSqxtunHi,
120  OpQxtnHi,
121  OpFcvtnHi,
122  OpFcvtlHi,
123  OpFcvtxnHi,
124  OpReinterpret,
125  OpAddhnHi,
126  OpRAddhnHi,
127  OpSubhnHi,
128  OpRSubhnHi,
129  OpAbdl,
130  OpAbdlHi,
131  OpAba,
132  OpAbal,
133  OpAbalHi,
134  OpQDMullHi,
135  OpQDMullHiN,
136  OpQDMlalHi,
137  OpQDMlalHiN,
138  OpQDMlslHi,
139  OpQDMlslHiN,
140  OpDiv,
141  OpLongHi,
142  OpNarrowHi,
143  OpMovlHi,
144  OpCopyLane,
145  OpCopyQLane,
146  OpCopyLaneQ,
147  OpScalarMulLane,
148  OpScalarMulLaneQ,
149  OpScalarMulXLane,
150  OpScalarMulXLaneQ,
151  OpScalarVMulXLane,
152  OpScalarVMulXLaneQ,
153  OpScalarQDMullLane,
154  OpScalarQDMullLaneQ,
155  OpScalarQDMulHiLane,
156  OpScalarQDMulHiLaneQ,
157  OpScalarQRDMulHiLane,
158  OpScalarQRDMulHiLaneQ,
159  OpScalarGetLane,
160  OpScalarSetLane
161};
162
163enum ClassKind {
164  ClassNone,
165  ClassI,           // generic integer instruction, e.g., "i8" suffix
166  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
167  ClassW,           // width-specific instruction, e.g., "8" suffix
168  ClassB,           // bitcast arguments with enum argument to specify type
169  ClassL,           // Logical instructions which are op instructions
170                    // but we need to not emit any suffix for in our
171                    // tests.
172  ClassNoTest       // Instructions which we do not test since they are
173                    // not TRUE instructions.
174};
175
176/// NeonTypeFlags - Flags to identify the types for overloaded Neon
177/// builtins.  These must be kept in sync with the flags in
178/// include/clang/Basic/TargetBuiltins.h.
179namespace {
180class NeonTypeFlags {
181  enum {
182    EltTypeMask = 0xf,
183    UnsignedFlag = 0x10,
184    QuadFlag = 0x20
185  };
186  uint32_t Flags;
187
188public:
189  enum EltType {
190    Int8,
191    Int16,
192    Int32,
193    Int64,
194    Poly8,
195    Poly16,
196    Poly64,
197    Poly128,
198    Float16,
199    Float32,
200    Float64
201  };
202
203  NeonTypeFlags(unsigned F) : Flags(F) {}
204  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
205    if (IsUnsigned)
206      Flags |= UnsignedFlag;
207    if (IsQuad)
208      Flags |= QuadFlag;
209  }
210
211  uint32_t getFlags() const { return Flags; }
212};
213} // end anonymous namespace
214
215namespace {
216class NeonEmitter {
217  RecordKeeper &Records;
218  StringMap<OpKind> OpMap;
219  DenseMap<Record*, ClassKind> ClassMap;
220
221public:
222  NeonEmitter(RecordKeeper &R) : Records(R) {
223    OpMap["OP_NONE"]  = OpNone;
224    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
225    OpMap["OP_ADD"]   = OpAdd;
226    OpMap["OP_ADDL"]  = OpAddl;
227    OpMap["OP_ADDLHi"] = OpAddlHi;
228    OpMap["OP_ADDW"]  = OpAddw;
229    OpMap["OP_ADDWHi"] = OpAddwHi;
230    OpMap["OP_SUB"]   = OpSub;
231    OpMap["OP_SUBL"]  = OpSubl;
232    OpMap["OP_SUBLHi"] = OpSublHi;
233    OpMap["OP_SUBW"]  = OpSubw;
234    OpMap["OP_SUBWHi"] = OpSubwHi;
235    OpMap["OP_MUL"]   = OpMul;
236    OpMap["OP_MLA"]   = OpMla;
237    OpMap["OP_MLAL"]  = OpMlal;
238    OpMap["OP_MULLHi"]  = OpMullHi;
239    OpMap["OP_MULLHi_P64"]  = OpMullHiP64;
240    OpMap["OP_MULLHi_N"]  = OpMullHiN;
241    OpMap["OP_MLALHi"]  = OpMlalHi;
242    OpMap["OP_MLALHi_N"]  = OpMlalHiN;
243    OpMap["OP_MLS"]   = OpMls;
244    OpMap["OP_MLSL"]  = OpMlsl;
245    OpMap["OP_MLSLHi"] = OpMlslHi;
246    OpMap["OP_MLSLHi_N"] = OpMlslHiN;
247    OpMap["OP_MUL_N"] = OpMulN;
248    OpMap["OP_MLA_N"] = OpMlaN;
249    OpMap["OP_MLS_N"] = OpMlsN;
250    OpMap["OP_FMLA_N"] = OpFMlaN;
251    OpMap["OP_FMLS_N"] = OpFMlsN;
252    OpMap["OP_MLAL_N"] = OpMlalN;
253    OpMap["OP_MLSL_N"] = OpMlslN;
254    OpMap["OP_MUL_LN"]= OpMulLane;
255    OpMap["OP_MULX_LN"]= OpMulXLane;
256    OpMap["OP_MULL_LN"] = OpMullLane;
257    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
258    OpMap["OP_MLA_LN"]= OpMlaLane;
259    OpMap["OP_MLS_LN"]= OpMlsLane;
260    OpMap["OP_MLAL_LN"] = OpMlalLane;
261    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
262    OpMap["OP_MLSL_LN"] = OpMlslLane;
263    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
264    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
265    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
266    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
267    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
268    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
269    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
270    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
271    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
272    OpMap["OP_FMS_LN"] = OpFMSLane;
273    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
274    OpMap["OP_TRN1"]  = OpTrn1;
275    OpMap["OP_ZIP1"]  = OpZip1;
276    OpMap["OP_UZP1"]  = OpUzp1;
277    OpMap["OP_TRN2"]  = OpTrn2;
278    OpMap["OP_ZIP2"]  = OpZip2;
279    OpMap["OP_UZP2"]  = OpUzp2;
280    OpMap["OP_EQ"]    = OpEq;
281    OpMap["OP_GE"]    = OpGe;
282    OpMap["OP_LE"]    = OpLe;
283    OpMap["OP_GT"]    = OpGt;
284    OpMap["OP_LT"]    = OpLt;
285    OpMap["OP_NEG"]   = OpNeg;
286    OpMap["OP_NOT"]   = OpNot;
287    OpMap["OP_AND"]   = OpAnd;
288    OpMap["OP_OR"]    = OpOr;
289    OpMap["OP_XOR"]   = OpXor;
290    OpMap["OP_ANDN"]  = OpAndNot;
291    OpMap["OP_ORN"]   = OpOrNot;
292    OpMap["OP_CAST"]  = OpCast;
293    OpMap["OP_CONC"]  = OpConcat;
294    OpMap["OP_HI"]    = OpHi;
295    OpMap["OP_LO"]    = OpLo;
296    OpMap["OP_DUP"]   = OpDup;
297    OpMap["OP_DUP_LN"] = OpDupLane;
298    OpMap["OP_SEL"]   = OpSelect;
299    OpMap["OP_REV16"] = OpRev16;
300    OpMap["OP_REV32"] = OpRev32;
301    OpMap["OP_REV64"] = OpRev64;
302    OpMap["OP_XTN"] = OpXtnHi;
303    OpMap["OP_SQXTUN"] = OpSqxtunHi;
304    OpMap["OP_QXTN"] = OpQxtnHi;
305    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
306    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
307    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
308    OpMap["OP_REINT"] = OpReinterpret;
309    OpMap["OP_ADDHNHi"] = OpAddhnHi;
310    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
311    OpMap["OP_SUBHNHi"] = OpSubhnHi;
312    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
313    OpMap["OP_ABDL"]  = OpAbdl;
314    OpMap["OP_ABDLHi"] = OpAbdlHi;
315    OpMap["OP_ABA"]   = OpAba;
316    OpMap["OP_ABAL"]  = OpAbal;
317    OpMap["OP_ABALHi"] = OpAbalHi;
318    OpMap["OP_QDMULLHi"] = OpQDMullHi;
319    OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
320    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
321    OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
322    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
323    OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
324    OpMap["OP_DIV"] = OpDiv;
325    OpMap["OP_LONG_HI"] = OpLongHi;
326    OpMap["OP_NARROW_HI"] = OpNarrowHi;
327    OpMap["OP_MOVL_HI"] = OpMovlHi;
328    OpMap["OP_COPY_LN"] = OpCopyLane;
329    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
330    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
331    OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
332    OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
333    OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
334    OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
335    OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
336    OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
337    OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
338    OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
339    OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
340    OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
341    OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
342    OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
343    OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
344    OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
345
346    Record *SI = R.getClass("SInst");
347    Record *II = R.getClass("IInst");
348    Record *WI = R.getClass("WInst");
349    Record *SOpI = R.getClass("SOpInst");
350    Record *IOpI = R.getClass("IOpInst");
351    Record *WOpI = R.getClass("WOpInst");
352    Record *LOpI = R.getClass("LOpInst");
353    Record *NoTestOpI = R.getClass("NoTestOpInst");
354
355    ClassMap[SI] = ClassS;
356    ClassMap[II] = ClassI;
357    ClassMap[WI] = ClassW;
358    ClassMap[SOpI] = ClassS;
359    ClassMap[IOpI] = ClassI;
360    ClassMap[WOpI] = ClassW;
361    ClassMap[LOpI] = ClassL;
362    ClassMap[NoTestOpI] = ClassNoTest;
363  }
364
365  // run - Emit arm_neon.h.inc
366  void run(raw_ostream &o);
367
368  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
369  void runHeader(raw_ostream &o);
370
371  // runTests - Emit tests for all the Neon intrinsics.
372  void runTests(raw_ostream &o);
373
374private:
375  void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
376                            std::string &CurrentGuard, bool &InGuard,
377                            StringMap<ClassKind> &EmittedMap);
378  void emitIntrinsic(raw_ostream &OS, Record *R,
379                     StringMap<ClassKind> &EmittedMap);
380  void genBuiltinsDef(raw_ostream &OS);
381  void genOverloadTypeCheckCode(raw_ostream &OS);
382  void genIntrinsicRangeCheckCode(raw_ostream &OS);
383  void genTargetTest(raw_ostream &OS);
384};
385} // end anonymous namespace
386
387/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
388/// which each StringRef representing a single type declared in the string.
389/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
390/// 2xfloat and 4xfloat respectively.
391static void ParseTypes(Record *r, std::string &s,
392                       SmallVectorImpl<StringRef> &TV) {
393  const char *data = s.data();
394  int len = 0;
395
396  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
397    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
398                         || data[len] == 'H' || data[len] == 'S')
399      continue;
400
401    switch (data[len]) {
402      case 'c':
403      case 's':
404      case 'i':
405      case 'l':
406      case 'k':
407      case 'h':
408      case 'f':
409      case 'd':
410        break;
411      default:
412        PrintFatalError(r->getLoc(),
413                      "Unexpected letter: " + std::string(data + len, 1));
414    }
415    TV.push_back(StringRef(data, len + 1));
416    data += len + 1;
417    len = -1;
418  }
419}
420
421/// Widen - Convert a type code into the next wider type.  char -> short,
422/// short -> int, etc.
423static char Widen(const char t) {
424  switch (t) {
425    case 'c':
426      return 's';
427    case 's':
428      return 'i';
429    case 'i':
430      return 'l';
431    case 'l':
432      return 'k';
433    case 'h':
434      return 'f';
435    case 'f':
436      return 'd';
437    default:
438      PrintFatalError("unhandled type in widen!");
439  }
440}
441
442/// Narrow - Convert a type code into the next smaller type.  short -> char,
443/// float -> half float, etc.
444static char Narrow(const char t) {
445  switch (t) {
446    case 's':
447      return 'c';
448    case 'i':
449      return 's';
450    case 'l':
451      return 'i';
452    case 'k':
453      return 'l';
454    case 'f':
455      return 'h';
456    case 'd':
457      return 'f';
458    default:
459      PrintFatalError("unhandled type in narrow!");
460  }
461}
462
463static std::string GetNarrowTypestr(StringRef ty)
464{
465  std::string s;
466  for (size_t i = 0, end = ty.size(); i < end; i++) {
467    switch (ty[i]) {
468      case 's':
469        s += 'c';
470        break;
471      case 'i':
472        s += 's';
473        break;
474      case 'l':
475        s += 'i';
476        break;
477      case 'k':
478        s += 'l';
479        break;
480      default:
481        s += ty[i];
482        break;
483    }
484  }
485
486  return s;
487}
488
489/// For a particular StringRef, return the base type code, and whether it has
490/// the quad-vector, polynomial, or unsigned modifiers set.
491static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
492  unsigned off = 0;
493  // ignore scalar.
494  if (ty[off] == 'S') {
495    ++off;
496  }
497  // remember quad.
498  if (ty[off] == 'Q' || ty[off] == 'H') {
499    quad = true;
500    ++off;
501  }
502
503  // remember poly.
504  if (ty[off] == 'P') {
505    poly = true;
506    ++off;
507  }
508
509  // remember unsigned.
510  if (ty[off] == 'U') {
511    usgn = true;
512    ++off;
513  }
514
515  // base type to get the type string for.
516  return ty[off];
517}
518
519/// ModType - Transform a type code and its modifiers based on a mod code. The
520/// mod code definitions may be found at the top of arm_neon.td.
521static char ModType(const char mod, char type, bool &quad, bool &poly,
522                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
523  switch (mod) {
524    case 't':
525      if (poly) {
526        poly = false;
527        usgn = true;
528      }
529      break;
530    case 'b':
531      scal = true;
532    case 'u':
533      usgn = true;
534      poly = false;
535      if (type == 'f')
536        type = 'i';
537      if (type == 'd')
538        type = 'l';
539      break;
540    case '$':
541      scal = true;
542    case 'x':
543      usgn = false;
544      poly = false;
545      if (type == 'f')
546        type = 'i';
547      if (type == 'd')
548        type = 'l';
549      break;
550    case 'o':
551      scal = true;
552      type = 'd';
553      usgn = false;
554      break;
555    case 'y':
556      scal = true;
557    case 'f':
558      if (type == 'h')
559        quad = true;
560      type = 'f';
561      usgn = false;
562      break;
563    case 'F':
564      type = 'd';
565      usgn = false;
566      break;
567    case 'g':
568      quad = false;
569      break;
570    case 'B':
571    case 'C':
572    case 'D':
573    case 'j':
574      quad = true;
575      break;
576    case 'w':
577      type = Widen(type);
578      quad = true;
579      break;
580    case 'n':
581      type = Widen(type);
582      break;
583    case 'i':
584      type = 'i';
585      scal = true;
586      break;
587    case 'l':
588      type = 'l';
589      scal = true;
590      usgn = true;
591      break;
592    case 'z':
593      type = Narrow(type);
594      scal = true;
595      break;
596    case 'r':
597      type = Widen(type);
598      scal = true;
599      break;
600    case 's':
601    case 'a':
602      scal = true;
603      break;
604    case 'k':
605      quad = true;
606      break;
607    case 'c':
608      cnst = true;
609    case 'p':
610      pntr = true;
611      scal = true;
612      break;
613    case 'h':
614      type = Narrow(type);
615      if (type == 'h')
616        quad = false;
617      break;
618    case 'q':
619      type = Narrow(type);
620      quad = true;
621      break;
622    case 'e':
623      type = Narrow(type);
624      usgn = true;
625      break;
626    case 'm':
627      type = Narrow(type);
628      quad = false;
629      break;
630    default:
631      break;
632  }
633  return type;
634}
635
636static bool IsMultiVecProto(const char p) {
637  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
638}
639
640/// TypeString - for a modifier and type, generate the name of the typedef for
641/// that type.  QUc -> uint8x8_t.
642static std::string TypeString(const char mod, StringRef typestr) {
643  bool quad = false;
644  bool poly = false;
645  bool usgn = false;
646  bool scal = false;
647  bool cnst = false;
648  bool pntr = false;
649
650  if (mod == 'v')
651    return "void";
652  if (mod == 'i')
653    return "int";
654
655  // base type to get the type string for.
656  char type = ClassifyType(typestr, quad, poly, usgn);
657
658  // Based on the modifying character, change the type and width if necessary.
659  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
660
661  SmallString<128> s;
662
663  if (usgn)
664    s.push_back('u');
665
666  switch (type) {
667    case 'c':
668      s += poly ? "poly8" : "int8";
669      if (scal)
670        break;
671      s += quad ? "x16" : "x8";
672      break;
673    case 's':
674      s += poly ? "poly16" : "int16";
675      if (scal)
676        break;
677      s += quad ? "x8" : "x4";
678      break;
679    case 'i':
680      s += "int32";
681      if (scal)
682        break;
683      s += quad ? "x4" : "x2";
684      break;
685    case 'l':
686      s += (poly && !usgn)? "poly64" : "int64";
687      if (scal)
688        break;
689      s += quad ? "x2" : "x1";
690      break;
691    case 'k':
692      s += "poly128";
693      break;
694    case 'h':
695      s += "float16";
696      if (scal)
697        break;
698      s += quad ? "x8" : "x4";
699      break;
700    case 'f':
701      s += "float32";
702      if (scal)
703        break;
704      s += quad ? "x4" : "x2";
705      break;
706    case 'd':
707      s += "float64";
708      if (scal)
709        break;
710      s += quad ? "x2" : "x1";
711      break;
712
713    default:
714      PrintFatalError("unhandled type!");
715  }
716
717  if (mod == '2' || mod == 'B')
718    s += "x2";
719  if (mod == '3' || mod == 'C')
720    s += "x3";
721  if (mod == '4' || mod == 'D')
722    s += "x4";
723
724  // Append _t, finishing the type string typedef type.
725  s += "_t";
726
727  if (cnst)
728    s += " const";
729
730  if (pntr)
731    s += " *";
732
733  return s.str();
734}
735
736/// BuiltinTypeString - for a modifier and type, generate the clang
737/// BuiltinsARM.def prototype code for the function.  See the top of clang's
738/// Builtins.def for a description of the type strings.
739static std::string BuiltinTypeString(const char mod, StringRef typestr,
740                                     ClassKind ck, bool ret) {
741  bool quad = false;
742  bool poly = false;
743  bool usgn = false;
744  bool scal = false;
745  bool cnst = false;
746  bool pntr = false;
747
748  if (mod == 'v')
749    return "v"; // void
750  if (mod == 'i')
751    return "i"; // int
752
753  // base type to get the type string for.
754  char type = ClassifyType(typestr, quad, poly, usgn);
755
756  // Based on the modifying character, change the type and width if necessary.
757  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
758
759  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
760                         scal && type != 'f' && type != 'd');
761
762  // All pointers are void* pointers.  Change type to 'v' now.
763  if (pntr) {
764    usgn = false;
765    poly = false;
766    type = 'v';
767  }
768  // Treat half-float ('h') types as unsigned short ('s') types.
769  if (type == 'h') {
770    type = 's';
771    usgn = true;
772  }
773
774  if (scal) {
775    SmallString<128> s;
776
777    if (usgn)
778      s.push_back('U');
779    else if (type == 'c')
780      s.push_back('S'); // make chars explicitly signed
781
782    if (type == 'l') // 64-bit long
783      s += "Wi";
784    else if (type == 'k') // 128-bit long
785      s = "LLLi";
786    else
787      s.push_back(type);
788
789    if (cnst)
790      s.push_back('C');
791    if (pntr)
792      s.push_back('*');
793    return s.str();
794  }
795
796  // Since the return value must be one type, return a vector type of the
797  // appropriate width which we will bitcast.  An exception is made for
798  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
799  // fashion, storing them to a pointer arg.
800  if (ret) {
801    if (IsMultiVecProto(mod))
802      return "vv*"; // void result with void* first argument
803    if (mod == 'f' || (ck != ClassB && type == 'f'))
804      return quad ? "V4f" : "V2f";
805    if (mod == 'F' || (ck != ClassB && type == 'd'))
806      return quad ? "V2d" : "V1d";
807    if (ck != ClassB && type == 's')
808      return quad ? "V8s" : "V4s";
809    if (ck != ClassB && type == 'i')
810      return quad ? "V4i" : "V2i";
811    if (ck != ClassB && type == 'l')
812      return quad ? "V2Wi" : "V1Wi";
813
814    return quad ? "V16Sc" : "V8Sc";
815  }
816
817  // Non-return array types are passed as individual vectors.
818  if (mod == '2' || mod == 'B')
819    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
820  if (mod == '3' || mod == 'C')
821    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
822  if (mod == '4' || mod == 'D')
823    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
824
825  if (mod == 'f' || (ck != ClassB && type == 'f'))
826    return quad ? "V4f" : "V2f";
827  if (mod == 'F' || (ck != ClassB && type == 'd'))
828    return quad ? "V2d" : "V1d";
829  if (ck != ClassB && type == 's')
830    return quad ? "V8s" : "V4s";
831  if (ck != ClassB && type == 'i')
832    return quad ? "V4i" : "V2i";
833  if (ck != ClassB && type == 'l')
834    return quad ? "V2Wi" : "V1Wi";
835
836  return quad ? "V16Sc" : "V8Sc";
837}
838
839/// InstructionTypeCode - Computes the ARM argument character code and
840/// quad status for a specific type string and ClassKind.
841static void InstructionTypeCode(const StringRef &typeStr,
842                                const ClassKind ck,
843                                bool &quad,
844                                std::string &typeCode) {
845  bool poly = false;
846  bool usgn = false;
847  char type = ClassifyType(typeStr, quad, poly, usgn);
848
849  switch (type) {
850  case 'c':
851    switch (ck) {
852    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
853    case ClassI: typeCode = "i8"; break;
854    case ClassW: typeCode = "8"; break;
855    default: break;
856    }
857    break;
858  case 's':
859    switch (ck) {
860    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
861    case ClassI: typeCode = "i16"; break;
862    case ClassW: typeCode = "16"; break;
863    default: break;
864    }
865    break;
866  case 'i':
867    switch (ck) {
868    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
869    case ClassI: typeCode = "i32"; break;
870    case ClassW: typeCode = "32"; break;
871    default: break;
872    }
873    break;
874  case 'l':
875    switch (ck) {
876    case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
877    case ClassI: typeCode = "i64"; break;
878    case ClassW: typeCode = "64"; break;
879    default: break;
880    }
881    break;
882  case 'k':
883    assert(poly && "Unrecognized 128 bit integer.");
884    typeCode = "p128";
885    break;
886  case 'h':
887    switch (ck) {
888    case ClassS:
889    case ClassI: typeCode = "f16"; break;
890    case ClassW: typeCode = "16"; break;
891    default: break;
892    }
893    break;
894  case 'f':
895    switch (ck) {
896    case ClassS:
897    case ClassI: typeCode = "f32"; break;
898    case ClassW: typeCode = "32"; break;
899    default: break;
900    }
901    break;
902  case 'd':
903    switch (ck) {
904    case ClassS:
905    case ClassI:
906      typeCode += "f64";
907      break;
908    case ClassW:
909      PrintFatalError("unhandled type!");
910    default:
911      break;
912    }
913    break;
914  default:
915    PrintFatalError("unhandled type!");
916  }
917}
918
919static char Insert_BHSD_Suffix(StringRef typestr){
920  unsigned off = 0;
921  if(typestr[off++] == 'S'){
922    while(typestr[off] == 'Q' || typestr[off] == 'H'||
923          typestr[off] == 'P' || typestr[off] == 'U')
924      ++off;
925    switch (typestr[off]){
926    default  : break;
927    case 'c' : return 'b';
928    case 's' : return 'h';
929    case 'i' :
930    case 'f' : return 's';
931    case 'l' :
932    case 'd' : return 'd';
933    }
934  }
935  return 0;
936}
937
938static bool endsWith_xN(std::string const &name) {
939  if (name.length() > 3) {
940    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
941        name.compare(name.length() - 3, 3, "_x3") == 0 ||
942        name.compare(name.length() - 3, 3, "_x4") == 0)
943      return true;
944  }
945  return false;
946}
947
948/// MangleName - Append a type or width suffix to a base neon function name,
949/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
950/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
951/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
952static std::string MangleName(const std::string &name, StringRef typestr,
953                              ClassKind ck) {
954  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
955      name == "vcvt_f64_f32")
956    return name;
957
958  bool quad = false;
959  std::string typeCode = "";
960
961  InstructionTypeCode(typestr, ck, quad, typeCode);
962
963  std::string s = name;
964
965  if (typeCode.size() > 0) {
966    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
967    if (endsWith_xN(s))
968      s.insert(s.length() - 3, "_" + typeCode);
969    else
970      s += "_" + typeCode;
971  }
972
973  if (ck == ClassB)
974    s += "_v";
975
976  // Insert a 'q' before the first '_' character so that it ends up before
977  // _lane or _n on vector-scalar operations.
978  if (typestr.find("Q") != StringRef::npos) {
979      size_t pos = s.find('_');
980      s = s.insert(pos, "q");
981  }
982  char ins = Insert_BHSD_Suffix(typestr);
983  if(ins){
984    size_t pos = s.find('_');
985    s = s.insert(pos, &ins, 1);
986  }
987
988  return s;
989}
990
991static void PreprocessInstruction(const StringRef &Name,
992                                  const std::string &InstName,
993                                  std::string &Prefix,
994                                  bool &HasNPostfix,
995                                  bool &HasLanePostfix,
996                                  bool &HasDupPostfix,
997                                  bool &IsSpecialVCvt,
998                                  size_t &TBNumber) {
999  // All of our instruction name fields from arm_neon.td are of the form
1000  //   <instructionname>_...
1001  // Thus we grab our instruction name via computation of said Prefix.
1002  const size_t PrefixEnd = Name.find_first_of('_');
1003  // If InstName is passed in, we use that instead of our name Prefix.
1004  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
1005
1006  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
1007
1008  HasNPostfix = Postfix.count("_n");
1009  HasLanePostfix = Postfix.count("_lane");
1010  HasDupPostfix = Postfix.count("_dup");
1011  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
1012
1013  if (InstName.compare("vtbl") == 0 ||
1014      InstName.compare("vtbx") == 0) {
1015    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
1016    // encoding to get its true value.
1017    TBNumber = Name[Name.size()-1] - 48;
1018  }
1019}
1020
1021/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1022/// extracted, generate a FileCheck pattern for a Load Or Store
1023static void
1024GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1025                                          const std::string& OutTypeCode,
1026                                          const bool &IsQuad,
1027                                          const bool &HasDupPostfix,
1028                                          const bool &HasLanePostfix,
1029                                          const size_t Count,
1030                                          std::string &RegisterSuffix) {
1031  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1032  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1033  // will output a series of v{ld,st}1s, so we have to handle it specially.
1034  if ((Count == 3 || Count == 4) && IsQuad) {
1035    RegisterSuffix += "{";
1036    for (size_t i = 0; i < Count; i++) {
1037      RegisterSuffix += "d{{[0-9]+}}";
1038      if (HasDupPostfix) {
1039        RegisterSuffix += "[]";
1040      }
1041      if (HasLanePostfix) {
1042        RegisterSuffix += "[{{[0-9]+}}]";
1043      }
1044      if (i < Count-1) {
1045        RegisterSuffix += ", ";
1046      }
1047    }
1048    RegisterSuffix += "}";
1049  } else {
1050
1051    // Handle normal loads and stores.
1052    RegisterSuffix += "{";
1053    for (size_t i = 0; i < Count; i++) {
1054      RegisterSuffix += "d{{[0-9]+}}";
1055      if (HasDupPostfix) {
1056        RegisterSuffix += "[]";
1057      }
1058      if (HasLanePostfix) {
1059        RegisterSuffix += "[{{[0-9]+}}]";
1060      }
1061      if (IsQuad && !HasLanePostfix) {
1062        RegisterSuffix += ", d{{[0-9]+}}";
1063        if (HasDupPostfix) {
1064          RegisterSuffix += "[]";
1065        }
1066      }
1067      if (i < Count-1) {
1068        RegisterSuffix += ", ";
1069      }
1070    }
1071    RegisterSuffix += "}, [r{{[0-9]+}}";
1072
1073    // We only include the alignment hint if we have a vld1.*64 or
1074    // a dup/lane instruction.
1075    if (IsLDSTOne) {
1076      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1077        RegisterSuffix += ":" + OutTypeCode;
1078      }
1079    }
1080
1081    RegisterSuffix += "]";
1082  }
1083}
1084
1085static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1086                                     const bool &HasNPostfix) {
1087  return (NameRef.count("vmla") ||
1088          NameRef.count("vmlal") ||
1089          NameRef.count("vmlsl") ||
1090          NameRef.count("vmull") ||
1091          NameRef.count("vqdmlal") ||
1092          NameRef.count("vqdmlsl") ||
1093          NameRef.count("vqdmulh") ||
1094          NameRef.count("vqdmull") ||
1095          NameRef.count("vqrdmulh")) && HasNPostfix;
1096}
1097
1098static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1099                                         const bool &HasLanePostfix) {
1100  return (NameRef.count("vmla") ||
1101          NameRef.count("vmls") ||
1102          NameRef.count("vmlal") ||
1103          NameRef.count("vmlsl") ||
1104          (NameRef.count("vmul") && NameRef.size() == 3)||
1105          NameRef.count("vqdmlal") ||
1106          NameRef.count("vqdmlsl") ||
1107          NameRef.count("vqdmulh") ||
1108          NameRef.count("vqrdmulh")) && HasLanePostfix;
1109}
1110
1111static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1112                                  const bool &HasLanePostfix,
1113                                  const bool &IsQuad) {
1114  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1115                               && IsQuad;
1116  const bool IsVMull = NameRef.count("mull") && !IsQuad;
1117  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1118}
1119
1120static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1121                                                     const std::string &Proto,
1122                                                     const bool &HasNPostfix,
1123                                                     const bool &IsQuad,
1124                                                     const bool &HasLanePostfix,
1125                                                     const bool &HasDupPostfix,
1126                                                     std::string &NormedProto) {
1127  // Handle generic case.
1128  const StringRef NameRef(Name);
1129  for (size_t i = 0, end = Proto.size(); i < end; i++) {
1130    switch (Proto[i]) {
1131    case 'u':
1132    case 'f':
1133    case 'F':
1134    case 'd':
1135    case 's':
1136    case 'x':
1137    case 't':
1138    case 'n':
1139      NormedProto += IsQuad? 'q' : 'd';
1140      break;
1141    case 'w':
1142    case 'k':
1143      NormedProto += 'q';
1144      break;
1145    case 'g':
1146    case 'j':
1147    case 'h':
1148    case 'e':
1149      NormedProto += 'd';
1150      break;
1151    case 'i':
1152      NormedProto += HasLanePostfix? 'a' : 'i';
1153      break;
1154    case 'a':
1155      if (HasLanePostfix) {
1156        NormedProto += 'a';
1157      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1158        NormedProto += IsQuad? 'q' : 'd';
1159      } else {
1160        NormedProto += 'i';
1161      }
1162      break;
1163    }
1164  }
1165
1166  // Handle Special Cases.
1167  const bool IsNotVExt = !NameRef.count("vext");
1168  const bool IsVPADAL = NameRef.count("vpadal");
1169  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1170                                                           HasLanePostfix);
1171  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1172                                                      IsQuad);
1173
1174  if (IsSpecialLaneMul) {
1175    // If
1176    NormedProto[2] = NormedProto[3];
1177    NormedProto.erase(3);
1178  } else if (NormedProto.size() == 4 &&
1179             NormedProto[0] == NormedProto[1] &&
1180             IsNotVExt) {
1181    // If NormedProto.size() == 4 and the first two proto characters are the
1182    // same, ignore the first.
1183    NormedProto = NormedProto.substr(1, 3);
1184  } else if (Is5OpLaneAccum) {
1185    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1186    std::string tmp = NormedProto.substr(1,2);
1187    tmp += NormedProto[4];
1188    NormedProto = tmp;
1189  } else if (IsVPADAL) {
1190    // If we have VPADAL, ignore the first character.
1191    NormedProto = NormedProto.substr(0, 2);
1192  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1193    // If our instruction is a dup instruction, keep only the first and
1194    // last characters.
1195    std::string tmp = "";
1196    tmp += NormedProto[0];
1197    tmp += NormedProto[NormedProto.size()-1];
1198    NormedProto = tmp;
1199  }
1200}
1201
1202/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1203/// extracted, generate a FileCheck pattern to check that an
1204/// instruction's arguments are correct.
1205static void GenerateRegisterCheckPattern(const std::string &Name,
1206                                         const std::string &Proto,
1207                                         const std::string &OutTypeCode,
1208                                         const bool &HasNPostfix,
1209                                         const bool &IsQuad,
1210                                         const bool &HasLanePostfix,
1211                                         const bool &HasDupPostfix,
1212                                         const size_t &TBNumber,
1213                                         std::string &RegisterSuffix) {
1214
1215  RegisterSuffix = "";
1216
1217  const StringRef NameRef(Name);
1218
1219  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1220    return;
1221  }
1222
1223  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1224  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1225
1226  if (IsLoadStore) {
1227    // Grab N value from  v{ld,st}N using its ascii representation.
1228    const size_t Count = NameRef[3] - 48;
1229
1230    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1231                                              HasDupPostfix, HasLanePostfix,
1232                                              Count, RegisterSuffix);
1233  } else if (IsTBXOrTBL) {
1234    RegisterSuffix += "d{{[0-9]+}}, {";
1235    for (size_t i = 0; i < TBNumber-1; i++) {
1236      RegisterSuffix += "d{{[0-9]+}}, ";
1237    }
1238    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1239  } else {
1240    // Handle a normal instruction.
1241    if (NameRef.count("vget") || NameRef.count("vset"))
1242      return;
1243
1244    // We first normalize our proto, since we only need to emit 4
1245    // different types of checks, yet have more than 4 proto types
1246    // that map onto those 4 patterns.
1247    std::string NormalizedProto("");
1248    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1249                                             HasLanePostfix, HasDupPostfix,
1250                                             NormalizedProto);
1251
1252    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1253      const char &c = NormalizedProto[i];
1254      switch (c) {
1255      case 'q':
1256        RegisterSuffix += "q{{[0-9]+}}, ";
1257        break;
1258
1259      case 'd':
1260        RegisterSuffix += "d{{[0-9]+}}, ";
1261        break;
1262
1263      case 'i':
1264        RegisterSuffix += "#{{[0-9]+}}, ";
1265        break;
1266
1267      case 'a':
1268        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1269        break;
1270      }
1271    }
1272
1273    // Remove extra ", ".
1274    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1275  }
1276}
1277
1278/// GenerateChecksForIntrinsic - Given a specific instruction name +
1279/// typestr + class kind, generate the proper set of FileCheck
1280/// Patterns to check for. We could just return a string, but instead
1281/// use a vector since it provides us with the extra flexibility of
1282/// emitting multiple checks, which comes in handy for certain cases
1283/// like mla where we want to check for 2 different instructions.
1284static void GenerateChecksForIntrinsic(const std::string &Name,
1285                                       const std::string &Proto,
1286                                       StringRef &OutTypeStr,
1287                                       StringRef &InTypeStr,
1288                                       ClassKind Ck,
1289                                       const std::string &InstName,
1290                                       bool IsHiddenLOp,
1291                                       std::vector<std::string>& Result) {
1292
1293  // If Ck is a ClassNoTest instruction, just return so no test is
1294  // emitted.
1295  if(Ck == ClassNoTest)
1296    return;
1297
1298  if (Name == "vcvt_f32_f16") {
1299    Result.push_back("vcvt.f32.f16");
1300    return;
1301  }
1302
1303
1304  // Now we preprocess our instruction given the data we have to get the
1305  // data that we need.
1306  // Create a StringRef for String Manipulation of our Name.
1307  const StringRef NameRef(Name);
1308  // Instruction Prefix.
1309  std::string Prefix;
1310  // The type code for our out type string.
1311  std::string OutTypeCode;
1312  // To handle our different cases, we need to check for different postfixes.
1313  // Is our instruction a quad instruction.
1314  bool IsQuad = false;
1315  // Our instruction is of the form <instructionname>_n.
1316  bool HasNPostfix = false;
1317  // Our instruction is of the form <instructionname>_lane.
1318  bool HasLanePostfix = false;
1319  // Our instruction is of the form <instructionname>_dup.
1320  bool HasDupPostfix  = false;
1321  // Our instruction is a vcvt instruction which requires special handling.
1322  bool IsSpecialVCvt = false;
1323  // If we have a vtbxN or vtblN instruction, this is set to N.
1324  size_t TBNumber = -1;
1325  // Register Suffix
1326  std::string RegisterSuffix;
1327
1328  PreprocessInstruction(NameRef, InstName, Prefix,
1329                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1330                        IsSpecialVCvt, TBNumber);
1331
1332  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1333  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1334                               HasLanePostfix, HasDupPostfix, TBNumber,
1335                               RegisterSuffix);
1336
1337  // In the following section, we handle a bunch of special cases. You can tell
1338  // a special case by the fact we are returning early.
1339
1340  // If our instruction is a logical instruction without postfix or a
1341  // hidden LOp just return the current Prefix.
1342  if (Ck == ClassL || IsHiddenLOp) {
1343    Result.push_back(Prefix + " " + RegisterSuffix);
1344    return;
1345  }
1346
1347  // If we have a vmov, due to the many different cases, some of which
1348  // vary within the different intrinsics generated for a single
1349  // instruction type, just output a vmov. (e.g. given an instruction
1350  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1351  //
1352  // FIXME: Maybe something can be done about this. The two cases that we care
1353  // about are vmov as an LType and vmov as a WType.
1354  if (Prefix == "vmov") {
1355    Result.push_back(Prefix + " " + RegisterSuffix);
1356    return;
1357  }
1358
1359  // In the following section, we handle special cases.
1360
1361  if (OutTypeCode == "64") {
1362    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1363    // type, the intrinsic will be optimized away, so just return
1364    // nothing.  On the other hand if we are handling an uint64x2_t
1365    // (i.e. quad instruction), vdup/vmov instructions should be
1366    // emitted.
1367    if (Prefix == "vdup" || Prefix == "vext") {
1368      if (IsQuad) {
1369        Result.push_back("{{vmov|vdup}}");
1370      }
1371      return;
1372    }
1373
1374    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1375    // multiple register operands.
1376    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1377                            || Prefix == "vld4";
1378    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1379                            || Prefix == "vst4";
1380    if (MultiLoadPrefix || MultiStorePrefix) {
1381      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1382      return;
1383    }
1384
1385    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1386    // emitting said instructions. So return a check for
1387    // vldr/vstr/vmov/str instead.
1388    if (HasLanePostfix || HasDupPostfix) {
1389      if (Prefix == "vst1") {
1390        Result.push_back("{{str|vstr|vmov}}");
1391        return;
1392      } else if (Prefix == "vld1") {
1393        Result.push_back("{{ldr|vldr|vmov}}");
1394        return;
1395      }
1396    }
1397  }
1398
1399  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1400  // sometimes disassembled as vtrn.32. We use a regex to handle both
1401  // cases.
1402  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1403    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1404    return;
1405  }
1406
1407  // Currently on most ARM processors, we do not use vmla/vmls for
1408  // quad floating point operations. Instead we output vmul + vadd. So
1409  // check if we have one of those instructions and just output a
1410  // check for vmul.
1411  if (OutTypeCode == "f32") {
1412    if (Prefix == "vmls") {
1413      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1414      Result.push_back("vsub." + OutTypeCode);
1415      return;
1416    } else if (Prefix == "vmla") {
1417      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1418      Result.push_back("vadd." + OutTypeCode);
1419      return;
1420    }
1421  }
1422
1423  // If we have vcvt, get the input type from the instruction name
1424  // (which should be of the form instname_inputtype) and append it
1425  // before the output type.
1426  if (Prefix == "vcvt") {
1427    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1428    Prefix += "." + inTypeCode;
1429  }
1430
1431  // Append output type code to get our final mangled instruction.
1432  Prefix += "." + OutTypeCode;
1433
1434  Result.push_back(Prefix + " " + RegisterSuffix);
1435}
1436
1437/// UseMacro - Examine the prototype string to determine if the intrinsic
1438/// should be defined as a preprocessor macro instead of an inline function.
1439static bool UseMacro(const std::string &proto, StringRef typestr) {
1440  // If this builtin takes an immediate argument, we need to #define it rather
1441  // than use a standard declaration, so that SemaChecking can range check
1442  // the immediate passed by the user.
1443  if (proto.find('i') != std::string::npos)
1444    return true;
1445
1446  // Pointer arguments need to use macros to avoid hiding aligned attributes
1447  // from the pointer type.
1448  if (proto.find('p') != std::string::npos ||
1449      proto.find('c') != std::string::npos)
1450    return true;
1451
1452  // It is not permitted to pass or return an __fp16 by value, so intrinsics
1453  // taking a scalar float16_t must be implemented as macros.
1454  if (typestr.find('h') != std::string::npos &&
1455      proto.find('s') != std::string::npos)
1456    return true;
1457
1458  return false;
1459}
1460
1461/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1462/// defined as a macro should be accessed directly instead of being first
1463/// assigned to a local temporary.
1464static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1465  // True for constant ints (i), pointers (p) and const pointers (c).
1466  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1467}
1468
1469// Generate the string "(argtype a, argtype b, ...)"
1470static std::string GenArgs(const std::string &proto, StringRef typestr,
1471                           const std::string &name) {
1472  bool define = UseMacro(proto, typestr);
1473  char arg = 'a';
1474
1475  std::string s;
1476  s += "(";
1477
1478  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1479    if (define) {
1480      // Some macro arguments are used directly instead of being assigned
1481      // to local temporaries; prepend an underscore prefix to make their
1482      // names consistent with the local temporaries.
1483      if (MacroArgUsedDirectly(proto, i))
1484        s += "__";
1485    } else {
1486      s += TypeString(proto[i], typestr) + " __";
1487    }
1488    s.push_back(arg);
1489    if ((i + 1) < e)
1490      s += ", ";
1491  }
1492
1493  s += ")";
1494  return s;
1495}
1496
1497// Macro arguments are not type-checked like inline function arguments, so
1498// assign them to local temporaries to get the right type checking.
1499static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1500                                  const std::string &name ) {
1501  char arg = 'a';
1502  std::string s;
1503  bool generatedLocal = false;
1504
1505  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1506    // Do not create a temporary for an immediate argument.
1507    // That would defeat the whole point of using a macro!
1508    if (MacroArgUsedDirectly(proto, i))
1509      continue;
1510    generatedLocal = true;
1511    s += TypeString(proto[i], typestr) + " __";
1512    s.push_back(arg);
1513    s += " = (";
1514    s.push_back(arg);
1515    s += "); ";
1516  }
1517
1518  if (generatedLocal)
1519    s += "\\\n  ";
1520  return s;
1521}
1522
1523// Use the vmovl builtin to sign-extend or zero-extend a vector.
1524static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1525  std::string s, high;
1526  high = h ? "_high" : "";
1527  s = MangleName("vmovl" + high, typestr, ClassS);
1528  s += "(" + a + ")";
1529  return s;
1530}
1531
1532// Get the high 64-bit part of a vector
1533static std::string GetHigh(const std::string &a, StringRef typestr) {
1534  std::string s;
1535  s = MangleName("vget_high", typestr, ClassS);
1536  s += "(" + a + ")";
1537  return s;
1538}
1539
1540// Gen operation with two operands and get high 64-bit for both of two operands.
1541static std::string Gen2OpWith2High(StringRef typestr,
1542                                   const std::string &op,
1543                                   const std::string &a,
1544                                   const std::string &b) {
1545  std::string s;
1546  std::string Op1 = GetHigh(a, typestr);
1547  std::string Op2 = GetHigh(b, typestr);
1548  s = MangleName(op, typestr, ClassS);
1549  s += "(" + Op1 + ", " + Op2 + ");";
1550  return s;
1551}
1552
1553// Gen operation with three operands and get high 64-bit of the latter
1554// two operands.
1555static std::string Gen3OpWith2High(StringRef typestr,
1556                                   const std::string &op,
1557                                   const std::string &a,
1558                                   const std::string &b,
1559                                   const std::string &c) {
1560  std::string s;
1561  std::string Op1 = GetHigh(b, typestr);
1562  std::string Op2 = GetHigh(c, typestr);
1563  s = MangleName(op, typestr, ClassS);
1564  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1565  return s;
1566}
1567
1568// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1569static std::string GenCombine(std::string typestr,
1570                              const std::string &a,
1571                              const std::string &b) {
1572  std::string s;
1573  s = MangleName("vcombine", typestr, ClassS);
1574  s += "(" + a + ", " + b + ")";
1575  return s;
1576}
1577
1578static std::string Duplicate(unsigned nElts, StringRef typestr,
1579                             const std::string &a) {
1580  std::string s;
1581
1582  s = "(" + TypeString('d', typestr) + "){ ";
1583  for (unsigned i = 0; i != nElts; ++i) {
1584    s += a;
1585    if ((i + 1) < nElts)
1586      s += ", ";
1587  }
1588  s += " }";
1589
1590  return s;
1591}
1592
1593static std::string SplatLane(unsigned nElts, const std::string &vec,
1594                             const std::string &lane) {
1595  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1596  for (unsigned i = 0; i < nElts; ++i)
1597    s += ", " + lane;
1598  s += ")";
1599  return s;
1600}
1601
1602static std::string RemoveHigh(const std::string &name) {
1603  std::string s = name;
1604  std::size_t found = s.find("_high_");
1605  if (found == std::string::npos)
1606    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1607  s.replace(found, 5, "");
1608  return s;
1609}
1610
1611static unsigned GetNumElements(StringRef typestr, bool &quad) {
1612  quad = false;
1613  bool dummy = false;
1614  char type = ClassifyType(typestr, quad, dummy, dummy);
1615  unsigned nElts = 0;
1616  switch (type) {
1617  case 'c': nElts = 8; break;
1618  case 's': nElts = 4; break;
1619  case 'i': nElts = 2; break;
1620  case 'l': nElts = 1; break;
1621  case 'k': nElts = 1; break;
1622  case 'h': nElts = 4; break;
1623  case 'f': nElts = 2; break;
1624  case 'd':
1625    nElts = 1;
1626    break;
1627  default:
1628    PrintFatalError("unhandled type!");
1629  }
1630  if (quad) nElts <<= 1;
1631  return nElts;
1632}
1633
1634// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1635//
1636// Note that some intrinsic definitions around 'lane' are being implemented
1637// with macros, because they all contain constant integer argument, and we
1638// statically check the range of the lane index to meet the semantic
1639// requirement of different intrinsics.
1640//
1641// For the intrinsics implemented with macro, if they contain another intrinsic
1642// implemented with maco, we have to avoid using the same argument names for
1643// the nested instrinsics. For example, macro vfms_lane is being implemented
1644// with another macor vfma_lane, so we rename all arguments for vfms_lane by
1645// adding a suffix '1'.
1646
1647static std::string GenOpString(const std::string &name, OpKind op,
1648                               const std::string &proto, StringRef typestr) {
1649  bool quad;
1650  unsigned nElts = GetNumElements(typestr, quad);
1651  bool define = UseMacro(proto, typestr);
1652
1653  std::string ts = TypeString(proto[0], typestr);
1654  std::string s;
1655  if (!define) {
1656    s = "return ";
1657  }
1658
1659  switch(op) {
1660  case OpAdd:
1661    s += "__a + __b;";
1662    break;
1663  case OpAddl:
1664    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1665    break;
1666  case OpAddlHi:
1667    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1668    break;
1669  case OpAddw:
1670    s += "__a + " + Extend(typestr, "__b") + ";";
1671    break;
1672  case OpAddwHi:
1673    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1674    break;
1675  case OpSub:
1676    s += "__a - __b;";
1677    break;
1678  case OpSubl:
1679    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1680    break;
1681  case OpSublHi:
1682    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1683    break;
1684  case OpSubw:
1685    s += "__a - " + Extend(typestr, "__b") + ";";
1686    break;
1687  case OpSubwHi:
1688    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1689    break;
1690  case OpMulN:
1691    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1692    break;
1693  case OpMulLane:
1694    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1695    break;
1696  case OpMulXLane:
1697    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1698      SplatLane(nElts, "__b", "__c") + ");";
1699    break;
1700  case OpMul:
1701    s += "__a * __b;";
1702    break;
1703  case OpFMlaN:
1704    s += MangleName("vfma", typestr, ClassS);
1705    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1706    break;
1707  case OpFMlsN:
1708    s += MangleName("vfms", typestr, ClassS);
1709    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1710    break;
1711  case OpMullLane:
1712    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1713      SplatLane(nElts, "__b", "__c") + ");";
1714    break;
1715  case OpMullHiLane:
1716    s += MangleName("vmull", typestr, ClassS) + "(" +
1717      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1718    break;
1719  case OpMlaN:
1720    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1721    break;
1722  case OpMlaLane:
1723    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1724    break;
1725  case OpMla:
1726    s += "__a + (__b * __c);";
1727    break;
1728  case OpMlalN:
1729    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1730      Duplicate(nElts, typestr, "__c") + ");";
1731    break;
1732  case OpMlalLane:
1733    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1734      SplatLane(nElts, "__c", "__d") + ");";
1735    break;
1736  case OpMlalHiLane:
1737    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1738      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1739    break;
1740  case OpMlal:
1741    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1742    break;
1743  case OpMullHi:
1744    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1745    break;
1746  case OpMullHiP64: {
1747    std::string Op1 = GetHigh("__a", typestr);
1748    std::string Op2 = GetHigh("__b", typestr);
1749    s += MangleName("vmull", typestr, ClassS);
1750    s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
1751    break;
1752  }
1753  case OpMullHiN:
1754    s += MangleName("vmull_n", typestr, ClassS);
1755    s += "(" + GetHigh("__a", typestr) + ", __b);";
1756    return s;
1757  case OpMlalHi:
1758    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1759    break;
1760  case OpMlalHiN:
1761    s += MangleName("vmlal_n", typestr, ClassS);
1762    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1763    return s;
1764  case OpMlsN:
1765    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1766    break;
1767  case OpMlsLane:
1768    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1769    break;
1770  case OpFMSLane:
1771    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1772    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1773    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1774    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1775    break;
1776  case OpFMSLaneQ:
1777    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1778    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1779    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1780    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1781    break;
1782  case OpMls:
1783    s += "__a - (__b * __c);";
1784    break;
1785  case OpMlslN:
1786    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1787      Duplicate(nElts, typestr, "__c") + ");";
1788    break;
1789  case OpMlslLane:
1790    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1791      SplatLane(nElts, "__c", "__d") + ");";
1792    break;
1793  case OpMlslHiLane:
1794    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1795      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1796    break;
1797  case OpMlsl:
1798    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1799    break;
1800  case OpMlslHi:
1801    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1802    break;
1803  case OpMlslHiN:
1804    s += MangleName("vmlsl_n", typestr, ClassS);
1805    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1806    break;
1807  case OpQDMullLane:
1808    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1809      SplatLane(nElts, "__b", "__c") + ");";
1810    break;
1811  case OpQDMullHiLane:
1812    s += MangleName("vqdmull", typestr, ClassS) + "(" +
1813      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1814    break;
1815  case OpQDMlalLane:
1816    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1817      SplatLane(nElts, "__c", "__d") + ");";
1818    break;
1819  case OpQDMlalHiLane:
1820    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1821      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1822    break;
1823  case OpQDMlslLane:
1824    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1825      SplatLane(nElts, "__c", "__d") + ");";
1826    break;
1827  case OpQDMlslHiLane:
1828    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1829      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1830    break;
1831  case OpQDMulhLane:
1832    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1833      SplatLane(nElts, "__b", "__c") + ");";
1834    break;
1835  case OpQRDMulhLane:
1836    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1837      SplatLane(nElts, "__b", "__c") + ");";
1838    break;
1839  case OpEq:
1840    s += "(" + ts + ")(__a == __b);";
1841    break;
1842  case OpGe:
1843    s += "(" + ts + ")(__a >= __b);";
1844    break;
1845  case OpLe:
1846    s += "(" + ts + ")(__a <= __b);";
1847    break;
1848  case OpGt:
1849    s += "(" + ts + ")(__a > __b);";
1850    break;
1851  case OpLt:
1852    s += "(" + ts + ")(__a < __b);";
1853    break;
1854  case OpNeg:
1855    s += " -__a;";
1856    break;
1857  case OpNot:
1858    s += " ~__a;";
1859    break;
1860  case OpAnd:
1861    s += "__a & __b;";
1862    break;
1863  case OpOr:
1864    s += "__a | __b;";
1865    break;
1866  case OpXor:
1867    s += "__a ^ __b;";
1868    break;
1869  case OpAndNot:
1870    s += "__a & ~__b;";
1871    break;
1872  case OpOrNot:
1873    s += "__a | ~__b;";
1874    break;
1875  case OpCast:
1876    s += "(" + ts + ")__a;";
1877    break;
1878  case OpConcat:
1879    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1880    s += ", (int64x1_t)__b, 0, 1);";
1881    break;
1882  case OpHi:
1883    // nElts is for the result vector, so the source is twice that number.
1884    s += "__builtin_shufflevector(__a, __a";
1885    for (unsigned i = nElts; i < nElts * 2; ++i)
1886      s += ", " + utostr(i);
1887    s+= ");";
1888    break;
1889  case OpLo:
1890    s += "__builtin_shufflevector(__a, __a";
1891    for (unsigned i = 0; i < nElts; ++i)
1892      s += ", " + utostr(i);
1893    s+= ");";
1894    break;
1895  case OpDup:
1896    s += Duplicate(nElts, typestr, "__a") + ";";
1897    break;
1898  case OpDupLane:
1899    s += SplatLane(nElts, "__a", "__b") + ";";
1900    break;
1901  case OpSelect:
1902    // ((0 & 1) | (~0 & 2))
1903    s += "(" + ts + ")";
1904    ts = TypeString(proto[1], typestr);
1905    s += "((__a & (" + ts + ")__b) | ";
1906    s += "(~__a & (" + ts + ")__c));";
1907    break;
1908  case OpRev16:
1909    s += "__builtin_shufflevector(__a, __a";
1910    for (unsigned i = 2; i <= nElts; i += 2)
1911      for (unsigned j = 0; j != 2; ++j)
1912        s += ", " + utostr(i - j - 1);
1913    s += ");";
1914    break;
1915  case OpRev32: {
1916    unsigned WordElts = nElts >> (1 + (int)quad);
1917    s += "__builtin_shufflevector(__a, __a";
1918    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1919      for (unsigned j = 0; j != WordElts; ++j)
1920        s += ", " + utostr(i - j - 1);
1921    s += ");";
1922    break;
1923  }
1924  case OpRev64: {
1925    unsigned DblWordElts = nElts >> (int)quad;
1926    s += "__builtin_shufflevector(__a, __a";
1927    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1928      for (unsigned j = 0; j != DblWordElts; ++j)
1929        s += ", " + utostr(i - j - 1);
1930    s += ");";
1931    break;
1932  }
1933  case OpXtnHi: {
1934    s = TypeString(proto[1], typestr) + " __a1 = " +
1935        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1936        "return __builtin_shufflevector(__a, __a1";
1937    for (unsigned i = 0; i < nElts * 4; ++i)
1938      s += ", " + utostr(i);
1939    s += ");";
1940    break;
1941  }
1942  case OpSqxtunHi: {
1943    s = TypeString(proto[1], typestr) + " __a1 = " +
1944        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1945        "return __builtin_shufflevector(__a, __a1";
1946    for (unsigned i = 0; i < nElts * 4; ++i)
1947      s += ", " + utostr(i);
1948    s += ");";
1949    break;
1950  }
1951  case OpQxtnHi: {
1952    s = TypeString(proto[1], typestr) + " __a1 = " +
1953        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1954        "return __builtin_shufflevector(__a, __a1";
1955    for (unsigned i = 0; i < nElts * 4; ++i)
1956      s += ", " + utostr(i);
1957    s += ");";
1958    break;
1959  }
1960  case OpFcvtnHi: {
1961    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1962    s = TypeString(proto[1], typestr) + " __a1 = " +
1963        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1964        "return __builtin_shufflevector(__a, __a1";
1965    for (unsigned i = 0; i < nElts * 4; ++i)
1966      s += ", " + utostr(i);
1967    s += ");";
1968    break;
1969  }
1970  case OpFcvtlHi: {
1971    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1972    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1973        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1974    break;
1975  }
1976  case OpFcvtxnHi: {
1977    s = TypeString(proto[1], typestr) + " __a1 = " +
1978        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1979        "return __builtin_shufflevector(__a, __a1";
1980    for (unsigned i = 0; i < nElts * 4; ++i)
1981      s += ", " + utostr(i);
1982    s += ");";
1983    break;
1984  }
1985  case OpUzp1:
1986    s += "__builtin_shufflevector(__a, __b";
1987    for (unsigned i = 0; i < nElts; i++)
1988      s += ", " + utostr(2*i);
1989    s += ");";
1990    break;
1991  case OpUzp2:
1992    s += "__builtin_shufflevector(__a, __b";
1993    for (unsigned i = 0; i < nElts; i++)
1994      s += ", " + utostr(2*i+1);
1995    s += ");";
1996    break;
1997  case OpZip1:
1998    s += "__builtin_shufflevector(__a, __b";
1999    for (unsigned i = 0; i < (nElts/2); i++)
2000       s += ", " + utostr(i) + ", " + utostr(i+nElts);
2001    s += ");";
2002    break;
2003  case OpZip2:
2004    s += "__builtin_shufflevector(__a, __b";
2005    for (unsigned i = nElts/2; i < nElts; i++)
2006       s += ", " + utostr(i) + ", " + utostr(i+nElts);
2007    s += ");";
2008    break;
2009  case OpTrn1:
2010    s += "__builtin_shufflevector(__a, __b";
2011    for (unsigned i = 0; i < (nElts/2); i++)
2012       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
2013    s += ");";
2014    break;
2015  case OpTrn2:
2016    s += "__builtin_shufflevector(__a, __b";
2017    for (unsigned i = 0; i < (nElts/2); i++)
2018       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
2019    s += ");";
2020    break;
2021  case OpAbdl: {
2022    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
2023    if (typestr[0] != 'U') {
2024      // vabd results are always unsigned and must be zero-extended.
2025      std::string utype = "U" + typestr.str();
2026      s += "(" + TypeString(proto[0], typestr) + ")";
2027      abd = "(" + TypeString('d', utype) + ")" + abd;
2028      s += Extend(utype, abd) + ";";
2029    } else {
2030      s += Extend(typestr, abd) + ";";
2031    }
2032    break;
2033  }
2034  case OpAbdlHi:
2035    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2036    break;
2037  case OpAddhnHi: {
2038    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2039    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2040    s += ";";
2041    break;
2042  }
2043  case OpRAddhnHi: {
2044    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2045    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2046    s += ";";
2047    break;
2048  }
2049  case OpSubhnHi: {
2050    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2051    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2052    s += ";";
2053    break;
2054  }
2055  case OpRSubhnHi: {
2056    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2057    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2058    s += ";";
2059    break;
2060  }
2061  case OpAba:
2062    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2063    break;
2064  case OpAbal:
2065    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2066    break;
2067  case OpAbalHi:
2068    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2069    break;
2070  case OpQDMullHi:
2071    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2072    break;
2073  case OpQDMullHiN:
2074    s += MangleName("vqdmull_n", typestr, ClassS);
2075    s += "(" + GetHigh("__a", typestr) + ", __b);";
2076    return s;
2077  case OpQDMlalHi:
2078    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2079    break;
2080  case OpQDMlalHiN:
2081    s += MangleName("vqdmlal_n", typestr, ClassS);
2082    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2083    return s;
2084  case OpQDMlslHi:
2085    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2086    break;
2087  case OpQDMlslHiN:
2088    s += MangleName("vqdmlsl_n", typestr, ClassS);
2089    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2090    return s;
2091  case OpDiv:
2092    s += "__a / __b;";
2093    break;
2094  case OpMovlHi: {
2095    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2096        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
2097    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2098    s += "(__a1, 0);";
2099    break;
2100  }
2101  case OpLongHi: {
2102    // Another local variable __a1 is needed for calling a Macro,
2103    // or using __a will have naming conflict when Macro expanding.
2104    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2105         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2106    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2107         "(__a1, __b);";
2108    break;
2109  }
2110  case OpNarrowHi: {
2111    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2112         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2113    break;
2114  }
2115  case OpCopyLane: {
2116    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2117    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2118    s += TypeString('s', typestr) + " __c2 = " +
2119         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n  " +
2120         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);";
2121    break;
2122  }
2123  case OpCopyQLane: {
2124    std::string typeCode = "";
2125    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2126    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2127    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2128    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2129         "(__c1, __d); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b);";
2130    break;
2131  }
2132  case OpCopyLaneQ: {
2133    std::string typeCode = "";
2134    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2135    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2136    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2137    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2138         "(__c1, __d); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b);";
2139    break;
2140  }
2141  case OpScalarMulLane: {
2142    std::string typeCode = "";
2143    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2144    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2145      "(__b, __c);\\\n  __a * __d1;";
2146    break;
2147  }
2148  case OpScalarMulLaneQ: {
2149    std::string typeCode = "";
2150    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2151    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2152    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2153    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2154      "(__b1, __c);\\\n  __a1 * __d1;";
2155    break;
2156  }
2157  case OpScalarMulXLane: {
2158    bool dummy = false;
2159    char type = ClassifyType(typestr, dummy, dummy, dummy);
2160    if (type == 'f') type = 's';
2161    std::string typeCode = "";
2162    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2164    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2165    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2166      "(__b1, __c);\\\n  vmulx" + type + "_" +
2167      typeCode +  "(__a1, __d1);";
2168    break;
2169  }
2170  case OpScalarMulXLaneQ: {
2171    bool dummy = false;
2172    char type = ClassifyType(typestr, dummy, dummy, dummy);
2173    if (type == 'f') type = 's';
2174    std::string typeCode = "";
2175    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2177    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2178    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2179      typeCode + "(__b1, __c);\\\n  vmulx" + type +
2180      "_" + typeCode +  "(__a1, __d1);";
2181    break;
2182  }
2183
2184  case OpScalarVMulXLane: {
2185    bool dummy = false;
2186    char type = ClassifyType(typestr, dummy, dummy, dummy);
2187    if (type == 'f') type = 's';
2188    std::string typeCode = "";
2189    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2191    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2192    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2193      typeCode + "(__a1, 0);\\\n" +
2194      "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2195      typeCode + "(__b1, __c);\\\n" +
2196      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2197      typeCode + "(__d1, __e1);\\\n" +
2198      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2199      "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2200    break;
2201  }
2202
2203  case OpScalarVMulXLaneQ: {
2204    bool dummy = false;
2205    char type = ClassifyType(typestr, dummy, dummy, dummy);
2206    if (type == 'f') type = 's';
2207    std::string typeCode = "";
2208    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2209    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2210    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2211    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2212      typeCode + "(__a1, 0);\\\n" +
2213      "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2214      typeCode + "(__b1, __c);\\\n" +
2215      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2216      typeCode + "(__d1, __e1);\\\n" +
2217      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2218      "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2219    break;
2220  }
2221  case OpScalarQDMullLane: {
2222    std::string typeCode = "";
2223    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2224    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2225    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2226    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
2227    "vget_lane_" + typeCode + "(__b1, __c));";
2228    break;
2229  }
2230  case OpScalarQDMullLaneQ: {
2231    std::string typeCode = "";
2232    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2233    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2234    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2235    s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
2236    "vgetq_lane_" + typeCode + "(__b1, __c));";
2237    break;
2238  }
2239  case OpScalarQDMulHiLane: {
2240    std::string typeCode = "";
2241    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2242    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2243    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2244    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
2245    "vget_lane_" + typeCode + "(__b1, __c));";
2246    break;
2247  }
2248  case OpScalarQDMulHiLaneQ: {
2249    std::string typeCode = "";
2250    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2251    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2252    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2253    s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
2254    "vgetq_lane_" + typeCode + "(__b1, __c));";
2255    break;
2256  }
2257  case OpScalarQRDMulHiLane: {
2258    std::string typeCode = "";
2259    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2260    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2261    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2262    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
2263    "vget_lane_" + typeCode + "(__b1, __c));";
2264    break;
2265  }
2266  case OpScalarQRDMulHiLaneQ: {
2267    std::string typeCode = "";
2268    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2269    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2270    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2271    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
2272    "vgetq_lane_" + typeCode + "(__b1, __c));";
2273    break;
2274  }
2275  case OpScalarGetLane:{
2276    std::string typeCode = "";
2277    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2278    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2279
2280    std::string intType = quad ? "int16x8_t" : "int16x4_t";
2281    std::string intName = quad ? "vgetq" : "vget";
2282
2283    // reinterpret float16 vector as int16 vector
2284    s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
2285
2286    s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
2287
2288    // reinterpret int16 vector as float16 vector
2289    s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
2290    s += "  __a4;";
2291    break;
2292  }
2293  case OpScalarSetLane:{
2294    std::string typeCode = "";
2295    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2296    s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";
2297
2298    std::string origType = quad ? "float16x8_t" : "float16x4_t";
2299    std::string intType = quad ? "int16x8_t" : "int16x4_t";
2300    std::string intName = quad ? "vsetq" : "vset";
2301
2302    // reinterpret float16_t as int16_t
2303    s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
2304    // reinterpret float16 vector as int16 vector
2305    s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
2306
2307    s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
2308
2309    // reinterpret int16 vector as float16 vector
2310    s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
2311    s += "__b4;";
2312    break;
2313  }
2314
2315  default:
2316    PrintFatalError("unknown OpKind!");
2317  }
2318  return s;
2319}
2320
2321static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2322  unsigned mod = proto[0];
2323
2324  if (mod == 'v' || mod == 'f' || mod == 'F')
2325    mod = proto[1];
2326
2327  bool quad = false;
2328  bool poly = false;
2329  bool usgn = false;
2330  bool scal = false;
2331  bool cnst = false;
2332  bool pntr = false;
2333
2334  // Base type to get the type string for.
2335  char type = ClassifyType(typestr, quad, poly, usgn);
2336
2337  // Based on the modifying character, change the type and width if necessary.
2338  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2339
2340  NeonTypeFlags::EltType ET;
2341  switch (type) {
2342    case 'c':
2343      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2344      break;
2345    case 's':
2346      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2347      break;
2348    case 'i':
2349      ET = NeonTypeFlags::Int32;
2350      break;
2351    case 'l':
2352      ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2353      break;
2354    case 'k':
2355      ET = NeonTypeFlags::Poly128;
2356      break;
2357    case 'h':
2358      ET = NeonTypeFlags::Float16;
2359      break;
2360    case 'f':
2361      ET = NeonTypeFlags::Float32;
2362      break;
2363    case 'd':
2364      ET = NeonTypeFlags::Float64;
2365      break;
2366    default:
2367      PrintFatalError("unhandled type!");
2368  }
2369  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2370  return Flags.getFlags();
2371}
2372
2373// We don't check 'a' in this function, because for builtin function the
2374// argument matching to 'a' uses a vector type splatted from a scalar type.
2375static bool ProtoHasScalar(const std::string proto)
2376{
2377  return (proto.find('s') != std::string::npos
2378          || proto.find('z') != std::string::npos
2379          || proto.find('r') != std::string::npos
2380          || proto.find('b') != std::string::npos
2381          || proto.find('$') != std::string::npos
2382          || proto.find('y') != std::string::npos
2383          || proto.find('o') != std::string::npos);
2384}
2385
2386// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2387static std::string GenBuiltin(const std::string &name, const std::string &proto,
2388                              StringRef typestr, ClassKind ck) {
2389  std::string s;
2390
2391  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2392  // sret-like argument.
2393  bool sret = IsMultiVecProto(proto[0]);
2394
2395  bool define = UseMacro(proto, typestr);
2396
2397  // Check if the prototype has a scalar operand with the type of the vector
2398  // elements.  If not, bitcasting the args will take care of arg checking.
2399  // The actual signedness etc. will be taken care of with special enums.
2400  if (!ProtoHasScalar(proto))
2401    ck = ClassB;
2402
2403  if (proto[0] != 'v') {
2404    std::string ts = TypeString(proto[0], typestr);
2405
2406    if (define) {
2407      if (sret)
2408        s += ts + " r; ";
2409      else
2410        s += "(" + ts + ")";
2411    } else if (sret) {
2412      s += ts + " r; ";
2413    } else {
2414      s += "return (" + ts + ")";
2415    }
2416  }
2417
2418  bool splat = proto.find('a') != std::string::npos;
2419
2420  s += "__builtin_neon_";
2421  if (splat) {
2422    // Call the non-splat builtin: chop off the "_n" suffix from the name.
2423    std::string vname(name, 0, name.size()-2);
2424    s += MangleName(vname, typestr, ck);
2425  } else {
2426    s += MangleName(name, typestr, ck);
2427  }
2428  s += "(";
2429
2430  // Pass the address of the return variable as the first argument to sret-like
2431  // builtins.
2432  if (sret)
2433    s += "&r, ";
2434
2435  char arg = 'a';
2436  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2437    std::string args = std::string(&arg, 1);
2438
2439    // Use the local temporaries instead of the macro arguments.
2440    args = "__" + args;
2441
2442    bool argQuad = false;
2443    bool argPoly = false;
2444    bool argUsgn = false;
2445    bool argScalar = false;
2446    bool dummy = false;
2447    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2448    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2449                      dummy, dummy);
2450
2451    // Handle multiple-vector values specially, emitting each subvector as an
2452    // argument to the __builtin.
2453    unsigned NumOfVec = 0;
2454    if (proto[i] >= '2' && proto[i] <= '4') {
2455      NumOfVec = proto[i] - '0';
2456    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2457      NumOfVec = proto[i] - 'A' + 1;
2458    }
2459
2460    if (NumOfVec > 0) {
2461      // Check if an explicit cast is needed.
2462      if (argType != 'c' || argPoly || argUsgn)
2463        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2464
2465      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2466        s += args + ".val[" + utostr(vi) + "]";
2467        if ((vi + 1) < ve)
2468          s += ", ";
2469      }
2470      if ((i + 1) < e)
2471        s += ", ";
2472
2473      continue;
2474    }
2475
2476    if (splat && (i + 1) == e)
2477      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2478
2479    // Check if an explicit cast is needed.
2480    if ((splat || !argScalar) &&
2481        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2482      std::string argTypeStr = "c";
2483      if (ck != ClassB)
2484        argTypeStr = argType;
2485      if (argQuad)
2486        argTypeStr = "Q" + argTypeStr;
2487      args = "(" + TypeString('d', argTypeStr) + ")" + args;
2488    }
2489
2490    s += args;
2491    if ((i + 1) < e)
2492      s += ", ";
2493  }
2494
2495  // Extra constant integer to hold type class enum for this function, e.g. s8
2496  if (ck == ClassB)
2497    s += ", " + utostr(GetNeonEnum(proto, typestr));
2498
2499  s += ");";
2500
2501  if (proto[0] != 'v' && sret) {
2502    if (define)
2503      s += " r;";
2504    else
2505      s += " return r;";
2506  }
2507  return s;
2508}
2509
2510static std::string GenBuiltinDef(const std::string &name,
2511                                 const std::string &proto,
2512                                 StringRef typestr, ClassKind ck) {
2513  std::string s("BUILTIN(__builtin_neon_");
2514
2515  // If all types are the same size, bitcasting the args will take care
2516  // of arg checking.  The actual signedness etc. will be taken care of with
2517  // special enums.
2518  if (!ProtoHasScalar(proto))
2519    ck = ClassB;
2520
2521  s += MangleName(name, typestr, ck);
2522  s += ", \"";
2523
2524  for (unsigned i = 0, e = proto.size(); i != e; ++i)
2525    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2526
2527  // Extra constant integer to hold type class enum for this function, e.g. s8
2528  if (ck == ClassB)
2529    s += "i";
2530
2531  s += "\", \"n\")";
2532  return s;
2533}
2534
2535static std::string GenIntrinsic(const std::string &name,
2536                                const std::string &proto,
2537                                StringRef outTypeStr, StringRef inTypeStr,
2538                                OpKind kind, ClassKind classKind) {
2539  assert(!proto.empty() && "");
2540  bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
2541  std::string s;
2542
2543  // static always inline + return type
2544  if (define)
2545    s += "#define ";
2546  else
2547    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2548
2549  // Function name with type suffix
2550  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2551  if (outTypeStr != inTypeStr) {
2552    // If the input type is different (e.g., for vreinterpret), append a suffix
2553    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2554    // does not insert another "q" in the name.
2555    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2556    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2557    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2558  }
2559  s += mangledName;
2560
2561  // Function arguments
2562  s += GenArgs(proto, inTypeStr, name);
2563
2564  // Definition.
2565  if (define) {
2566    s += " __extension__ ({ \\\n  ";
2567    s += GenMacroLocals(proto, inTypeStr, name);
2568  } else if (kind == OpUnavailable) {
2569    s += " __attribute__((unavailable));\n";
2570    return s;
2571  } else
2572    s += " {\n  ";
2573
2574  if (kind != OpNone)
2575    s += GenOpString(name, kind, proto, outTypeStr);
2576  else
2577    s += GenBuiltin(name, proto, outTypeStr, classKind);
2578  if (define)
2579    s += " })";
2580  else
2581    s += " }";
2582  s += "\n";
2583  return s;
2584}
2585
2586/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2587/// is comprised of type definitions and function declarations.
2588void NeonEmitter::run(raw_ostream &OS) {
2589  OS <<
2590    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2591    "---===\n"
2592    " *\n"
2593    " * Permission is hereby granted, free of charge, to any person obtaining "
2594    "a copy\n"
2595    " * of this software and associated documentation files (the \"Software\"),"
2596    " to deal\n"
2597    " * in the Software without restriction, including without limitation the "
2598    "rights\n"
2599    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2600    "and/or sell\n"
2601    " * copies of the Software, and to permit persons to whom the Software is\n"
2602    " * furnished to do so, subject to the following conditions:\n"
2603    " *\n"
2604    " * The above copyright notice and this permission notice shall be "
2605    "included in\n"
2606    " * all copies or substantial portions of the Software.\n"
2607    " *\n"
2608    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2609    "EXPRESS OR\n"
2610    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2611    "MERCHANTABILITY,\n"
2612    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2613    "SHALL THE\n"
2614    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2615    "OTHER\n"
2616    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2617    "ARISING FROM,\n"
2618    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2619    "DEALINGS IN\n"
2620    " * THE SOFTWARE.\n"
2621    " *\n"
2622    " *===--------------------------------------------------------------------"
2623    "---===\n"
2624    " */\n\n";
2625
2626  OS << "#ifndef __ARM_NEON_H\n";
2627  OS << "#define __ARM_NEON_H\n\n";
2628
2629  OS << "#if !defined(__ARM_NEON)\n";
2630  OS << "#error \"NEON support not enabled\"\n";
2631  OS << "#endif\n\n";
2632
2633  OS << "#include <stdint.h>\n\n";
2634
2635  // Emit NEON-specific scalar typedefs.
2636  OS << "typedef float float32_t;\n";
2637  OS << "typedef __fp16 float16_t;\n";
2638
2639  OS << "#ifdef __aarch64__\n";
2640  OS << "typedef double float64_t;\n";
2641  OS << "#endif\n\n";
2642
2643  // For now, signedness of polynomial types depends on target
2644  OS << "#ifdef __aarch64__\n";
2645  OS << "typedef uint8_t poly8_t;\n";
2646  OS << "typedef uint16_t poly16_t;\n";
2647  OS << "typedef uint64_t poly64_t;\n";
2648  OS << "typedef __uint128_t poly128_t;\n";
2649  OS << "#else\n";
2650  OS << "typedef int8_t poly8_t;\n";
2651  OS << "typedef int16_t poly16_t;\n";
2652  OS << "#endif\n";
2653
2654  // Emit Neon vector typedefs.
2655  std::string TypedefTypes(
2656      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2657  SmallVector<StringRef, 24> TDTypeVec;
2658  ParseTypes(0, TypedefTypes, TDTypeVec);
2659
2660  // Emit vector typedefs.
2661  bool isA64 = false;
2662  bool preinsert;
2663  bool postinsert;
2664  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2665    bool dummy, quad = false, poly = false;
2666    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2667    preinsert = false;
2668    postinsert = false;
2669
2670    if (type == 'd' || (type == 'l' && poly)) {
2671      preinsert = isA64? false: true;
2672      isA64 = true;
2673    } else {
2674      postinsert = isA64? true: false;
2675      isA64 = false;
2676    }
2677    if (postinsert)
2678      OS << "#endif\n";
2679    if (preinsert)
2680      OS << "#ifdef __aarch64__\n";
2681
2682    if (poly)
2683      OS << "typedef __attribute__((neon_polyvector_type(";
2684    else
2685      OS << "typedef __attribute__((neon_vector_type(";
2686
2687    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2688    OS << utostr(nElts) << "))) ";
2689    if (nElts < 10)
2690      OS << " ";
2691
2692    OS << TypeString('s', TDTypeVec[i]);
2693    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2694
2695  }
2696  postinsert = isA64? true: false;
2697  if (postinsert)
2698    OS << "#endif\n";
2699  OS << "\n";
2700
2701  // Emit struct typedefs.
2702  isA64 = false;
2703  for (unsigned vi = 2; vi != 5; ++vi) {
2704    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2705      bool dummy, quad = false, poly = false;
2706      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2707      preinsert = false;
2708      postinsert = false;
2709
2710      if (type == 'd' || (type == 'l' && poly)) {
2711        preinsert = isA64? false: true;
2712        isA64 = true;
2713      } else {
2714        postinsert = isA64? true: false;
2715        isA64 = false;
2716      }
2717      if (postinsert)
2718        OS << "#endif\n";
2719      if (preinsert)
2720        OS << "#ifdef __aarch64__\n";
2721
2722      std::string ts = TypeString('d', TDTypeVec[i]);
2723      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2724      OS << "typedef struct " << vs << " {\n";
2725      OS << "  " << ts << " val";
2726      OS << "[" << utostr(vi) << "]";
2727      OS << ";\n} ";
2728      OS << vs << ";\n";
2729      OS << "\n";
2730    }
2731  }
2732  postinsert = isA64? true: false;
2733  if (postinsert)
2734    OS << "#endif\n";
2735  OS << "\n";
2736
2737  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2738
2739  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2740
2741  StringMap<ClassKind> EmittedMap;
2742  std::string CurrentGuard = "";
2743  bool InGuard = false;
2744
2745  // Some intrinsics are used to express others. These need to be emitted near
2746  // the beginning so that the declarations are present when needed. This is
2747  // rather an ugly, arbitrary list, but probably simpler than actually tracking
2748  // dependency info.
2749  static const char *EarlyDefsArr[] =
2750      { "VFMA",      "VQMOVN",    "VQMOVUN",  "VABD",    "VMOVL",
2751        "VABDL",     "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
2752        "VMULL",     "VMLAL_N",   "VMLSL_N",  "VMULL_N", "VMULL_P64",
2753        "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
2754  ArrayRef<const char *> EarlyDefs(EarlyDefsArr);
2755
2756  for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
2757    Record *R = Records.getDef(EarlyDefs[i]);
2758    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
2759  }
2760
2761  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2762    Record *R = RV[i];
2763    if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
2764        EarlyDefs.end())
2765      continue;
2766
2767    emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
2768  }
2769
2770  if (InGuard)
2771    OS << "#endif\n\n";
2772
2773  OS << "#undef __ai\n\n";
2774  OS << "#endif /* __ARM_NEON_H */\n";
2775}
2776
2777void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
2778                                       std::string &CurrentGuard, bool &InGuard,
2779                                       StringMap<ClassKind> &EmittedMap) {
2780
2781  std::string NewGuard = R->getValueAsString("ArchGuard");
2782  if (NewGuard != CurrentGuard) {
2783    if (InGuard)
2784      OS << "#endif\n\n";
2785    if (NewGuard.size())
2786      OS << "#if " << NewGuard << '\n';
2787
2788    CurrentGuard = NewGuard;
2789    InGuard = NewGuard.size() != 0;
2790  }
2791
2792  emitIntrinsic(OS, R, EmittedMap);
2793}
2794
2795/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2796/// intrinsics specified by record R checking for intrinsic uniqueness.
2797void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2798                                StringMap<ClassKind> &EmittedMap) {
2799  std::string name = R->getValueAsString("Name");
2800  std::string Proto = R->getValueAsString("Prototype");
2801  std::string Types = R->getValueAsString("Types");
2802
2803  SmallVector<StringRef, 16> TypeVec;
2804  ParseTypes(R, Types, TypeVec);
2805
2806  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2807
2808  ClassKind classKind = ClassNone;
2809  if (R->getSuperClasses().size() >= 2)
2810    classKind = ClassMap[R->getSuperClasses()[1]];
2811  if (classKind == ClassNone && kind == OpNone)
2812    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2813
2814  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2815    if (kind == OpReinterpret) {
2816      bool outQuad = false;
2817      bool dummy = false;
2818      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2819      for (unsigned srcti = 0, srcte = TypeVec.size();
2820           srcti != srcte; ++srcti) {
2821        bool inQuad = false;
2822        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2823        if (srcti == ti || inQuad != outQuad)
2824          continue;
2825        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2826                                     OpCast, ClassS);
2827        if (EmittedMap.count(s))
2828          continue;
2829        EmittedMap[s] = ClassS;
2830        OS << s;
2831      }
2832    } else {
2833      std::string s =
2834          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2835      if (EmittedMap.count(s)) {
2836        errs() << "warning: duplicate definition: " << name
2837               << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
2838        continue;
2839      }
2840      EmittedMap[s] = classKind;
2841      OS << s;
2842    }
2843  }
2844  OS << "\n";
2845}
2846
2847static unsigned RangeFromType(const char mod, StringRef typestr) {
2848  // base type to get the type string for.
2849  bool quad = false, dummy = false;
2850  char type = ClassifyType(typestr, quad, dummy, dummy);
2851  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2852
2853  switch (type) {
2854    case 'c':
2855      return (8 << (int)quad) - 1;
2856    case 'h':
2857    case 's':
2858      return (4 << (int)quad) - 1;
2859    case 'f':
2860    case 'i':
2861      return (2 << (int)quad) - 1;
2862    case 'd':
2863    case 'l':
2864      return (1 << (int)quad) - 1;
2865    case 'k':
2866      return 0;
2867    default:
2868      PrintFatalError("unhandled type!");
2869  }
2870}
2871
2872static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2873  // base type to get the type string for.
2874  bool dummy = false;
2875  char type = ClassifyType(typestr, dummy, dummy, dummy);
2876  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2877
2878  switch (type) {
2879    case 'c':
2880      return 7;
2881    case 'h':
2882    case 's':
2883      return 15;
2884    case 'f':
2885    case 'i':
2886      return 31;
2887    case 'd':
2888    case 'l':
2889      return 63;
2890    case 'k':
2891      return 127;
2892    default:
2893      PrintFatalError("unhandled type!");
2894  }
2895}
2896
2897/// Generate the ARM and AArch64 intrinsic range checking code for
2898/// shift/lane immediates, checking for unique declarations.
2899void
2900NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) {
2901  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2902  StringMap<OpKind> EmittedMap;
2903
2904  // Generate the intrinsic range checking code for shift/lane immediates.
2905  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2906
2907  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2908    Record *R = RV[i];
2909
2910    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2911    if (k != OpNone)
2912      continue;
2913
2914    std::string name = R->getValueAsString("Name");
2915    std::string Proto = R->getValueAsString("Prototype");
2916    std::string Types = R->getValueAsString("Types");
2917    std::string Rename = name + "@" + Proto;
2918
2919    // Functions with 'a' (the splat code) in the type prototype should not get
2920    // their own builtin as they use the non-splat variant.
2921    if (Proto.find('a') != std::string::npos)
2922      continue;
2923
2924    // Functions which do not have an immediate do not need to have range
2925    // checking code emitted.
2926    size_t immPos = Proto.find('i');
2927    if (immPos == std::string::npos)
2928      continue;
2929
2930    SmallVector<StringRef, 16> TypeVec;
2931    ParseTypes(R, Types, TypeVec);
2932
2933    if (R->getSuperClasses().size() < 2)
2934      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2935
2936    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2937    if (!ProtoHasScalar(Proto))
2938      ck = ClassB;
2939
2940    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2941      std::string namestr, shiftstr, rangestr;
2942
2943      if (R->getValueAsBit("isVCVT_N")) {
2944        // VCVT between floating- and fixed-point values takes an immediate
2945        // in the range [1, 32] for f32, or [1, 64] for f64.
2946        ck = ClassB;
2947        if (name.find("32") != std::string::npos)
2948          rangestr = "l = 1; u = 31"; // upper bound = l + u
2949        else if (name.find("64") != std::string::npos)
2950          rangestr = "l = 1; u = 63";
2951        else
2952          PrintFatalError(R->getLoc(),
2953              "Fixed point convert name should contains \"32\" or \"64\"");
2954
2955      } else if (R->getValueAsBit("isScalarShift")) {
2956        // Right shifts have an 'r' in the name, left shifts do not.  Convert
2957        // instructions have the same bounds and right shifts.
2958        if (name.find('r') != std::string::npos ||
2959            name.find("cvt") != std::string::npos)
2960          rangestr = "l = 1; ";
2961
2962        unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2963        // Narrow shift has half the upper bound
2964        if (R->getValueAsBit("isScalarNarrowShift"))
2965          upBound /= 2;
2966
2967        rangestr += "u = " + utostr(upBound);
2968      } else if (R->getValueAsBit("isShift")) {
2969        // Builtins which are overloaded by type will need to have their upper
2970        // bound computed at Sema time based on the type constant.
2971        shiftstr = ", true";
2972
2973        // Right shifts have an 'r' in the name, left shifts do not.
2974        if (name.find('r') != std::string::npos)
2975          rangestr = "l = 1; ";
2976
2977        rangestr += "u = RFT(TV" + shiftstr + ")";
2978      } else if (ck == ClassB) {
2979        // ClassB intrinsics have a type (and hence lane number) that is only
2980        // known at runtime.
2981        assert(immPos > 0 && "unexpected immediate operand");
2982        if (R->getValueAsBit("isLaneQ"))
2983          rangestr = "u = RFT(TV, false, true)";
2984        else
2985          rangestr = "u = RFT(TV, false, false)";
2986      } else {
2987        // The immediate generally refers to a lane in the preceding argument.
2988        assert(immPos > 0 && "unexpected immediate operand");
2989        rangestr =
2990            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2991      }
2992      // Make sure cases appear only once by uniquing them in a string map.
2993      namestr = MangleName(name, TypeVec[ti], ck);
2994      if (EmittedMap.count(namestr))
2995        continue;
2996      EmittedMap[namestr] = OpNone;
2997
2998      // Calculate the index of the immediate that should be range checked.
2999      unsigned immidx = 0;
3000
3001      // Builtins that return a struct of multiple vectors have an extra
3002      // leading arg for the struct return.
3003      if (IsMultiVecProto(Proto[0]))
3004        ++immidx;
3005
3006      // Add one to the index for each argument until we reach the immediate
3007      // to be checked.  Structs of vectors are passed as multiple arguments.
3008      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
3009        switch (Proto[ii]) {
3010        default:
3011          immidx += 1;
3012          break;
3013        case '2':
3014        case 'B':
3015          immidx += 2;
3016          break;
3017        case '3':
3018        case 'C':
3019          immidx += 3;
3020          break;
3021        case '4':
3022        case 'D':
3023          immidx += 4;
3024          break;
3025        case 'i':
3026          ie = ii + 1;
3027          break;
3028        }
3029      }
3030      OS << "case NEON::BI__builtin_neon_";
3031      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
3032         << rangestr << "; break;\n";
3033    }
3034  }
3035  OS << "#endif\n\n";
3036}
3037
3038struct OverloadInfo {
3039  uint64_t Mask;
3040  int PtrArgNum;
3041  bool HasConstPtr;
3042};
3043/// Generate the ARM and AArch64 overloaded type checking code for
3044/// SemaChecking.cpp, checking for unique builtin declarations.
3045void
3046NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
3047  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3048
3049  // Generate the overloaded type checking code for SemaChecking.cpp
3050  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3051
3052  // We record each overload check line before emitting because subsequent Inst
3053  // definitions may extend the number of permitted types (i.e. augment the
3054  // Mask). Use std::map to avoid sorting the table by hash number.
3055  std::map<std::string, OverloadInfo> OverloadMap;
3056  typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator;
3057
3058  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3059    Record *R = RV[i];
3060    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3061    if (k != OpNone)
3062      continue;
3063
3064    std::string Proto = R->getValueAsString("Prototype");
3065    std::string Types = R->getValueAsString("Types");
3066    std::string name = R->getValueAsString("Name");
3067    std::string Rename = name + "@" + Proto;
3068
3069    // Functions with 'a' (the splat code) in the type prototype should not get
3070    // their own builtin as they use the non-splat variant.
3071    if (Proto.find('a') != std::string::npos)
3072      continue;
3073
3074    // Functions which have a scalar argument cannot be overloaded, no need to
3075    // check them if we are emitting the type checking code.
3076    if (ProtoHasScalar(Proto))
3077      continue;
3078
3079    SmallVector<StringRef, 16> TypeVec;
3080    ParseTypes(R, Types, TypeVec);
3081
3082    if (R->getSuperClasses().size() < 2)
3083      PrintFatalError(R->getLoc(), "Builtin has no class kind");
3084
3085    int si = -1, qi = -1;
3086    uint64_t mask = 0, qmask = 0;
3087    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3088      // Generate the switch case(s) for this builtin for the type validation.
3089      bool quad = false, poly = false, usgn = false;
3090      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3091
3092      if (quad) {
3093        qi = ti;
3094        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3095      } else {
3096        si = ti;
3097        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3098      }
3099    }
3100
3101    // Check if the builtin function has a pointer or const pointer argument.
3102    int PtrArgNum = -1;
3103    bool HasConstPtr = false;
3104    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3105      char ArgType = Proto[arg];
3106      if (ArgType == 'c') {
3107        HasConstPtr = true;
3108        PtrArgNum = arg - 1;
3109        break;
3110      }
3111      if (ArgType == 'p') {
3112        PtrArgNum = arg - 1;
3113        break;
3114      }
3115    }
3116    // For sret builtins, adjust the pointer argument index.
3117    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3118      PtrArgNum += 1;
3119
3120    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3121    // and vst1_lane intrinsics.  Using a pointer to the vector element
3122    // type with one of those operations causes codegen to select an aligned
3123    // load/store instruction.  If you want an unaligned operation,
3124    // the pointer argument needs to have less alignment than element type,
3125    // so just accept any pointer type.
3126    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3127      PtrArgNum = -1;
3128      HasConstPtr = false;
3129    }
3130
3131    if (mask) {
3132      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
3133          MangleName(name, TypeVec[si], ClassB), OverloadInfo()));
3134      OverloadInfo &Record = I.first->second;
3135      if (!I.second)
3136        assert(Record.PtrArgNum == PtrArgNum &&
3137               Record.HasConstPtr == HasConstPtr);
3138      Record.Mask |= mask;
3139      Record.PtrArgNum = PtrArgNum;
3140      Record.HasConstPtr = HasConstPtr;
3141    }
3142    if (qmask) {
3143      std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
3144          MangleName(name, TypeVec[qi], ClassB), OverloadInfo()));
3145      OverloadInfo &Record = I.first->second;
3146      if (!I.second)
3147        assert(Record.PtrArgNum == PtrArgNum &&
3148               Record.HasConstPtr == HasConstPtr);
3149      Record.Mask |= qmask;
3150      Record.PtrArgNum = PtrArgNum;
3151      Record.HasConstPtr = HasConstPtr;
3152    }
3153  }
3154
3155  for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E;
3156       ++I) {
3157    OverloadInfo &BuiltinOverloads = I->second;
3158    OS << "case NEON::BI__builtin_neon_" << I->first << ": ";
3159    OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL";
3160    if (BuiltinOverloads.PtrArgNum >= 0)
3161      OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum;
3162    if (BuiltinOverloads.HasConstPtr)
3163      OS << "; HasConstPtr = true";
3164    OS << "; break;\n";
3165  }
3166
3167  OS << "#endif\n\n";
3168}
3169
3170/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
3171/// declaration of builtins, checking for unique builtin declarations.
3172void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
3173  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3174
3175  // We want to emit the intrinsics in alphabetical order, so use the more
3176  // expensive std::map to gather them together first.
3177  std::map<std::string, OpKind> EmittedMap;
3178
3179  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3180    Record *R = RV[i];
3181    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3182    if (k != OpNone)
3183      continue;
3184
3185    std::string Proto = R->getValueAsString("Prototype");
3186    std::string name = R->getValueAsString("Name");
3187    std::string Rename = name + "@" + Proto;
3188
3189    // Functions with 'a' (the splat code) in the type prototype should not get
3190    // their own builtin as they use the non-splat variant.
3191    if (Proto.find('a') != std::string::npos)
3192      continue;
3193
3194    std::string Types = R->getValueAsString("Types");
3195    SmallVector<StringRef, 16> TypeVec;
3196    ParseTypes(R, Types, TypeVec);
3197
3198    if (R->getSuperClasses().size() < 2)
3199      PrintFatalError(R->getLoc(), "Builtin has no class kind");
3200
3201    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3202
3203    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3204      // Generate the declaration for this builtin, ensuring
3205      // that each unique BUILTIN() macro appears only once in the output
3206      // stream.
3207      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3208      if (EmittedMap.count(bd))
3209        continue;
3210
3211      EmittedMap[bd] = OpNone;
3212    }
3213  }
3214
3215  // Generate BuiltinsNEON.
3216  OS << "#ifdef GET_NEON_BUILTINS\n";
3217
3218  for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
3219                                               E = EmittedMap.end();
3220       I != E; ++I)
3221    OS << I->first << "\n";
3222
3223  OS << "#endif\n\n";
3224}
3225
3226/// runHeader - Emit a file with sections defining:
3227/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3228/// 2. the SemaChecking code for the type overload checking.
3229/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3230void NeonEmitter::runHeader(raw_ostream &OS) {
3231  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3232
3233  // Generate shared BuiltinsXXX.def
3234  genBuiltinsDef(OS);
3235
3236  // Generate ARM overloaded type checking code for SemaChecking.cpp
3237  genOverloadTypeCheckCode(OS);
3238
3239  // Generate ARM range checking code for shift/lane immediates.
3240  genIntrinsicRangeCheckCode(OS);
3241}
3242
3243/// GenTest - Write out a test for the intrinsic specified by the name and
3244/// type strings, including the embedded patterns for FileCheck to match.
3245static std::string GenTest(const std::string &name,
3246                           const std::string &proto,
3247                           StringRef outTypeStr, StringRef inTypeStr,
3248                           bool isShift, bool isHiddenLOp,
3249                           ClassKind ck, const std::string &InstName,
3250                           bool isA64,
3251                           std::string & testFuncProto) {
3252  assert(!proto.empty() && "");
3253  std::string s;
3254
3255  // Function name with type suffix
3256  std::string mangledName = MangleName(name, outTypeStr, ClassS);
3257  if (outTypeStr != inTypeStr) {
3258    // If the input type is different (e.g., for vreinterpret), append a suffix
3259    // for the input type.  String off a "Q" (quad) prefix so that MangleName
3260    // does not insert another "q" in the name.
3261    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3262    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3263    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3264  }
3265
3266  // todo: GenerateChecksForIntrinsic does not generate CHECK
3267  // for aarch64 instructions yet
3268  std::vector<std::string> FileCheckPatterns;
3269  if (!isA64) {
3270    GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3271                               isHiddenLOp, FileCheckPatterns);
3272    s+= "// CHECK_ARM: test_" + mangledName + "\n";
3273  }
3274  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3275
3276  // Emit the FileCheck patterns.
3277  // If for any reason we do not want to emit a check, mangledInst
3278  // will be the empty string.
3279  if (FileCheckPatterns.size()) {
3280    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3281                                                  e = FileCheckPatterns.end();
3282         i != e;
3283         ++i) {
3284      s += "// CHECK_ARM: " + *i + "\n";
3285    }
3286  }
3287
3288  // Emit the start of the test function.
3289
3290  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3291  char arg = 'a';
3292  std::string comma;
3293  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3294    // Do not create arguments for values that must be immediate constants.
3295    if (proto[i] == 'i')
3296      continue;
3297    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3298    testFuncProto.push_back(arg);
3299    comma = ", ";
3300  }
3301  testFuncProto += ")";
3302
3303  s+= testFuncProto;
3304  s+= " {\n  ";
3305
3306  if (proto[0] != 'v')
3307    s += "return ";
3308  s += mangledName + "(";
3309  arg = 'a';
3310  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3311    if (proto[i] == 'i') {
3312      // For immediate operands, test the maximum value.
3313      if (isShift)
3314        s += "1"; // FIXME
3315      else
3316        // The immediate generally refers to a lane in the preceding argument.
3317        s += utostr(RangeFromType(proto[i-1], inTypeStr));
3318    } else {
3319      s.push_back(arg);
3320    }
3321    if ((i + 1) < e)
3322      s += ", ";
3323  }
3324  s += ");\n}\n\n";
3325  return s;
3326}
3327
3328/// Write out all intrinsic tests for the specified target, checking
3329/// for intrinsic test uniqueness.
3330void NeonEmitter::genTargetTest(raw_ostream &OS) {
3331  StringMap<OpKind> EmittedMap;
3332  std::string CurrentGuard = "";
3333  bool InGuard = false;
3334
3335  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3336  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3337    Record *R = RV[i];
3338    std::string name = R->getValueAsString("Name");
3339    std::string Proto = R->getValueAsString("Prototype");
3340    std::string Types = R->getValueAsString("Types");
3341    bool isShift = R->getValueAsBit("isShift");
3342    std::string InstName = R->getValueAsString("InstName");
3343    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3344
3345    std::string NewGuard = R->getValueAsString("ArchGuard");
3346    if (NewGuard != CurrentGuard) {
3347      if (InGuard)
3348        OS << "#endif\n\n";
3349      if (NewGuard.size())
3350        OS << "#if " << NewGuard << '\n';
3351
3352      CurrentGuard = NewGuard;
3353      InGuard = NewGuard.size() != 0;
3354    }
3355
3356    SmallVector<StringRef, 16> TypeVec;
3357    ParseTypes(R, Types, TypeVec);
3358
3359    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3360    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3361    if (kind == OpUnavailable)
3362      continue;
3363    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3364      if (kind == OpReinterpret) {
3365        bool outQuad = false;
3366        bool dummy = false;
3367        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3368        for (unsigned srcti = 0, srcte = TypeVec.size();
3369             srcti != srcte; ++srcti) {
3370          bool inQuad = false;
3371          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3372          if (srcti == ti || inQuad != outQuad)
3373            continue;
3374          std::string testFuncProto;
3375          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3376                                  isShift, isHiddenLOp, ck, InstName,
3377                                  CurrentGuard.size(), testFuncProto);
3378          if (EmittedMap.count(testFuncProto))
3379            continue;
3380          EmittedMap[testFuncProto] = kind;
3381          OS << s << "\n";
3382        }
3383      } else {
3384        std::string testFuncProto;
3385        std::string s =
3386            GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
3387                    ck, InstName, CurrentGuard.size(), testFuncProto);
3388        OS << s << "\n";
3389      }
3390    }
3391  }
3392
3393  if (InGuard)
3394    OS << "#endif\n";
3395}
3396/// runTests - Write out a complete set of tests for all of the Neon
3397/// intrinsics.
3398void NeonEmitter::runTests(raw_ostream &OS) {
3399  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3400        "apcs-gnu\\\n"
3401        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3402        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3403        "\n"
3404        "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3405        "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3406        "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3407        "\n"
3408        "// REQUIRES: long_tests\n"
3409        "\n"
3410        "#include <arm_neon.h>\n"
3411        "\n";
3412
3413  genTargetTest(OS);
3414}
3415
3416namespace clang {
3417void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3418  NeonEmitter(Records).run(OS);
3419}
3420void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3421  NeonEmitter(Records).runHeader(OS);
3422}
3423void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3424  NeonEmitter(Records).runTests(OS);
3425}
3426} // End namespace clang
3427