NeonEmitter.cpp revision 8137a607eebe799d95fd05226fb91119a5b054b0
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMlalHi,
56  OpMls,
57  OpMlsl,
58  OpMlslHi,
59  OpMulN,
60  OpMlaN,
61  OpMlsN,
62  OpMlalN,
63  OpMlslN,
64  OpMulLane,
65  OpMulXLane,
66  OpMullLane,
67  OpMullHiLane,
68  OpMlaLane,
69  OpMlsLane,
70  OpMlalLane,
71  OpMlalHiLane,
72  OpMlslLane,
73  OpMlslHiLane,
74  OpQDMullLane,
75  OpQDMullHiLane,
76  OpQDMlalLane,
77  OpQDMlalHiLane,
78  OpQDMlslLane,
79  OpQDMlslHiLane,
80  OpQDMulhLane,
81  OpQRDMulhLane,
82  OpFMSLane,
83  OpFMSLaneQ,
84  OpTrn1,
85  OpZip1,
86  OpUzp1,
87  OpTrn2,
88  OpZip2,
89  OpUzp2,
90  OpEq,
91  OpGe,
92  OpLe,
93  OpGt,
94  OpLt,
95  OpNeg,
96  OpNot,
97  OpAnd,
98  OpOr,
99  OpXor,
100  OpAndNot,
101  OpOrNot,
102  OpCast,
103  OpConcat,
104  OpDup,
105  OpDupLane,
106  OpHi,
107  OpLo,
108  OpSelect,
109  OpRev16,
110  OpRev32,
111  OpRev64,
112  OpXtnHi,
113  OpSqxtunHi,
114  OpQxtnHi,
115  OpFcvtnHi,
116  OpFcvtlHi,
117  OpFcvtxnHi,
118  OpReinterpret,
119  OpAddhnHi,
120  OpRAddhnHi,
121  OpSubhnHi,
122  OpRSubhnHi,
123  OpAbdl,
124  OpAbdlHi,
125  OpAba,
126  OpAbal,
127  OpAbalHi,
128  OpQDMullHi,
129  OpQDMlalHi,
130  OpQDMlslHi,
131  OpDiv,
132  OpLongHi,
133  OpNarrowHi,
134  OpMovlHi,
135  OpCopyLane,
136  OpCopyQLane,
137  OpCopyLaneQ
138};
139
140enum ClassKind {
141  ClassNone,
142  ClassI,           // generic integer instruction, e.g., "i8" suffix
143  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
144  ClassW,           // width-specific instruction, e.g., "8" suffix
145  ClassB,           // bitcast arguments with enum argument to specify type
146  ClassL,           // Logical instructions which are op instructions
147                    // but we need to not emit any suffix for in our
148                    // tests.
149  ClassNoTest       // Instructions which we do not test since they are
150                    // not TRUE instructions.
151};
152
153/// NeonTypeFlags - Flags to identify the types for overloaded Neon
154/// builtins.  These must be kept in sync with the flags in
155/// include/clang/Basic/TargetBuiltins.h.
156namespace {
157class NeonTypeFlags {
158  enum {
159    EltTypeMask = 0xf,
160    UnsignedFlag = 0x10,
161    QuadFlag = 0x20
162  };
163  uint32_t Flags;
164
165public:
166  enum EltType {
167    Int8,
168    Int16,
169    Int32,
170    Int64,
171    Poly8,
172    Poly16,
173    Float16,
174    Float32,
175    Float64
176  };
177
178  NeonTypeFlags(unsigned F) : Flags(F) {}
179  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
180    if (IsUnsigned)
181      Flags |= UnsignedFlag;
182    if (IsQuad)
183      Flags |= QuadFlag;
184  }
185
186  uint32_t getFlags() const { return Flags; }
187};
188} // end anonymous namespace
189
190namespace {
191class NeonEmitter {
192  RecordKeeper &Records;
193  StringMap<OpKind> OpMap;
194  DenseMap<Record*, ClassKind> ClassMap;
195
196public:
197  NeonEmitter(RecordKeeper &R) : Records(R) {
198    OpMap["OP_NONE"]  = OpNone;
199    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
200    OpMap["OP_ADD"]   = OpAdd;
201    OpMap["OP_ADDL"]  = OpAddl;
202    OpMap["OP_ADDLHi"] = OpAddlHi;
203    OpMap["OP_ADDW"]  = OpAddw;
204    OpMap["OP_ADDWHi"] = OpAddwHi;
205    OpMap["OP_SUB"]   = OpSub;
206    OpMap["OP_SUBL"]  = OpSubl;
207    OpMap["OP_SUBLHi"] = OpSublHi;
208    OpMap["OP_SUBW"]  = OpSubw;
209    OpMap["OP_SUBWHi"] = OpSubwHi;
210    OpMap["OP_MUL"]   = OpMul;
211    OpMap["OP_MLA"]   = OpMla;
212    OpMap["OP_MLAL"]  = OpMlal;
213    OpMap["OP_MULLHi"]  = OpMullHi;
214    OpMap["OP_MLALHi"]  = OpMlalHi;
215    OpMap["OP_MLS"]   = OpMls;
216    OpMap["OP_MLSL"]  = OpMlsl;
217    OpMap["OP_MLSLHi"] = OpMlslHi;
218    OpMap["OP_MUL_N"] = OpMulN;
219    OpMap["OP_MLA_N"] = OpMlaN;
220    OpMap["OP_MLS_N"] = OpMlsN;
221    OpMap["OP_MLAL_N"] = OpMlalN;
222    OpMap["OP_MLSL_N"] = OpMlslN;
223    OpMap["OP_MUL_LN"]= OpMulLane;
224    OpMap["OP_MULX_LN"]= OpMulXLane;
225    OpMap["OP_MULL_LN"] = OpMullLane;
226    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
227    OpMap["OP_MLA_LN"]= OpMlaLane;
228    OpMap["OP_MLS_LN"]= OpMlsLane;
229    OpMap["OP_MLAL_LN"] = OpMlalLane;
230    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
231    OpMap["OP_MLSL_LN"] = OpMlslLane;
232    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
233    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
234    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
235    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
236    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
237    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
238    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
239    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
240    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
241    OpMap["OP_FMS_LN"] = OpFMSLane;
242    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
243    OpMap["OP_TRN1"]  = OpTrn1;
244    OpMap["OP_ZIP1"]  = OpZip1;
245    OpMap["OP_UZP1"]  = OpUzp1;
246    OpMap["OP_TRN2"]  = OpTrn2;
247    OpMap["OP_ZIP2"]  = OpZip2;
248    OpMap["OP_UZP2"]  = OpUzp2;
249    OpMap["OP_EQ"]    = OpEq;
250    OpMap["OP_GE"]    = OpGe;
251    OpMap["OP_LE"]    = OpLe;
252    OpMap["OP_GT"]    = OpGt;
253    OpMap["OP_LT"]    = OpLt;
254    OpMap["OP_NEG"]   = OpNeg;
255    OpMap["OP_NOT"]   = OpNot;
256    OpMap["OP_AND"]   = OpAnd;
257    OpMap["OP_OR"]    = OpOr;
258    OpMap["OP_XOR"]   = OpXor;
259    OpMap["OP_ANDN"]  = OpAndNot;
260    OpMap["OP_ORN"]   = OpOrNot;
261    OpMap["OP_CAST"]  = OpCast;
262    OpMap["OP_CONC"]  = OpConcat;
263    OpMap["OP_HI"]    = OpHi;
264    OpMap["OP_LO"]    = OpLo;
265    OpMap["OP_DUP"]   = OpDup;
266    OpMap["OP_DUP_LN"] = OpDupLane;
267    OpMap["OP_SEL"]   = OpSelect;
268    OpMap["OP_REV16"] = OpRev16;
269    OpMap["OP_REV32"] = OpRev32;
270    OpMap["OP_REV64"] = OpRev64;
271    OpMap["OP_XTN"] = OpXtnHi;
272    OpMap["OP_SQXTUN"] = OpSqxtunHi;
273    OpMap["OP_QXTN"] = OpQxtnHi;
274    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
275    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
276    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
277    OpMap["OP_REINT"] = OpReinterpret;
278    OpMap["OP_ADDHNHi"] = OpAddhnHi;
279    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
280    OpMap["OP_SUBHNHi"] = OpSubhnHi;
281    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
282    OpMap["OP_ABDL"]  = OpAbdl;
283    OpMap["OP_ABDLHi"] = OpAbdlHi;
284    OpMap["OP_ABA"]   = OpAba;
285    OpMap["OP_ABAL"]  = OpAbal;
286    OpMap["OP_ABALHi"] = OpAbalHi;
287    OpMap["OP_QDMULLHi"] = OpQDMullHi;
288    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
289    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
290    OpMap["OP_DIV"] = OpDiv;
291    OpMap["OP_LONG_HI"] = OpLongHi;
292    OpMap["OP_NARROW_HI"] = OpNarrowHi;
293    OpMap["OP_MOVL_HI"] = OpMovlHi;
294    OpMap["OP_COPY_LN"] = OpCopyLane;
295    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
296    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
297
298    Record *SI = R.getClass("SInst");
299    Record *II = R.getClass("IInst");
300    Record *WI = R.getClass("WInst");
301    Record *SOpI = R.getClass("SOpInst");
302    Record *IOpI = R.getClass("IOpInst");
303    Record *WOpI = R.getClass("WOpInst");
304    Record *LOpI = R.getClass("LOpInst");
305    Record *NoTestOpI = R.getClass("NoTestOpInst");
306
307    ClassMap[SI] = ClassS;
308    ClassMap[II] = ClassI;
309    ClassMap[WI] = ClassW;
310    ClassMap[SOpI] = ClassS;
311    ClassMap[IOpI] = ClassI;
312    ClassMap[WOpI] = ClassW;
313    ClassMap[LOpI] = ClassL;
314    ClassMap[NoTestOpI] = ClassNoTest;
315  }
316
317  // run - Emit arm_neon.h.inc
318  void run(raw_ostream &o);
319
320  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
321  void runHeader(raw_ostream &o);
322
323  // runTests - Emit tests for all the Neon intrinsics.
324  void runTests(raw_ostream &o);
325
326private:
327  void emitIntrinsic(raw_ostream &OS, Record *R,
328                     StringMap<ClassKind> &EmittedMap);
329  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
330                      bool isA64GenBuiltinDef);
331  void genOverloadTypeCheckCode(raw_ostream &OS,
332                                StringMap<ClassKind> &A64IntrinsicMap,
333                                bool isA64TypeCheck);
334  void genIntrinsicRangeCheckCode(raw_ostream &OS,
335                                  StringMap<ClassKind> &A64IntrinsicMap,
336                                  bool isA64RangeCheck);
337  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
338                     bool isA64TestGen);
339};
340} // end anonymous namespace
341
342/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
343/// which each StringRef representing a single type declared in the string.
344/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
345/// 2xfloat and 4xfloat respectively.
346static void ParseTypes(Record *r, std::string &s,
347                       SmallVectorImpl<StringRef> &TV) {
348  const char *data = s.data();
349  int len = 0;
350
351  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
352    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
353                         || data[len] == 'H' || data[len] == 'S')
354      continue;
355
356    switch (data[len]) {
357      case 'c':
358      case 's':
359      case 'i':
360      case 'l':
361      case 'h':
362      case 'f':
363      case 'd':
364        break;
365      default:
366        PrintFatalError(r->getLoc(),
367                      "Unexpected letter: " + std::string(data + len, 1));
368    }
369    TV.push_back(StringRef(data, len + 1));
370    data += len + 1;
371    len = -1;
372  }
373}
374
375/// Widen - Convert a type code into the next wider type.  char -> short,
376/// short -> int, etc.
377static char Widen(const char t) {
378  switch (t) {
379    case 'c':
380      return 's';
381    case 's':
382      return 'i';
383    case 'i':
384      return 'l';
385    case 'h':
386      return 'f';
387    case 'f':
388      return 'd';
389    default:
390      PrintFatalError("unhandled type in widen!");
391  }
392}
393
394/// Narrow - Convert a type code into the next smaller type.  short -> char,
395/// float -> half float, etc.
396static char Narrow(const char t) {
397  switch (t) {
398    case 's':
399      return 'c';
400    case 'i':
401      return 's';
402    case 'l':
403      return 'i';
404    case 'f':
405      return 'h';
406    case 'd':
407      return 'f';
408    default:
409      PrintFatalError("unhandled type in narrow!");
410  }
411}
412
413static std::string GetNarrowTypestr(StringRef ty)
414{
415  std::string s;
416  for (size_t i = 0, end = ty.size(); i < end; i++) {
417    switch (ty[i]) {
418      case 's':
419        s += 'c';
420        break;
421      case 'i':
422        s += 's';
423        break;
424      case 'l':
425        s += 'i';
426        break;
427      default:
428        s += ty[i];
429        break;
430    }
431  }
432
433  return s;
434}
435
436/// For a particular StringRef, return the base type code, and whether it has
437/// the quad-vector, polynomial, or unsigned modifiers set.
438static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
439  unsigned off = 0;
440  // ignore scalar.
441  if (ty[off] == 'S') {
442    ++off;
443  }
444  // remember quad.
445  if (ty[off] == 'Q' || ty[off] == 'H') {
446    quad = true;
447    ++off;
448  }
449
450  // remember poly.
451  if (ty[off] == 'P') {
452    poly = true;
453    ++off;
454  }
455
456  // remember unsigned.
457  if (ty[off] == 'U') {
458    usgn = true;
459    ++off;
460  }
461
462  // base type to get the type string for.
463  return ty[off];
464}
465
466/// ModType - Transform a type code and its modifiers based on a mod code. The
467/// mod code definitions may be found at the top of arm_neon.td.
468static char ModType(const char mod, char type, bool &quad, bool &poly,
469                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
470  switch (mod) {
471    case 't':
472      if (poly) {
473        poly = false;
474        usgn = true;
475      }
476      break;
477    case 'b':
478      scal = true;
479    case 'u':
480      usgn = true;
481      poly = false;
482      if (type == 'f')
483        type = 'i';
484      if (type == 'd')
485        type = 'l';
486      break;
487    case '$':
488      scal = true;
489    case 'x':
490      usgn = false;
491      poly = false;
492      if (type == 'f')
493        type = 'i';
494      if (type == 'd')
495        type = 'l';
496      break;
497    case 'o':
498      scal = true;
499      type = 'd';
500      usgn = false;
501      break;
502    case 'y':
503      scal = true;
504    case 'f':
505      if (type == 'h')
506        quad = true;
507      type = 'f';
508      usgn = false;
509      break;
510    case 'g':
511      quad = false;
512      break;
513    case 'B':
514    case 'C':
515    case 'D':
516    case 'j':
517      quad = true;
518      break;
519    case 'w':
520      type = Widen(type);
521      quad = true;
522      break;
523    case 'n':
524      type = Widen(type);
525      break;
526    case 'i':
527      type = 'i';
528      scal = true;
529      break;
530    case 'l':
531      type = 'l';
532      scal = true;
533      usgn = true;
534      break;
535    case 'z':
536      type = Narrow(type);
537      scal = true;
538      break;
539    case 'r':
540      type = Widen(type);
541      scal = true;
542      break;
543    case 's':
544    case 'a':
545      scal = true;
546      break;
547    case 'k':
548      quad = true;
549      break;
550    case 'c':
551      cnst = true;
552    case 'p':
553      pntr = true;
554      scal = true;
555      break;
556    case 'h':
557      type = Narrow(type);
558      if (type == 'h')
559        quad = false;
560      break;
561    case 'q':
562      type = Narrow(type);
563      quad = true;
564      break;
565    case 'e':
566      type = Narrow(type);
567      usgn = true;
568      break;
569    case 'm':
570      type = Narrow(type);
571      quad = false;
572      break;
573    default:
574      break;
575  }
576  return type;
577}
578
579static bool IsMultiVecProto(const char p) {
580  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
581}
582
583/// TypeString - for a modifier and type, generate the name of the typedef for
584/// that type.  QUc -> uint8x8_t.
585static std::string TypeString(const char mod, StringRef typestr) {
586  bool quad = false;
587  bool poly = false;
588  bool usgn = false;
589  bool scal = false;
590  bool cnst = false;
591  bool pntr = false;
592
593  if (mod == 'v')
594    return "void";
595  if (mod == 'i')
596    return "int";
597
598  // base type to get the type string for.
599  char type = ClassifyType(typestr, quad, poly, usgn);
600
601  // Based on the modifying character, change the type and width if necessary.
602  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
603
604  SmallString<128> s;
605
606  if (usgn)
607    s.push_back('u');
608
609  switch (type) {
610    case 'c':
611      s += poly ? "poly8" : "int8";
612      if (scal)
613        break;
614      s += quad ? "x16" : "x8";
615      break;
616    case 's':
617      s += poly ? "poly16" : "int16";
618      if (scal)
619        break;
620      s += quad ? "x8" : "x4";
621      break;
622    case 'i':
623      s += "int32";
624      if (scal)
625        break;
626      s += quad ? "x4" : "x2";
627      break;
628    case 'l':
629      s += "int64";
630      if (scal)
631        break;
632      s += quad ? "x2" : "x1";
633      break;
634    case 'h':
635      s += "float16";
636      if (scal)
637        break;
638      s += quad ? "x8" : "x4";
639      break;
640    case 'f':
641      s += "float32";
642      if (scal)
643        break;
644      s += quad ? "x4" : "x2";
645      break;
646    case 'd':
647      s += "float64";
648      if (scal)
649        break;
650      s += quad ? "x2" : "x1";
651      break;
652
653    default:
654      PrintFatalError("unhandled type!");
655  }
656
657  if (mod == '2' || mod == 'B')
658    s += "x2";
659  if (mod == '3' || mod == 'C')
660    s += "x3";
661  if (mod == '4' || mod == 'D')
662    s += "x4";
663
664  // Append _t, finishing the type string typedef type.
665  s += "_t";
666
667  if (cnst)
668    s += " const";
669
670  if (pntr)
671    s += " *";
672
673  return s.str();
674}
675
676/// BuiltinTypeString - for a modifier and type, generate the clang
677/// BuiltinsARM.def prototype code for the function.  See the top of clang's
678/// Builtins.def for a description of the type strings.
679static std::string BuiltinTypeString(const char mod, StringRef typestr,
680                                     ClassKind ck, bool ret) {
681  bool quad = false;
682  bool poly = false;
683  bool usgn = false;
684  bool scal = false;
685  bool cnst = false;
686  bool pntr = false;
687
688  if (mod == 'v')
689    return "v"; // void
690  if (mod == 'i')
691    return "i"; // int
692
693  // base type to get the type string for.
694  char type = ClassifyType(typestr, quad, poly, usgn);
695
696  // Based on the modifying character, change the type and width if necessary.
697  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
698
699  // All pointers are void* pointers.  Change type to 'v' now.
700  if (pntr) {
701    usgn = false;
702    poly = false;
703    type = 'v';
704  }
705  // Treat half-float ('h') types as unsigned short ('s') types.
706  if (type == 'h') {
707    type = 's';
708    usgn = true;
709  }
710  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
711                         scal && type != 'f' && type != 'd');
712
713  if (scal) {
714    SmallString<128> s;
715
716    if (usgn)
717      s.push_back('U');
718    else if (type == 'c')
719      s.push_back('S'); // make chars explicitly signed
720
721    if (type == 'l') // 64-bit long
722      s += "LLi";
723    else
724      s.push_back(type);
725
726    if (cnst)
727      s.push_back('C');
728    if (pntr)
729      s.push_back('*');
730    return s.str();
731  }
732
733  // Since the return value must be one type, return a vector type of the
734  // appropriate width which we will bitcast.  An exception is made for
735  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
736  // fashion, storing them to a pointer arg.
737  if (ret) {
738    if (IsMultiVecProto(mod))
739      return "vv*"; // void result with void* first argument
740    if (mod == 'f' || (ck != ClassB && type == 'f'))
741      return quad ? "V4f" : "V2f";
742    if (ck != ClassB && type == 'd')
743      return quad ? "V2d" : "V1d";
744    if (ck != ClassB && type == 's')
745      return quad ? "V8s" : "V4s";
746    if (ck != ClassB && type == 'i')
747      return quad ? "V4i" : "V2i";
748    if (ck != ClassB && type == 'l')
749      return quad ? "V2LLi" : "V1LLi";
750
751    return quad ? "V16Sc" : "V8Sc";
752  }
753
754  // Non-return array types are passed as individual vectors.
755  if (mod == '2' || mod == 'B')
756    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
757  if (mod == '3' || mod == 'C')
758    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
759  if (mod == '4' || mod == 'D')
760    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
761
762  if (mod == 'f' || (ck != ClassB && type == 'f'))
763    return quad ? "V4f" : "V2f";
764  if (ck != ClassB && type == 'd')
765    return quad ? "V2d" : "V1d";
766  if (ck != ClassB && type == 's')
767    return quad ? "V8s" : "V4s";
768  if (ck != ClassB && type == 'i')
769    return quad ? "V4i" : "V2i";
770  if (ck != ClassB && type == 'l')
771    return quad ? "V2LLi" : "V1LLi";
772
773  return quad ? "V16Sc" : "V8Sc";
774}
775
776/// InstructionTypeCode - Computes the ARM argument character code and
777/// quad status for a specific type string and ClassKind.
778static void InstructionTypeCode(const StringRef &typeStr,
779                                const ClassKind ck,
780                                bool &quad,
781                                std::string &typeCode) {
782  bool poly = false;
783  bool usgn = false;
784  char type = ClassifyType(typeStr, quad, poly, usgn);
785
786  switch (type) {
787  case 'c':
788    switch (ck) {
789    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
790    case ClassI: typeCode = "i8"; break;
791    case ClassW: typeCode = "8"; break;
792    default: break;
793    }
794    break;
795  case 's':
796    switch (ck) {
797    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
798    case ClassI: typeCode = "i16"; break;
799    case ClassW: typeCode = "16"; break;
800    default: break;
801    }
802    break;
803  case 'i':
804    switch (ck) {
805    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
806    case ClassI: typeCode = "i32"; break;
807    case ClassW: typeCode = "32"; break;
808    default: break;
809    }
810    break;
811  case 'l':
812    switch (ck) {
813    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
814    case ClassI: typeCode = "i64"; break;
815    case ClassW: typeCode = "64"; break;
816    default: break;
817    }
818    break;
819  case 'h':
820    switch (ck) {
821    case ClassS:
822    case ClassI: typeCode = "f16"; break;
823    case ClassW: typeCode = "16"; break;
824    default: break;
825    }
826    break;
827  case 'f':
828    switch (ck) {
829    case ClassS:
830    case ClassI: typeCode = "f32"; break;
831    case ClassW: typeCode = "32"; break;
832    default: break;
833    }
834    break;
835  case 'd':
836    switch (ck) {
837    case ClassS:
838    case ClassI:
839      typeCode += "f64";
840      break;
841    case ClassW:
842      PrintFatalError("unhandled type!");
843    default:
844      break;
845    }
846    break;
847  default:
848    PrintFatalError("unhandled type!");
849  }
850}
851
852static char Insert_BHSD_Suffix(StringRef typestr){
853  unsigned off = 0;
854  if(typestr[off++] == 'S'){
855    while(typestr[off] == 'Q' || typestr[off] == 'H'||
856          typestr[off] == 'P' || typestr[off] == 'U')
857      ++off;
858    switch (typestr[off]){
859    default  : break;
860    case 'c' : return 'b';
861    case 's' : return 'h';
862    case 'i' :
863    case 'f' : return 's';
864    case 'l' :
865    case 'd' : return 'd';
866    }
867  }
868  return 0;
869}
870
871/// MangleName - Append a type or width suffix to a base neon function name,
872/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
873/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
874/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
875static std::string MangleName(const std::string &name, StringRef typestr,
876                              ClassKind ck) {
877  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
878    return name;
879
880  bool quad = false;
881  std::string typeCode = "";
882
883  InstructionTypeCode(typestr, ck, quad, typeCode);
884
885  std::string s = name;
886
887  if (typeCode.size() > 0) {
888    s += "_" + typeCode;
889  }
890
891  if (ck == ClassB)
892    s += "_v";
893
894  // Insert a 'q' before the first '_' character so that it ends up before
895  // _lane or _n on vector-scalar operations.
896  if (typestr.find("Q") != StringRef::npos) {
897      size_t pos = s.find('_');
898      s = s.insert(pos, "q");
899  }
900  char ins = Insert_BHSD_Suffix(typestr);
901  if(ins){
902    size_t pos = s.find('_');
903    s = s.insert(pos, &ins, 1);
904  }
905
906  return s;
907}
908
909static void PreprocessInstruction(const StringRef &Name,
910                                  const std::string &InstName,
911                                  std::string &Prefix,
912                                  bool &HasNPostfix,
913                                  bool &HasLanePostfix,
914                                  bool &HasDupPostfix,
915                                  bool &IsSpecialVCvt,
916                                  size_t &TBNumber) {
917  // All of our instruction name fields from arm_neon.td are of the form
918  //   <instructionname>_...
919  // Thus we grab our instruction name via computation of said Prefix.
920  const size_t PrefixEnd = Name.find_first_of('_');
921  // If InstName is passed in, we use that instead of our name Prefix.
922  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
923
924  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
925
926  HasNPostfix = Postfix.count("_n");
927  HasLanePostfix = Postfix.count("_lane");
928  HasDupPostfix = Postfix.count("_dup");
929  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
930
931  if (InstName.compare("vtbl") == 0 ||
932      InstName.compare("vtbx") == 0) {
933    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
934    // encoding to get its true value.
935    TBNumber = Name[Name.size()-1] - 48;
936  }
937}
938
939/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
940/// extracted, generate a FileCheck pattern for a Load Or Store
941static void
942GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
943                                          const std::string& OutTypeCode,
944                                          const bool &IsQuad,
945                                          const bool &HasDupPostfix,
946                                          const bool &HasLanePostfix,
947                                          const size_t Count,
948                                          std::string &RegisterSuffix) {
949  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
950  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
951  // will output a series of v{ld,st}1s, so we have to handle it specially.
952  if ((Count == 3 || Count == 4) && IsQuad) {
953    RegisterSuffix += "{";
954    for (size_t i = 0; i < Count; i++) {
955      RegisterSuffix += "d{{[0-9]+}}";
956      if (HasDupPostfix) {
957        RegisterSuffix += "[]";
958      }
959      if (HasLanePostfix) {
960        RegisterSuffix += "[{{[0-9]+}}]";
961      }
962      if (i < Count-1) {
963        RegisterSuffix += ", ";
964      }
965    }
966    RegisterSuffix += "}";
967  } else {
968
969    // Handle normal loads and stores.
970    RegisterSuffix += "{";
971    for (size_t i = 0; i < Count; i++) {
972      RegisterSuffix += "d{{[0-9]+}}";
973      if (HasDupPostfix) {
974        RegisterSuffix += "[]";
975      }
976      if (HasLanePostfix) {
977        RegisterSuffix += "[{{[0-9]+}}]";
978      }
979      if (IsQuad && !HasLanePostfix) {
980        RegisterSuffix += ", d{{[0-9]+}}";
981        if (HasDupPostfix) {
982          RegisterSuffix += "[]";
983        }
984      }
985      if (i < Count-1) {
986        RegisterSuffix += ", ";
987      }
988    }
989    RegisterSuffix += "}, [r{{[0-9]+}}";
990
991    // We only include the alignment hint if we have a vld1.*64 or
992    // a dup/lane instruction.
993    if (IsLDSTOne) {
994      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
995        RegisterSuffix += ":" + OutTypeCode;
996      }
997    }
998
999    RegisterSuffix += "]";
1000  }
1001}
1002
1003static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1004                                     const bool &HasNPostfix) {
1005  return (NameRef.count("vmla") ||
1006          NameRef.count("vmlal") ||
1007          NameRef.count("vmlsl") ||
1008          NameRef.count("vmull") ||
1009          NameRef.count("vqdmlal") ||
1010          NameRef.count("vqdmlsl") ||
1011          NameRef.count("vqdmulh") ||
1012          NameRef.count("vqdmull") ||
1013          NameRef.count("vqrdmulh")) && HasNPostfix;
1014}
1015
1016static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1017                                         const bool &HasLanePostfix) {
1018  return (NameRef.count("vmla") ||
1019          NameRef.count("vmls") ||
1020          NameRef.count("vmlal") ||
1021          NameRef.count("vmlsl") ||
1022          (NameRef.count("vmul") && NameRef.size() == 3)||
1023          NameRef.count("vqdmlal") ||
1024          NameRef.count("vqdmlsl") ||
1025          NameRef.count("vqdmulh") ||
1026          NameRef.count("vqrdmulh")) && HasLanePostfix;
1027}
1028
1029static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1030                                  const bool &HasLanePostfix,
1031                                  const bool &IsQuad) {
1032  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1033                               && IsQuad;
1034  const bool IsVMull = NameRef.count("mull") && !IsQuad;
1035  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1036}
1037
1038static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1039                                                     const std::string &Proto,
1040                                                     const bool &HasNPostfix,
1041                                                     const bool &IsQuad,
1042                                                     const bool &HasLanePostfix,
1043                                                     const bool &HasDupPostfix,
1044                                                     std::string &NormedProto) {
1045  // Handle generic case.
1046  const StringRef NameRef(Name);
1047  for (size_t i = 0, end = Proto.size(); i < end; i++) {
1048    switch (Proto[i]) {
1049    case 'u':
1050    case 'f':
1051    case 'd':
1052    case 's':
1053    case 'x':
1054    case 't':
1055    case 'n':
1056      NormedProto += IsQuad? 'q' : 'd';
1057      break;
1058    case 'w':
1059    case 'k':
1060      NormedProto += 'q';
1061      break;
1062    case 'g':
1063    case 'j':
1064    case 'h':
1065    case 'e':
1066      NormedProto += 'd';
1067      break;
1068    case 'i':
1069      NormedProto += HasLanePostfix? 'a' : 'i';
1070      break;
1071    case 'a':
1072      if (HasLanePostfix) {
1073        NormedProto += 'a';
1074      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1075        NormedProto += IsQuad? 'q' : 'd';
1076      } else {
1077        NormedProto += 'i';
1078      }
1079      break;
1080    }
1081  }
1082
1083  // Handle Special Cases.
1084  const bool IsNotVExt = !NameRef.count("vext");
1085  const bool IsVPADAL = NameRef.count("vpadal");
1086  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1087                                                           HasLanePostfix);
1088  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1089                                                      IsQuad);
1090
1091  if (IsSpecialLaneMul) {
1092    // If
1093    NormedProto[2] = NormedProto[3];
1094    NormedProto.erase(3);
1095  } else if (NormedProto.size() == 4 &&
1096             NormedProto[0] == NormedProto[1] &&
1097             IsNotVExt) {
1098    // If NormedProto.size() == 4 and the first two proto characters are the
1099    // same, ignore the first.
1100    NormedProto = NormedProto.substr(1, 3);
1101  } else if (Is5OpLaneAccum) {
1102    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1103    std::string tmp = NormedProto.substr(1,2);
1104    tmp += NormedProto[4];
1105    NormedProto = tmp;
1106  } else if (IsVPADAL) {
1107    // If we have VPADAL, ignore the first character.
1108    NormedProto = NormedProto.substr(0, 2);
1109  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1110    // If our instruction is a dup instruction, keep only the first and
1111    // last characters.
1112    std::string tmp = "";
1113    tmp += NormedProto[0];
1114    tmp += NormedProto[NormedProto.size()-1];
1115    NormedProto = tmp;
1116  }
1117}
1118
1119/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1120/// extracted, generate a FileCheck pattern to check that an
1121/// instruction's arguments are correct.
1122static void GenerateRegisterCheckPattern(const std::string &Name,
1123                                         const std::string &Proto,
1124                                         const std::string &OutTypeCode,
1125                                         const bool &HasNPostfix,
1126                                         const bool &IsQuad,
1127                                         const bool &HasLanePostfix,
1128                                         const bool &HasDupPostfix,
1129                                         const size_t &TBNumber,
1130                                         std::string &RegisterSuffix) {
1131
1132  RegisterSuffix = "";
1133
1134  const StringRef NameRef(Name);
1135  const StringRef ProtoRef(Proto);
1136
1137  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1138    return;
1139  }
1140
1141  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1142  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1143
1144  if (IsLoadStore) {
1145    // Grab N value from  v{ld,st}N using its ascii representation.
1146    const size_t Count = NameRef[3] - 48;
1147
1148    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1149                                              HasDupPostfix, HasLanePostfix,
1150                                              Count, RegisterSuffix);
1151  } else if (IsTBXOrTBL) {
1152    RegisterSuffix += "d{{[0-9]+}}, {";
1153    for (size_t i = 0; i < TBNumber-1; i++) {
1154      RegisterSuffix += "d{{[0-9]+}}, ";
1155    }
1156    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1157  } else {
1158    // Handle a normal instruction.
1159    if (NameRef.count("vget") || NameRef.count("vset"))
1160      return;
1161
1162    // We first normalize our proto, since we only need to emit 4
1163    // different types of checks, yet have more than 4 proto types
1164    // that map onto those 4 patterns.
1165    std::string NormalizedProto("");
1166    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1167                                             HasLanePostfix, HasDupPostfix,
1168                                             NormalizedProto);
1169
1170    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1171      const char &c = NormalizedProto[i];
1172      switch (c) {
1173      case 'q':
1174        RegisterSuffix += "q{{[0-9]+}}, ";
1175        break;
1176
1177      case 'd':
1178        RegisterSuffix += "d{{[0-9]+}}, ";
1179        break;
1180
1181      case 'i':
1182        RegisterSuffix += "#{{[0-9]+}}, ";
1183        break;
1184
1185      case 'a':
1186        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1187        break;
1188      }
1189    }
1190
1191    // Remove extra ", ".
1192    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1193  }
1194}
1195
1196/// GenerateChecksForIntrinsic - Given a specific instruction name +
1197/// typestr + class kind, generate the proper set of FileCheck
1198/// Patterns to check for. We could just return a string, but instead
1199/// use a vector since it provides us with the extra flexibility of
1200/// emitting multiple checks, which comes in handy for certain cases
1201/// like mla where we want to check for 2 different instructions.
1202static void GenerateChecksForIntrinsic(const std::string &Name,
1203                                       const std::string &Proto,
1204                                       StringRef &OutTypeStr,
1205                                       StringRef &InTypeStr,
1206                                       ClassKind Ck,
1207                                       const std::string &InstName,
1208                                       bool IsHiddenLOp,
1209                                       std::vector<std::string>& Result) {
1210
1211  // If Ck is a ClassNoTest instruction, just return so no test is
1212  // emitted.
1213  if(Ck == ClassNoTest)
1214    return;
1215
1216  if (Name == "vcvt_f32_f16") {
1217    Result.push_back("vcvt.f32.f16");
1218    return;
1219  }
1220
1221
1222  // Now we preprocess our instruction given the data we have to get the
1223  // data that we need.
1224  // Create a StringRef for String Manipulation of our Name.
1225  const StringRef NameRef(Name);
1226  // Instruction Prefix.
1227  std::string Prefix;
1228  // The type code for our out type string.
1229  std::string OutTypeCode;
1230  // To handle our different cases, we need to check for different postfixes.
1231  // Is our instruction a quad instruction.
1232  bool IsQuad = false;
1233  // Our instruction is of the form <instructionname>_n.
1234  bool HasNPostfix = false;
1235  // Our instruction is of the form <instructionname>_lane.
1236  bool HasLanePostfix = false;
1237  // Our instruction is of the form <instructionname>_dup.
1238  bool HasDupPostfix  = false;
1239  // Our instruction is a vcvt instruction which requires special handling.
1240  bool IsSpecialVCvt = false;
1241  // If we have a vtbxN or vtblN instruction, this is set to N.
1242  size_t TBNumber = -1;
1243  // Register Suffix
1244  std::string RegisterSuffix;
1245
1246  PreprocessInstruction(NameRef, InstName, Prefix,
1247                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1248                        IsSpecialVCvt, TBNumber);
1249
1250  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1251  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1252                               HasLanePostfix, HasDupPostfix, TBNumber,
1253                               RegisterSuffix);
1254
1255  // In the following section, we handle a bunch of special cases. You can tell
1256  // a special case by the fact we are returning early.
1257
1258  // If our instruction is a logical instruction without postfix or a
1259  // hidden LOp just return the current Prefix.
1260  if (Ck == ClassL || IsHiddenLOp) {
1261    Result.push_back(Prefix + " " + RegisterSuffix);
1262    return;
1263  }
1264
1265  // If we have a vmov, due to the many different cases, some of which
1266  // vary within the different intrinsics generated for a single
1267  // instruction type, just output a vmov. (e.g. given an instruction
1268  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1269  //
1270  // FIXME: Maybe something can be done about this. The two cases that we care
1271  // about are vmov as an LType and vmov as a WType.
1272  if (Prefix == "vmov") {
1273    Result.push_back(Prefix + " " + RegisterSuffix);
1274    return;
1275  }
1276
1277  // In the following section, we handle special cases.
1278
1279  if (OutTypeCode == "64") {
1280    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1281    // type, the intrinsic will be optimized away, so just return
1282    // nothing.  On the other hand if we are handling an uint64x2_t
1283    // (i.e. quad instruction), vdup/vmov instructions should be
1284    // emitted.
1285    if (Prefix == "vdup" || Prefix == "vext") {
1286      if (IsQuad) {
1287        Result.push_back("{{vmov|vdup}}");
1288      }
1289      return;
1290    }
1291
1292    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1293    // multiple register operands.
1294    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1295                            || Prefix == "vld4";
1296    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1297                            || Prefix == "vst4";
1298    if (MultiLoadPrefix || MultiStorePrefix) {
1299      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1300      return;
1301    }
1302
1303    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1304    // emitting said instructions. So return a check for
1305    // vldr/vstr/vmov/str instead.
1306    if (HasLanePostfix || HasDupPostfix) {
1307      if (Prefix == "vst1") {
1308        Result.push_back("{{str|vstr|vmov}}");
1309        return;
1310      } else if (Prefix == "vld1") {
1311        Result.push_back("{{ldr|vldr|vmov}}");
1312        return;
1313      }
1314    }
1315  }
1316
1317  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1318  // sometimes disassembled as vtrn.32. We use a regex to handle both
1319  // cases.
1320  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1321    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1322    return;
1323  }
1324
1325  // Currently on most ARM processors, we do not use vmla/vmls for
1326  // quad floating point operations. Instead we output vmul + vadd. So
1327  // check if we have one of those instructions and just output a
1328  // check for vmul.
1329  if (OutTypeCode == "f32") {
1330    if (Prefix == "vmls") {
1331      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1332      Result.push_back("vsub." + OutTypeCode);
1333      return;
1334    } else if (Prefix == "vmla") {
1335      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1336      Result.push_back("vadd." + OutTypeCode);
1337      return;
1338    }
1339  }
1340
1341  // If we have vcvt, get the input type from the instruction name
1342  // (which should be of the form instname_inputtype) and append it
1343  // before the output type.
1344  if (Prefix == "vcvt") {
1345    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1346    Prefix += "." + inTypeCode;
1347  }
1348
1349  // Append output type code to get our final mangled instruction.
1350  Prefix += "." + OutTypeCode;
1351
1352  Result.push_back(Prefix + " " + RegisterSuffix);
1353}
1354
1355/// UseMacro - Examine the prototype string to determine if the intrinsic
1356/// should be defined as a preprocessor macro instead of an inline function.
1357static bool UseMacro(const std::string &proto) {
1358  // If this builtin takes an immediate argument, we need to #define it rather
1359  // than use a standard declaration, so that SemaChecking can range check
1360  // the immediate passed by the user.
1361  if (proto.find('i') != std::string::npos)
1362    return true;
1363
1364  // Pointer arguments need to use macros to avoid hiding aligned attributes
1365  // from the pointer type.
1366  if (proto.find('p') != std::string::npos ||
1367      proto.find('c') != std::string::npos)
1368    return true;
1369
1370  return false;
1371}
1372
1373/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1374/// defined as a macro should be accessed directly instead of being first
1375/// assigned to a local temporary.
1376static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1377  // True for constant ints (i), pointers (p) and const pointers (c).
1378  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1379}
1380
1381// Generate the string "(argtype a, argtype b, ...)"
1382static std::string GenArgs(const std::string &proto, StringRef typestr,
1383                           const std::string &name) {
1384  bool define = UseMacro(proto);
1385  char arg = 'a';
1386
1387  std::string s;
1388  s += "(";
1389
1390  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1391    if (define) {
1392      // Some macro arguments are used directly instead of being assigned
1393      // to local temporaries; prepend an underscore prefix to make their
1394      // names consistent with the local temporaries.
1395      if (MacroArgUsedDirectly(proto, i))
1396        s += "__";
1397    } else {
1398      s += TypeString(proto[i], typestr) + " __";
1399    }
1400    s.push_back(arg);
1401    //To avoid argument being multiple defined, add extra number for renaming.
1402    if (name == "vcopy_lane" || name == "vcopy_laneq")
1403      s.push_back('1');
1404    if ((i + 1) < e)
1405      s += ", ";
1406  }
1407
1408  s += ")";
1409  return s;
1410}
1411
1412// Macro arguments are not type-checked like inline function arguments, so
1413// assign them to local temporaries to get the right type checking.
1414static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1415                                  const std::string &name ) {
1416  char arg = 'a';
1417  std::string s;
1418  bool generatedLocal = false;
1419
1420  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1421    // Do not create a temporary for an immediate argument.
1422    // That would defeat the whole point of using a macro!
1423    if (MacroArgUsedDirectly(proto, i))
1424      continue;
1425    generatedLocal = true;
1426    bool extranumber = false;
1427    if (name == "vcopy_lane" || name == "vcopy_laneq")
1428      extranumber = true;
1429
1430    s += TypeString(proto[i], typestr) + " __";
1431    s.push_back(arg);
1432    if(extranumber)
1433      s.push_back('1');
1434    s += " = (";
1435    s.push_back(arg);
1436    if(extranumber)
1437      s.push_back('1');
1438    s += "); ";
1439  }
1440
1441  if (generatedLocal)
1442    s += "\\\n  ";
1443  return s;
1444}
1445
1446// Use the vmovl builtin to sign-extend or zero-extend a vector.
1447static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1448  std::string s, high;
1449  high = h ? "_high" : "";
1450  s = MangleName("vmovl" + high, typestr, ClassS);
1451  s += "(" + a + ")";
1452  return s;
1453}
1454
1455// Get the high 64-bit part of a vector
1456static std::string GetHigh(const std::string &a, StringRef typestr) {
1457  std::string s;
1458  s = MangleName("vget_high", typestr, ClassS);
1459  s += "(" + a + ")";
1460  return s;
1461}
1462
1463// Gen operation with two operands and get high 64-bit for both of two operands.
1464static std::string Gen2OpWith2High(StringRef typestr,
1465                                   const std::string &op,
1466                                   const std::string &a,
1467                                   const std::string &b) {
1468  std::string s;
1469  std::string Op1 = GetHigh(a, typestr);
1470  std::string Op2 = GetHigh(b, typestr);
1471  s = MangleName(op, typestr, ClassS);
1472  s += "(" + Op1 + ", " + Op2 + ");";
1473  return s;
1474}
1475
1476// Gen operation with three operands and get high 64-bit of the latter
1477// two operands.
1478static std::string Gen3OpWith2High(StringRef typestr,
1479                                   const std::string &op,
1480                                   const std::string &a,
1481                                   const std::string &b,
1482                                   const std::string &c) {
1483  std::string s;
1484  std::string Op1 = GetHigh(b, typestr);
1485  std::string Op2 = GetHigh(c, typestr);
1486  s = MangleName(op, typestr, ClassS);
1487  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1488  return s;
1489}
1490
1491// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1492static std::string GenCombine(std::string typestr,
1493                              const std::string &a,
1494                              const std::string &b) {
1495  std::string s;
1496  s = MangleName("vcombine", typestr, ClassS);
1497  s += "(" + a + ", " + b + ")";
1498  return s;
1499}
1500
1501static std::string Duplicate(unsigned nElts, StringRef typestr,
1502                             const std::string &a) {
1503  std::string s;
1504
1505  s = "(" + TypeString('d', typestr) + "){ ";
1506  for (unsigned i = 0; i != nElts; ++i) {
1507    s += a;
1508    if ((i + 1) < nElts)
1509      s += ", ";
1510  }
1511  s += " }";
1512
1513  return s;
1514}
1515
1516static std::string SplatLane(unsigned nElts, const std::string &vec,
1517                             const std::string &lane) {
1518  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1519  for (unsigned i = 0; i < nElts; ++i)
1520    s += ", " + lane;
1521  s += ")";
1522  return s;
1523}
1524
1525static std::string RemoveHigh(const std::string &name) {
1526  std::string s = name;
1527  std::size_t found = s.find("_high_");
1528  if (found == std::string::npos)
1529    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1530  s.replace(found, 5, "");
1531  return s;
1532}
1533
1534static unsigned GetNumElements(StringRef typestr, bool &quad) {
1535  quad = false;
1536  bool dummy = false;
1537  char type = ClassifyType(typestr, quad, dummy, dummy);
1538  unsigned nElts = 0;
1539  switch (type) {
1540  case 'c': nElts = 8; break;
1541  case 's': nElts = 4; break;
1542  case 'i': nElts = 2; break;
1543  case 'l': nElts = 1; break;
1544  case 'h': nElts = 4; break;
1545  case 'f': nElts = 2; break;
1546  case 'd':
1547    nElts = 1;
1548    break;
1549  default:
1550    PrintFatalError("unhandled type!");
1551  }
1552  if (quad) nElts <<= 1;
1553  return nElts;
1554}
1555
1556// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1557static std::string GenOpString(const std::string &name, OpKind op,
1558                               const std::string &proto, StringRef typestr) {
1559  bool quad;
1560  unsigned nElts = GetNumElements(typestr, quad);
1561  bool define = UseMacro(proto);
1562
1563  std::string ts = TypeString(proto[0], typestr);
1564  std::string s;
1565  if (!define) {
1566    s = "return ";
1567  }
1568
1569  switch(op) {
1570  case OpAdd:
1571    s += "__a + __b;";
1572    break;
1573  case OpAddl:
1574    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1575    break;
1576  case OpAddlHi:
1577    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1578    break;
1579  case OpAddw:
1580    s += "__a + " + Extend(typestr, "__b") + ";";
1581    break;
1582  case OpAddwHi:
1583    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1584    break;
1585  case OpSub:
1586    s += "__a - __b;";
1587    break;
1588  case OpSubl:
1589    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1590    break;
1591  case OpSublHi:
1592    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1593    break;
1594  case OpSubw:
1595    s += "__a - " + Extend(typestr, "__b") + ";";
1596    break;
1597  case OpSubwHi:
1598    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1599    break;
1600  case OpMulN:
1601    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1602    break;
1603  case OpMulLane:
1604    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1605    break;
1606  case OpMulXLane:
1607    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1608      SplatLane(nElts, "__b", "__c") + ");";
1609    break;
1610  case OpMul:
1611    s += "__a * __b;";
1612    break;
1613  case OpMullLane:
1614    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1615      SplatLane(nElts, "__b", "__c") + ");";
1616    break;
1617  case OpMullHiLane:
1618    s += MangleName("vmull", typestr, ClassS) + "(" +
1619      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1620    break;
1621  case OpMlaN:
1622    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1623    break;
1624  case OpMlaLane:
1625    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1626    break;
1627  case OpMla:
1628    s += "__a + (__b * __c);";
1629    break;
1630  case OpMlalN:
1631    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1632      Duplicate(nElts, typestr, "__c") + ");";
1633    break;
1634  case OpMlalLane:
1635    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1636      SplatLane(nElts, "__c", "__d") + ");";
1637    break;
1638  case OpMlalHiLane:
1639    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1640      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1641    break;
1642  case OpMlal:
1643    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1644    break;
1645  case OpMullHi:
1646    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1647    break;
1648  case OpMlalHi:
1649    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1650    break;
1651  case OpMlsN:
1652    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1653    break;
1654  case OpMlsLane:
1655    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1656    break;
1657  case OpFMSLane:
1658    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1659    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1660    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1661    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1662    break;
1663  case OpFMSLaneQ:
1664    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1665    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1666    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1667    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1668    break;
1669  case OpMls:
1670    s += "__a - (__b * __c);";
1671    break;
1672  case OpMlslN:
1673    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1674      Duplicate(nElts, typestr, "__c") + ");";
1675    break;
1676  case OpMlslLane:
1677    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1678      SplatLane(nElts, "__c", "__d") + ");";
1679    break;
1680  case OpMlslHiLane:
1681    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1682      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1683    break;
1684  case OpMlsl:
1685    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1686    break;
1687  case OpMlslHi:
1688    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1689    break;
1690  case OpQDMullLane:
1691    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1692      SplatLane(nElts, "__b", "__c") + ");";
1693    break;
1694  case OpQDMullHiLane:
1695    s += MangleName("vqdmull", typestr, ClassS) + "(" +
1696      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1697    break;
1698  case OpQDMlalLane:
1699    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1700      SplatLane(nElts, "__c", "__d") + ");";
1701    break;
1702  case OpQDMlalHiLane:
1703    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1704      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1705    break;
1706  case OpQDMlslLane:
1707    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1708      SplatLane(nElts, "__c", "__d") + ");";
1709    break;
1710  case OpQDMlslHiLane:
1711    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1712      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1713    break;
1714  case OpQDMulhLane:
1715    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1716      SplatLane(nElts, "__b", "__c") + ");";
1717    break;
1718  case OpQRDMulhLane:
1719    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1720      SplatLane(nElts, "__b", "__c") + ");";
1721    break;
1722  case OpEq:
1723    s += "(" + ts + ")(__a == __b);";
1724    break;
1725  case OpGe:
1726    s += "(" + ts + ")(__a >= __b);";
1727    break;
1728  case OpLe:
1729    s += "(" + ts + ")(__a <= __b);";
1730    break;
1731  case OpGt:
1732    s += "(" + ts + ")(__a > __b);";
1733    break;
1734  case OpLt:
1735    s += "(" + ts + ")(__a < __b);";
1736    break;
1737  case OpNeg:
1738    s += " -__a;";
1739    break;
1740  case OpNot:
1741    s += " ~__a;";
1742    break;
1743  case OpAnd:
1744    s += "__a & __b;";
1745    break;
1746  case OpOr:
1747    s += "__a | __b;";
1748    break;
1749  case OpXor:
1750    s += "__a ^ __b;";
1751    break;
1752  case OpAndNot:
1753    s += "__a & ~__b;";
1754    break;
1755  case OpOrNot:
1756    s += "__a | ~__b;";
1757    break;
1758  case OpCast:
1759    s += "(" + ts + ")__a;";
1760    break;
1761  case OpConcat:
1762    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1763    s += ", (int64x1_t)__b, 0, 1);";
1764    break;
1765  case OpHi:
1766    // nElts is for the result vector, so the source is twice that number.
1767    s += "__builtin_shufflevector(__a, __a";
1768    for (unsigned i = nElts; i < nElts * 2; ++i)
1769      s += ", " + utostr(i);
1770    s+= ");";
1771    break;
1772  case OpLo:
1773    s += "__builtin_shufflevector(__a, __a";
1774    for (unsigned i = 0; i < nElts; ++i)
1775      s += ", " + utostr(i);
1776    s+= ");";
1777    break;
1778  case OpDup:
1779    s += Duplicate(nElts, typestr, "__a") + ";";
1780    break;
1781  case OpDupLane:
1782    s += SplatLane(nElts, "__a", "__b") + ";";
1783    break;
1784  case OpSelect:
1785    // ((0 & 1) | (~0 & 2))
1786    s += "(" + ts + ")";
1787    ts = TypeString(proto[1], typestr);
1788    s += "((__a & (" + ts + ")__b) | ";
1789    s += "(~__a & (" + ts + ")__c));";
1790    break;
1791  case OpRev16:
1792    s += "__builtin_shufflevector(__a, __a";
1793    for (unsigned i = 2; i <= nElts; i += 2)
1794      for (unsigned j = 0; j != 2; ++j)
1795        s += ", " + utostr(i - j - 1);
1796    s += ");";
1797    break;
1798  case OpRev32: {
1799    unsigned WordElts = nElts >> (1 + (int)quad);
1800    s += "__builtin_shufflevector(__a, __a";
1801    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1802      for (unsigned j = 0; j != WordElts; ++j)
1803        s += ", " + utostr(i - j - 1);
1804    s += ");";
1805    break;
1806  }
1807  case OpRev64: {
1808    unsigned DblWordElts = nElts >> (int)quad;
1809    s += "__builtin_shufflevector(__a, __a";
1810    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1811      for (unsigned j = 0; j != DblWordElts; ++j)
1812        s += ", " + utostr(i - j - 1);
1813    s += ");";
1814    break;
1815  }
1816  case OpXtnHi: {
1817    s = TypeString(proto[1], typestr) + " __a1 = " +
1818        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1819        "return __builtin_shufflevector(__a, __a1";
1820    for (unsigned i = 0; i < nElts * 4; ++i)
1821      s += ", " + utostr(i);
1822    s += ");";
1823    break;
1824  }
1825  case OpSqxtunHi: {
1826    s = TypeString(proto[1], typestr) + " __a1 = " +
1827        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1828        "return __builtin_shufflevector(__a, __a1";
1829    for (unsigned i = 0; i < nElts * 4; ++i)
1830      s += ", " + utostr(i);
1831    s += ");";
1832    break;
1833  }
1834  case OpQxtnHi: {
1835    s = TypeString(proto[1], typestr) + " __a1 = " +
1836        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1837        "return __builtin_shufflevector(__a, __a1";
1838    for (unsigned i = 0; i < nElts * 4; ++i)
1839      s += ", " + utostr(i);
1840    s += ");";
1841    break;
1842  }
1843  case OpFcvtnHi: {
1844    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1845    s = TypeString(proto[1], typestr) + " __a1 = " +
1846        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1847        "return __builtin_shufflevector(__a, __a1";
1848    for (unsigned i = 0; i < nElts * 4; ++i)
1849      s += ", " + utostr(i);
1850    s += ");";
1851    break;
1852  }
1853  case OpFcvtlHi: {
1854    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1855    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1856        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1857    break;
1858  }
1859  case OpFcvtxnHi: {
1860    s = TypeString(proto[1], typestr) + " __a1 = " +
1861        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1862        "return __builtin_shufflevector(__a, __a1";
1863    for (unsigned i = 0; i < nElts * 4; ++i)
1864      s += ", " + utostr(i);
1865    s += ");";
1866    break;
1867  }
1868  case OpUzp1:
1869    s += "__builtin_shufflevector(__a, __b";
1870    for (unsigned i = 0; i < nElts; i++)
1871      s += ", " + utostr(2*i);
1872    s += ");";
1873    break;
1874  case OpUzp2:
1875    s += "__builtin_shufflevector(__a, __b";
1876    for (unsigned i = 0; i < nElts; i++)
1877      s += ", " + utostr(2*i+1);
1878    s += ");";
1879    break;
1880  case OpZip1:
1881    s += "__builtin_shufflevector(__a, __b";
1882    for (unsigned i = 0; i < (nElts/2); i++)
1883       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1884    s += ");";
1885    break;
1886  case OpZip2:
1887    s += "__builtin_shufflevector(__a, __b";
1888    for (unsigned i = nElts/2; i < nElts; i++)
1889       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1890    s += ");";
1891    break;
1892  case OpTrn1:
1893    s += "__builtin_shufflevector(__a, __b";
1894    for (unsigned i = 0; i < (nElts/2); i++)
1895       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1896    s += ");";
1897    break;
1898  case OpTrn2:
1899    s += "__builtin_shufflevector(__a, __b";
1900    for (unsigned i = 0; i < (nElts/2); i++)
1901       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1902    s += ");";
1903    break;
1904  case OpAbdl: {
1905    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1906    if (typestr[0] != 'U') {
1907      // vabd results are always unsigned and must be zero-extended.
1908      std::string utype = "U" + typestr.str();
1909      s += "(" + TypeString(proto[0], typestr) + ")";
1910      abd = "(" + TypeString('d', utype) + ")" + abd;
1911      s += Extend(utype, abd) + ";";
1912    } else {
1913      s += Extend(typestr, abd) + ";";
1914    }
1915    break;
1916  }
1917  case OpAbdlHi:
1918    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1919    break;
1920  case OpAddhnHi: {
1921    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1922    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1923    s += ";";
1924    break;
1925  }
1926  case OpRAddhnHi: {
1927    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1928    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1929    s += ";";
1930    break;
1931  }
1932  case OpSubhnHi: {
1933    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1934    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1935    s += ";";
1936    break;
1937  }
1938  case OpRSubhnHi: {
1939    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1940    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1941    s += ";";
1942    break;
1943  }
1944  case OpAba:
1945    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1946    break;
1947  case OpAbal:
1948    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1949    break;
1950  case OpAbalHi:
1951    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1952    break;
1953  case OpQDMullHi:
1954    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1955    break;
1956  case OpQDMlalHi:
1957    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1958    break;
1959  case OpQDMlslHi:
1960    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1961    break;
1962  case OpDiv:
1963    s += "__a / __b;";
1964    break;
1965  case OpMovlHi: {
1966    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1967        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1968    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1969    s += "(__a1, 0);";
1970    break;
1971  }
1972  case OpLongHi: {
1973    // Another local variable __a1 is needed for calling a Macro,
1974    // or using __a will have naming conflict when Macro expanding.
1975    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1976         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1977    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1978         "(__a1, __b);";
1979    break;
1980  }
1981  case OpNarrowHi: {
1982    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1983         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1984    break;
1985  }
1986  case OpCopyLane: {
1987    s += TypeString('s', typestr) + " __c2 = " +
1988         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
1989         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
1990    break;
1991  }
1992  case OpCopyQLane: {
1993    std::string typeCode = "";
1994    InstructionTypeCode(typestr, ClassS, quad, typeCode);
1995    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
1996         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
1997    break;
1998  }
1999  case OpCopyLaneQ: {
2000    std::string typeCode = "";
2001    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2002    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2003         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2004    break;
2005  }
2006  default:
2007    PrintFatalError("unknown OpKind!");
2008  }
2009  return s;
2010}
2011
2012static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2013  unsigned mod = proto[0];
2014
2015  if (mod == 'v' || mod == 'f')
2016    mod = proto[1];
2017
2018  bool quad = false;
2019  bool poly = false;
2020  bool usgn = false;
2021  bool scal = false;
2022  bool cnst = false;
2023  bool pntr = false;
2024
2025  // Base type to get the type string for.
2026  char type = ClassifyType(typestr, quad, poly, usgn);
2027
2028  // Based on the modifying character, change the type and width if necessary.
2029  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2030
2031  NeonTypeFlags::EltType ET;
2032  switch (type) {
2033    case 'c':
2034      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2035      break;
2036    case 's':
2037      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2038      break;
2039    case 'i':
2040      ET = NeonTypeFlags::Int32;
2041      break;
2042    case 'l':
2043      ET = NeonTypeFlags::Int64;
2044      break;
2045    case 'h':
2046      ET = NeonTypeFlags::Float16;
2047      break;
2048    case 'f':
2049      ET = NeonTypeFlags::Float32;
2050      break;
2051    case 'd':
2052      ET = NeonTypeFlags::Float64;
2053      break;
2054    default:
2055      PrintFatalError("unhandled type!");
2056  }
2057  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2058  return Flags.getFlags();
2059}
2060
2061static bool ProtoHasScalar(const std::string proto)
2062{
2063  return (proto.find('s') != std::string::npos
2064          || proto.find('r') != std::string::npos);
2065}
2066
2067// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2068static std::string GenBuiltin(const std::string &name, const std::string &proto,
2069                              StringRef typestr, ClassKind ck) {
2070  std::string s;
2071
2072  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2073  // sret-like argument.
2074  bool sret = IsMultiVecProto(proto[0]);
2075
2076  bool define = UseMacro(proto);
2077
2078  // Check if the prototype has a scalar operand with the type of the vector
2079  // elements.  If not, bitcasting the args will take care of arg checking.
2080  // The actual signedness etc. will be taken care of with special enums.
2081  if (!ProtoHasScalar(proto))
2082    ck = ClassB;
2083
2084  if (proto[0] != 'v') {
2085    std::string ts = TypeString(proto[0], typestr);
2086
2087    if (define) {
2088      if (sret)
2089        s += ts + " r; ";
2090      else
2091        s += "(" + ts + ")";
2092    } else if (sret) {
2093      s += ts + " r; ";
2094    } else {
2095      s += "return (" + ts + ")";
2096    }
2097  }
2098
2099  bool splat = proto.find('a') != std::string::npos;
2100
2101  s += "__builtin_neon_";
2102  if (splat) {
2103    // Call the non-splat builtin: chop off the "_n" suffix from the name.
2104    std::string vname(name, 0, name.size()-2);
2105    s += MangleName(vname, typestr, ck);
2106  } else {
2107    s += MangleName(name, typestr, ck);
2108  }
2109  s += "(";
2110
2111  // Pass the address of the return variable as the first argument to sret-like
2112  // builtins.
2113  if (sret)
2114    s += "&r, ";
2115
2116  char arg = 'a';
2117  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2118    std::string args = std::string(&arg, 1);
2119
2120    // Use the local temporaries instead of the macro arguments.
2121    args = "__" + args;
2122
2123    bool argQuad = false;
2124    bool argPoly = false;
2125    bool argUsgn = false;
2126    bool argScalar = false;
2127    bool dummy = false;
2128    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2129    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2130                      dummy, dummy);
2131
2132    // Handle multiple-vector values specially, emitting each subvector as an
2133    // argument to the __builtin.
2134    unsigned NumOfVec = 0;
2135    if (proto[i] >= '2' && proto[i] <= '4') {
2136      NumOfVec = proto[i] - '0';
2137    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2138      NumOfVec = proto[i] - 'A' + 1;
2139    }
2140
2141    if (NumOfVec > 0) {
2142      // Check if an explicit cast is needed.
2143      if (argType != 'c' || argPoly || argUsgn)
2144        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2145
2146      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2147        s += args + ".val[" + utostr(vi) + "]";
2148        if ((vi + 1) < ve)
2149          s += ", ";
2150      }
2151      if ((i + 1) < e)
2152        s += ", ";
2153
2154      continue;
2155    }
2156
2157    if (splat && (i + 1) == e)
2158      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2159
2160    // Check if an explicit cast is needed.
2161    if ((splat || !argScalar) &&
2162        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2163      std::string argTypeStr = "c";
2164      if (ck != ClassB)
2165        argTypeStr = argType;
2166      if (argQuad)
2167        argTypeStr = "Q" + argTypeStr;
2168      args = "(" + TypeString('d', argTypeStr) + ")" + args;
2169    }
2170
2171    s += args;
2172    if ((i + 1) < e)
2173      s += ", ";
2174  }
2175
2176  // Extra constant integer to hold type class enum for this function, e.g. s8
2177  if (ck == ClassB)
2178    s += ", " + utostr(GetNeonEnum(proto, typestr));
2179
2180  s += ");";
2181
2182  if (proto[0] != 'v' && sret) {
2183    if (define)
2184      s += " r;";
2185    else
2186      s += " return r;";
2187  }
2188  return s;
2189}
2190
2191static std::string GenBuiltinDef(const std::string &name,
2192                                 const std::string &proto,
2193                                 StringRef typestr, ClassKind ck) {
2194  std::string s("BUILTIN(__builtin_neon_");
2195
2196  // If all types are the same size, bitcasting the args will take care
2197  // of arg checking.  The actual signedness etc. will be taken care of with
2198  // special enums.
2199  if (!ProtoHasScalar(proto))
2200    ck = ClassB;
2201
2202  s += MangleName(name, typestr, ck);
2203  s += ", \"";
2204
2205  for (unsigned i = 0, e = proto.size(); i != e; ++i)
2206    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2207
2208  // Extra constant integer to hold type class enum for this function, e.g. s8
2209  if (ck == ClassB)
2210    s += "i";
2211
2212  s += "\", \"n\")";
2213  return s;
2214}
2215
2216static std::string GenIntrinsic(const std::string &name,
2217                                const std::string &proto,
2218                                StringRef outTypeStr, StringRef inTypeStr,
2219                                OpKind kind, ClassKind classKind) {
2220  assert(!proto.empty() && "");
2221  bool define = UseMacro(proto) && kind != OpUnavailable;
2222  std::string s;
2223
2224  // static always inline + return type
2225  if (define)
2226    s += "#define ";
2227  else
2228    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2229
2230  // Function name with type suffix
2231  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2232  if (outTypeStr != inTypeStr) {
2233    // If the input type is different (e.g., for vreinterpret), append a suffix
2234    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2235    // does not insert another "q" in the name.
2236    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2237    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2238    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2239  }
2240  s += mangledName;
2241
2242  // Function arguments
2243  s += GenArgs(proto, inTypeStr, name);
2244
2245  // Definition.
2246  if (define) {
2247    s += " __extension__ ({ \\\n  ";
2248    s += GenMacroLocals(proto, inTypeStr, name);
2249  } else if (kind == OpUnavailable) {
2250    s += " __attribute__((unavailable));\n";
2251    return s;
2252  } else
2253    s += " {\n  ";
2254
2255  if (kind != OpNone)
2256    s += GenOpString(name, kind, proto, outTypeStr);
2257  else
2258    s += GenBuiltin(name, proto, outTypeStr, classKind);
2259  if (define)
2260    s += " })";
2261  else
2262    s += " }";
2263  s += "\n";
2264  return s;
2265}
2266
2267/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2268/// is comprised of type definitions and function declarations.
2269void NeonEmitter::run(raw_ostream &OS) {
2270  OS <<
2271    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2272    "---===\n"
2273    " *\n"
2274    " * Permission is hereby granted, free of charge, to any person obtaining "
2275    "a copy\n"
2276    " * of this software and associated documentation files (the \"Software\"),"
2277    " to deal\n"
2278    " * in the Software without restriction, including without limitation the "
2279    "rights\n"
2280    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2281    "and/or sell\n"
2282    " * copies of the Software, and to permit persons to whom the Software is\n"
2283    " * furnished to do so, subject to the following conditions:\n"
2284    " *\n"
2285    " * The above copyright notice and this permission notice shall be "
2286    "included in\n"
2287    " * all copies or substantial portions of the Software.\n"
2288    " *\n"
2289    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2290    "EXPRESS OR\n"
2291    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2292    "MERCHANTABILITY,\n"
2293    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2294    "SHALL THE\n"
2295    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2296    "OTHER\n"
2297    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2298    "ARISING FROM,\n"
2299    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2300    "DEALINGS IN\n"
2301    " * THE SOFTWARE.\n"
2302    " *\n"
2303    " *===--------------------------------------------------------------------"
2304    "---===\n"
2305    " */\n\n";
2306
2307  OS << "#ifndef __ARM_NEON_H\n";
2308  OS << "#define __ARM_NEON_H\n\n";
2309
2310  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2311  OS << "#error \"NEON support not enabled\"\n";
2312  OS << "#endif\n\n";
2313
2314  OS << "#include <stdint.h>\n\n";
2315
2316  // Emit NEON-specific scalar typedefs.
2317  OS << "typedef float float32_t;\n";
2318  OS << "typedef __fp16 float16_t;\n";
2319
2320  OS << "#ifdef __aarch64__\n";
2321  OS << "typedef double float64_t;\n";
2322  OS << "#endif\n\n";
2323
2324  // For now, signedness of polynomial types depends on target
2325  OS << "#ifdef __aarch64__\n";
2326  OS << "typedef uint8_t poly8_t;\n";
2327  OS << "typedef uint16_t poly16_t;\n";
2328  OS << "#else\n";
2329  OS << "typedef int8_t poly8_t;\n";
2330  OS << "typedef int16_t poly16_t;\n";
2331  OS << "#endif\n";
2332
2333  // Emit Neon vector typedefs.
2334  std::string TypedefTypes(
2335      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
2336  SmallVector<StringRef, 24> TDTypeVec;
2337  ParseTypes(0, TypedefTypes, TDTypeVec);
2338
2339  // Emit vector typedefs.
2340  bool isA64 = false;
2341  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2342    bool dummy, quad = false, poly = false;
2343    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2344    bool preinsert = false;
2345    bool postinsert = false;
2346
2347    if (type == 'd') {
2348      preinsert = isA64? false: true;
2349      isA64 = true;
2350    } else {
2351      postinsert = isA64? true: false;
2352      isA64 = false;
2353    }
2354    if (postinsert)
2355      OS << "#endif\n";
2356    if (preinsert)
2357      OS << "#ifdef __aarch64__\n";
2358
2359    if (poly)
2360      OS << "typedef __attribute__((neon_polyvector_type(";
2361    else
2362      OS << "typedef __attribute__((neon_vector_type(";
2363
2364    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2365    OS << utostr(nElts) << "))) ";
2366    if (nElts < 10)
2367      OS << " ";
2368
2369    OS << TypeString('s', TDTypeVec[i]);
2370    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2371
2372  }
2373  OS << "\n";
2374
2375  // Emit struct typedefs.
2376  isA64 = false;
2377  for (unsigned vi = 2; vi != 5; ++vi) {
2378    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2379      bool dummy, quad = false, poly = false;
2380      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2381      bool preinsert = false;
2382      bool postinsert = false;
2383
2384      if (type == 'd') {
2385        preinsert = isA64? false: true;
2386        isA64 = true;
2387      } else {
2388        postinsert = isA64? true: false;
2389        isA64 = false;
2390      }
2391      if (postinsert)
2392        OS << "#endif\n";
2393      if (preinsert)
2394        OS << "#ifdef __aarch64__\n";
2395
2396      std::string ts = TypeString('d', TDTypeVec[i]);
2397      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2398      OS << "typedef struct " << vs << " {\n";
2399      OS << "  " << ts << " val";
2400      OS << "[" << utostr(vi) << "]";
2401      OS << ";\n} ";
2402      OS << vs << ";\n";
2403      OS << "\n";
2404    }
2405  }
2406
2407  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2408
2409  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2410
2411  StringMap<ClassKind> EmittedMap;
2412
2413  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2414  // intrinsics.  (Some of the saturating multiply instructions are also
2415  // used to implement the corresponding "_lane" variants, but tablegen
2416  // sorts the records into alphabetical order so that the "_lane" variants
2417  // come after the intrinsics they use.)
2418  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2419  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2420  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2421  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2422
2423  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2424  // common intrinsics appear only once in the output stream.
2425  // The check for uniquiness is done in emitIntrinsic.
2426  // Emit ARM intrinsics.
2427  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2428    Record *R = RV[i];
2429
2430    // Skip AArch64 intrinsics; they will be emitted at the end.
2431    bool isA64 = R->getValueAsBit("isA64");
2432    if (isA64)
2433      continue;
2434
2435    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2436        R->getName() != "VABD")
2437      emitIntrinsic(OS, R, EmittedMap);
2438  }
2439
2440  // Emit AArch64-specific intrinsics.
2441  OS << "#ifdef __aarch64__\n";
2442
2443  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2444  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2445  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2446
2447  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2448    Record *R = RV[i];
2449
2450    // Skip ARM intrinsics already included above.
2451    bool isA64 = R->getValueAsBit("isA64");
2452    if (!isA64)
2453      continue;
2454
2455    emitIntrinsic(OS, R, EmittedMap);
2456  }
2457
2458  OS << "#endif\n\n";
2459
2460  OS << "#undef __ai\n\n";
2461  OS << "#endif /* __ARM_NEON_H */\n";
2462}
2463
2464/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2465/// intrinsics specified by record R checking for intrinsic uniqueness.
2466void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2467                                StringMap<ClassKind> &EmittedMap) {
2468  std::string name = R->getValueAsString("Name");
2469  std::string Proto = R->getValueAsString("Prototype");
2470  std::string Types = R->getValueAsString("Types");
2471
2472  SmallVector<StringRef, 16> TypeVec;
2473  ParseTypes(R, Types, TypeVec);
2474
2475  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2476
2477  ClassKind classKind = ClassNone;
2478  if (R->getSuperClasses().size() >= 2)
2479    classKind = ClassMap[R->getSuperClasses()[1]];
2480  if (classKind == ClassNone && kind == OpNone)
2481    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2482
2483  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2484    if (kind == OpReinterpret) {
2485      bool outQuad = false;
2486      bool dummy = false;
2487      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2488      for (unsigned srcti = 0, srcte = TypeVec.size();
2489           srcti != srcte; ++srcti) {
2490        bool inQuad = false;
2491        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2492        if (srcti == ti || inQuad != outQuad)
2493          continue;
2494        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2495                                     OpCast, ClassS);
2496        if (EmittedMap.count(s))
2497          continue;
2498        EmittedMap[s] = ClassS;
2499        OS << s;
2500      }
2501    } else {
2502      std::string s =
2503          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2504      if (EmittedMap.count(s))
2505        continue;
2506      EmittedMap[s] = classKind;
2507      OS << s;
2508    }
2509  }
2510  OS << "\n";
2511}
2512
2513static unsigned RangeFromType(const char mod, StringRef typestr) {
2514  // base type to get the type string for.
2515  bool quad = false, dummy = false;
2516  char type = ClassifyType(typestr, quad, dummy, dummy);
2517  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2518
2519  switch (type) {
2520    case 'c':
2521      return (8 << (int)quad) - 1;
2522    case 'h':
2523    case 's':
2524      return (4 << (int)quad) - 1;
2525    case 'f':
2526    case 'i':
2527      return (2 << (int)quad) - 1;
2528    case 'd':
2529    case 'l':
2530      return (1 << (int)quad) - 1;
2531    default:
2532      PrintFatalError("unhandled type!");
2533  }
2534}
2535
2536static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2537  // base type to get the type string for.
2538  bool dummy = false;
2539  char type = ClassifyType(typestr, dummy, dummy, dummy);
2540  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2541
2542  switch (type) {
2543    case 'c':
2544      return 7;
2545    case 'h':
2546    case 's':
2547      return 15;
2548    case 'f':
2549    case 'i':
2550      return 31;
2551    case 'd':
2552    case 'l':
2553      return 63;
2554    default:
2555      PrintFatalError("unhandled type!");
2556  }
2557}
2558
2559/// Generate the ARM and AArch64 intrinsic range checking code for
2560/// shift/lane immediates, checking for unique declarations.
2561void
2562NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2563                                        StringMap<ClassKind> &A64IntrinsicMap,
2564                                        bool isA64RangeCheck) {
2565  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2566  StringMap<OpKind> EmittedMap;
2567
2568  // Generate the intrinsic range checking code for shift/lane immediates.
2569  if (isA64RangeCheck)
2570    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2571  else
2572    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2573
2574  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2575    Record *R = RV[i];
2576
2577    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2578    if (k != OpNone)
2579      continue;
2580
2581    std::string name = R->getValueAsString("Name");
2582    std::string Proto = R->getValueAsString("Prototype");
2583    std::string Types = R->getValueAsString("Types");
2584    std::string Rename = name + "@" + Proto;
2585
2586    // Functions with 'a' (the splat code) in the type prototype should not get
2587    // their own builtin as they use the non-splat variant.
2588    if (Proto.find('a') != std::string::npos)
2589      continue;
2590
2591    // Functions which do not have an immediate do not need to have range
2592    // checking code emitted.
2593    size_t immPos = Proto.find('i');
2594    if (immPos == std::string::npos)
2595      continue;
2596
2597    SmallVector<StringRef, 16> TypeVec;
2598    ParseTypes(R, Types, TypeVec);
2599
2600    if (R->getSuperClasses().size() < 2)
2601      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2602
2603    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2604
2605    // Do not include AArch64 range checks if not generating code for AArch64.
2606    bool isA64 = R->getValueAsBit("isA64");
2607    if (!isA64RangeCheck && isA64)
2608      continue;
2609
2610    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2611    // redefined by AArch64 to handle new types.
2612    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2613      ClassKind &A64CK = A64IntrinsicMap[Rename];
2614      if (A64CK == ck && ck != ClassNone)
2615        continue;
2616    }
2617
2618    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2619      std::string namestr, shiftstr, rangestr;
2620
2621      if (R->getValueAsBit("isVCVT_N")) {
2622        // VCVT between floating- and fixed-point values takes an immediate
2623        // in the range [1, 32] for f32, or [1, 64] for f64.
2624        ck = ClassB;
2625        if (name.find("32") != std::string::npos)
2626          rangestr = "l = 1; u = 31"; // upper bound = l + u
2627        else if (name.find("64") != std::string::npos)
2628          rangestr = "l = 1; u = 63";
2629        else
2630          PrintFatalError(R->getLoc(),
2631              "Fixed point convert name should contains \"32\" or \"64\"");
2632
2633      } else if (R->getValueAsBit("isScalarShift")) {
2634        // Right shifts have an 'r' in the name, left shifts do not.  Convert
2635        // instructions have the same bounds and right shifts.
2636        if (name.find('r') != std::string::npos ||
2637            name.find("cvt") != std::string::npos)
2638          rangestr = "l = 1; ";
2639
2640        rangestr += "u = " +
2641          utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
2642      } else if (!ProtoHasScalar(Proto)) {
2643        // Builtins which are overloaded by type will need to have their upper
2644        // bound computed at Sema time based on the type constant.
2645        ck = ClassB;
2646        if (R->getValueAsBit("isShift")) {
2647          shiftstr = ", true";
2648
2649          // Right shifts have an 'r' in the name, left shifts do not.
2650          if (name.find('r') != std::string::npos)
2651            rangestr = "l = 1; ";
2652        }
2653        rangestr += "u = RFT(TV" + shiftstr + ")";
2654      } else {
2655        // The immediate generally refers to a lane in the preceding argument.
2656        assert(immPos > 0 && "unexpected immediate operand");
2657        rangestr =
2658            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2659      }
2660      // Make sure cases appear only once by uniquing them in a string map.
2661      namestr = MangleName(name, TypeVec[ti], ck);
2662      if (EmittedMap.count(namestr))
2663        continue;
2664      EmittedMap[namestr] = OpNone;
2665
2666      // Calculate the index of the immediate that should be range checked.
2667      unsigned immidx = 0;
2668
2669      // Builtins that return a struct of multiple vectors have an extra
2670      // leading arg for the struct return.
2671      if (IsMultiVecProto(Proto[0]))
2672        ++immidx;
2673
2674      // Add one to the index for each argument until we reach the immediate
2675      // to be checked.  Structs of vectors are passed as multiple arguments.
2676      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2677        switch (Proto[ii]) {
2678        default:
2679          immidx += 1;
2680          break;
2681        case '2':
2682        case 'B':
2683          immidx += 2;
2684          break;
2685        case '3':
2686        case 'C':
2687          immidx += 3;
2688          break;
2689        case '4':
2690        case 'D':
2691          immidx += 4;
2692          break;
2693        case 'i':
2694          ie = ii + 1;
2695          break;
2696        }
2697      }
2698      if (isA64RangeCheck)
2699        OS << "case AArch64::BI__builtin_neon_";
2700      else
2701        OS << "case ARM::BI__builtin_neon_";
2702      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2703         << rangestr << "; break;\n";
2704    }
2705  }
2706  OS << "#endif\n\n";
2707}
2708
2709/// Generate the ARM and AArch64 overloaded type checking code for
2710/// SemaChecking.cpp, checking for unique builtin declarations.
2711void
2712NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2713                                      StringMap<ClassKind> &A64IntrinsicMap,
2714                                      bool isA64TypeCheck) {
2715  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2716  StringMap<OpKind> EmittedMap;
2717
2718  // Generate the overloaded type checking code for SemaChecking.cpp
2719  if (isA64TypeCheck)
2720    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2721  else
2722    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2723
2724  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2725    Record *R = RV[i];
2726    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2727    if (k != OpNone)
2728      continue;
2729
2730    std::string Proto = R->getValueAsString("Prototype");
2731    std::string Types = R->getValueAsString("Types");
2732    std::string name = R->getValueAsString("Name");
2733    std::string Rename = name + "@" + Proto;
2734
2735    // Functions with 'a' (the splat code) in the type prototype should not get
2736    // their own builtin as they use the non-splat variant.
2737    if (Proto.find('a') != std::string::npos)
2738      continue;
2739
2740    // Functions which have a scalar argument cannot be overloaded, no need to
2741    // check them if we are emitting the type checking code.
2742    if (ProtoHasScalar(Proto))
2743      continue;
2744
2745    SmallVector<StringRef, 16> TypeVec;
2746    ParseTypes(R, Types, TypeVec);
2747
2748    if (R->getSuperClasses().size() < 2)
2749      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2750
2751    // Do not include AArch64 type checks if not generating code for AArch64.
2752    bool isA64 = R->getValueAsBit("isA64");
2753    if (!isA64TypeCheck && isA64)
2754      continue;
2755
2756    // Include ARM  type check in AArch64 but only if ARM intrinsics
2757    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2758    // redefined in AArch64 to handle an additional 2 x f64 type.
2759    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2760    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2761      ClassKind &A64CK = A64IntrinsicMap[Rename];
2762      if (A64CK == ck && ck != ClassNone)
2763        continue;
2764    }
2765
2766    int si = -1, qi = -1;
2767    uint64_t mask = 0, qmask = 0;
2768    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2769      // Generate the switch case(s) for this builtin for the type validation.
2770      bool quad = false, poly = false, usgn = false;
2771      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2772
2773      if (quad) {
2774        qi = ti;
2775        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2776      } else {
2777        si = ti;
2778        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2779      }
2780    }
2781
2782    // Check if the builtin function has a pointer or const pointer argument.
2783    int PtrArgNum = -1;
2784    bool HasConstPtr = false;
2785    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2786      char ArgType = Proto[arg];
2787      if (ArgType == 'c') {
2788        HasConstPtr = true;
2789        PtrArgNum = arg - 1;
2790        break;
2791      }
2792      if (ArgType == 'p') {
2793        PtrArgNum = arg - 1;
2794        break;
2795      }
2796    }
2797    // For sret builtins, adjust the pointer argument index.
2798    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
2799      PtrArgNum += 1;
2800
2801    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2802    // and vst1_lane intrinsics.  Using a pointer to the vector element
2803    // type with one of those operations causes codegen to select an aligned
2804    // load/store instruction.  If you want an unaligned operation,
2805    // the pointer argument needs to have less alignment than element type,
2806    // so just accept any pointer type.
2807    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2808      PtrArgNum = -1;
2809      HasConstPtr = false;
2810    }
2811
2812    if (mask) {
2813      if (isA64TypeCheck)
2814        OS << "case AArch64::BI__builtin_neon_";
2815      else
2816        OS << "case ARM::BI__builtin_neon_";
2817      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2818         << "0x" << utohexstr(mask) << "ULL";
2819      if (PtrArgNum >= 0)
2820        OS << "; PtrArgNum = " << PtrArgNum;
2821      if (HasConstPtr)
2822        OS << "; HasConstPtr = true";
2823      OS << "; break;\n";
2824    }
2825    if (qmask) {
2826      if (isA64TypeCheck)
2827        OS << "case AArch64::BI__builtin_neon_";
2828      else
2829        OS << "case ARM::BI__builtin_neon_";
2830      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2831         << "0x" << utohexstr(qmask) << "ULL";
2832      if (PtrArgNum >= 0)
2833        OS << "; PtrArgNum = " << PtrArgNum;
2834      if (HasConstPtr)
2835        OS << "; HasConstPtr = true";
2836      OS << "; break;\n";
2837    }
2838  }
2839  OS << "#endif\n\n";
2840}
2841
2842/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2843/// declaration of builtins, checking for unique builtin declarations.
2844void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2845                                 StringMap<ClassKind> &A64IntrinsicMap,
2846                                 bool isA64GenBuiltinDef) {
2847  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2848  StringMap<OpKind> EmittedMap;
2849
2850  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2851  if (isA64GenBuiltinDef)
2852    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2853  else
2854    OS << "#ifdef GET_NEON_BUILTINS\n";
2855
2856  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2857    Record *R = RV[i];
2858    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2859    if (k != OpNone)
2860      continue;
2861
2862    std::string Proto = R->getValueAsString("Prototype");
2863    std::string name = R->getValueAsString("Name");
2864    std::string Rename = name + "@" + Proto;
2865
2866    // Functions with 'a' (the splat code) in the type prototype should not get
2867    // their own builtin as they use the non-splat variant.
2868    if (Proto.find('a') != std::string::npos)
2869      continue;
2870
2871    std::string Types = R->getValueAsString("Types");
2872    SmallVector<StringRef, 16> TypeVec;
2873    ParseTypes(R, Types, TypeVec);
2874
2875    if (R->getSuperClasses().size() < 2)
2876      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2877
2878    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2879
2880    // Do not include AArch64 BUILTIN() macros if not generating
2881    // code for AArch64
2882    bool isA64 = R->getValueAsBit("isA64");
2883    if (!isA64GenBuiltinDef && isA64)
2884      continue;
2885
2886    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2887    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2888    // redefined in AArch64 to handle an additional 2 x f64 type.
2889    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2890      ClassKind &A64CK = A64IntrinsicMap[Rename];
2891      if (A64CK == ck && ck != ClassNone)
2892        continue;
2893    }
2894
2895    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2896      // Generate the declaration for this builtin, ensuring
2897      // that each unique BUILTIN() macro appears only once in the output
2898      // stream.
2899      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2900      if (EmittedMap.count(bd))
2901        continue;
2902
2903      EmittedMap[bd] = OpNone;
2904      OS << bd << "\n";
2905    }
2906  }
2907  OS << "#endif\n\n";
2908}
2909
2910/// runHeader - Emit a file with sections defining:
2911/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2912/// 2. the SemaChecking code for the type overload checking.
2913/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2914void NeonEmitter::runHeader(raw_ostream &OS) {
2915  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2916
2917  // build a map of AArch64 intriniscs to be used in uniqueness checks.
2918  StringMap<ClassKind> A64IntrinsicMap;
2919  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2920    Record *R = RV[i];
2921
2922    bool isA64 = R->getValueAsBit("isA64");
2923    if (!isA64)
2924      continue;
2925
2926    ClassKind CK = ClassNone;
2927    if (R->getSuperClasses().size() >= 2)
2928      CK = ClassMap[R->getSuperClasses()[1]];
2929
2930    std::string Name = R->getValueAsString("Name");
2931    std::string Proto = R->getValueAsString("Prototype");
2932    std::string Rename = Name + "@" + Proto;
2933    if (A64IntrinsicMap.count(Rename))
2934      continue;
2935    A64IntrinsicMap[Rename] = CK;
2936  }
2937
2938  // Generate BuiltinsARM.def for ARM
2939  genBuiltinsDef(OS, A64IntrinsicMap, false);
2940
2941  // Generate BuiltinsAArch64.def for AArch64
2942  genBuiltinsDef(OS, A64IntrinsicMap, true);
2943
2944  // Generate ARM overloaded type checking code for SemaChecking.cpp
2945  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2946
2947  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2948  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2949
2950  // Generate ARM range checking code for shift/lane immediates.
2951  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2952
2953  // Generate the AArch64 range checking code for shift/lane immediates.
2954  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2955}
2956
2957/// GenTest - Write out a test for the intrinsic specified by the name and
2958/// type strings, including the embedded patterns for FileCheck to match.
2959static std::string GenTest(const std::string &name,
2960                           const std::string &proto,
2961                           StringRef outTypeStr, StringRef inTypeStr,
2962                           bool isShift, bool isHiddenLOp,
2963                           ClassKind ck, const std::string &InstName,
2964						   bool isA64,
2965						   std::string & testFuncProto) {
2966  assert(!proto.empty() && "");
2967  std::string s;
2968
2969  // Function name with type suffix
2970  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2971  if (outTypeStr != inTypeStr) {
2972    // If the input type is different (e.g., for vreinterpret), append a suffix
2973    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2974    // does not insert another "q" in the name.
2975    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2976    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2977    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2978  }
2979
2980  // todo: GenerateChecksForIntrinsic does not generate CHECK
2981  // for aarch64 instructions yet
2982  std::vector<std::string> FileCheckPatterns;
2983  if (!isA64) {
2984	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2985							   isHiddenLOp, FileCheckPatterns);
2986	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2987  }
2988  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2989
2990  // Emit the FileCheck patterns.
2991  // If for any reason we do not want to emit a check, mangledInst
2992  // will be the empty string.
2993  if (FileCheckPatterns.size()) {
2994    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2995                                                  e = FileCheckPatterns.end();
2996         i != e;
2997         ++i) {
2998      s += "// CHECK_ARM: " + *i + "\n";
2999    }
3000  }
3001
3002  // Emit the start of the test function.
3003
3004  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3005  char arg = 'a';
3006  std::string comma;
3007  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3008    // Do not create arguments for values that must be immediate constants.
3009    if (proto[i] == 'i')
3010      continue;
3011    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3012    testFuncProto.push_back(arg);
3013    comma = ", ";
3014  }
3015  testFuncProto += ")";
3016
3017  s+= testFuncProto;
3018  s+= " {\n  ";
3019
3020  if (proto[0] != 'v')
3021    s += "return ";
3022  s += mangledName + "(";
3023  arg = 'a';
3024  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3025    if (proto[i] == 'i') {
3026      // For immediate operands, test the maximum value.
3027      if (isShift)
3028        s += "1"; // FIXME
3029      else
3030        // The immediate generally refers to a lane in the preceding argument.
3031        s += utostr(RangeFromType(proto[i-1], inTypeStr));
3032    } else {
3033      s.push_back(arg);
3034    }
3035    if ((i + 1) < e)
3036      s += ", ";
3037  }
3038  s += ");\n}\n\n";
3039  return s;
3040}
3041
3042/// Write out all intrinsic tests for the specified target, checking
3043/// for intrinsic test uniqueness.
3044void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3045                                bool isA64GenTest) {
3046  if (isA64GenTest)
3047	OS << "#ifdef __aarch64__\n";
3048
3049  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3050  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3051    Record *R = RV[i];
3052    std::string name = R->getValueAsString("Name");
3053    std::string Proto = R->getValueAsString("Prototype");
3054    std::string Types = R->getValueAsString("Types");
3055    bool isShift = R->getValueAsBit("isShift");
3056    std::string InstName = R->getValueAsString("InstName");
3057    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3058    bool isA64 = R->getValueAsBit("isA64");
3059
3060    // do not include AArch64 intrinsic test if not generating
3061    // code for AArch64
3062    if (!isA64GenTest && isA64)
3063      continue;
3064
3065    SmallVector<StringRef, 16> TypeVec;
3066    ParseTypes(R, Types, TypeVec);
3067
3068    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3069    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3070    if (kind == OpUnavailable)
3071      continue;
3072    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3073      if (kind == OpReinterpret) {
3074        bool outQuad = false;
3075        bool dummy = false;
3076        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3077        for (unsigned srcti = 0, srcte = TypeVec.size();
3078             srcti != srcte; ++srcti) {
3079          bool inQuad = false;
3080          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3081          if (srcti == ti || inQuad != outQuad)
3082            continue;
3083		  std::string testFuncProto;
3084          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3085                                  isShift, isHiddenLOp, ck, InstName, isA64,
3086								  testFuncProto);
3087          if (EmittedMap.count(testFuncProto))
3088            continue;
3089          EmittedMap[testFuncProto] = kind;
3090          OS << s << "\n";
3091        }
3092      } else {
3093		std::string testFuncProto;
3094        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3095                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
3096        if (EmittedMap.count(testFuncProto))
3097          continue;
3098        EmittedMap[testFuncProto] = kind;
3099        OS << s << "\n";
3100      }
3101    }
3102  }
3103
3104  if (isA64GenTest)
3105	OS << "#endif\n";
3106}
3107/// runTests - Write out a complete set of tests for all of the Neon
3108/// intrinsics.
3109void NeonEmitter::runTests(raw_ostream &OS) {
3110  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3111        "apcs-gnu\\\n"
3112        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3113        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3114		"\n"
3115	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3116	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3117	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3118        "\n"
3119        "// REQUIRES: long_tests\n"
3120        "\n"
3121        "#include <arm_neon.h>\n"
3122        "\n";
3123
3124  // ARM tests must be emitted before AArch64 tests to ensure
3125  // tests for intrinsics that are common to ARM and AArch64
3126  // appear only once in the output stream.
3127  // The check for uniqueness is done in genTargetTest.
3128  StringMap<OpKind> EmittedMap;
3129
3130  genTargetTest(OS, EmittedMap, false);
3131
3132  genTargetTest(OS, EmittedMap, true);
3133}
3134
3135namespace clang {
3136void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3137  NeonEmitter(Records).run(OS);
3138}
3139void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3140  NeonEmitter(Records).runHeader(OS);
3141}
3142void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3143  NeonEmitter(Records).runTests(OS);
3144}
3145} // End namespace clang
3146