NeonEmitter.cpp revision dd12780e86575795fa912529a911b01e2abc4677
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMlalHi,
56  OpMls,
57  OpMlsl,
58  OpMlslHi,
59  OpMulN,
60  OpMlaN,
61  OpMlsN,
62  OpMlalN,
63  OpMlslN,
64  OpMulLane,
65  OpMulXLane,
66  OpMullLane,
67  OpMullHiLane,
68  OpMlaLane,
69  OpMlsLane,
70  OpMlalLane,
71  OpMlalHiLane,
72  OpMlslLane,
73  OpMlslHiLane,
74  OpQDMullLane,
75  OpQDMullHiLane,
76  OpQDMlalLane,
77  OpQDMlalHiLane,
78  OpQDMlslLane,
79  OpQDMlslHiLane,
80  OpQDMulhLane,
81  OpQRDMulhLane,
82  OpFMSLane,
83  OpFMSLaneQ,
84  OpTrn1,
85  OpZip1,
86  OpUzp1,
87  OpTrn2,
88  OpZip2,
89  OpUzp2,
90  OpEq,
91  OpGe,
92  OpLe,
93  OpGt,
94  OpLt,
95  OpNeg,
96  OpNot,
97  OpAnd,
98  OpOr,
99  OpXor,
100  OpAndNot,
101  OpOrNot,
102  OpCast,
103  OpConcat,
104  OpDup,
105  OpDupLane,
106  OpHi,
107  OpLo,
108  OpSelect,
109  OpRev16,
110  OpRev32,
111  OpRev64,
112  OpXtnHi,
113  OpSqxtunHi,
114  OpQxtnHi,
115  OpFcvtnHi,
116  OpFcvtlHi,
117  OpFcvtxnHi,
118  OpReinterpret,
119  OpAddhnHi,
120  OpRAddhnHi,
121  OpSubhnHi,
122  OpRSubhnHi,
123  OpAbdl,
124  OpAbdlHi,
125  OpAba,
126  OpAbal,
127  OpAbalHi,
128  OpQDMullHi,
129  OpQDMlalHi,
130  OpQDMlslHi,
131  OpDiv,
132  OpLongHi,
133  OpNarrowHi,
134  OpMovlHi,
135  OpCopyLane,
136  OpCopyQLane,
137  OpCopyLaneQ,
138  OpScalarMulLane,
139  OpScalarMulLaneQ,
140  OpScalarMulXLane,
141  OpScalarMulXLaneQ,
142  OpScalarVMulXLane,
143  OpScalarVMulXLaneQ
144};
145
146enum ClassKind {
147  ClassNone,
148  ClassI,           // generic integer instruction, e.g., "i8" suffix
149  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
150  ClassW,           // width-specific instruction, e.g., "8" suffix
151  ClassB,           // bitcast arguments with enum argument to specify type
152  ClassL,           // Logical instructions which are op instructions
153                    // but we need to not emit any suffix for in our
154                    // tests.
155  ClassNoTest       // Instructions which we do not test since they are
156                    // not TRUE instructions.
157};
158
159/// NeonTypeFlags - Flags to identify the types for overloaded Neon
160/// builtins.  These must be kept in sync with the flags in
161/// include/clang/Basic/TargetBuiltins.h.
162namespace {
163class NeonTypeFlags {
164  enum {
165    EltTypeMask = 0xf,
166    UnsignedFlag = 0x10,
167    QuadFlag = 0x20
168  };
169  uint32_t Flags;
170
171public:
172  enum EltType {
173    Int8,
174    Int16,
175    Int32,
176    Int64,
177    Poly8,
178    Poly16,
179    Poly64,
180    Float16,
181    Float32,
182    Float64
183  };
184
185  NeonTypeFlags(unsigned F) : Flags(F) {}
186  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
187    if (IsUnsigned)
188      Flags |= UnsignedFlag;
189    if (IsQuad)
190      Flags |= QuadFlag;
191  }
192
193  uint32_t getFlags() const { return Flags; }
194};
195} // end anonymous namespace
196
197namespace {
198class NeonEmitter {
199  RecordKeeper &Records;
200  StringMap<OpKind> OpMap;
201  DenseMap<Record*, ClassKind> ClassMap;
202
203public:
204  NeonEmitter(RecordKeeper &R) : Records(R) {
205    OpMap["OP_NONE"]  = OpNone;
206    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
207    OpMap["OP_ADD"]   = OpAdd;
208    OpMap["OP_ADDL"]  = OpAddl;
209    OpMap["OP_ADDLHi"] = OpAddlHi;
210    OpMap["OP_ADDW"]  = OpAddw;
211    OpMap["OP_ADDWHi"] = OpAddwHi;
212    OpMap["OP_SUB"]   = OpSub;
213    OpMap["OP_SUBL"]  = OpSubl;
214    OpMap["OP_SUBLHi"] = OpSublHi;
215    OpMap["OP_SUBW"]  = OpSubw;
216    OpMap["OP_SUBWHi"] = OpSubwHi;
217    OpMap["OP_MUL"]   = OpMul;
218    OpMap["OP_MLA"]   = OpMla;
219    OpMap["OP_MLAL"]  = OpMlal;
220    OpMap["OP_MULLHi"]  = OpMullHi;
221    OpMap["OP_MLALHi"]  = OpMlalHi;
222    OpMap["OP_MLS"]   = OpMls;
223    OpMap["OP_MLSL"]  = OpMlsl;
224    OpMap["OP_MLSLHi"] = OpMlslHi;
225    OpMap["OP_MUL_N"] = OpMulN;
226    OpMap["OP_MLA_N"] = OpMlaN;
227    OpMap["OP_MLS_N"] = OpMlsN;
228    OpMap["OP_MLAL_N"] = OpMlalN;
229    OpMap["OP_MLSL_N"] = OpMlslN;
230    OpMap["OP_MUL_LN"]= OpMulLane;
231    OpMap["OP_MULX_LN"]= OpMulXLane;
232    OpMap["OP_MULL_LN"] = OpMullLane;
233    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
234    OpMap["OP_MLA_LN"]= OpMlaLane;
235    OpMap["OP_MLS_LN"]= OpMlsLane;
236    OpMap["OP_MLAL_LN"] = OpMlalLane;
237    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
238    OpMap["OP_MLSL_LN"] = OpMlslLane;
239    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
240    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
241    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
242    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
243    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
244    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
245    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
246    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
247    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
248    OpMap["OP_FMS_LN"] = OpFMSLane;
249    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
250    OpMap["OP_TRN1"]  = OpTrn1;
251    OpMap["OP_ZIP1"]  = OpZip1;
252    OpMap["OP_UZP1"]  = OpUzp1;
253    OpMap["OP_TRN2"]  = OpTrn2;
254    OpMap["OP_ZIP2"]  = OpZip2;
255    OpMap["OP_UZP2"]  = OpUzp2;
256    OpMap["OP_EQ"]    = OpEq;
257    OpMap["OP_GE"]    = OpGe;
258    OpMap["OP_LE"]    = OpLe;
259    OpMap["OP_GT"]    = OpGt;
260    OpMap["OP_LT"]    = OpLt;
261    OpMap["OP_NEG"]   = OpNeg;
262    OpMap["OP_NOT"]   = OpNot;
263    OpMap["OP_AND"]   = OpAnd;
264    OpMap["OP_OR"]    = OpOr;
265    OpMap["OP_XOR"]   = OpXor;
266    OpMap["OP_ANDN"]  = OpAndNot;
267    OpMap["OP_ORN"]   = OpOrNot;
268    OpMap["OP_CAST"]  = OpCast;
269    OpMap["OP_CONC"]  = OpConcat;
270    OpMap["OP_HI"]    = OpHi;
271    OpMap["OP_LO"]    = OpLo;
272    OpMap["OP_DUP"]   = OpDup;
273    OpMap["OP_DUP_LN"] = OpDupLane;
274    OpMap["OP_SEL"]   = OpSelect;
275    OpMap["OP_REV16"] = OpRev16;
276    OpMap["OP_REV32"] = OpRev32;
277    OpMap["OP_REV64"] = OpRev64;
278    OpMap["OP_XTN"] = OpXtnHi;
279    OpMap["OP_SQXTUN"] = OpSqxtunHi;
280    OpMap["OP_QXTN"] = OpQxtnHi;
281    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
282    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
283    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
284    OpMap["OP_REINT"] = OpReinterpret;
285    OpMap["OP_ADDHNHi"] = OpAddhnHi;
286    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
287    OpMap["OP_SUBHNHi"] = OpSubhnHi;
288    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
289    OpMap["OP_ABDL"]  = OpAbdl;
290    OpMap["OP_ABDLHi"] = OpAbdlHi;
291    OpMap["OP_ABA"]   = OpAba;
292    OpMap["OP_ABAL"]  = OpAbal;
293    OpMap["OP_ABALHi"] = OpAbalHi;
294    OpMap["OP_QDMULLHi"] = OpQDMullHi;
295    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
296    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
297    OpMap["OP_DIV"] = OpDiv;
298    OpMap["OP_LONG_HI"] = OpLongHi;
299    OpMap["OP_NARROW_HI"] = OpNarrowHi;
300    OpMap["OP_MOVL_HI"] = OpMovlHi;
301    OpMap["OP_COPY_LN"] = OpCopyLane;
302    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
303    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
304    OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
305    OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
306    OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
307    OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
308    OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
309    OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
310
311    Record *SI = R.getClass("SInst");
312    Record *II = R.getClass("IInst");
313    Record *WI = R.getClass("WInst");
314    Record *SOpI = R.getClass("SOpInst");
315    Record *IOpI = R.getClass("IOpInst");
316    Record *WOpI = R.getClass("WOpInst");
317    Record *LOpI = R.getClass("LOpInst");
318    Record *NoTestOpI = R.getClass("NoTestOpInst");
319
320    ClassMap[SI] = ClassS;
321    ClassMap[II] = ClassI;
322    ClassMap[WI] = ClassW;
323    ClassMap[SOpI] = ClassS;
324    ClassMap[IOpI] = ClassI;
325    ClassMap[WOpI] = ClassW;
326    ClassMap[LOpI] = ClassL;
327    ClassMap[NoTestOpI] = ClassNoTest;
328  }
329
330  // run - Emit arm_neon.h.inc
331  void run(raw_ostream &o);
332
333  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
334  void runHeader(raw_ostream &o);
335
336  // runTests - Emit tests for all the Neon intrinsics.
337  void runTests(raw_ostream &o);
338
339private:
340  void emitIntrinsic(raw_ostream &OS, Record *R,
341                     StringMap<ClassKind> &EmittedMap);
342  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
343                      bool isA64GenBuiltinDef);
344  void genOverloadTypeCheckCode(raw_ostream &OS,
345                                StringMap<ClassKind> &A64IntrinsicMap,
346                                bool isA64TypeCheck);
347  void genIntrinsicRangeCheckCode(raw_ostream &OS,
348                                  StringMap<ClassKind> &A64IntrinsicMap,
349                                  bool isA64RangeCheck);
350  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
351                     bool isA64TestGen);
352};
353} // end anonymous namespace
354
355/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
356/// which each StringRef representing a single type declared in the string.
357/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
358/// 2xfloat and 4xfloat respectively.
359static void ParseTypes(Record *r, std::string &s,
360                       SmallVectorImpl<StringRef> &TV) {
361  const char *data = s.data();
362  int len = 0;
363
364  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
365    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
366                         || data[len] == 'H' || data[len] == 'S')
367      continue;
368
369    switch (data[len]) {
370      case 'c':
371      case 's':
372      case 'i':
373      case 'l':
374      case 'h':
375      case 'f':
376      case 'd':
377        break;
378      default:
379        PrintFatalError(r->getLoc(),
380                      "Unexpected letter: " + std::string(data + len, 1));
381    }
382    TV.push_back(StringRef(data, len + 1));
383    data += len + 1;
384    len = -1;
385  }
386}
387
388/// Widen - Convert a type code into the next wider type.  char -> short,
389/// short -> int, etc.
390static char Widen(const char t) {
391  switch (t) {
392    case 'c':
393      return 's';
394    case 's':
395      return 'i';
396    case 'i':
397      return 'l';
398    case 'h':
399      return 'f';
400    case 'f':
401      return 'd';
402    default:
403      PrintFatalError("unhandled type in widen!");
404  }
405}
406
407/// Narrow - Convert a type code into the next smaller type.  short -> char,
408/// float -> half float, etc.
409static char Narrow(const char t) {
410  switch (t) {
411    case 's':
412      return 'c';
413    case 'i':
414      return 's';
415    case 'l':
416      return 'i';
417    case 'f':
418      return 'h';
419    case 'd':
420      return 'f';
421    default:
422      PrintFatalError("unhandled type in narrow!");
423  }
424}
425
426static std::string GetNarrowTypestr(StringRef ty)
427{
428  std::string s;
429  for (size_t i = 0, end = ty.size(); i < end; i++) {
430    switch (ty[i]) {
431      case 's':
432        s += 'c';
433        break;
434      case 'i':
435        s += 's';
436        break;
437      case 'l':
438        s += 'i';
439        break;
440      default:
441        s += ty[i];
442        break;
443    }
444  }
445
446  return s;
447}
448
449/// For a particular StringRef, return the base type code, and whether it has
450/// the quad-vector, polynomial, or unsigned modifiers set.
451static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
452  unsigned off = 0;
453  // ignore scalar.
454  if (ty[off] == 'S') {
455    ++off;
456  }
457  // remember quad.
458  if (ty[off] == 'Q' || ty[off] == 'H') {
459    quad = true;
460    ++off;
461  }
462
463  // remember poly.
464  if (ty[off] == 'P') {
465    poly = true;
466    ++off;
467  }
468
469  // remember unsigned.
470  if (ty[off] == 'U') {
471    usgn = true;
472    ++off;
473  }
474
475  // base type to get the type string for.
476  return ty[off];
477}
478
479/// ModType - Transform a type code and its modifiers based on a mod code. The
480/// mod code definitions may be found at the top of arm_neon.td.
481static char ModType(const char mod, char type, bool &quad, bool &poly,
482                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
483  switch (mod) {
484    case 't':
485      if (poly) {
486        poly = false;
487        usgn = true;
488      }
489      break;
490    case 'b':
491      scal = true;
492    case 'u':
493      usgn = true;
494      poly = false;
495      if (type == 'f')
496        type = 'i';
497      if (type == 'd')
498        type = 'l';
499      break;
500    case '$':
501      scal = true;
502    case 'x':
503      usgn = false;
504      poly = false;
505      if (type == 'f')
506        type = 'i';
507      if (type == 'd')
508        type = 'l';
509      break;
510    case 'o':
511      scal = true;
512      type = 'd';
513      usgn = false;
514      break;
515    case 'y':
516      scal = true;
517    case 'f':
518      if (type == 'h')
519        quad = true;
520      type = 'f';
521      usgn = false;
522      break;
523    case 'g':
524      quad = false;
525      break;
526    case 'B':
527    case 'C':
528    case 'D':
529    case 'j':
530      quad = true;
531      break;
532    case 'w':
533      type = Widen(type);
534      quad = true;
535      break;
536    case 'n':
537      type = Widen(type);
538      break;
539    case 'i':
540      type = 'i';
541      scal = true;
542      break;
543    case 'l':
544      type = 'l';
545      scal = true;
546      usgn = true;
547      break;
548    case 'z':
549      type = Narrow(type);
550      scal = true;
551      break;
552    case 'r':
553      type = Widen(type);
554      scal = true;
555      break;
556    case 's':
557    case 'a':
558      scal = true;
559      break;
560    case 'k':
561      quad = true;
562      break;
563    case 'c':
564      cnst = true;
565    case 'p':
566      pntr = true;
567      scal = true;
568      break;
569    case 'h':
570      type = Narrow(type);
571      if (type == 'h')
572        quad = false;
573      break;
574    case 'q':
575      type = Narrow(type);
576      quad = true;
577      break;
578    case 'e':
579      type = Narrow(type);
580      usgn = true;
581      break;
582    case 'm':
583      type = Narrow(type);
584      quad = false;
585      break;
586    default:
587      break;
588  }
589  return type;
590}
591
592static bool IsMultiVecProto(const char p) {
593  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
594}
595
596/// TypeString - for a modifier and type, generate the name of the typedef for
597/// that type.  QUc -> uint8x8_t.
598static std::string TypeString(const char mod, StringRef typestr) {
599  bool quad = false;
600  bool poly = false;
601  bool usgn = false;
602  bool scal = false;
603  bool cnst = false;
604  bool pntr = false;
605
606  if (mod == 'v')
607    return "void";
608  if (mod == 'i')
609    return "int";
610
611  // base type to get the type string for.
612  char type = ClassifyType(typestr, quad, poly, usgn);
613
614  // Based on the modifying character, change the type and width if necessary.
615  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
616
617  SmallString<128> s;
618
619  if (usgn)
620    s.push_back('u');
621
622  switch (type) {
623    case 'c':
624      s += poly ? "poly8" : "int8";
625      if (scal)
626        break;
627      s += quad ? "x16" : "x8";
628      break;
629    case 's':
630      s += poly ? "poly16" : "int16";
631      if (scal)
632        break;
633      s += quad ? "x8" : "x4";
634      break;
635    case 'i':
636      s += "int32";
637      if (scal)
638        break;
639      s += quad ? "x4" : "x2";
640      break;
641    case 'l':
642      s += (poly && !usgn)? "poly64" : "int64";
643      if (scal)
644        break;
645      s += quad ? "x2" : "x1";
646      break;
647    case 'h':
648      s += "float16";
649      if (scal)
650        break;
651      s += quad ? "x8" : "x4";
652      break;
653    case 'f':
654      s += "float32";
655      if (scal)
656        break;
657      s += quad ? "x4" : "x2";
658      break;
659    case 'd':
660      s += "float64";
661      if (scal)
662        break;
663      s += quad ? "x2" : "x1";
664      break;
665
666    default:
667      PrintFatalError("unhandled type!");
668  }
669
670  if (mod == '2' || mod == 'B')
671    s += "x2";
672  if (mod == '3' || mod == 'C')
673    s += "x3";
674  if (mod == '4' || mod == 'D')
675    s += "x4";
676
677  // Append _t, finishing the type string typedef type.
678  s += "_t";
679
680  if (cnst)
681    s += " const";
682
683  if (pntr)
684    s += " *";
685
686  return s.str();
687}
688
689/// BuiltinTypeString - for a modifier and type, generate the clang
690/// BuiltinsARM.def prototype code for the function.  See the top of clang's
691/// Builtins.def for a description of the type strings.
692static std::string BuiltinTypeString(const char mod, StringRef typestr,
693                                     ClassKind ck, bool ret) {
694  bool quad = false;
695  bool poly = false;
696  bool usgn = false;
697  bool scal = false;
698  bool cnst = false;
699  bool pntr = false;
700
701  if (mod == 'v')
702    return "v"; // void
703  if (mod == 'i')
704    return "i"; // int
705
706  // base type to get the type string for.
707  char type = ClassifyType(typestr, quad, poly, usgn);
708
709  // Based on the modifying character, change the type and width if necessary.
710  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
711
712  // All pointers are void* pointers.  Change type to 'v' now.
713  if (pntr) {
714    usgn = false;
715    poly = false;
716    type = 'v';
717  }
718  // Treat half-float ('h') types as unsigned short ('s') types.
719  if (type == 'h') {
720    type = 's';
721    usgn = true;
722  }
723  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
724                         scal && type != 'f' && type != 'd');
725
726  if (scal) {
727    SmallString<128> s;
728
729    if (usgn)
730      s.push_back('U');
731    else if (type == 'c')
732      s.push_back('S'); // make chars explicitly signed
733
734    if (type == 'l') // 64-bit long
735      s += "LLi";
736    else
737      s.push_back(type);
738
739    if (cnst)
740      s.push_back('C');
741    if (pntr)
742      s.push_back('*');
743    return s.str();
744  }
745
746  // Since the return value must be one type, return a vector type of the
747  // appropriate width which we will bitcast.  An exception is made for
748  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
749  // fashion, storing them to a pointer arg.
750  if (ret) {
751    if (IsMultiVecProto(mod))
752      return "vv*"; // void result with void* first argument
753    if (mod == 'f' || (ck != ClassB && type == 'f'))
754      return quad ? "V4f" : "V2f";
755    if (ck != ClassB && type == 'd')
756      return quad ? "V2d" : "V1d";
757    if (ck != ClassB && type == 's')
758      return quad ? "V8s" : "V4s";
759    if (ck != ClassB && type == 'i')
760      return quad ? "V4i" : "V2i";
761    if (ck != ClassB && type == 'l')
762      return quad ? "V2LLi" : "V1LLi";
763
764    return quad ? "V16Sc" : "V8Sc";
765  }
766
767  // Non-return array types are passed as individual vectors.
768  if (mod == '2' || mod == 'B')
769    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
770  if (mod == '3' || mod == 'C')
771    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
772  if (mod == '4' || mod == 'D')
773    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
774
775  if (mod == 'f' || (ck != ClassB && type == 'f'))
776    return quad ? "V4f" : "V2f";
777  if (ck != ClassB && type == 'd')
778    return quad ? "V2d" : "V1d";
779  if (ck != ClassB && type == 's')
780    return quad ? "V8s" : "V4s";
781  if (ck != ClassB && type == 'i')
782    return quad ? "V4i" : "V2i";
783  if (ck != ClassB && type == 'l')
784    return quad ? "V2LLi" : "V1LLi";
785
786  return quad ? "V16Sc" : "V8Sc";
787}
788
789/// InstructionTypeCode - Computes the ARM argument character code and
790/// quad status for a specific type string and ClassKind.
791static void InstructionTypeCode(const StringRef &typeStr,
792                                const ClassKind ck,
793                                bool &quad,
794                                std::string &typeCode) {
795  bool poly = false;
796  bool usgn = false;
797  char type = ClassifyType(typeStr, quad, poly, usgn);
798
799  switch (type) {
800  case 'c':
801    switch (ck) {
802    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
803    case ClassI: typeCode = "i8"; break;
804    case ClassW: typeCode = "8"; break;
805    default: break;
806    }
807    break;
808  case 's':
809    switch (ck) {
810    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
811    case ClassI: typeCode = "i16"; break;
812    case ClassW: typeCode = "16"; break;
813    default: break;
814    }
815    break;
816  case 'i':
817    switch (ck) {
818    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
819    case ClassI: typeCode = "i32"; break;
820    case ClassW: typeCode = "32"; break;
821    default: break;
822    }
823    break;
824  case 'l':
825    switch (ck) {
826    case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
827    case ClassI: typeCode = "i64"; break;
828    case ClassW: typeCode = "64"; break;
829    default: break;
830    }
831    break;
832  case 'h':
833    switch (ck) {
834    case ClassS:
835    case ClassI: typeCode = "f16"; break;
836    case ClassW: typeCode = "16"; break;
837    default: break;
838    }
839    break;
840  case 'f':
841    switch (ck) {
842    case ClassS:
843    case ClassI: typeCode = "f32"; break;
844    case ClassW: typeCode = "32"; break;
845    default: break;
846    }
847    break;
848  case 'd':
849    switch (ck) {
850    case ClassS:
851    case ClassI:
852      typeCode += "f64";
853      break;
854    case ClassW:
855      PrintFatalError("unhandled type!");
856    default:
857      break;
858    }
859    break;
860  default:
861    PrintFatalError("unhandled type!");
862  }
863}
864
865static char Insert_BHSD_Suffix(StringRef typestr){
866  unsigned off = 0;
867  if(typestr[off++] == 'S'){
868    while(typestr[off] == 'Q' || typestr[off] == 'H'||
869          typestr[off] == 'P' || typestr[off] == 'U')
870      ++off;
871    switch (typestr[off]){
872    default  : break;
873    case 'c' : return 'b';
874    case 's' : return 'h';
875    case 'i' :
876    case 'f' : return 's';
877    case 'l' :
878    case 'd' : return 'd';
879    }
880  }
881  return 0;
882}
883
884static bool endsWith_xN(std::string const &name) {
885  if (name.length() > 3) {
886    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
887        name.compare(name.length() - 3, 3, "_x3") == 0 ||
888        name.compare(name.length() - 3, 3, "_x4") == 0)
889      return true;
890  }
891  return false;
892}
893
894/// MangleName - Append a type or width suffix to a base neon function name,
895/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
896/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
897/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
898static std::string MangleName(const std::string &name, StringRef typestr,
899                              ClassKind ck) {
900  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64")
901    return name;
902
903  bool quad = false;
904  std::string typeCode = "";
905
906  InstructionTypeCode(typestr, ck, quad, typeCode);
907
908  std::string s = name;
909
910  if (typeCode.size() > 0) {
911    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
912    if (endsWith_xN(s))
913      s.insert(s.length() - 3, "_" + typeCode);
914    else
915      s += "_" + typeCode;
916  }
917
918  if (ck == ClassB)
919    s += "_v";
920
921  // Insert a 'q' before the first '_' character so that it ends up before
922  // _lane or _n on vector-scalar operations.
923  if (typestr.find("Q") != StringRef::npos) {
924      size_t pos = s.find('_');
925      s = s.insert(pos, "q");
926  }
927  char ins = Insert_BHSD_Suffix(typestr);
928  if(ins){
929    size_t pos = s.find('_');
930    s = s.insert(pos, &ins, 1);
931  }
932
933  return s;
934}
935
936static void PreprocessInstruction(const StringRef &Name,
937                                  const std::string &InstName,
938                                  std::string &Prefix,
939                                  bool &HasNPostfix,
940                                  bool &HasLanePostfix,
941                                  bool &HasDupPostfix,
942                                  bool &IsSpecialVCvt,
943                                  size_t &TBNumber) {
944  // All of our instruction name fields from arm_neon.td are of the form
945  //   <instructionname>_...
946  // Thus we grab our instruction name via computation of said Prefix.
947  const size_t PrefixEnd = Name.find_first_of('_');
948  // If InstName is passed in, we use that instead of our name Prefix.
949  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
950
951  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
952
953  HasNPostfix = Postfix.count("_n");
954  HasLanePostfix = Postfix.count("_lane");
955  HasDupPostfix = Postfix.count("_dup");
956  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
957
958  if (InstName.compare("vtbl") == 0 ||
959      InstName.compare("vtbx") == 0) {
960    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
961    // encoding to get its true value.
962    TBNumber = Name[Name.size()-1] - 48;
963  }
964}
965
966/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
967/// extracted, generate a FileCheck pattern for a Load Or Store
968static void
969GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
970                                          const std::string& OutTypeCode,
971                                          const bool &IsQuad,
972                                          const bool &HasDupPostfix,
973                                          const bool &HasLanePostfix,
974                                          const size_t Count,
975                                          std::string &RegisterSuffix) {
976  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
977  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
978  // will output a series of v{ld,st}1s, so we have to handle it specially.
979  if ((Count == 3 || Count == 4) && IsQuad) {
980    RegisterSuffix += "{";
981    for (size_t i = 0; i < Count; i++) {
982      RegisterSuffix += "d{{[0-9]+}}";
983      if (HasDupPostfix) {
984        RegisterSuffix += "[]";
985      }
986      if (HasLanePostfix) {
987        RegisterSuffix += "[{{[0-9]+}}]";
988      }
989      if (i < Count-1) {
990        RegisterSuffix += ", ";
991      }
992    }
993    RegisterSuffix += "}";
994  } else {
995
996    // Handle normal loads and stores.
997    RegisterSuffix += "{";
998    for (size_t i = 0; i < Count; i++) {
999      RegisterSuffix += "d{{[0-9]+}}";
1000      if (HasDupPostfix) {
1001        RegisterSuffix += "[]";
1002      }
1003      if (HasLanePostfix) {
1004        RegisterSuffix += "[{{[0-9]+}}]";
1005      }
1006      if (IsQuad && !HasLanePostfix) {
1007        RegisterSuffix += ", d{{[0-9]+}}";
1008        if (HasDupPostfix) {
1009          RegisterSuffix += "[]";
1010        }
1011      }
1012      if (i < Count-1) {
1013        RegisterSuffix += ", ";
1014      }
1015    }
1016    RegisterSuffix += "}, [r{{[0-9]+}}";
1017
1018    // We only include the alignment hint if we have a vld1.*64 or
1019    // a dup/lane instruction.
1020    if (IsLDSTOne) {
1021      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1022        RegisterSuffix += ":" + OutTypeCode;
1023      }
1024    }
1025
1026    RegisterSuffix += "]";
1027  }
1028}
1029
1030static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1031                                     const bool &HasNPostfix) {
1032  return (NameRef.count("vmla") ||
1033          NameRef.count("vmlal") ||
1034          NameRef.count("vmlsl") ||
1035          NameRef.count("vmull") ||
1036          NameRef.count("vqdmlal") ||
1037          NameRef.count("vqdmlsl") ||
1038          NameRef.count("vqdmulh") ||
1039          NameRef.count("vqdmull") ||
1040          NameRef.count("vqrdmulh")) && HasNPostfix;
1041}
1042
1043static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1044                                         const bool &HasLanePostfix) {
1045  return (NameRef.count("vmla") ||
1046          NameRef.count("vmls") ||
1047          NameRef.count("vmlal") ||
1048          NameRef.count("vmlsl") ||
1049          (NameRef.count("vmul") && NameRef.size() == 3)||
1050          NameRef.count("vqdmlal") ||
1051          NameRef.count("vqdmlsl") ||
1052          NameRef.count("vqdmulh") ||
1053          NameRef.count("vqrdmulh")) && HasLanePostfix;
1054}
1055
1056static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1057                                  const bool &HasLanePostfix,
1058                                  const bool &IsQuad) {
1059  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1060                               && IsQuad;
1061  const bool IsVMull = NameRef.count("mull") && !IsQuad;
1062  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1063}
1064
1065static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1066                                                     const std::string &Proto,
1067                                                     const bool &HasNPostfix,
1068                                                     const bool &IsQuad,
1069                                                     const bool &HasLanePostfix,
1070                                                     const bool &HasDupPostfix,
1071                                                     std::string &NormedProto) {
1072  // Handle generic case.
1073  const StringRef NameRef(Name);
1074  for (size_t i = 0, end = Proto.size(); i < end; i++) {
1075    switch (Proto[i]) {
1076    case 'u':
1077    case 'f':
1078    case 'd':
1079    case 's':
1080    case 'x':
1081    case 't':
1082    case 'n':
1083      NormedProto += IsQuad? 'q' : 'd';
1084      break;
1085    case 'w':
1086    case 'k':
1087      NormedProto += 'q';
1088      break;
1089    case 'g':
1090    case 'j':
1091    case 'h':
1092    case 'e':
1093      NormedProto += 'd';
1094      break;
1095    case 'i':
1096      NormedProto += HasLanePostfix? 'a' : 'i';
1097      break;
1098    case 'a':
1099      if (HasLanePostfix) {
1100        NormedProto += 'a';
1101      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1102        NormedProto += IsQuad? 'q' : 'd';
1103      } else {
1104        NormedProto += 'i';
1105      }
1106      break;
1107    }
1108  }
1109
1110  // Handle Special Cases.
1111  const bool IsNotVExt = !NameRef.count("vext");
1112  const bool IsVPADAL = NameRef.count("vpadal");
1113  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1114                                                           HasLanePostfix);
1115  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1116                                                      IsQuad);
1117
1118  if (IsSpecialLaneMul) {
1119    // If
1120    NormedProto[2] = NormedProto[3];
1121    NormedProto.erase(3);
1122  } else if (NormedProto.size() == 4 &&
1123             NormedProto[0] == NormedProto[1] &&
1124             IsNotVExt) {
1125    // If NormedProto.size() == 4 and the first two proto characters are the
1126    // same, ignore the first.
1127    NormedProto = NormedProto.substr(1, 3);
1128  } else if (Is5OpLaneAccum) {
1129    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1130    std::string tmp = NormedProto.substr(1,2);
1131    tmp += NormedProto[4];
1132    NormedProto = tmp;
1133  } else if (IsVPADAL) {
1134    // If we have VPADAL, ignore the first character.
1135    NormedProto = NormedProto.substr(0, 2);
1136  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1137    // If our instruction is a dup instruction, keep only the first and
1138    // last characters.
1139    std::string tmp = "";
1140    tmp += NormedProto[0];
1141    tmp += NormedProto[NormedProto.size()-1];
1142    NormedProto = tmp;
1143  }
1144}
1145
1146/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1147/// extracted, generate a FileCheck pattern to check that an
1148/// instruction's arguments are correct.
1149static void GenerateRegisterCheckPattern(const std::string &Name,
1150                                         const std::string &Proto,
1151                                         const std::string &OutTypeCode,
1152                                         const bool &HasNPostfix,
1153                                         const bool &IsQuad,
1154                                         const bool &HasLanePostfix,
1155                                         const bool &HasDupPostfix,
1156                                         const size_t &TBNumber,
1157                                         std::string &RegisterSuffix) {
1158
1159  RegisterSuffix = "";
1160
1161  const StringRef NameRef(Name);
1162  const StringRef ProtoRef(Proto);
1163
1164  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1165    return;
1166  }
1167
1168  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1169  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1170
1171  if (IsLoadStore) {
1172    // Grab N value from  v{ld,st}N using its ascii representation.
1173    const size_t Count = NameRef[3] - 48;
1174
1175    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1176                                              HasDupPostfix, HasLanePostfix,
1177                                              Count, RegisterSuffix);
1178  } else if (IsTBXOrTBL) {
1179    RegisterSuffix += "d{{[0-9]+}}, {";
1180    for (size_t i = 0; i < TBNumber-1; i++) {
1181      RegisterSuffix += "d{{[0-9]+}}, ";
1182    }
1183    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1184  } else {
1185    // Handle a normal instruction.
1186    if (NameRef.count("vget") || NameRef.count("vset"))
1187      return;
1188
1189    // We first normalize our proto, since we only need to emit 4
1190    // different types of checks, yet have more than 4 proto types
1191    // that map onto those 4 patterns.
1192    std::string NormalizedProto("");
1193    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1194                                             HasLanePostfix, HasDupPostfix,
1195                                             NormalizedProto);
1196
1197    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1198      const char &c = NormalizedProto[i];
1199      switch (c) {
1200      case 'q':
1201        RegisterSuffix += "q{{[0-9]+}}, ";
1202        break;
1203
1204      case 'd':
1205        RegisterSuffix += "d{{[0-9]+}}, ";
1206        break;
1207
1208      case 'i':
1209        RegisterSuffix += "#{{[0-9]+}}, ";
1210        break;
1211
1212      case 'a':
1213        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1214        break;
1215      }
1216    }
1217
1218    // Remove extra ", ".
1219    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1220  }
1221}
1222
1223/// GenerateChecksForIntrinsic - Given a specific instruction name +
1224/// typestr + class kind, generate the proper set of FileCheck
1225/// Patterns to check for. We could just return a string, but instead
1226/// use a vector since it provides us with the extra flexibility of
1227/// emitting multiple checks, which comes in handy for certain cases
1228/// like mla where we want to check for 2 different instructions.
1229static void GenerateChecksForIntrinsic(const std::string &Name,
1230                                       const std::string &Proto,
1231                                       StringRef &OutTypeStr,
1232                                       StringRef &InTypeStr,
1233                                       ClassKind Ck,
1234                                       const std::string &InstName,
1235                                       bool IsHiddenLOp,
1236                                       std::vector<std::string>& Result) {
1237
1238  // If Ck is a ClassNoTest instruction, just return so no test is
1239  // emitted.
1240  if(Ck == ClassNoTest)
1241    return;
1242
1243  if (Name == "vcvt_f32_f16") {
1244    Result.push_back("vcvt.f32.f16");
1245    return;
1246  }
1247
1248
1249  // Now we preprocess our instruction given the data we have to get the
1250  // data that we need.
1251  // Create a StringRef for String Manipulation of our Name.
1252  const StringRef NameRef(Name);
1253  // Instruction Prefix.
1254  std::string Prefix;
1255  // The type code for our out type string.
1256  std::string OutTypeCode;
1257  // To handle our different cases, we need to check for different postfixes.
1258  // Is our instruction a quad instruction.
1259  bool IsQuad = false;
1260  // Our instruction is of the form <instructionname>_n.
1261  bool HasNPostfix = false;
1262  // Our instruction is of the form <instructionname>_lane.
1263  bool HasLanePostfix = false;
1264  // Our instruction is of the form <instructionname>_dup.
1265  bool HasDupPostfix  = false;
1266  // Our instruction is a vcvt instruction which requires special handling.
1267  bool IsSpecialVCvt = false;
1268  // If we have a vtbxN or vtblN instruction, this is set to N.
1269  size_t TBNumber = -1;
1270  // Register Suffix
1271  std::string RegisterSuffix;
1272
1273  PreprocessInstruction(NameRef, InstName, Prefix,
1274                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1275                        IsSpecialVCvt, TBNumber);
1276
1277  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1278  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1279                               HasLanePostfix, HasDupPostfix, TBNumber,
1280                               RegisterSuffix);
1281
1282  // In the following section, we handle a bunch of special cases. You can tell
1283  // a special case by the fact we are returning early.
1284
1285  // If our instruction is a logical instruction without postfix or a
1286  // hidden LOp just return the current Prefix.
1287  if (Ck == ClassL || IsHiddenLOp) {
1288    Result.push_back(Prefix + " " + RegisterSuffix);
1289    return;
1290  }
1291
1292  // If we have a vmov, due to the many different cases, some of which
1293  // vary within the different intrinsics generated for a single
1294  // instruction type, just output a vmov. (e.g. given an instruction
1295  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1296  //
1297  // FIXME: Maybe something can be done about this. The two cases that we care
1298  // about are vmov as an LType and vmov as a WType.
1299  if (Prefix == "vmov") {
1300    Result.push_back(Prefix + " " + RegisterSuffix);
1301    return;
1302  }
1303
1304  // In the following section, we handle special cases.
1305
1306  if (OutTypeCode == "64") {
1307    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1308    // type, the intrinsic will be optimized away, so just return
1309    // nothing.  On the other hand if we are handling an uint64x2_t
1310    // (i.e. quad instruction), vdup/vmov instructions should be
1311    // emitted.
1312    if (Prefix == "vdup" || Prefix == "vext") {
1313      if (IsQuad) {
1314        Result.push_back("{{vmov|vdup}}");
1315      }
1316      return;
1317    }
1318
1319    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1320    // multiple register operands.
1321    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1322                            || Prefix == "vld4";
1323    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1324                            || Prefix == "vst4";
1325    if (MultiLoadPrefix || MultiStorePrefix) {
1326      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1327      return;
1328    }
1329
1330    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1331    // emitting said instructions. So return a check for
1332    // vldr/vstr/vmov/str instead.
1333    if (HasLanePostfix || HasDupPostfix) {
1334      if (Prefix == "vst1") {
1335        Result.push_back("{{str|vstr|vmov}}");
1336        return;
1337      } else if (Prefix == "vld1") {
1338        Result.push_back("{{ldr|vldr|vmov}}");
1339        return;
1340      }
1341    }
1342  }
1343
1344  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1345  // sometimes disassembled as vtrn.32. We use a regex to handle both
1346  // cases.
1347  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1348    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1349    return;
1350  }
1351
1352  // Currently on most ARM processors, we do not use vmla/vmls for
1353  // quad floating point operations. Instead we output vmul + vadd. So
1354  // check if we have one of those instructions and just output a
1355  // check for vmul.
1356  if (OutTypeCode == "f32") {
1357    if (Prefix == "vmls") {
1358      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1359      Result.push_back("vsub." + OutTypeCode);
1360      return;
1361    } else if (Prefix == "vmla") {
1362      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1363      Result.push_back("vadd." + OutTypeCode);
1364      return;
1365    }
1366  }
1367
1368  // If we have vcvt, get the input type from the instruction name
1369  // (which should be of the form instname_inputtype) and append it
1370  // before the output type.
1371  if (Prefix == "vcvt") {
1372    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1373    Prefix += "." + inTypeCode;
1374  }
1375
1376  // Append output type code to get our final mangled instruction.
1377  Prefix += "." + OutTypeCode;
1378
1379  Result.push_back(Prefix + " " + RegisterSuffix);
1380}
1381
1382/// UseMacro - Examine the prototype string to determine if the intrinsic
1383/// should be defined as a preprocessor macro instead of an inline function.
1384static bool UseMacro(const std::string &proto) {
1385  // If this builtin takes an immediate argument, we need to #define it rather
1386  // than use a standard declaration, so that SemaChecking can range check
1387  // the immediate passed by the user.
1388  if (proto.find('i') != std::string::npos)
1389    return true;
1390
1391  // Pointer arguments need to use macros to avoid hiding aligned attributes
1392  // from the pointer type.
1393  if (proto.find('p') != std::string::npos ||
1394      proto.find('c') != std::string::npos)
1395    return true;
1396
1397  return false;
1398}
1399
1400/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1401/// defined as a macro should be accessed directly instead of being first
1402/// assigned to a local temporary.
1403static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1404  // True for constant ints (i), pointers (p) and const pointers (c).
1405  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1406}
1407
1408// Generate the string "(argtype a, argtype b, ...)"
1409static std::string GenArgs(const std::string &proto, StringRef typestr,
1410                           const std::string &name) {
1411  bool define = UseMacro(proto);
1412  char arg = 'a';
1413
1414  std::string s;
1415  s += "(";
1416
1417  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1418    if (define) {
1419      // Some macro arguments are used directly instead of being assigned
1420      // to local temporaries; prepend an underscore prefix to make their
1421      // names consistent with the local temporaries.
1422      if (MacroArgUsedDirectly(proto, i))
1423        s += "__";
1424    } else {
1425      s += TypeString(proto[i], typestr) + " __";
1426    }
1427    s.push_back(arg);
1428    //To avoid argument being multiple defined, add extra number for renaming.
1429    if (name == "vcopy_lane" || name == "vcopy_laneq")
1430      s.push_back('1');
1431    if ((i + 1) < e)
1432      s += ", ";
1433  }
1434
1435  s += ")";
1436  return s;
1437}
1438
1439// Macro arguments are not type-checked like inline function arguments, so
1440// assign them to local temporaries to get the right type checking.
1441static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1442                                  const std::string &name ) {
1443  char arg = 'a';
1444  std::string s;
1445  bool generatedLocal = false;
1446
1447  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1448    // Do not create a temporary for an immediate argument.
1449    // That would defeat the whole point of using a macro!
1450    if (MacroArgUsedDirectly(proto, i))
1451      continue;
1452    generatedLocal = true;
1453    bool extranumber = false;
1454    if (name == "vcopy_lane" || name == "vcopy_laneq")
1455      extranumber = true;
1456
1457    s += TypeString(proto[i], typestr) + " __";
1458    s.push_back(arg);
1459    if(extranumber)
1460      s.push_back('1');
1461    s += " = (";
1462    s.push_back(arg);
1463    if(extranumber)
1464      s.push_back('1');
1465    s += "); ";
1466  }
1467
1468  if (generatedLocal)
1469    s += "\\\n  ";
1470  return s;
1471}
1472
1473// Use the vmovl builtin to sign-extend or zero-extend a vector.
1474static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1475  std::string s, high;
1476  high = h ? "_high" : "";
1477  s = MangleName("vmovl" + high, typestr, ClassS);
1478  s += "(" + a + ")";
1479  return s;
1480}
1481
1482// Get the high 64-bit part of a vector
1483static std::string GetHigh(const std::string &a, StringRef typestr) {
1484  std::string s;
1485  s = MangleName("vget_high", typestr, ClassS);
1486  s += "(" + a + ")";
1487  return s;
1488}
1489
1490// Gen operation with two operands and get high 64-bit for both of two operands.
1491static std::string Gen2OpWith2High(StringRef typestr,
1492                                   const std::string &op,
1493                                   const std::string &a,
1494                                   const std::string &b) {
1495  std::string s;
1496  std::string Op1 = GetHigh(a, typestr);
1497  std::string Op2 = GetHigh(b, typestr);
1498  s = MangleName(op, typestr, ClassS);
1499  s += "(" + Op1 + ", " + Op2 + ");";
1500  return s;
1501}
1502
1503// Gen operation with three operands and get high 64-bit of the latter
1504// two operands.
1505static std::string Gen3OpWith2High(StringRef typestr,
1506                                   const std::string &op,
1507                                   const std::string &a,
1508                                   const std::string &b,
1509                                   const std::string &c) {
1510  std::string s;
1511  std::string Op1 = GetHigh(b, typestr);
1512  std::string Op2 = GetHigh(c, typestr);
1513  s = MangleName(op, typestr, ClassS);
1514  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1515  return s;
1516}
1517
1518// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1519static std::string GenCombine(std::string typestr,
1520                              const std::string &a,
1521                              const std::string &b) {
1522  std::string s;
1523  s = MangleName("vcombine", typestr, ClassS);
1524  s += "(" + a + ", " + b + ")";
1525  return s;
1526}
1527
1528static std::string Duplicate(unsigned nElts, StringRef typestr,
1529                             const std::string &a) {
1530  std::string s;
1531
1532  s = "(" + TypeString('d', typestr) + "){ ";
1533  for (unsigned i = 0; i != nElts; ++i) {
1534    s += a;
1535    if ((i + 1) < nElts)
1536      s += ", ";
1537  }
1538  s += " }";
1539
1540  return s;
1541}
1542
1543static std::string SplatLane(unsigned nElts, const std::string &vec,
1544                             const std::string &lane) {
1545  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1546  for (unsigned i = 0; i < nElts; ++i)
1547    s += ", " + lane;
1548  s += ")";
1549  return s;
1550}
1551
1552static std::string RemoveHigh(const std::string &name) {
1553  std::string s = name;
1554  std::size_t found = s.find("_high_");
1555  if (found == std::string::npos)
1556    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1557  s.replace(found, 5, "");
1558  return s;
1559}
1560
1561static unsigned GetNumElements(StringRef typestr, bool &quad) {
1562  quad = false;
1563  bool dummy = false;
1564  char type = ClassifyType(typestr, quad, dummy, dummy);
1565  unsigned nElts = 0;
1566  switch (type) {
1567  case 'c': nElts = 8; break;
1568  case 's': nElts = 4; break;
1569  case 'i': nElts = 2; break;
1570  case 'l': nElts = 1; break;
1571  case 'h': nElts = 4; break;
1572  case 'f': nElts = 2; break;
1573  case 'd':
1574    nElts = 1;
1575    break;
1576  default:
1577    PrintFatalError("unhandled type!");
1578  }
1579  if (quad) nElts <<= 1;
1580  return nElts;
1581}
1582
1583// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1584static std::string GenOpString(const std::string &name, OpKind op,
1585                               const std::string &proto, StringRef typestr) {
1586  bool quad;
1587  unsigned nElts = GetNumElements(typestr, quad);
1588  bool define = UseMacro(proto);
1589
1590  std::string ts = TypeString(proto[0], typestr);
1591  std::string s;
1592  if (!define) {
1593    s = "return ";
1594  }
1595
1596  switch(op) {
1597  case OpAdd:
1598    s += "__a + __b;";
1599    break;
1600  case OpAddl:
1601    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1602    break;
1603  case OpAddlHi:
1604    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1605    break;
1606  case OpAddw:
1607    s += "__a + " + Extend(typestr, "__b") + ";";
1608    break;
1609  case OpAddwHi:
1610    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1611    break;
1612  case OpSub:
1613    s += "__a - __b;";
1614    break;
1615  case OpSubl:
1616    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1617    break;
1618  case OpSublHi:
1619    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1620    break;
1621  case OpSubw:
1622    s += "__a - " + Extend(typestr, "__b") + ";";
1623    break;
1624  case OpSubwHi:
1625    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1626    break;
1627  case OpMulN:
1628    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1629    break;
1630  case OpMulLane:
1631    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1632    break;
1633  case OpMulXLane:
1634    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1635      SplatLane(nElts, "__b", "__c") + ");";
1636    break;
1637  case OpMul:
1638    s += "__a * __b;";
1639    break;
1640  case OpMullLane:
1641    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1642      SplatLane(nElts, "__b", "__c") + ");";
1643    break;
1644  case OpMullHiLane:
1645    s += MangleName("vmull", typestr, ClassS) + "(" +
1646      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1647    break;
1648  case OpMlaN:
1649    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1650    break;
1651  case OpMlaLane:
1652    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1653    break;
1654  case OpMla:
1655    s += "__a + (__b * __c);";
1656    break;
1657  case OpMlalN:
1658    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1659      Duplicate(nElts, typestr, "__c") + ");";
1660    break;
1661  case OpMlalLane:
1662    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1663      SplatLane(nElts, "__c", "__d") + ");";
1664    break;
1665  case OpMlalHiLane:
1666    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1667      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1668    break;
1669  case OpMlal:
1670    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1671    break;
1672  case OpMullHi:
1673    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1674    break;
1675  case OpMlalHi:
1676    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1677    break;
1678  case OpMlsN:
1679    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1680    break;
1681  case OpMlsLane:
1682    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1683    break;
1684  case OpFMSLane:
1685    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1686    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1687    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1688    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1689    break;
1690  case OpFMSLaneQ:
1691    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1692    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1693    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1694    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1695    break;
1696  case OpMls:
1697    s += "__a - (__b * __c);";
1698    break;
1699  case OpMlslN:
1700    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1701      Duplicate(nElts, typestr, "__c") + ");";
1702    break;
1703  case OpMlslLane:
1704    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1705      SplatLane(nElts, "__c", "__d") + ");";
1706    break;
1707  case OpMlslHiLane:
1708    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1709      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1710    break;
1711  case OpMlsl:
1712    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1713    break;
1714  case OpMlslHi:
1715    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1716    break;
1717  case OpQDMullLane:
1718    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1719      SplatLane(nElts, "__b", "__c") + ");";
1720    break;
1721  case OpQDMullHiLane:
1722    s += MangleName("vqdmull", typestr, ClassS) + "(" +
1723      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1724    break;
1725  case OpQDMlalLane:
1726    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1727      SplatLane(nElts, "__c", "__d") + ");";
1728    break;
1729  case OpQDMlalHiLane:
1730    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1731      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1732    break;
1733  case OpQDMlslLane:
1734    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1735      SplatLane(nElts, "__c", "__d") + ");";
1736    break;
1737  case OpQDMlslHiLane:
1738    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1739      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1740    break;
1741  case OpQDMulhLane:
1742    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1743      SplatLane(nElts, "__b", "__c") + ");";
1744    break;
1745  case OpQRDMulhLane:
1746    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1747      SplatLane(nElts, "__b", "__c") + ");";
1748    break;
1749  case OpEq:
1750    s += "(" + ts + ")(__a == __b);";
1751    break;
1752  case OpGe:
1753    s += "(" + ts + ")(__a >= __b);";
1754    break;
1755  case OpLe:
1756    s += "(" + ts + ")(__a <= __b);";
1757    break;
1758  case OpGt:
1759    s += "(" + ts + ")(__a > __b);";
1760    break;
1761  case OpLt:
1762    s += "(" + ts + ")(__a < __b);";
1763    break;
1764  case OpNeg:
1765    s += " -__a;";
1766    break;
1767  case OpNot:
1768    s += " ~__a;";
1769    break;
1770  case OpAnd:
1771    s += "__a & __b;";
1772    break;
1773  case OpOr:
1774    s += "__a | __b;";
1775    break;
1776  case OpXor:
1777    s += "__a ^ __b;";
1778    break;
1779  case OpAndNot:
1780    s += "__a & ~__b;";
1781    break;
1782  case OpOrNot:
1783    s += "__a | ~__b;";
1784    break;
1785  case OpCast:
1786    s += "(" + ts + ")__a;";
1787    break;
1788  case OpConcat:
1789    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1790    s += ", (int64x1_t)__b, 0, 1);";
1791    break;
1792  case OpHi:
1793    // nElts is for the result vector, so the source is twice that number.
1794    s += "__builtin_shufflevector(__a, __a";
1795    for (unsigned i = nElts; i < nElts * 2; ++i)
1796      s += ", " + utostr(i);
1797    s+= ");";
1798    break;
1799  case OpLo:
1800    s += "__builtin_shufflevector(__a, __a";
1801    for (unsigned i = 0; i < nElts; ++i)
1802      s += ", " + utostr(i);
1803    s+= ");";
1804    break;
1805  case OpDup:
1806    s += Duplicate(nElts, typestr, "__a") + ";";
1807    break;
1808  case OpDupLane:
1809    s += SplatLane(nElts, "__a", "__b") + ";";
1810    break;
1811  case OpSelect:
1812    // ((0 & 1) | (~0 & 2))
1813    s += "(" + ts + ")";
1814    ts = TypeString(proto[1], typestr);
1815    s += "((__a & (" + ts + ")__b) | ";
1816    s += "(~__a & (" + ts + ")__c));";
1817    break;
1818  case OpRev16:
1819    s += "__builtin_shufflevector(__a, __a";
1820    for (unsigned i = 2; i <= nElts; i += 2)
1821      for (unsigned j = 0; j != 2; ++j)
1822        s += ", " + utostr(i - j - 1);
1823    s += ");";
1824    break;
1825  case OpRev32: {
1826    unsigned WordElts = nElts >> (1 + (int)quad);
1827    s += "__builtin_shufflevector(__a, __a";
1828    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1829      for (unsigned j = 0; j != WordElts; ++j)
1830        s += ", " + utostr(i - j - 1);
1831    s += ");";
1832    break;
1833  }
1834  case OpRev64: {
1835    unsigned DblWordElts = nElts >> (int)quad;
1836    s += "__builtin_shufflevector(__a, __a";
1837    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1838      for (unsigned j = 0; j != DblWordElts; ++j)
1839        s += ", " + utostr(i - j - 1);
1840    s += ");";
1841    break;
1842  }
1843  case OpXtnHi: {
1844    s = TypeString(proto[1], typestr) + " __a1 = " +
1845        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1846        "return __builtin_shufflevector(__a, __a1";
1847    for (unsigned i = 0; i < nElts * 4; ++i)
1848      s += ", " + utostr(i);
1849    s += ");";
1850    break;
1851  }
1852  case OpSqxtunHi: {
1853    s = TypeString(proto[1], typestr) + " __a1 = " +
1854        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1855        "return __builtin_shufflevector(__a, __a1";
1856    for (unsigned i = 0; i < nElts * 4; ++i)
1857      s += ", " + utostr(i);
1858    s += ");";
1859    break;
1860  }
1861  case OpQxtnHi: {
1862    s = TypeString(proto[1], typestr) + " __a1 = " +
1863        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1864        "return __builtin_shufflevector(__a, __a1";
1865    for (unsigned i = 0; i < nElts * 4; ++i)
1866      s += ", " + utostr(i);
1867    s += ");";
1868    break;
1869  }
1870  case OpFcvtnHi: {
1871    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1872    s = TypeString(proto[1], typestr) + " __a1 = " +
1873        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1874        "return __builtin_shufflevector(__a, __a1";
1875    for (unsigned i = 0; i < nElts * 4; ++i)
1876      s += ", " + utostr(i);
1877    s += ");";
1878    break;
1879  }
1880  case OpFcvtlHi: {
1881    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1882    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1883        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1884    break;
1885  }
1886  case OpFcvtxnHi: {
1887    s = TypeString(proto[1], typestr) + " __a1 = " +
1888        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1889        "return __builtin_shufflevector(__a, __a1";
1890    for (unsigned i = 0; i < nElts * 4; ++i)
1891      s += ", " + utostr(i);
1892    s += ");";
1893    break;
1894  }
1895  case OpUzp1:
1896    s += "__builtin_shufflevector(__a, __b";
1897    for (unsigned i = 0; i < nElts; i++)
1898      s += ", " + utostr(2*i);
1899    s += ");";
1900    break;
1901  case OpUzp2:
1902    s += "__builtin_shufflevector(__a, __b";
1903    for (unsigned i = 0; i < nElts; i++)
1904      s += ", " + utostr(2*i+1);
1905    s += ");";
1906    break;
1907  case OpZip1:
1908    s += "__builtin_shufflevector(__a, __b";
1909    for (unsigned i = 0; i < (nElts/2); i++)
1910       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1911    s += ");";
1912    break;
1913  case OpZip2:
1914    s += "__builtin_shufflevector(__a, __b";
1915    for (unsigned i = nElts/2; i < nElts; i++)
1916       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1917    s += ");";
1918    break;
1919  case OpTrn1:
1920    s += "__builtin_shufflevector(__a, __b";
1921    for (unsigned i = 0; i < (nElts/2); i++)
1922       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1923    s += ");";
1924    break;
1925  case OpTrn2:
1926    s += "__builtin_shufflevector(__a, __b";
1927    for (unsigned i = 0; i < (nElts/2); i++)
1928       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1929    s += ");";
1930    break;
1931  case OpAbdl: {
1932    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1933    if (typestr[0] != 'U') {
1934      // vabd results are always unsigned and must be zero-extended.
1935      std::string utype = "U" + typestr.str();
1936      s += "(" + TypeString(proto[0], typestr) + ")";
1937      abd = "(" + TypeString('d', utype) + ")" + abd;
1938      s += Extend(utype, abd) + ";";
1939    } else {
1940      s += Extend(typestr, abd) + ";";
1941    }
1942    break;
1943  }
1944  case OpAbdlHi:
1945    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1946    break;
1947  case OpAddhnHi: {
1948    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1949    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1950    s += ";";
1951    break;
1952  }
1953  case OpRAddhnHi: {
1954    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1955    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1956    s += ";";
1957    break;
1958  }
1959  case OpSubhnHi: {
1960    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1961    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1962    s += ";";
1963    break;
1964  }
1965  case OpRSubhnHi: {
1966    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1967    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1968    s += ";";
1969    break;
1970  }
1971  case OpAba:
1972    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1973    break;
1974  case OpAbal:
1975    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1976    break;
1977  case OpAbalHi:
1978    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1979    break;
1980  case OpQDMullHi:
1981    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1982    break;
1983  case OpQDMlalHi:
1984    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1985    break;
1986  case OpQDMlslHi:
1987    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1988    break;
1989  case OpDiv:
1990    s += "__a / __b;";
1991    break;
1992  case OpMovlHi: {
1993    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1994        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1995    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1996    s += "(__a1, 0);";
1997    break;
1998  }
1999  case OpLongHi: {
2000    // Another local variable __a1 is needed for calling a Macro,
2001    // or using __a will have naming conflict when Macro expanding.
2002    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2003         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2004    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2005         "(__a1, __b);";
2006    break;
2007  }
2008  case OpNarrowHi: {
2009    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2010         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2011    break;
2012  }
2013  case OpCopyLane: {
2014    s += TypeString('s', typestr) + " __c2 = " +
2015         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
2016         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2017    break;
2018  }
2019  case OpCopyQLane: {
2020    std::string typeCode = "";
2021    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2022    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2023         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2024    break;
2025  }
2026  case OpCopyLaneQ: {
2027    std::string typeCode = "";
2028    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2029    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2030         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2031    break;
2032  }
2033  case OpScalarMulLane: {
2034    std::string typeCode = "";
2035    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2036	s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2037	  "(__b, __c);\\\n  __a * __d1;";
2038    break;
2039  }
2040  case OpScalarMulLaneQ: {
2041    std::string typeCode = "";
2042    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2043        s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2044          "(__b, __c);\\\n  __a * __d1;";
2045    break;
2046  }
2047  case OpScalarMulXLane: {
2048    bool dummy = false;
2049    char type = ClassifyType(typestr, dummy, dummy, dummy);
2050    if (type == 'f') type = 's';
2051    std::string typeCode = "";
2052    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2053    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2054      "(__b, __c);\\\n  vmulx" + type + "_" +
2055      typeCode +  "(__a, __d1);";
2056    break;
2057  }
2058  case OpScalarMulXLaneQ: {
2059    bool dummy = false;
2060    char type = ClassifyType(typestr, dummy, dummy, dummy);
2061    if (type == 'f') type = 's';
2062    std::string typeCode = "";
2063    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2064    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2065      typeCode + "(__b, __c);\\\n  vmulx" + type +
2066      "_" + typeCode +  "(__a, __d1);";
2067    break;
2068  }
2069
2070  case OpScalarVMulXLane: {
2071    bool dummy = false;
2072    char type = ClassifyType(typestr, dummy, dummy, dummy);
2073    if (type == 'f') type = 's';
2074    std::string typeCode = "";
2075    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2076    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2077      typeCode + "(__a, 0);\\\n" +
2078      "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2079      typeCode + "(__b, __c);\\\n" +
2080      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2081      typeCode + "(__d1, __e1);\\\n" +
2082      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2083      "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2084    break;
2085  }
2086
2087  case OpScalarVMulXLaneQ: {
2088    bool dummy = false;
2089    char type = ClassifyType(typestr, dummy, dummy, dummy);
2090    if (type == 'f') type = 's';
2091    std::string typeCode = "";
2092    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2093    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2094      typeCode + "(__a, 0);\\\n" +
2095      "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2096      typeCode + "(__b, __c);\\\n" +
2097      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2098      typeCode + "(__d1, __e1);\\\n" +
2099      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2100      "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2101    break;
2102  }
2103
2104  default:
2105    PrintFatalError("unknown OpKind!");
2106  }
2107  return s;
2108}
2109
2110static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2111  unsigned mod = proto[0];
2112
2113  if (mod == 'v' || mod == 'f')
2114    mod = proto[1];
2115
2116  bool quad = false;
2117  bool poly = false;
2118  bool usgn = false;
2119  bool scal = false;
2120  bool cnst = false;
2121  bool pntr = false;
2122
2123  // Base type to get the type string for.
2124  char type = ClassifyType(typestr, quad, poly, usgn);
2125
2126  // Based on the modifying character, change the type and width if necessary.
2127  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2128
2129  NeonTypeFlags::EltType ET;
2130  switch (type) {
2131    case 'c':
2132      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2133      break;
2134    case 's':
2135      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2136      break;
2137    case 'i':
2138      ET = NeonTypeFlags::Int32;
2139      break;
2140    case 'l':
2141      ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2142      break;
2143    case 'h':
2144      ET = NeonTypeFlags::Float16;
2145      break;
2146    case 'f':
2147      ET = NeonTypeFlags::Float32;
2148      break;
2149    case 'd':
2150      ET = NeonTypeFlags::Float64;
2151      break;
2152    default:
2153      PrintFatalError("unhandled type!");
2154  }
2155  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2156  return Flags.getFlags();
2157}
2158
2159static bool ProtoHasScalar(const std::string proto)
2160{
2161  return (proto.find('s') != std::string::npos
2162          || proto.find('r') != std::string::npos);
2163}
2164
2165// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2166static std::string GenBuiltin(const std::string &name, const std::string &proto,
2167                              StringRef typestr, ClassKind ck) {
2168  std::string s;
2169
2170  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2171  // sret-like argument.
2172  bool sret = IsMultiVecProto(proto[0]);
2173
2174  bool define = UseMacro(proto);
2175
2176  // Check if the prototype has a scalar operand with the type of the vector
2177  // elements.  If not, bitcasting the args will take care of arg checking.
2178  // The actual signedness etc. will be taken care of with special enums.
2179  if (!ProtoHasScalar(proto))
2180    ck = ClassB;
2181
2182  if (proto[0] != 'v') {
2183    std::string ts = TypeString(proto[0], typestr);
2184
2185    if (define) {
2186      if (sret)
2187        s += ts + " r; ";
2188      else
2189        s += "(" + ts + ")";
2190    } else if (sret) {
2191      s += ts + " r; ";
2192    } else {
2193      s += "return (" + ts + ")";
2194    }
2195  }
2196
2197  bool splat = proto.find('a') != std::string::npos;
2198
2199  s += "__builtin_neon_";
2200  if (splat) {
2201    // Call the non-splat builtin: chop off the "_n" suffix from the name.
2202    std::string vname(name, 0, name.size()-2);
2203    s += MangleName(vname, typestr, ck);
2204  } else {
2205    s += MangleName(name, typestr, ck);
2206  }
2207  s += "(";
2208
2209  // Pass the address of the return variable as the first argument to sret-like
2210  // builtins.
2211  if (sret)
2212    s += "&r, ";
2213
2214  char arg = 'a';
2215  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2216    std::string args = std::string(&arg, 1);
2217
2218    // Use the local temporaries instead of the macro arguments.
2219    args = "__" + args;
2220
2221    bool argQuad = false;
2222    bool argPoly = false;
2223    bool argUsgn = false;
2224    bool argScalar = false;
2225    bool dummy = false;
2226    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2227    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2228                      dummy, dummy);
2229
2230    // Handle multiple-vector values specially, emitting each subvector as an
2231    // argument to the __builtin.
2232    unsigned NumOfVec = 0;
2233    if (proto[i] >= '2' && proto[i] <= '4') {
2234      NumOfVec = proto[i] - '0';
2235    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2236      NumOfVec = proto[i] - 'A' + 1;
2237    }
2238
2239    if (NumOfVec > 0) {
2240      // Check if an explicit cast is needed.
2241      if (argType != 'c' || argPoly || argUsgn)
2242        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2243
2244      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2245        s += args + ".val[" + utostr(vi) + "]";
2246        if ((vi + 1) < ve)
2247          s += ", ";
2248      }
2249      if ((i + 1) < e)
2250        s += ", ";
2251
2252      continue;
2253    }
2254
2255    if (splat && (i + 1) == e)
2256      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2257
2258    // Check if an explicit cast is needed.
2259    if ((splat || !argScalar) &&
2260        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2261      std::string argTypeStr = "c";
2262      if (ck != ClassB)
2263        argTypeStr = argType;
2264      if (argQuad)
2265        argTypeStr = "Q" + argTypeStr;
2266      args = "(" + TypeString('d', argTypeStr) + ")" + args;
2267    }
2268
2269    s += args;
2270    if ((i + 1) < e)
2271      s += ", ";
2272  }
2273
2274  // Extra constant integer to hold type class enum for this function, e.g. s8
2275  if (ck == ClassB)
2276    s += ", " + utostr(GetNeonEnum(proto, typestr));
2277
2278  s += ");";
2279
2280  if (proto[0] != 'v' && sret) {
2281    if (define)
2282      s += " r;";
2283    else
2284      s += " return r;";
2285  }
2286  return s;
2287}
2288
2289static std::string GenBuiltinDef(const std::string &name,
2290                                 const std::string &proto,
2291                                 StringRef typestr, ClassKind ck) {
2292  std::string s("BUILTIN(__builtin_neon_");
2293
2294  // If all types are the same size, bitcasting the args will take care
2295  // of arg checking.  The actual signedness etc. will be taken care of with
2296  // special enums.
2297  if (!ProtoHasScalar(proto))
2298    ck = ClassB;
2299
2300  s += MangleName(name, typestr, ck);
2301  s += ", \"";
2302
2303  for (unsigned i = 0, e = proto.size(); i != e; ++i)
2304    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2305
2306  // Extra constant integer to hold type class enum for this function, e.g. s8
2307  if (ck == ClassB)
2308    s += "i";
2309
2310  s += "\", \"n\")";
2311  return s;
2312}
2313
2314static std::string GenIntrinsic(const std::string &name,
2315                                const std::string &proto,
2316                                StringRef outTypeStr, StringRef inTypeStr,
2317                                OpKind kind, ClassKind classKind) {
2318  assert(!proto.empty() && "");
2319  bool define = UseMacro(proto) && kind != OpUnavailable;
2320  std::string s;
2321
2322  // static always inline + return type
2323  if (define)
2324    s += "#define ";
2325  else
2326    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2327
2328  // Function name with type suffix
2329  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2330  if (outTypeStr != inTypeStr) {
2331    // If the input type is different (e.g., for vreinterpret), append a suffix
2332    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2333    // does not insert another "q" in the name.
2334    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2335    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2336    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2337  }
2338  s += mangledName;
2339
2340  // Function arguments
2341  s += GenArgs(proto, inTypeStr, name);
2342
2343  // Definition.
2344  if (define) {
2345    s += " __extension__ ({ \\\n  ";
2346    s += GenMacroLocals(proto, inTypeStr, name);
2347  } else if (kind == OpUnavailable) {
2348    s += " __attribute__((unavailable));\n";
2349    return s;
2350  } else
2351    s += " {\n  ";
2352
2353  if (kind != OpNone)
2354    s += GenOpString(name, kind, proto, outTypeStr);
2355  else
2356    s += GenBuiltin(name, proto, outTypeStr, classKind);
2357  if (define)
2358    s += " })";
2359  else
2360    s += " }";
2361  s += "\n";
2362  return s;
2363}
2364
2365/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2366/// is comprised of type definitions and function declarations.
2367void NeonEmitter::run(raw_ostream &OS) {
2368  OS <<
2369    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2370    "---===\n"
2371    " *\n"
2372    " * Permission is hereby granted, free of charge, to any person obtaining "
2373    "a copy\n"
2374    " * of this software and associated documentation files (the \"Software\"),"
2375    " to deal\n"
2376    " * in the Software without restriction, including without limitation the "
2377    "rights\n"
2378    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2379    "and/or sell\n"
2380    " * copies of the Software, and to permit persons to whom the Software is\n"
2381    " * furnished to do so, subject to the following conditions:\n"
2382    " *\n"
2383    " * The above copyright notice and this permission notice shall be "
2384    "included in\n"
2385    " * all copies or substantial portions of the Software.\n"
2386    " *\n"
2387    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2388    "EXPRESS OR\n"
2389    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2390    "MERCHANTABILITY,\n"
2391    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2392    "SHALL THE\n"
2393    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2394    "OTHER\n"
2395    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2396    "ARISING FROM,\n"
2397    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2398    "DEALINGS IN\n"
2399    " * THE SOFTWARE.\n"
2400    " *\n"
2401    " *===--------------------------------------------------------------------"
2402    "---===\n"
2403    " */\n\n";
2404
2405  OS << "#ifndef __ARM_NEON_H\n";
2406  OS << "#define __ARM_NEON_H\n\n";
2407
2408  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2409  OS << "#error \"NEON support not enabled\"\n";
2410  OS << "#endif\n\n";
2411
2412  OS << "#include <stdint.h>\n\n";
2413
2414  // Emit NEON-specific scalar typedefs.
2415  OS << "typedef float float32_t;\n";
2416  OS << "typedef __fp16 float16_t;\n";
2417
2418  OS << "#ifdef __aarch64__\n";
2419  OS << "typedef double float64_t;\n";
2420  OS << "#endif\n\n";
2421
2422  // For now, signedness of polynomial types depends on target
2423  OS << "#ifdef __aarch64__\n";
2424  OS << "typedef uint8_t poly8_t;\n";
2425  OS << "typedef uint16_t poly16_t;\n";
2426  OS << "typedef uint64_t poly64_t;\n";
2427  OS << "#else\n";
2428  OS << "typedef int8_t poly8_t;\n";
2429  OS << "typedef int16_t poly16_t;\n";
2430  OS << "#endif\n";
2431
2432  // Emit Neon vector typedefs.
2433  std::string TypedefTypes(
2434      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2435  SmallVector<StringRef, 24> TDTypeVec;
2436  ParseTypes(0, TypedefTypes, TDTypeVec);
2437
2438  // Emit vector typedefs.
2439  bool isA64 = false;
2440  bool preinsert;
2441  bool postinsert;
2442  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2443    bool dummy, quad = false, poly = false;
2444    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2445    preinsert = false;
2446    postinsert = false;
2447
2448    if (type == 'd' || (type == 'l' && poly)) {
2449      preinsert = isA64? false: true;
2450      isA64 = true;
2451    } else {
2452      postinsert = isA64? true: false;
2453      isA64 = false;
2454    }
2455    if (postinsert)
2456      OS << "#endif\n";
2457    if (preinsert)
2458      OS << "#ifdef __aarch64__\n";
2459
2460    if (poly)
2461      OS << "typedef __attribute__((neon_polyvector_type(";
2462    else
2463      OS << "typedef __attribute__((neon_vector_type(";
2464
2465    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2466    OS << utostr(nElts) << "))) ";
2467    if (nElts < 10)
2468      OS << " ";
2469
2470    OS << TypeString('s', TDTypeVec[i]);
2471    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2472
2473  }
2474  postinsert = isA64? true: false;
2475  if (postinsert)
2476    OS << "#endif\n";
2477  OS << "\n";
2478
2479  // Emit struct typedefs.
2480  isA64 = false;
2481  for (unsigned vi = 2; vi != 5; ++vi) {
2482    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2483      bool dummy, quad = false, poly = false;
2484      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2485      preinsert = false;
2486      postinsert = false;
2487
2488      if (type == 'd' || (type == 'l' && poly)) {
2489        preinsert = isA64? false: true;
2490        isA64 = true;
2491      } else {
2492        postinsert = isA64? true: false;
2493        isA64 = false;
2494      }
2495      if (postinsert)
2496        OS << "#endif\n";
2497      if (preinsert)
2498        OS << "#ifdef __aarch64__\n";
2499
2500      std::string ts = TypeString('d', TDTypeVec[i]);
2501      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2502      OS << "typedef struct " << vs << " {\n";
2503      OS << "  " << ts << " val";
2504      OS << "[" << utostr(vi) << "]";
2505      OS << ";\n} ";
2506      OS << vs << ";\n";
2507      OS << "\n";
2508    }
2509  }
2510  postinsert = isA64? true: false;
2511  if (postinsert)
2512    OS << "#endif\n";
2513  OS << "\n";
2514
2515  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2516
2517  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2518
2519  StringMap<ClassKind> EmittedMap;
2520
2521  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2522  // intrinsics.  (Some of the saturating multiply instructions are also
2523  // used to implement the corresponding "_lane" variants, but tablegen
2524  // sorts the records into alphabetical order so that the "_lane" variants
2525  // come after the intrinsics they use.)
2526  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2527  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2528  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2529  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2530
2531  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2532  // common intrinsics appear only once in the output stream.
2533  // The check for uniquiness is done in emitIntrinsic.
2534  // Emit ARM intrinsics.
2535  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2536    Record *R = RV[i];
2537
2538    // Skip AArch64 intrinsics; they will be emitted at the end.
2539    bool isA64 = R->getValueAsBit("isA64");
2540    if (isA64)
2541      continue;
2542
2543    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2544        R->getName() != "VABD")
2545      emitIntrinsic(OS, R, EmittedMap);
2546  }
2547
2548  // Emit AArch64-specific intrinsics.
2549  OS << "#ifdef __aarch64__\n";
2550
2551  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2552  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2553  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2554
2555  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2556    Record *R = RV[i];
2557
2558    // Skip ARM intrinsics already included above.
2559    bool isA64 = R->getValueAsBit("isA64");
2560    if (!isA64)
2561      continue;
2562
2563    emitIntrinsic(OS, R, EmittedMap);
2564  }
2565
2566  OS << "#endif\n\n";
2567
2568  OS << "#undef __ai\n\n";
2569  OS << "#endif /* __ARM_NEON_H */\n";
2570}
2571
2572/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2573/// intrinsics specified by record R checking for intrinsic uniqueness.
2574void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2575                                StringMap<ClassKind> &EmittedMap) {
2576  std::string name = R->getValueAsString("Name");
2577  std::string Proto = R->getValueAsString("Prototype");
2578  std::string Types = R->getValueAsString("Types");
2579
2580  SmallVector<StringRef, 16> TypeVec;
2581  ParseTypes(R, Types, TypeVec);
2582
2583  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2584
2585  ClassKind classKind = ClassNone;
2586  if (R->getSuperClasses().size() >= 2)
2587    classKind = ClassMap[R->getSuperClasses()[1]];
2588  if (classKind == ClassNone && kind == OpNone)
2589    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2590
2591  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2592    if (kind == OpReinterpret) {
2593      bool outQuad = false;
2594      bool dummy = false;
2595      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2596      for (unsigned srcti = 0, srcte = TypeVec.size();
2597           srcti != srcte; ++srcti) {
2598        bool inQuad = false;
2599        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2600        if (srcti == ti || inQuad != outQuad)
2601          continue;
2602        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2603                                     OpCast, ClassS);
2604        if (EmittedMap.count(s))
2605          continue;
2606        EmittedMap[s] = ClassS;
2607        OS << s;
2608      }
2609    } else {
2610      std::string s =
2611          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2612      if (EmittedMap.count(s))
2613        continue;
2614      EmittedMap[s] = classKind;
2615      OS << s;
2616    }
2617  }
2618  OS << "\n";
2619}
2620
2621static unsigned RangeFromType(const char mod, StringRef typestr) {
2622  // base type to get the type string for.
2623  bool quad = false, dummy = false;
2624  char type = ClassifyType(typestr, quad, dummy, dummy);
2625  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2626
2627  switch (type) {
2628    case 'c':
2629      return (8 << (int)quad) - 1;
2630    case 'h':
2631    case 's':
2632      return (4 << (int)quad) - 1;
2633    case 'f':
2634    case 'i':
2635      return (2 << (int)quad) - 1;
2636    case 'd':
2637    case 'l':
2638      return (1 << (int)quad) - 1;
2639    default:
2640      PrintFatalError("unhandled type!");
2641  }
2642}
2643
2644static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2645  // base type to get the type string for.
2646  bool dummy = false;
2647  char type = ClassifyType(typestr, dummy, dummy, dummy);
2648  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2649
2650  switch (type) {
2651    case 'c':
2652      return 7;
2653    case 'h':
2654    case 's':
2655      return 15;
2656    case 'f':
2657    case 'i':
2658      return 31;
2659    case 'd':
2660    case 'l':
2661      return 63;
2662    default:
2663      PrintFatalError("unhandled type!");
2664  }
2665}
2666
2667/// Generate the ARM and AArch64 intrinsic range checking code for
2668/// shift/lane immediates, checking for unique declarations.
2669void
2670NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2671                                        StringMap<ClassKind> &A64IntrinsicMap,
2672                                        bool isA64RangeCheck) {
2673  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2674  StringMap<OpKind> EmittedMap;
2675
2676  // Generate the intrinsic range checking code for shift/lane immediates.
2677  if (isA64RangeCheck)
2678    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2679  else
2680    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2681
2682  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2683    Record *R = RV[i];
2684
2685    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2686    if (k != OpNone)
2687      continue;
2688
2689    std::string name = R->getValueAsString("Name");
2690    std::string Proto = R->getValueAsString("Prototype");
2691    std::string Types = R->getValueAsString("Types");
2692    std::string Rename = name + "@" + Proto;
2693
2694    // Functions with 'a' (the splat code) in the type prototype should not get
2695    // their own builtin as they use the non-splat variant.
2696    if (Proto.find('a') != std::string::npos)
2697      continue;
2698
2699    // Functions which do not have an immediate do not need to have range
2700    // checking code emitted.
2701    size_t immPos = Proto.find('i');
2702    if (immPos == std::string::npos)
2703      continue;
2704
2705    SmallVector<StringRef, 16> TypeVec;
2706    ParseTypes(R, Types, TypeVec);
2707
2708    if (R->getSuperClasses().size() < 2)
2709      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2710
2711    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2712
2713    // Do not include AArch64 range checks if not generating code for AArch64.
2714    bool isA64 = R->getValueAsBit("isA64");
2715    if (!isA64RangeCheck && isA64)
2716      continue;
2717
2718    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2719    // redefined by AArch64 to handle new types.
2720    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2721      ClassKind &A64CK = A64IntrinsicMap[Rename];
2722      if (A64CK == ck && ck != ClassNone)
2723        continue;
2724    }
2725
2726    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2727      std::string namestr, shiftstr, rangestr;
2728
2729      if (R->getValueAsBit("isVCVT_N")) {
2730        // VCVT between floating- and fixed-point values takes an immediate
2731        // in the range [1, 32] for f32, or [1, 64] for f64.
2732        ck = ClassB;
2733        if (name.find("32") != std::string::npos)
2734          rangestr = "l = 1; u = 31"; // upper bound = l + u
2735        else if (name.find("64") != std::string::npos)
2736          rangestr = "l = 1; u = 63";
2737        else
2738          PrintFatalError(R->getLoc(),
2739              "Fixed point convert name should contains \"32\" or \"64\"");
2740
2741      } else if (R->getValueAsBit("isScalarShift")) {
2742        // Right shifts have an 'r' in the name, left shifts do not.  Convert
2743        // instructions have the same bounds and right shifts.
2744        if (name.find('r') != std::string::npos ||
2745            name.find("cvt") != std::string::npos)
2746          rangestr = "l = 1; ";
2747
2748        rangestr += "u = " +
2749          utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
2750      } else if (!ProtoHasScalar(Proto)) {
2751        // Builtins which are overloaded by type will need to have their upper
2752        // bound computed at Sema time based on the type constant.
2753        ck = ClassB;
2754        if (R->getValueAsBit("isShift")) {
2755          shiftstr = ", true";
2756
2757          // Right shifts have an 'r' in the name, left shifts do not.
2758          if (name.find('r') != std::string::npos)
2759            rangestr = "l = 1; ";
2760        }
2761        rangestr += "u = RFT(TV" + shiftstr + ")";
2762      } else {
2763        // The immediate generally refers to a lane in the preceding argument.
2764        assert(immPos > 0 && "unexpected immediate operand");
2765        rangestr =
2766            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2767      }
2768      // Make sure cases appear only once by uniquing them in a string map.
2769      namestr = MangleName(name, TypeVec[ti], ck);
2770      if (EmittedMap.count(namestr))
2771        continue;
2772      EmittedMap[namestr] = OpNone;
2773
2774      // Calculate the index of the immediate that should be range checked.
2775      unsigned immidx = 0;
2776
2777      // Builtins that return a struct of multiple vectors have an extra
2778      // leading arg for the struct return.
2779      if (IsMultiVecProto(Proto[0]))
2780        ++immidx;
2781
2782      // Add one to the index for each argument until we reach the immediate
2783      // to be checked.  Structs of vectors are passed as multiple arguments.
2784      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2785        switch (Proto[ii]) {
2786        default:
2787          immidx += 1;
2788          break;
2789        case '2':
2790        case 'B':
2791          immidx += 2;
2792          break;
2793        case '3':
2794        case 'C':
2795          immidx += 3;
2796          break;
2797        case '4':
2798        case 'D':
2799          immidx += 4;
2800          break;
2801        case 'i':
2802          ie = ii + 1;
2803          break;
2804        }
2805      }
2806      if (isA64RangeCheck)
2807        OS << "case AArch64::BI__builtin_neon_";
2808      else
2809        OS << "case ARM::BI__builtin_neon_";
2810      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2811         << rangestr << "; break;\n";
2812    }
2813  }
2814  OS << "#endif\n\n";
2815}
2816
2817/// Generate the ARM and AArch64 overloaded type checking code for
2818/// SemaChecking.cpp, checking for unique builtin declarations.
2819void
2820NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2821                                      StringMap<ClassKind> &A64IntrinsicMap,
2822                                      bool isA64TypeCheck) {
2823  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2824  StringMap<OpKind> EmittedMap;
2825
2826  // Generate the overloaded type checking code for SemaChecking.cpp
2827  if (isA64TypeCheck)
2828    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2829  else
2830    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2831
2832  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2833    Record *R = RV[i];
2834    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2835    if (k != OpNone)
2836      continue;
2837
2838    std::string Proto = R->getValueAsString("Prototype");
2839    std::string Types = R->getValueAsString("Types");
2840    std::string name = R->getValueAsString("Name");
2841    std::string Rename = name + "@" + Proto;
2842
2843    // Functions with 'a' (the splat code) in the type prototype should not get
2844    // their own builtin as they use the non-splat variant.
2845    if (Proto.find('a') != std::string::npos)
2846      continue;
2847
2848    // Functions which have a scalar argument cannot be overloaded, no need to
2849    // check them if we are emitting the type checking code.
2850    if (ProtoHasScalar(Proto))
2851      continue;
2852
2853    SmallVector<StringRef, 16> TypeVec;
2854    ParseTypes(R, Types, TypeVec);
2855
2856    if (R->getSuperClasses().size() < 2)
2857      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2858
2859    // Do not include AArch64 type checks if not generating code for AArch64.
2860    bool isA64 = R->getValueAsBit("isA64");
2861    if (!isA64TypeCheck && isA64)
2862      continue;
2863
2864    // Include ARM  type check in AArch64 but only if ARM intrinsics
2865    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2866    // redefined in AArch64 to handle an additional 2 x f64 type.
2867    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2868    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2869      ClassKind &A64CK = A64IntrinsicMap[Rename];
2870      if (A64CK == ck && ck != ClassNone)
2871        continue;
2872    }
2873
2874    int si = -1, qi = -1;
2875    uint64_t mask = 0, qmask = 0;
2876    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2877      // Generate the switch case(s) for this builtin for the type validation.
2878      bool quad = false, poly = false, usgn = false;
2879      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2880
2881      if (quad) {
2882        qi = ti;
2883        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2884      } else {
2885        si = ti;
2886        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2887      }
2888    }
2889
2890    // Check if the builtin function has a pointer or const pointer argument.
2891    int PtrArgNum = -1;
2892    bool HasConstPtr = false;
2893    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2894      char ArgType = Proto[arg];
2895      if (ArgType == 'c') {
2896        HasConstPtr = true;
2897        PtrArgNum = arg - 1;
2898        break;
2899      }
2900      if (ArgType == 'p') {
2901        PtrArgNum = arg - 1;
2902        break;
2903      }
2904    }
2905    // For sret builtins, adjust the pointer argument index.
2906    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
2907      PtrArgNum += 1;
2908
2909    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2910    // and vst1_lane intrinsics.  Using a pointer to the vector element
2911    // type with one of those operations causes codegen to select an aligned
2912    // load/store instruction.  If you want an unaligned operation,
2913    // the pointer argument needs to have less alignment than element type,
2914    // so just accept any pointer type.
2915    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2916      PtrArgNum = -1;
2917      HasConstPtr = false;
2918    }
2919
2920    if (mask) {
2921      if (isA64TypeCheck)
2922        OS << "case AArch64::BI__builtin_neon_";
2923      else
2924        OS << "case ARM::BI__builtin_neon_";
2925      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2926         << "0x" << utohexstr(mask) << "ULL";
2927      if (PtrArgNum >= 0)
2928        OS << "; PtrArgNum = " << PtrArgNum;
2929      if (HasConstPtr)
2930        OS << "; HasConstPtr = true";
2931      OS << "; break;\n";
2932    }
2933    if (qmask) {
2934      if (isA64TypeCheck)
2935        OS << "case AArch64::BI__builtin_neon_";
2936      else
2937        OS << "case ARM::BI__builtin_neon_";
2938      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2939         << "0x" << utohexstr(qmask) << "ULL";
2940      if (PtrArgNum >= 0)
2941        OS << "; PtrArgNum = " << PtrArgNum;
2942      if (HasConstPtr)
2943        OS << "; HasConstPtr = true";
2944      OS << "; break;\n";
2945    }
2946  }
2947  OS << "#endif\n\n";
2948}
2949
2950/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2951/// declaration of builtins, checking for unique builtin declarations.
2952void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2953                                 StringMap<ClassKind> &A64IntrinsicMap,
2954                                 bool isA64GenBuiltinDef) {
2955  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2956  StringMap<OpKind> EmittedMap;
2957
2958  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2959  if (isA64GenBuiltinDef)
2960    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2961  else
2962    OS << "#ifdef GET_NEON_BUILTINS\n";
2963
2964  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2965    Record *R = RV[i];
2966    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2967    if (k != OpNone)
2968      continue;
2969
2970    std::string Proto = R->getValueAsString("Prototype");
2971    std::string name = R->getValueAsString("Name");
2972    std::string Rename = name + "@" + Proto;
2973
2974    // Functions with 'a' (the splat code) in the type prototype should not get
2975    // their own builtin as they use the non-splat variant.
2976    if (Proto.find('a') != std::string::npos)
2977      continue;
2978
2979    std::string Types = R->getValueAsString("Types");
2980    SmallVector<StringRef, 16> TypeVec;
2981    ParseTypes(R, Types, TypeVec);
2982
2983    if (R->getSuperClasses().size() < 2)
2984      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2985
2986    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2987
2988    // Do not include AArch64 BUILTIN() macros if not generating
2989    // code for AArch64
2990    bool isA64 = R->getValueAsBit("isA64");
2991    if (!isA64GenBuiltinDef && isA64)
2992      continue;
2993
2994    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2995    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2996    // redefined in AArch64 to handle an additional 2 x f64 type.
2997    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2998      ClassKind &A64CK = A64IntrinsicMap[Rename];
2999      if (A64CK == ck && ck != ClassNone)
3000        continue;
3001    }
3002
3003    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3004      // Generate the declaration for this builtin, ensuring
3005      // that each unique BUILTIN() macro appears only once in the output
3006      // stream.
3007      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3008      if (EmittedMap.count(bd))
3009        continue;
3010
3011      EmittedMap[bd] = OpNone;
3012      OS << bd << "\n";
3013    }
3014  }
3015  OS << "#endif\n\n";
3016}
3017
3018/// runHeader - Emit a file with sections defining:
3019/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3020/// 2. the SemaChecking code for the type overload checking.
3021/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3022void NeonEmitter::runHeader(raw_ostream &OS) {
3023  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3024
3025  // build a map of AArch64 intriniscs to be used in uniqueness checks.
3026  StringMap<ClassKind> A64IntrinsicMap;
3027  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3028    Record *R = RV[i];
3029
3030    bool isA64 = R->getValueAsBit("isA64");
3031    if (!isA64)
3032      continue;
3033
3034    ClassKind CK = ClassNone;
3035    if (R->getSuperClasses().size() >= 2)
3036      CK = ClassMap[R->getSuperClasses()[1]];
3037
3038    std::string Name = R->getValueAsString("Name");
3039    std::string Proto = R->getValueAsString("Prototype");
3040    std::string Rename = Name + "@" + Proto;
3041    if (A64IntrinsicMap.count(Rename))
3042      continue;
3043    A64IntrinsicMap[Rename] = CK;
3044  }
3045
3046  // Generate BuiltinsARM.def for ARM
3047  genBuiltinsDef(OS, A64IntrinsicMap, false);
3048
3049  // Generate BuiltinsAArch64.def for AArch64
3050  genBuiltinsDef(OS, A64IntrinsicMap, true);
3051
3052  // Generate ARM overloaded type checking code for SemaChecking.cpp
3053  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3054
3055  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3056  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3057
3058  // Generate ARM range checking code for shift/lane immediates.
3059  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3060
3061  // Generate the AArch64 range checking code for shift/lane immediates.
3062  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3063}
3064
3065/// GenTest - Write out a test for the intrinsic specified by the name and
3066/// type strings, including the embedded patterns for FileCheck to match.
3067static std::string GenTest(const std::string &name,
3068                           const std::string &proto,
3069                           StringRef outTypeStr, StringRef inTypeStr,
3070                           bool isShift, bool isHiddenLOp,
3071                           ClassKind ck, const std::string &InstName,
3072                           bool isA64,
3073                           std::string & testFuncProto) {
3074  assert(!proto.empty() && "");
3075  std::string s;
3076
3077  // Function name with type suffix
3078  std::string mangledName = MangleName(name, outTypeStr, ClassS);
3079  if (outTypeStr != inTypeStr) {
3080    // If the input type is different (e.g., for vreinterpret), append a suffix
3081    // for the input type.  String off a "Q" (quad) prefix so that MangleName
3082    // does not insert another "q" in the name.
3083    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3084    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3085    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3086  }
3087
3088  // todo: GenerateChecksForIntrinsic does not generate CHECK
3089  // for aarch64 instructions yet
3090  std::vector<std::string> FileCheckPatterns;
3091  if (!isA64) {
3092	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3093							   isHiddenLOp, FileCheckPatterns);
3094	s+= "// CHECK_ARM: test_" + mangledName + "\n";
3095  }
3096  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3097
3098  // Emit the FileCheck patterns.
3099  // If for any reason we do not want to emit a check, mangledInst
3100  // will be the empty string.
3101  if (FileCheckPatterns.size()) {
3102    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3103                                                  e = FileCheckPatterns.end();
3104         i != e;
3105         ++i) {
3106      s += "// CHECK_ARM: " + *i + "\n";
3107    }
3108  }
3109
3110  // Emit the start of the test function.
3111
3112  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3113  char arg = 'a';
3114  std::string comma;
3115  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3116    // Do not create arguments for values that must be immediate constants.
3117    if (proto[i] == 'i')
3118      continue;
3119    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3120    testFuncProto.push_back(arg);
3121    comma = ", ";
3122  }
3123  testFuncProto += ")";
3124
3125  s+= testFuncProto;
3126  s+= " {\n  ";
3127
3128  if (proto[0] != 'v')
3129    s += "return ";
3130  s += mangledName + "(";
3131  arg = 'a';
3132  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3133    if (proto[i] == 'i') {
3134      // For immediate operands, test the maximum value.
3135      if (isShift)
3136        s += "1"; // FIXME
3137      else
3138        // The immediate generally refers to a lane in the preceding argument.
3139        s += utostr(RangeFromType(proto[i-1], inTypeStr));
3140    } else {
3141      s.push_back(arg);
3142    }
3143    if ((i + 1) < e)
3144      s += ", ";
3145  }
3146  s += ");\n}\n\n";
3147  return s;
3148}
3149
3150/// Write out all intrinsic tests for the specified target, checking
3151/// for intrinsic test uniqueness.
3152void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3153                                bool isA64GenTest) {
3154  if (isA64GenTest)
3155	OS << "#ifdef __aarch64__\n";
3156
3157  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3158  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3159    Record *R = RV[i];
3160    std::string name = R->getValueAsString("Name");
3161    std::string Proto = R->getValueAsString("Prototype");
3162    std::string Types = R->getValueAsString("Types");
3163    bool isShift = R->getValueAsBit("isShift");
3164    std::string InstName = R->getValueAsString("InstName");
3165    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3166    bool isA64 = R->getValueAsBit("isA64");
3167
3168    // do not include AArch64 intrinsic test if not generating
3169    // code for AArch64
3170    if (!isA64GenTest && isA64)
3171      continue;
3172
3173    SmallVector<StringRef, 16> TypeVec;
3174    ParseTypes(R, Types, TypeVec);
3175
3176    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3177    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3178    if (kind == OpUnavailable)
3179      continue;
3180    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3181      if (kind == OpReinterpret) {
3182        bool outQuad = false;
3183        bool dummy = false;
3184        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3185        for (unsigned srcti = 0, srcte = TypeVec.size();
3186             srcti != srcte; ++srcti) {
3187          bool inQuad = false;
3188          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3189          if (srcti == ti || inQuad != outQuad)
3190            continue;
3191		  std::string testFuncProto;
3192          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3193                                  isShift, isHiddenLOp, ck, InstName, isA64,
3194								  testFuncProto);
3195          if (EmittedMap.count(testFuncProto))
3196            continue;
3197          EmittedMap[testFuncProto] = kind;
3198          OS << s << "\n";
3199        }
3200      } else {
3201		std::string testFuncProto;
3202        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3203                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
3204        if (EmittedMap.count(testFuncProto))
3205          continue;
3206        EmittedMap[testFuncProto] = kind;
3207        OS << s << "\n";
3208      }
3209    }
3210  }
3211
3212  if (isA64GenTest)
3213	OS << "#endif\n";
3214}
3215/// runTests - Write out a complete set of tests for all of the Neon
3216/// intrinsics.
3217void NeonEmitter::runTests(raw_ostream &OS) {
3218  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3219        "apcs-gnu\\\n"
3220        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3221        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3222		"\n"
3223	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3224	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3225	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3226        "\n"
3227        "// REQUIRES: long_tests\n"
3228        "\n"
3229        "#include <arm_neon.h>\n"
3230        "\n";
3231
3232  // ARM tests must be emitted before AArch64 tests to ensure
3233  // tests for intrinsics that are common to ARM and AArch64
3234  // appear only once in the output stream.
3235  // The check for uniqueness is done in genTargetTest.
3236  StringMap<OpKind> EmittedMap;
3237
3238  genTargetTest(OS, EmittedMap, false);
3239
3240  genTargetTest(OS, EmittedMap, true);
3241}
3242
3243namespace clang {
3244void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3245  NeonEmitter(Records).run(OS);
3246}
3247void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3248  NeonEmitter(Records).runHeader(OS);
3249}
3250void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3251  NeonEmitter(Records).runTests(OS);
3252}
3253} // End namespace clang
3254