NeonEmitter.cpp revision 097a4b487897ca29f0f371c81b6a8b6c1ca599e4
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMlalHi,
56  OpMls,
57  OpMlsl,
58  OpMlslHi,
59  OpMulN,
60  OpMlaN,
61  OpMlsN,
62  OpMlalN,
63  OpMlslN,
64  OpMulLane,
65  OpMullLane,
66  OpMlaLane,
67  OpMlsLane,
68  OpMlalLane,
69  OpMlslLane,
70  OpQDMullLane,
71  OpQDMlalLane,
72  OpQDMlslLane,
73  OpQDMulhLane,
74  OpQRDMulhLane,
75  OpEq,
76  OpGe,
77  OpLe,
78  OpGt,
79  OpLt,
80  OpNeg,
81  OpNot,
82  OpAnd,
83  OpOr,
84  OpXor,
85  OpAndNot,
86  OpOrNot,
87  OpCast,
88  OpConcat,
89  OpDup,
90  OpDupLane,
91  OpHi,
92  OpLo,
93  OpSelect,
94  OpRev16,
95  OpRev32,
96  OpRev64,
97  OpReinterpret,
98  OpAddhnHi,
99  OpRAddhnHi,
100  OpSubhnHi,
101  OpRSubhnHi,
102  OpAbdl,
103  OpAbdlHi,
104  OpAba,
105  OpAbal,
106  OpAbalHi,
107  OpQDMullHi,
108  OpQDMlalHi,
109  OpQDMlslHi,
110  OpDiv,
111  OpLongHi,
112  OpNarrowHi,
113  OpMovlHi
114};
115
116enum ClassKind {
117  ClassNone,
118  ClassI,           // generic integer instruction, e.g., "i8" suffix
119  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
120  ClassW,           // width-specific instruction, e.g., "8" suffix
121  ClassB,           // bitcast arguments with enum argument to specify type
122  ClassL,           // Logical instructions which are op instructions
123                    // but we need to not emit any suffix for in our
124                    // tests.
125  ClassNoTest       // Instructions which we do not test since they are
126                    // not TRUE instructions.
127};
128
129/// NeonTypeFlags - Flags to identify the types for overloaded Neon
130/// builtins.  These must be kept in sync with the flags in
131/// include/clang/Basic/TargetBuiltins.h.
132namespace {
133class NeonTypeFlags {
134  enum {
135    EltTypeMask = 0xf,
136    UnsignedFlag = 0x10,
137    QuadFlag = 0x20
138  };
139  uint32_t Flags;
140
141public:
142  enum EltType {
143    Int8,
144    Int16,
145    Int32,
146    Int64,
147    Poly8,
148    Poly16,
149    Float16,
150    Float32,
151    Float64
152  };
153
154  NeonTypeFlags(unsigned F) : Flags(F) {}
155  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
156    if (IsUnsigned)
157      Flags |= UnsignedFlag;
158    if (IsQuad)
159      Flags |= QuadFlag;
160  }
161
162  uint32_t getFlags() const { return Flags; }
163};
164} // end anonymous namespace
165
166namespace {
167class NeonEmitter {
168  RecordKeeper &Records;
169  StringMap<OpKind> OpMap;
170  DenseMap<Record*, ClassKind> ClassMap;
171
172public:
173  NeonEmitter(RecordKeeper &R) : Records(R) {
174    OpMap["OP_NONE"]  = OpNone;
175    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
176    OpMap["OP_ADD"]   = OpAdd;
177    OpMap["OP_ADDL"]  = OpAddl;
178    OpMap["OP_ADDLHi"] = OpAddlHi;
179    OpMap["OP_ADDW"]  = OpAddw;
180    OpMap["OP_ADDWHi"] = OpAddwHi;
181    OpMap["OP_SUB"]   = OpSub;
182    OpMap["OP_SUBL"]  = OpSubl;
183    OpMap["OP_SUBLHi"] = OpSublHi;
184    OpMap["OP_SUBW"]  = OpSubw;
185    OpMap["OP_SUBWHi"] = OpSubwHi;
186    OpMap["OP_MUL"]   = OpMul;
187    OpMap["OP_MLA"]   = OpMla;
188    OpMap["OP_MLAL"]  = OpMlal;
189    OpMap["OP_MULLHi"]  = OpMullHi;
190    OpMap["OP_MLALHi"]  = OpMlalHi;
191    OpMap["OP_MLS"]   = OpMls;
192    OpMap["OP_MLSL"]  = OpMlsl;
193    OpMap["OP_MLSLHi"] = OpMlslHi;
194    OpMap["OP_MUL_N"] = OpMulN;
195    OpMap["OP_MLA_N"] = OpMlaN;
196    OpMap["OP_MLS_N"] = OpMlsN;
197    OpMap["OP_MLAL_N"] = OpMlalN;
198    OpMap["OP_MLSL_N"] = OpMlslN;
199    OpMap["OP_MUL_LN"]= OpMulLane;
200    OpMap["OP_MULL_LN"] = OpMullLane;
201    OpMap["OP_MLA_LN"]= OpMlaLane;
202    OpMap["OP_MLS_LN"]= OpMlsLane;
203    OpMap["OP_MLAL_LN"] = OpMlalLane;
204    OpMap["OP_MLSL_LN"] = OpMlslLane;
205    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
206    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
207    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
208    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
209    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
210    OpMap["OP_EQ"]    = OpEq;
211    OpMap["OP_GE"]    = OpGe;
212    OpMap["OP_LE"]    = OpLe;
213    OpMap["OP_GT"]    = OpGt;
214    OpMap["OP_LT"]    = OpLt;
215    OpMap["OP_NEG"]   = OpNeg;
216    OpMap["OP_NOT"]   = OpNot;
217    OpMap["OP_AND"]   = OpAnd;
218    OpMap["OP_OR"]    = OpOr;
219    OpMap["OP_XOR"]   = OpXor;
220    OpMap["OP_ANDN"]  = OpAndNot;
221    OpMap["OP_ORN"]   = OpOrNot;
222    OpMap["OP_CAST"]  = OpCast;
223    OpMap["OP_CONC"]  = OpConcat;
224    OpMap["OP_HI"]    = OpHi;
225    OpMap["OP_LO"]    = OpLo;
226    OpMap["OP_DUP"]   = OpDup;
227    OpMap["OP_DUP_LN"] = OpDupLane;
228    OpMap["OP_SEL"]   = OpSelect;
229    OpMap["OP_REV16"] = OpRev16;
230    OpMap["OP_REV32"] = OpRev32;
231    OpMap["OP_REV64"] = OpRev64;
232    OpMap["OP_REINT"] = OpReinterpret;
233    OpMap["OP_ADDHNHi"] = OpAddhnHi;
234    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
235    OpMap["OP_SUBHNHi"] = OpSubhnHi;
236    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
237    OpMap["OP_ABDL"]  = OpAbdl;
238    OpMap["OP_ABDLHi"] = OpAbdlHi;
239    OpMap["OP_ABA"]   = OpAba;
240    OpMap["OP_ABAL"]  = OpAbal;
241    OpMap["OP_ABALHi"] = OpAbalHi;
242    OpMap["OP_QDMULLHi"] = OpQDMullHi;
243    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
244    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
245    OpMap["OP_DIV"] = OpDiv;
246    OpMap["OP_LONG_HI"] = OpLongHi;
247    OpMap["OP_NARROW_HI"] = OpNarrowHi;
248    OpMap["OP_MOVL_HI"] = OpMovlHi;
249
250    Record *SI = R.getClass("SInst");
251    Record *II = R.getClass("IInst");
252    Record *WI = R.getClass("WInst");
253    Record *SOpI = R.getClass("SOpInst");
254    Record *IOpI = R.getClass("IOpInst");
255    Record *WOpI = R.getClass("WOpInst");
256    Record *LOpI = R.getClass("LOpInst");
257    Record *NoTestOpI = R.getClass("NoTestOpInst");
258
259    ClassMap[SI] = ClassS;
260    ClassMap[II] = ClassI;
261    ClassMap[WI] = ClassW;
262    ClassMap[SOpI] = ClassS;
263    ClassMap[IOpI] = ClassI;
264    ClassMap[WOpI] = ClassW;
265    ClassMap[LOpI] = ClassL;
266    ClassMap[NoTestOpI] = ClassNoTest;
267  }
268
269  // run - Emit arm_neon.h.inc
270  void run(raw_ostream &o);
271
272  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
273  void runHeader(raw_ostream &o);
274
275  // runTests - Emit tests for all the Neon intrinsics.
276  void runTests(raw_ostream &o);
277
278private:
279  void emitIntrinsic(raw_ostream &OS, Record *R,
280                     StringMap<ClassKind> &EmittedMap);
281  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
282                      bool isA64GenBuiltinDef);
283  void genOverloadTypeCheckCode(raw_ostream &OS,
284                                StringMap<ClassKind> &A64IntrinsicMap,
285                                bool isA64TypeCheck);
286  void genIntrinsicRangeCheckCode(raw_ostream &OS,
287                                  StringMap<ClassKind> &A64IntrinsicMap,
288                                  bool isA64RangeCheck);
289  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
290                     bool isA64TestGen);
291};
292} // end anonymous namespace
293
294/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
295/// which each StringRef representing a single type declared in the string.
296/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
297/// 2xfloat and 4xfloat respectively.
298static void ParseTypes(Record *r, std::string &s,
299                       SmallVectorImpl<StringRef> &TV) {
300  const char *data = s.data();
301  int len = 0;
302
303  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
304    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
305                         || data[len] == 'H' || data[len] == 'S')
306      continue;
307
308    switch (data[len]) {
309      case 'c':
310      case 's':
311      case 'i':
312      case 'l':
313      case 'h':
314      case 'f':
315      case 'd':
316        break;
317      default:
318        PrintFatalError(r->getLoc(),
319                      "Unexpected letter: " + std::string(data + len, 1));
320    }
321    TV.push_back(StringRef(data, len + 1));
322    data += len + 1;
323    len = -1;
324  }
325}
326
327/// Widen - Convert a type code into the next wider type.  char -> short,
328/// short -> int, etc.
329static char Widen(const char t) {
330  switch (t) {
331    case 'c':
332      return 's';
333    case 's':
334      return 'i';
335    case 'i':
336      return 'l';
337    case 'h':
338      return 'f';
339    default:
340      PrintFatalError("unhandled type in widen!");
341  }
342}
343
344/// Narrow - Convert a type code into the next smaller type.  short -> char,
345/// float -> half float, etc.
346static char Narrow(const char t) {
347  switch (t) {
348    case 's':
349      return 'c';
350    case 'i':
351      return 's';
352    case 'l':
353      return 'i';
354    case 'f':
355      return 'h';
356    default:
357      PrintFatalError("unhandled type in narrow!");
358  }
359}
360
361static std::string GetNarrowTypestr(StringRef ty)
362{
363  std::string s;
364  for (size_t i = 0, end = ty.size(); i < end; i++) {
365    switch (ty[i]) {
366      case 's':
367        s += 'c';
368        break;
369      case 'i':
370        s += 's';
371        break;
372      case 'l':
373        s += 'i';
374        break;
375      default:
376        s += ty[i];
377        break;
378    }
379  }
380
381  return s;
382}
383
384/// For a particular StringRef, return the base type code, and whether it has
385/// the quad-vector, polynomial, or unsigned modifiers set.
386static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
387  unsigned off = 0;
388  // ignore scalar.
389  if (ty[off] == 'S') {
390    ++off;
391  }
392  // remember quad.
393  if (ty[off] == 'Q' || ty[off] == 'H') {
394    quad = true;
395    ++off;
396  }
397
398  // remember poly.
399  if (ty[off] == 'P') {
400    poly = true;
401    ++off;
402  }
403
404  // remember unsigned.
405  if (ty[off] == 'U') {
406    usgn = true;
407    ++off;
408  }
409
410  // base type to get the type string for.
411  return ty[off];
412}
413
414/// ModType - Transform a type code and its modifiers based on a mod code. The
415/// mod code definitions may be found at the top of arm_neon.td.
416static char ModType(const char mod, char type, bool &quad, bool &poly,
417                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
418  switch (mod) {
419    case 't':
420      if (poly) {
421        poly = false;
422        usgn = true;
423      }
424      break;
425    case 'u':
426      usgn = true;
427      poly = false;
428      if (type == 'f')
429        type = 'i';
430      if (type == 'd')
431        type = 'l';
432      break;
433    case 'x':
434      usgn = false;
435      poly = false;
436      if (type == 'f')
437        type = 'i';
438      if (type == 'd')
439        type = 'l';
440      break;
441    case 'f':
442      if (type == 'h')
443        quad = true;
444      type = 'f';
445      usgn = false;
446      break;
447    case 'g':
448      quad = false;
449      break;
450    case 'w':
451      type = Widen(type);
452      quad = true;
453      break;
454    case 'n':
455      type = Widen(type);
456      break;
457    case 'i':
458      type = 'i';
459      scal = true;
460      break;
461    case 'l':
462      type = 'l';
463      scal = true;
464      usgn = true;
465      break;
466    case 's':
467    case 'a':
468      scal = true;
469      break;
470    case 'k':
471      quad = true;
472      break;
473    case 'c':
474      cnst = true;
475    case 'p':
476      pntr = true;
477      scal = true;
478      break;
479    case 'h':
480      type = Narrow(type);
481      if (type == 'h')
482        quad = false;
483      break;
484    case 'q':
485      type = Narrow(type);
486      quad = true;
487      break;
488    case 'e':
489      type = Narrow(type);
490      usgn = true;
491      break;
492    case 'm':
493      type = Narrow(type);
494      quad = false;
495      break;
496    default:
497      break;
498  }
499  return type;
500}
501
502/// TypeString - for a modifier and type, generate the name of the typedef for
503/// that type.  QUc -> uint8x8_t.
504static std::string TypeString(const char mod, StringRef typestr) {
505  bool quad = false;
506  bool poly = false;
507  bool usgn = false;
508  bool scal = false;
509  bool cnst = false;
510  bool pntr = false;
511
512  if (mod == 'v')
513    return "void";
514  if (mod == 'i')
515    return "int";
516
517  // base type to get the type string for.
518  char type = ClassifyType(typestr, quad, poly, usgn);
519
520  // Based on the modifying character, change the type and width if necessary.
521  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
522
523  SmallString<128> s;
524
525  if (usgn)
526    s.push_back('u');
527
528  switch (type) {
529    case 'c':
530      s += poly ? "poly8" : "int8";
531      if (scal)
532        break;
533      s += quad ? "x16" : "x8";
534      break;
535    case 's':
536      s += poly ? "poly16" : "int16";
537      if (scal)
538        break;
539      s += quad ? "x8" : "x4";
540      break;
541    case 'i':
542      s += "int32";
543      if (scal)
544        break;
545      s += quad ? "x4" : "x2";
546      break;
547    case 'l':
548      s += "int64";
549      if (scal)
550        break;
551      s += quad ? "x2" : "x1";
552      break;
553    case 'h':
554      s += "float16";
555      if (scal)
556        break;
557      s += quad ? "x8" : "x4";
558      break;
559    case 'f':
560      s += "float32";
561      if (scal)
562        break;
563      s += quad ? "x4" : "x2";
564      break;
565    case 'd':
566      s += "float64";
567      if (scal)
568        break;
569      s += quad ? "x2" : "x1";
570      break;
571
572    default:
573      PrintFatalError("unhandled type!");
574  }
575
576  if (mod == '2')
577    s += "x2";
578  if (mod == '3')
579    s += "x3";
580  if (mod == '4')
581    s += "x4";
582
583  // Append _t, finishing the type string typedef type.
584  s += "_t";
585
586  if (cnst)
587    s += " const";
588
589  if (pntr)
590    s += " *";
591
592  return s.str();
593}
594
595/// BuiltinTypeString - for a modifier and type, generate the clang
596/// BuiltinsARM.def prototype code for the function.  See the top of clang's
597/// Builtins.def for a description of the type strings.
598static std::string BuiltinTypeString(const char mod, StringRef typestr,
599                                     ClassKind ck, bool ret) {
600  bool quad = false;
601  bool poly = false;
602  bool usgn = false;
603  bool scal = false;
604  bool cnst = false;
605  bool pntr = false;
606
607  if (mod == 'v')
608    return "v"; // void
609  if (mod == 'i')
610    return "i"; // int
611
612  // base type to get the type string for.
613  char type = ClassifyType(typestr, quad, poly, usgn);
614
615  // Based on the modifying character, change the type and width if necessary.
616  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
617
618  // All pointers are void* pointers.  Change type to 'v' now.
619  if (pntr) {
620    usgn = false;
621    poly = false;
622    type = 'v';
623  }
624  // Treat half-float ('h') types as unsigned short ('s') types.
625  if (type == 'h') {
626    type = 's';
627    usgn = true;
628  }
629  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
630
631  if (scal) {
632    SmallString<128> s;
633
634    if (usgn)
635      s.push_back('U');
636    else if (type == 'c')
637      s.push_back('S'); // make chars explicitly signed
638
639    if (type == 'l') // 64-bit long
640      s += "LLi";
641    else
642      s.push_back(type);
643
644    if (cnst)
645      s.push_back('C');
646    if (pntr)
647      s.push_back('*');
648    return s.str();
649  }
650
651  // Since the return value must be one type, return a vector type of the
652  // appropriate width which we will bitcast.  An exception is made for
653  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
654  // fashion, storing them to a pointer arg.
655  if (ret) {
656    if (mod >= '2' && mod <= '4')
657      return "vv*"; // void result with void* first argument
658    if (mod == 'f' || (ck != ClassB && type == 'f'))
659      return quad ? "V4f" : "V2f";
660    if (ck != ClassB && type == 's')
661      return quad ? "V8s" : "V4s";
662    if (ck != ClassB && type == 'i')
663      return quad ? "V4i" : "V2i";
664    if (ck != ClassB && type == 'l')
665      return quad ? "V2LLi" : "V1LLi";
666
667    return quad ? "V16Sc" : "V8Sc";
668  }
669
670  // Non-return array types are passed as individual vectors.
671  if (mod == '2')
672    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
673  if (mod == '3')
674    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
675  if (mod == '4')
676    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
677
678  if (mod == 'f' || (ck != ClassB && type == 'f'))
679    return quad ? "V4f" : "V2f";
680  if (ck != ClassB && type == 's')
681    return quad ? "V8s" : "V4s";
682  if (ck != ClassB && type == 'i')
683    return quad ? "V4i" : "V2i";
684  if (ck != ClassB && type == 'l')
685    return quad ? "V2LLi" : "V1LLi";
686
687  return quad ? "V16Sc" : "V8Sc";
688}
689
690/// InstructionTypeCode - Computes the ARM argument character code and
691/// quad status for a specific type string and ClassKind.
692static void InstructionTypeCode(const StringRef &typeStr,
693                                const ClassKind ck,
694                                bool &quad,
695                                std::string &typeCode) {
696  bool poly = false;
697  bool usgn = false;
698  char type = ClassifyType(typeStr, quad, poly, usgn);
699
700  switch (type) {
701  case 'c':
702    switch (ck) {
703    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
704    case ClassI: typeCode = "i8"; break;
705    case ClassW: typeCode = "8"; break;
706    default: break;
707    }
708    break;
709  case 's':
710    switch (ck) {
711    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
712    case ClassI: typeCode = "i16"; break;
713    case ClassW: typeCode = "16"; break;
714    default: break;
715    }
716    break;
717  case 'i':
718    switch (ck) {
719    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
720    case ClassI: typeCode = "i32"; break;
721    case ClassW: typeCode = "32"; break;
722    default: break;
723    }
724    break;
725  case 'l':
726    switch (ck) {
727    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
728    case ClassI: typeCode = "i64"; break;
729    case ClassW: typeCode = "64"; break;
730    default: break;
731    }
732    break;
733  case 'h':
734    switch (ck) {
735    case ClassS:
736    case ClassI: typeCode = "f16"; break;
737    case ClassW: typeCode = "16"; break;
738    default: break;
739    }
740    break;
741  case 'f':
742    switch (ck) {
743    case ClassS:
744    case ClassI: typeCode = "f32"; break;
745    case ClassW: typeCode = "32"; break;
746    default: break;
747    }
748    break;
749  case 'd':
750    switch (ck) {
751    case ClassS:
752    case ClassI:
753      typeCode += "f64";
754      break;
755    case ClassW:
756      PrintFatalError("unhandled type!");
757    default:
758      break;
759    }
760    break;
761  default:
762    PrintFatalError("unhandled type!");
763  }
764}
765
766static char Insert_BHSD_Suffix(StringRef typestr){
767  unsigned off = 0;
768  if(typestr[off++] == 'S'){
769    while(typestr[off] == 'Q' || typestr[off] == 'H'||
770          typestr[off] == 'P' || typestr[off] == 'U')
771      ++off;
772    switch (typestr[off]){
773    default  : break;
774    case 'c' : return 'b';
775    case 's' : return 'h';
776    case 'i' :
777    case 'f' : return 's';
778    case 'l' :
779    case 'd' : return 'd';
780    }
781  }
782  return 0;
783}
784
785/// MangleName - Append a type or width suffix to a base neon function name,
786/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
787/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
788/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
789static std::string MangleName(const std::string &name, StringRef typestr,
790                              ClassKind ck) {
791  if (name == "vcvt_f32_f16")
792    return name;
793
794  bool quad = false;
795  std::string typeCode = "";
796
797  InstructionTypeCode(typestr, ck, quad, typeCode);
798
799  std::string s = name;
800
801  if (typeCode.size() > 0) {
802    s += "_" + typeCode;
803  }
804
805  if (ck == ClassB)
806    s += "_v";
807
808  // Insert a 'q' before the first '_' character so that it ends up before
809  // _lane or _n on vector-scalar operations.
810  if (typestr.find("Q") != StringRef::npos) {
811      size_t pos = s.find('_');
812      s = s.insert(pos, "q");
813  }
814  char ins = Insert_BHSD_Suffix(typestr);
815  if(ins){
816    size_t pos = s.find('_');
817    s = s.insert(pos, &ins, 1);
818  }
819
820  return s;
821}
822
823static void PreprocessInstruction(const StringRef &Name,
824                                  const std::string &InstName,
825                                  std::string &Prefix,
826                                  bool &HasNPostfix,
827                                  bool &HasLanePostfix,
828                                  bool &HasDupPostfix,
829                                  bool &IsSpecialVCvt,
830                                  size_t &TBNumber) {
831  // All of our instruction name fields from arm_neon.td are of the form
832  //   <instructionname>_...
833  // Thus we grab our instruction name via computation of said Prefix.
834  const size_t PrefixEnd = Name.find_first_of('_');
835  // If InstName is passed in, we use that instead of our name Prefix.
836  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
837
838  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
839
840  HasNPostfix = Postfix.count("_n");
841  HasLanePostfix = Postfix.count("_lane");
842  HasDupPostfix = Postfix.count("_dup");
843  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
844
845  if (InstName.compare("vtbl") == 0 ||
846      InstName.compare("vtbx") == 0) {
847    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
848    // encoding to get its true value.
849    TBNumber = Name[Name.size()-1] - 48;
850  }
851}
852
853/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
854/// extracted, generate a FileCheck pattern for a Load Or Store
855static void
856GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
857                                          const std::string& OutTypeCode,
858                                          const bool &IsQuad,
859                                          const bool &HasDupPostfix,
860                                          const bool &HasLanePostfix,
861                                          const size_t Count,
862                                          std::string &RegisterSuffix) {
863  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
864  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
865  // will output a series of v{ld,st}1s, so we have to handle it specially.
866  if ((Count == 3 || Count == 4) && IsQuad) {
867    RegisterSuffix += "{";
868    for (size_t i = 0; i < Count; i++) {
869      RegisterSuffix += "d{{[0-9]+}}";
870      if (HasDupPostfix) {
871        RegisterSuffix += "[]";
872      }
873      if (HasLanePostfix) {
874        RegisterSuffix += "[{{[0-9]+}}]";
875      }
876      if (i < Count-1) {
877        RegisterSuffix += ", ";
878      }
879    }
880    RegisterSuffix += "}";
881  } else {
882
883    // Handle normal loads and stores.
884    RegisterSuffix += "{";
885    for (size_t i = 0; i < Count; i++) {
886      RegisterSuffix += "d{{[0-9]+}}";
887      if (HasDupPostfix) {
888        RegisterSuffix += "[]";
889      }
890      if (HasLanePostfix) {
891        RegisterSuffix += "[{{[0-9]+}}]";
892      }
893      if (IsQuad && !HasLanePostfix) {
894        RegisterSuffix += ", d{{[0-9]+}}";
895        if (HasDupPostfix) {
896          RegisterSuffix += "[]";
897        }
898      }
899      if (i < Count-1) {
900        RegisterSuffix += ", ";
901      }
902    }
903    RegisterSuffix += "}, [r{{[0-9]+}}";
904
905    // We only include the alignment hint if we have a vld1.*64 or
906    // a dup/lane instruction.
907    if (IsLDSTOne) {
908      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
909        RegisterSuffix += ":" + OutTypeCode;
910      }
911    }
912
913    RegisterSuffix += "]";
914  }
915}
916
917static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
918                                     const bool &HasNPostfix) {
919  return (NameRef.count("vmla") ||
920          NameRef.count("vmlal") ||
921          NameRef.count("vmlsl") ||
922          NameRef.count("vmull") ||
923          NameRef.count("vqdmlal") ||
924          NameRef.count("vqdmlsl") ||
925          NameRef.count("vqdmulh") ||
926          NameRef.count("vqdmull") ||
927          NameRef.count("vqrdmulh")) && HasNPostfix;
928}
929
930static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
931                                         const bool &HasLanePostfix) {
932  return (NameRef.count("vmla") ||
933          NameRef.count("vmls") ||
934          NameRef.count("vmlal") ||
935          NameRef.count("vmlsl") ||
936          (NameRef.count("vmul") && NameRef.size() == 3)||
937          NameRef.count("vqdmlal") ||
938          NameRef.count("vqdmlsl") ||
939          NameRef.count("vqdmulh") ||
940          NameRef.count("vqrdmulh")) && HasLanePostfix;
941}
942
943static bool IsSpecialLaneMultiply(const StringRef &NameRef,
944                                  const bool &HasLanePostfix,
945                                  const bool &IsQuad) {
946  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
947                               && IsQuad;
948  const bool IsVMull = NameRef.count("mull") && !IsQuad;
949  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
950}
951
952static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
953                                                     const std::string &Proto,
954                                                     const bool &HasNPostfix,
955                                                     const bool &IsQuad,
956                                                     const bool &HasLanePostfix,
957                                                     const bool &HasDupPostfix,
958                                                     std::string &NormedProto) {
959  // Handle generic case.
960  const StringRef NameRef(Name);
961  for (size_t i = 0, end = Proto.size(); i < end; i++) {
962    switch (Proto[i]) {
963    case 'u':
964    case 'f':
965    case 'd':
966    case 's':
967    case 'x':
968    case 't':
969    case 'n':
970      NormedProto += IsQuad? 'q' : 'd';
971      break;
972    case 'w':
973    case 'k':
974      NormedProto += 'q';
975      break;
976    case 'g':
977    case 'h':
978    case 'e':
979      NormedProto += 'd';
980      break;
981    case 'i':
982      NormedProto += HasLanePostfix? 'a' : 'i';
983      break;
984    case 'a':
985      if (HasLanePostfix) {
986        NormedProto += 'a';
987      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
988        NormedProto += IsQuad? 'q' : 'd';
989      } else {
990        NormedProto += 'i';
991      }
992      break;
993    }
994  }
995
996  // Handle Special Cases.
997  const bool IsNotVExt = !NameRef.count("vext");
998  const bool IsVPADAL = NameRef.count("vpadal");
999  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1000                                                           HasLanePostfix);
1001  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1002                                                      IsQuad);
1003
1004  if (IsSpecialLaneMul) {
1005    // If
1006    NormedProto[2] = NormedProto[3];
1007    NormedProto.erase(3);
1008  } else if (NormedProto.size() == 4 &&
1009             NormedProto[0] == NormedProto[1] &&
1010             IsNotVExt) {
1011    // If NormedProto.size() == 4 and the first two proto characters are the
1012    // same, ignore the first.
1013    NormedProto = NormedProto.substr(1, 3);
1014  } else if (Is5OpLaneAccum) {
1015    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1016    std::string tmp = NormedProto.substr(1,2);
1017    tmp += NormedProto[4];
1018    NormedProto = tmp;
1019  } else if (IsVPADAL) {
1020    // If we have VPADAL, ignore the first character.
1021    NormedProto = NormedProto.substr(0, 2);
1022  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1023    // If our instruction is a dup instruction, keep only the first and
1024    // last characters.
1025    std::string tmp = "";
1026    tmp += NormedProto[0];
1027    tmp += NormedProto[NormedProto.size()-1];
1028    NormedProto = tmp;
1029  }
1030}
1031
1032/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1033/// extracted, generate a FileCheck pattern to check that an
1034/// instruction's arguments are correct.
1035static void GenerateRegisterCheckPattern(const std::string &Name,
1036                                         const std::string &Proto,
1037                                         const std::string &OutTypeCode,
1038                                         const bool &HasNPostfix,
1039                                         const bool &IsQuad,
1040                                         const bool &HasLanePostfix,
1041                                         const bool &HasDupPostfix,
1042                                         const size_t &TBNumber,
1043                                         std::string &RegisterSuffix) {
1044
1045  RegisterSuffix = "";
1046
1047  const StringRef NameRef(Name);
1048  const StringRef ProtoRef(Proto);
1049
1050  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1051    return;
1052  }
1053
1054  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1055  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1056
1057  if (IsLoadStore) {
1058    // Grab N value from  v{ld,st}N using its ascii representation.
1059    const size_t Count = NameRef[3] - 48;
1060
1061    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1062                                              HasDupPostfix, HasLanePostfix,
1063                                              Count, RegisterSuffix);
1064  } else if (IsTBXOrTBL) {
1065    RegisterSuffix += "d{{[0-9]+}}, {";
1066    for (size_t i = 0; i < TBNumber-1; i++) {
1067      RegisterSuffix += "d{{[0-9]+}}, ";
1068    }
1069    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1070  } else {
1071    // Handle a normal instruction.
1072    if (NameRef.count("vget") || NameRef.count("vset"))
1073      return;
1074
1075    // We first normalize our proto, since we only need to emit 4
1076    // different types of checks, yet have more than 4 proto types
1077    // that map onto those 4 patterns.
1078    std::string NormalizedProto("");
1079    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1080                                             HasLanePostfix, HasDupPostfix,
1081                                             NormalizedProto);
1082
1083    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1084      const char &c = NormalizedProto[i];
1085      switch (c) {
1086      case 'q':
1087        RegisterSuffix += "q{{[0-9]+}}, ";
1088        break;
1089
1090      case 'd':
1091        RegisterSuffix += "d{{[0-9]+}}, ";
1092        break;
1093
1094      case 'i':
1095        RegisterSuffix += "#{{[0-9]+}}, ";
1096        break;
1097
1098      case 'a':
1099        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1100        break;
1101      }
1102    }
1103
1104    // Remove extra ", ".
1105    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1106  }
1107}
1108
1109/// GenerateChecksForIntrinsic - Given a specific instruction name +
1110/// typestr + class kind, generate the proper set of FileCheck
1111/// Patterns to check for. We could just return a string, but instead
1112/// use a vector since it provides us with the extra flexibility of
1113/// emitting multiple checks, which comes in handy for certain cases
1114/// like mla where we want to check for 2 different instructions.
1115static void GenerateChecksForIntrinsic(const std::string &Name,
1116                                       const std::string &Proto,
1117                                       StringRef &OutTypeStr,
1118                                       StringRef &InTypeStr,
1119                                       ClassKind Ck,
1120                                       const std::string &InstName,
1121                                       bool IsHiddenLOp,
1122                                       std::vector<std::string>& Result) {
1123
1124  // If Ck is a ClassNoTest instruction, just return so no test is
1125  // emitted.
1126  if(Ck == ClassNoTest)
1127    return;
1128
1129  if (Name == "vcvt_f32_f16") {
1130    Result.push_back("vcvt.f32.f16");
1131    return;
1132  }
1133
1134
1135  // Now we preprocess our instruction given the data we have to get the
1136  // data that we need.
1137  // Create a StringRef for String Manipulation of our Name.
1138  const StringRef NameRef(Name);
1139  // Instruction Prefix.
1140  std::string Prefix;
1141  // The type code for our out type string.
1142  std::string OutTypeCode;
1143  // To handle our different cases, we need to check for different postfixes.
1144  // Is our instruction a quad instruction.
1145  bool IsQuad = false;
1146  // Our instruction is of the form <instructionname>_n.
1147  bool HasNPostfix = false;
1148  // Our instruction is of the form <instructionname>_lane.
1149  bool HasLanePostfix = false;
1150  // Our instruction is of the form <instructionname>_dup.
1151  bool HasDupPostfix  = false;
1152  // Our instruction is a vcvt instruction which requires special handling.
1153  bool IsSpecialVCvt = false;
1154  // If we have a vtbxN or vtblN instruction, this is set to N.
1155  size_t TBNumber = -1;
1156  // Register Suffix
1157  std::string RegisterSuffix;
1158
1159  PreprocessInstruction(NameRef, InstName, Prefix,
1160                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1161                        IsSpecialVCvt, TBNumber);
1162
1163  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1164  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1165                               HasLanePostfix, HasDupPostfix, TBNumber,
1166                               RegisterSuffix);
1167
1168  // In the following section, we handle a bunch of special cases. You can tell
1169  // a special case by the fact we are returning early.
1170
1171  // If our instruction is a logical instruction without postfix or a
1172  // hidden LOp just return the current Prefix.
1173  if (Ck == ClassL || IsHiddenLOp) {
1174    Result.push_back(Prefix + " " + RegisterSuffix);
1175    return;
1176  }
1177
1178  // If we have a vmov, due to the many different cases, some of which
1179  // vary within the different intrinsics generated for a single
1180  // instruction type, just output a vmov. (e.g. given an instruction
1181  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1182  //
1183  // FIXME: Maybe something can be done about this. The two cases that we care
1184  // about are vmov as an LType and vmov as a WType.
1185  if (Prefix == "vmov") {
1186    Result.push_back(Prefix + " " + RegisterSuffix);
1187    return;
1188  }
1189
1190  // In the following section, we handle special cases.
1191
1192  if (OutTypeCode == "64") {
1193    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1194    // type, the intrinsic will be optimized away, so just return
1195    // nothing.  On the other hand if we are handling an uint64x2_t
1196    // (i.e. quad instruction), vdup/vmov instructions should be
1197    // emitted.
1198    if (Prefix == "vdup" || Prefix == "vext") {
1199      if (IsQuad) {
1200        Result.push_back("{{vmov|vdup}}");
1201      }
1202      return;
1203    }
1204
1205    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1206    // multiple register operands.
1207    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1208                            || Prefix == "vld4";
1209    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1210                            || Prefix == "vst4";
1211    if (MultiLoadPrefix || MultiStorePrefix) {
1212      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1213      return;
1214    }
1215
1216    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1217    // emitting said instructions. So return a check for
1218    // vldr/vstr/vmov/str instead.
1219    if (HasLanePostfix || HasDupPostfix) {
1220      if (Prefix == "vst1") {
1221        Result.push_back("{{str|vstr|vmov}}");
1222        return;
1223      } else if (Prefix == "vld1") {
1224        Result.push_back("{{ldr|vldr|vmov}}");
1225        return;
1226      }
1227    }
1228  }
1229
1230  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1231  // sometimes disassembled as vtrn.32. We use a regex to handle both
1232  // cases.
1233  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1234    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1235    return;
1236  }
1237
1238  // Currently on most ARM processors, we do not use vmla/vmls for
1239  // quad floating point operations. Instead we output vmul + vadd. So
1240  // check if we have one of those instructions and just output a
1241  // check for vmul.
1242  if (OutTypeCode == "f32") {
1243    if (Prefix == "vmls") {
1244      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1245      Result.push_back("vsub." + OutTypeCode);
1246      return;
1247    } else if (Prefix == "vmla") {
1248      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1249      Result.push_back("vadd." + OutTypeCode);
1250      return;
1251    }
1252  }
1253
1254  // If we have vcvt, get the input type from the instruction name
1255  // (which should be of the form instname_inputtype) and append it
1256  // before the output type.
1257  if (Prefix == "vcvt") {
1258    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1259    Prefix += "." + inTypeCode;
1260  }
1261
1262  // Append output type code to get our final mangled instruction.
1263  Prefix += "." + OutTypeCode;
1264
1265  Result.push_back(Prefix + " " + RegisterSuffix);
1266}
1267
1268/// UseMacro - Examine the prototype string to determine if the intrinsic
1269/// should be defined as a preprocessor macro instead of an inline function.
1270static bool UseMacro(const std::string &proto) {
1271  // If this builtin takes an immediate argument, we need to #define it rather
1272  // than use a standard declaration, so that SemaChecking can range check
1273  // the immediate passed by the user.
1274  if (proto.find('i') != std::string::npos)
1275    return true;
1276
1277  // Pointer arguments need to use macros to avoid hiding aligned attributes
1278  // from the pointer type.
1279  if (proto.find('p') != std::string::npos ||
1280      proto.find('c') != std::string::npos)
1281    return true;
1282
1283  return false;
1284}
1285
1286/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1287/// defined as a macro should be accessed directly instead of being first
1288/// assigned to a local temporary.
1289static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1290  // True for constant ints (i), pointers (p) and const pointers (c).
1291  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1292}
1293
1294// Generate the string "(argtype a, argtype b, ...)"
1295static std::string GenArgs(const std::string &proto, StringRef typestr) {
1296  bool define = UseMacro(proto);
1297  char arg = 'a';
1298
1299  std::string s;
1300  s += "(";
1301
1302  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1303    if (define) {
1304      // Some macro arguments are used directly instead of being assigned
1305      // to local temporaries; prepend an underscore prefix to make their
1306      // names consistent with the local temporaries.
1307      if (MacroArgUsedDirectly(proto, i))
1308        s += "__";
1309    } else {
1310      s += TypeString(proto[i], typestr) + " __";
1311    }
1312    s.push_back(arg);
1313    if ((i + 1) < e)
1314      s += ", ";
1315  }
1316
1317  s += ")";
1318  return s;
1319}
1320
1321// Macro arguments are not type-checked like inline function arguments, so
1322// assign them to local temporaries to get the right type checking.
1323static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1324  char arg = 'a';
1325  std::string s;
1326  bool generatedLocal = false;
1327
1328  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1329    // Do not create a temporary for an immediate argument.
1330    // That would defeat the whole point of using a macro!
1331    if (MacroArgUsedDirectly(proto, i))
1332      continue;
1333    generatedLocal = true;
1334
1335    s += TypeString(proto[i], typestr) + " __";
1336    s.push_back(arg);
1337    s += " = (";
1338    s.push_back(arg);
1339    s += "); ";
1340  }
1341
1342  if (generatedLocal)
1343    s += "\\\n  ";
1344  return s;
1345}
1346
1347// Use the vmovl builtin to sign-extend or zero-extend a vector.
1348static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1349  std::string s, high;
1350  high = h ? "_high" : "";
1351  s = MangleName("vmovl" + high, typestr, ClassS);
1352  s += "(" + a + ")";
1353  return s;
1354}
1355
1356// Get the high 64-bit part of a vector
1357static std::string GetHigh(const std::string &a, StringRef typestr) {
1358  std::string s;
1359  s = MangleName("vget_high", typestr, ClassS);
1360  s += "(" + a + ")";
1361  return s;
1362}
1363
1364// Gen operation with two operands and get high 64-bit for both of two operands.
1365static std::string Gen2OpWith2High(StringRef typestr,
1366                                   const std::string &op,
1367                                   const std::string &a,
1368                                   const std::string &b) {
1369  std::string s;
1370  std::string Op1 = GetHigh(a, typestr);
1371  std::string Op2 = GetHigh(b, typestr);
1372  s = MangleName(op, typestr, ClassS);
1373  s += "(" + Op1 + ", " + Op2 + ");";
1374  return s;
1375}
1376
1377// Gen operation with three operands and get high 64-bit of the latter
1378// two operands.
1379static std::string Gen3OpWith2High(StringRef typestr,
1380                                   const std::string &op,
1381                                   const std::string &a,
1382                                   const std::string &b,
1383                                   const std::string &c) {
1384  std::string s;
1385  std::string Op1 = GetHigh(b, typestr);
1386  std::string Op2 = GetHigh(c, typestr);
1387  s = MangleName(op, typestr, ClassS);
1388  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1389  return s;
1390}
1391
1392// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1393static std::string GenCombine(std::string typestr,
1394                              const std::string &a,
1395                              const std::string &b) {
1396  std::string s;
1397  s = MangleName("vcombine", typestr, ClassS);
1398  s += "(" + a + ", " + b + ")";
1399  return s;
1400}
1401
1402static std::string Duplicate(unsigned nElts, StringRef typestr,
1403                             const std::string &a) {
1404  std::string s;
1405
1406  s = "(" + TypeString('d', typestr) + "){ ";
1407  for (unsigned i = 0; i != nElts; ++i) {
1408    s += a;
1409    if ((i + 1) < nElts)
1410      s += ", ";
1411  }
1412  s += " }";
1413
1414  return s;
1415}
1416
1417static std::string SplatLane(unsigned nElts, const std::string &vec,
1418                             const std::string &lane) {
1419  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1420  for (unsigned i = 0; i < nElts; ++i)
1421    s += ", " + lane;
1422  s += ")";
1423  return s;
1424}
1425
1426static std::string RemoveHigh(const std::string &name) {
1427  std::string s = name;
1428  std::size_t found = s.find("_high_");
1429  if (found == std::string::npos)
1430    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1431  s.replace(found, 5, "");
1432  return s;
1433}
1434
1435static unsigned GetNumElements(StringRef typestr, bool &quad) {
1436  quad = false;
1437  bool dummy = false;
1438  char type = ClassifyType(typestr, quad, dummy, dummy);
1439  unsigned nElts = 0;
1440  switch (type) {
1441  case 'c': nElts = 8; break;
1442  case 's': nElts = 4; break;
1443  case 'i': nElts = 2; break;
1444  case 'l': nElts = 1; break;
1445  case 'h': nElts = 4; break;
1446  case 'f': nElts = 2; break;
1447  case 'd':
1448    nElts = 1;
1449    break;
1450  default:
1451    PrintFatalError("unhandled type!");
1452  }
1453  if (quad) nElts <<= 1;
1454  return nElts;
1455}
1456
1457// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1458static std::string GenOpString(const std::string &name, OpKind op,
1459                               const std::string &proto, StringRef typestr) {
1460  bool quad;
1461  unsigned nElts = GetNumElements(typestr, quad);
1462  bool define = UseMacro(proto);
1463
1464  std::string ts = TypeString(proto[0], typestr);
1465  std::string s;
1466  if (!define) {
1467    s = "return ";
1468  }
1469
1470  switch(op) {
1471  case OpAdd:
1472    s += "__a + __b;";
1473    break;
1474  case OpAddl:
1475    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1476    break;
1477  case OpAddlHi:
1478    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1479    break;
1480  case OpAddw:
1481    s += "__a + " + Extend(typestr, "__b") + ";";
1482    break;
1483  case OpAddwHi:
1484    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1485    break;
1486  case OpSub:
1487    s += "__a - __b;";
1488    break;
1489  case OpSubl:
1490    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1491    break;
1492  case OpSublHi:
1493    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1494    break;
1495  case OpSubw:
1496    s += "__a - " + Extend(typestr, "__b") + ";";
1497    break;
1498  case OpSubwHi:
1499    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1500    break;
1501  case OpMulN:
1502    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1503    break;
1504  case OpMulLane:
1505    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1506    break;
1507  case OpMul:
1508    s += "__a * __b;";
1509    break;
1510  case OpMullLane:
1511    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1512      SplatLane(nElts, "__b", "__c") + ");";
1513    break;
1514  case OpMlaN:
1515    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1516    break;
1517  case OpMlaLane:
1518    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1519    break;
1520  case OpMla:
1521    s += "__a + (__b * __c);";
1522    break;
1523  case OpMlalN:
1524    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1525      Duplicate(nElts, typestr, "__c") + ");";
1526    break;
1527  case OpMlalLane:
1528    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1529      SplatLane(nElts, "__c", "__d") + ");";
1530    break;
1531  case OpMlal:
1532    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1533    break;
1534  case OpMullHi:
1535    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1536    break;
1537  case OpMlalHi:
1538    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1539    break;
1540  case OpMlsN:
1541    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1542    break;
1543  case OpMlsLane:
1544    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1545    break;
1546  case OpMls:
1547    s += "__a - (__b * __c);";
1548    break;
1549  case OpMlslN:
1550    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1551      Duplicate(nElts, typestr, "__c") + ");";
1552    break;
1553  case OpMlslLane:
1554    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1555      SplatLane(nElts, "__c", "__d") + ");";
1556    break;
1557  case OpMlsl:
1558    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1559    break;
1560  case OpMlslHi:
1561    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1562    break;
1563  case OpQDMullLane:
1564    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1565      SplatLane(nElts, "__b", "__c") + ");";
1566    break;
1567  case OpQDMlalLane:
1568    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1569      SplatLane(nElts, "__c", "__d") + ");";
1570    break;
1571  case OpQDMlslLane:
1572    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1573      SplatLane(nElts, "__c", "__d") + ");";
1574    break;
1575  case OpQDMulhLane:
1576    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1577      SplatLane(nElts, "__b", "__c") + ");";
1578    break;
1579  case OpQRDMulhLane:
1580    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1581      SplatLane(nElts, "__b", "__c") + ");";
1582    break;
1583  case OpEq:
1584    s += "(" + ts + ")(__a == __b);";
1585    break;
1586  case OpGe:
1587    s += "(" + ts + ")(__a >= __b);";
1588    break;
1589  case OpLe:
1590    s += "(" + ts + ")(__a <= __b);";
1591    break;
1592  case OpGt:
1593    s += "(" + ts + ")(__a > __b);";
1594    break;
1595  case OpLt:
1596    s += "(" + ts + ")(__a < __b);";
1597    break;
1598  case OpNeg:
1599    s += " -__a;";
1600    break;
1601  case OpNot:
1602    s += " ~__a;";
1603    break;
1604  case OpAnd:
1605    s += "__a & __b;";
1606    break;
1607  case OpOr:
1608    s += "__a | __b;";
1609    break;
1610  case OpXor:
1611    s += "__a ^ __b;";
1612    break;
1613  case OpAndNot:
1614    s += "__a & ~__b;";
1615    break;
1616  case OpOrNot:
1617    s += "__a | ~__b;";
1618    break;
1619  case OpCast:
1620    s += "(" + ts + ")__a;";
1621    break;
1622  case OpConcat:
1623    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1624    s += ", (int64x1_t)__b, 0, 1);";
1625    break;
1626  case OpHi:
1627    // nElts is for the result vector, so the source is twice that number.
1628    s += "__builtin_shufflevector(__a, __a";
1629    for (unsigned i = nElts; i < nElts * 2; ++i)
1630      s += ", " + utostr(i);
1631    s+= ");";
1632    break;
1633  case OpLo:
1634    s += "__builtin_shufflevector(__a, __a";
1635    for (unsigned i = 0; i < nElts; ++i)
1636      s += ", " + utostr(i);
1637    s+= ");";
1638    break;
1639  case OpDup:
1640    s += Duplicate(nElts, typestr, "__a") + ";";
1641    break;
1642  case OpDupLane:
1643    s += SplatLane(nElts, "__a", "__b") + ";";
1644    break;
1645  case OpSelect:
1646    // ((0 & 1) | (~0 & 2))
1647    s += "(" + ts + ")";
1648    ts = TypeString(proto[1], typestr);
1649    s += "((__a & (" + ts + ")__b) | ";
1650    s += "(~__a & (" + ts + ")__c));";
1651    break;
1652  case OpRev16:
1653    s += "__builtin_shufflevector(__a, __a";
1654    for (unsigned i = 2; i <= nElts; i += 2)
1655      for (unsigned j = 0; j != 2; ++j)
1656        s += ", " + utostr(i - j - 1);
1657    s += ");";
1658    break;
1659  case OpRev32: {
1660    unsigned WordElts = nElts >> (1 + (int)quad);
1661    s += "__builtin_shufflevector(__a, __a";
1662    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1663      for (unsigned j = 0; j != WordElts; ++j)
1664        s += ", " + utostr(i - j - 1);
1665    s += ");";
1666    break;
1667  }
1668  case OpRev64: {
1669    unsigned DblWordElts = nElts >> (int)quad;
1670    s += "__builtin_shufflevector(__a, __a";
1671    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1672      for (unsigned j = 0; j != DblWordElts; ++j)
1673        s += ", " + utostr(i - j - 1);
1674    s += ");";
1675    break;
1676  }
1677  case OpAbdl: {
1678    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1679    if (typestr[0] != 'U') {
1680      // vabd results are always unsigned and must be zero-extended.
1681      std::string utype = "U" + typestr.str();
1682      s += "(" + TypeString(proto[0], typestr) + ")";
1683      abd = "(" + TypeString('d', utype) + ")" + abd;
1684      s += Extend(utype, abd) + ";";
1685    } else {
1686      s += Extend(typestr, abd) + ";";
1687    }
1688    break;
1689  }
1690  case OpAbdlHi:
1691    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1692    break;
1693  case OpAddhnHi: {
1694    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1695    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1696    s += ";";
1697    break;
1698  }
1699  case OpRAddhnHi: {
1700    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1701    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1702    s += ";";
1703    break;
1704  }
1705  case OpSubhnHi: {
1706    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1707    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1708    s += ";";
1709    break;
1710  }
1711  case OpRSubhnHi: {
1712    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1713    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1714    s += ";";
1715    break;
1716  }
1717  case OpAba:
1718    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1719    break;
1720  case OpAbal:
1721    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1722    break;
1723  case OpAbalHi:
1724    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1725    break;
1726  case OpQDMullHi:
1727    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1728    break;
1729  case OpQDMlalHi:
1730    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1731    break;
1732  case OpQDMlslHi:
1733    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1734    break;
1735  case OpDiv:
1736    s += "__a / __b;";
1737    break;
1738  case OpMovlHi: {
1739    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1740        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1741    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1742    s += "(__a1, 0);";
1743    break;
1744  }
1745  case OpLongHi: {
1746    // Another local variable __a1 is needed for calling a Macro,
1747    // or using __a will have naming conflict when Macro expanding.
1748    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1749         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1750    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1751         "(__a1, __b);";
1752    break;
1753  }
1754  case OpNarrowHi: {
1755    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1756         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1757    break;
1758  }
1759  default:
1760    PrintFatalError("unknown OpKind!");
1761  }
1762  return s;
1763}
1764
1765static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1766  unsigned mod = proto[0];
1767
1768  if (mod == 'v' || mod == 'f')
1769    mod = proto[1];
1770
1771  bool quad = false;
1772  bool poly = false;
1773  bool usgn = false;
1774  bool scal = false;
1775  bool cnst = false;
1776  bool pntr = false;
1777
1778  // Base type to get the type string for.
1779  char type = ClassifyType(typestr, quad, poly, usgn);
1780
1781  // Based on the modifying character, change the type and width if necessary.
1782  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1783
1784  NeonTypeFlags::EltType ET;
1785  switch (type) {
1786    case 'c':
1787      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1788      break;
1789    case 's':
1790      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1791      break;
1792    case 'i':
1793      ET = NeonTypeFlags::Int32;
1794      break;
1795    case 'l':
1796      ET = NeonTypeFlags::Int64;
1797      break;
1798    case 'h':
1799      ET = NeonTypeFlags::Float16;
1800      break;
1801    case 'f':
1802      ET = NeonTypeFlags::Float32;
1803      break;
1804    case 'd':
1805      ET = NeonTypeFlags::Float64;
1806      break;
1807    default:
1808      PrintFatalError("unhandled type!");
1809  }
1810  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1811  return Flags.getFlags();
1812}
1813
1814// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1815static std::string GenBuiltin(const std::string &name, const std::string &proto,
1816                              StringRef typestr, ClassKind ck) {
1817  std::string s;
1818
1819  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1820  // sret-like argument.
1821  bool sret = (proto[0] >= '2' && proto[0] <= '4');
1822
1823  bool define = UseMacro(proto);
1824
1825  // Check if the prototype has a scalar operand with the type of the vector
1826  // elements.  If not, bitcasting the args will take care of arg checking.
1827  // The actual signedness etc. will be taken care of with special enums.
1828  if (proto.find('s') == std::string::npos)
1829    ck = ClassB;
1830
1831  if (proto[0] != 'v') {
1832    std::string ts = TypeString(proto[0], typestr);
1833
1834    if (define) {
1835      if (sret)
1836        s += ts + " r; ";
1837      else
1838        s += "(" + ts + ")";
1839    } else if (sret) {
1840      s += ts + " r; ";
1841    } else {
1842      s += "return (" + ts + ")";
1843    }
1844  }
1845
1846  bool splat = proto.find('a') != std::string::npos;
1847
1848  s += "__builtin_neon_";
1849  if (splat) {
1850    // Call the non-splat builtin: chop off the "_n" suffix from the name.
1851    std::string vname(name, 0, name.size()-2);
1852    s += MangleName(vname, typestr, ck);
1853  } else {
1854    s += MangleName(name, typestr, ck);
1855  }
1856  s += "(";
1857
1858  // Pass the address of the return variable as the first argument to sret-like
1859  // builtins.
1860  if (sret)
1861    s += "&r, ";
1862
1863  char arg = 'a';
1864  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1865    std::string args = std::string(&arg, 1);
1866
1867    // Use the local temporaries instead of the macro arguments.
1868    args = "__" + args;
1869
1870    bool argQuad = false;
1871    bool argPoly = false;
1872    bool argUsgn = false;
1873    bool argScalar = false;
1874    bool dummy = false;
1875    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1876    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1877                      dummy, dummy);
1878
1879    // Handle multiple-vector values specially, emitting each subvector as an
1880    // argument to the __builtin.
1881    if (proto[i] >= '2' && proto[i] <= '4') {
1882      // Check if an explicit cast is needed.
1883      if (argType != 'c' || argPoly || argUsgn)
1884        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1885
1886      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1887        s += args + ".val[" + utostr(vi) + "]";
1888        if ((vi + 1) < ve)
1889          s += ", ";
1890      }
1891      if ((i + 1) < e)
1892        s += ", ";
1893
1894      continue;
1895    }
1896
1897    if (splat && (i + 1) == e)
1898      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1899
1900    // Check if an explicit cast is needed.
1901    if ((splat || !argScalar) &&
1902        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1903      std::string argTypeStr = "c";
1904      if (ck != ClassB)
1905        argTypeStr = argType;
1906      if (argQuad)
1907        argTypeStr = "Q" + argTypeStr;
1908      args = "(" + TypeString('d', argTypeStr) + ")" + args;
1909    }
1910
1911    s += args;
1912    if ((i + 1) < e)
1913      s += ", ";
1914  }
1915
1916  // Extra constant integer to hold type class enum for this function, e.g. s8
1917  if (ck == ClassB)
1918    s += ", " + utostr(GetNeonEnum(proto, typestr));
1919
1920  s += ");";
1921
1922  if (proto[0] != 'v' && sret) {
1923    if (define)
1924      s += " r;";
1925    else
1926      s += " return r;";
1927  }
1928  return s;
1929}
1930
1931static std::string GenBuiltinDef(const std::string &name,
1932                                 const std::string &proto,
1933                                 StringRef typestr, ClassKind ck) {
1934  std::string s("BUILTIN(__builtin_neon_");
1935
1936  // If all types are the same size, bitcasting the args will take care
1937  // of arg checking.  The actual signedness etc. will be taken care of with
1938  // special enums.
1939  if (proto.find('s') == std::string::npos)
1940    ck = ClassB;
1941
1942  s += MangleName(name, typestr, ck);
1943  s += ", \"";
1944
1945  for (unsigned i = 0, e = proto.size(); i != e; ++i)
1946    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1947
1948  // Extra constant integer to hold type class enum for this function, e.g. s8
1949  if (ck == ClassB)
1950    s += "i";
1951
1952  s += "\", \"n\")";
1953  return s;
1954}
1955
1956static std::string GenIntrinsic(const std::string &name,
1957                                const std::string &proto,
1958                                StringRef outTypeStr, StringRef inTypeStr,
1959                                OpKind kind, ClassKind classKind) {
1960  assert(!proto.empty() && "");
1961  bool define = UseMacro(proto) && kind != OpUnavailable;
1962  std::string s;
1963
1964  // static always inline + return type
1965  if (define)
1966    s += "#define ";
1967  else
1968    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1969
1970  // Function name with type suffix
1971  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1972  if (outTypeStr != inTypeStr) {
1973    // If the input type is different (e.g., for vreinterpret), append a suffix
1974    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1975    // does not insert another "q" in the name.
1976    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1977    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1978    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1979  }
1980  s += mangledName;
1981
1982  // Function arguments
1983  s += GenArgs(proto, inTypeStr);
1984
1985  // Definition.
1986  if (define) {
1987    s += " __extension__ ({ \\\n  ";
1988    s += GenMacroLocals(proto, inTypeStr);
1989  } else if (kind == OpUnavailable) {
1990    s += " __attribute__((unavailable));\n";
1991    return s;
1992  } else
1993    s += " {\n  ";
1994
1995  if (kind != OpNone)
1996    s += GenOpString(name, kind, proto, outTypeStr);
1997  else
1998    s += GenBuiltin(name, proto, outTypeStr, classKind);
1999  if (define)
2000    s += " })";
2001  else
2002    s += " }";
2003  s += "\n";
2004  return s;
2005}
2006
2007/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2008/// is comprised of type definitions and function declarations.
2009void NeonEmitter::run(raw_ostream &OS) {
2010  OS <<
2011    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2012    "---===\n"
2013    " *\n"
2014    " * Permission is hereby granted, free of charge, to any person obtaining "
2015    "a copy\n"
2016    " * of this software and associated documentation files (the \"Software\"),"
2017    " to deal\n"
2018    " * in the Software without restriction, including without limitation the "
2019    "rights\n"
2020    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2021    "and/or sell\n"
2022    " * copies of the Software, and to permit persons to whom the Software is\n"
2023    " * furnished to do so, subject to the following conditions:\n"
2024    " *\n"
2025    " * The above copyright notice and this permission notice shall be "
2026    "included in\n"
2027    " * all copies or substantial portions of the Software.\n"
2028    " *\n"
2029    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2030    "EXPRESS OR\n"
2031    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2032    "MERCHANTABILITY,\n"
2033    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2034    "SHALL THE\n"
2035    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2036    "OTHER\n"
2037    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2038    "ARISING FROM,\n"
2039    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2040    "DEALINGS IN\n"
2041    " * THE SOFTWARE.\n"
2042    " *\n"
2043    " *===--------------------------------------------------------------------"
2044    "---===\n"
2045    " */\n\n";
2046
2047  OS << "#ifndef __ARM_NEON_H\n";
2048  OS << "#define __ARM_NEON_H\n\n";
2049
2050  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2051  OS << "#error \"NEON support not enabled\"\n";
2052  OS << "#endif\n\n";
2053
2054  OS << "#include <stdint.h>\n\n";
2055
2056  // Emit NEON-specific scalar typedefs.
2057  OS << "typedef float float32_t;\n";
2058  OS << "typedef __fp16 float16_t;\n";
2059
2060  OS << "#ifdef __aarch64__\n";
2061  OS << "typedef double float64_t;\n";
2062  OS << "#endif\n\n";
2063
2064  // For now, signedness of polynomial types depends on target
2065  OS << "#ifdef __aarch64__\n";
2066  OS << "typedef uint8_t poly8_t;\n";
2067  OS << "typedef uint16_t poly16_t;\n";
2068  OS << "#else\n";
2069  OS << "typedef int8_t poly8_t;\n";
2070  OS << "typedef int16_t poly16_t;\n";
2071  OS << "#endif\n";
2072
2073  // Emit Neon vector typedefs.
2074  std::string TypedefTypes(
2075      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
2076  SmallVector<StringRef, 24> TDTypeVec;
2077  ParseTypes(0, TypedefTypes, TDTypeVec);
2078
2079  // Emit vector typedefs.
2080  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2081    bool dummy, quad = false, poly = false;
2082    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2083    bool isA64 = false;
2084
2085    if (type == 'd' && quad)
2086      isA64 = true;
2087
2088    if (isA64)
2089      OS << "#ifdef __aarch64__\n";
2090
2091    if (poly)
2092      OS << "typedef __attribute__((neon_polyvector_type(";
2093    else
2094      OS << "typedef __attribute__((neon_vector_type(";
2095
2096    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2097    OS << utostr(nElts) << "))) ";
2098    if (nElts < 10)
2099      OS << " ";
2100
2101    OS << TypeString('s', TDTypeVec[i]);
2102    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2103
2104    if (isA64)
2105      OS << "#endif\n";
2106  }
2107  OS << "\n";
2108
2109  // Emit struct typedefs.
2110  for (unsigned vi = 2; vi != 5; ++vi) {
2111    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2112      bool dummy, quad = false, poly = false;
2113      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2114      bool isA64 = false;
2115
2116      if (type == 'd' && quad)
2117        isA64 = true;
2118
2119      if (isA64)
2120        OS << "#ifdef __aarch64__\n";
2121
2122      std::string ts = TypeString('d', TDTypeVec[i]);
2123      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2124      OS << "typedef struct " << vs << " {\n";
2125      OS << "  " << ts << " val";
2126      OS << "[" << utostr(vi) << "]";
2127      OS << ";\n} ";
2128      OS << vs << ";\n";
2129
2130      if (isA64)
2131        OS << "#endif\n";
2132
2133      OS << "\n";
2134    }
2135  }
2136
2137  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2138
2139  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2140
2141  StringMap<ClassKind> EmittedMap;
2142
2143  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2144  // intrinsics.  (Some of the saturating multiply instructions are also
2145  // used to implement the corresponding "_lane" variants, but tablegen
2146  // sorts the records into alphabetical order so that the "_lane" variants
2147  // come after the intrinsics they use.)
2148  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2149  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2150  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2151  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2152
2153  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2154  // common intrinsics appear only once in the output stream.
2155  // The check for uniquiness is done in emitIntrinsic.
2156  // Emit ARM intrinsics.
2157  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2158    Record *R = RV[i];
2159
2160    // Skip AArch64 intrinsics; they will be emitted at the end.
2161    bool isA64 = R->getValueAsBit("isA64");
2162    if (isA64)
2163      continue;
2164
2165    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2166        R->getName() != "VABD")
2167      emitIntrinsic(OS, R, EmittedMap);
2168  }
2169
2170  // Emit AArch64-specific intrinsics.
2171  OS << "#ifdef __aarch64__\n";
2172
2173  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2174  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2175  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2176
2177  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2178    Record *R = RV[i];
2179
2180    // Skip ARM intrinsics already included above.
2181    bool isA64 = R->getValueAsBit("isA64");
2182    if (!isA64)
2183      continue;
2184
2185    emitIntrinsic(OS, R, EmittedMap);
2186  }
2187
2188  OS << "#endif\n\n";
2189
2190  OS << "#undef __ai\n\n";
2191  OS << "#endif /* __ARM_NEON_H */\n";
2192}
2193
2194/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2195/// intrinsics specified by record R checking for intrinsic uniqueness.
2196void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2197                                StringMap<ClassKind> &EmittedMap) {
2198  std::string name = R->getValueAsString("Name");
2199  std::string Proto = R->getValueAsString("Prototype");
2200  std::string Types = R->getValueAsString("Types");
2201
2202  SmallVector<StringRef, 16> TypeVec;
2203  ParseTypes(R, Types, TypeVec);
2204
2205  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2206
2207  ClassKind classKind = ClassNone;
2208  if (R->getSuperClasses().size() >= 2)
2209    classKind = ClassMap[R->getSuperClasses()[1]];
2210  if (classKind == ClassNone && kind == OpNone)
2211    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2212
2213  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2214    if (kind == OpReinterpret) {
2215      bool outQuad = false;
2216      bool dummy = false;
2217      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2218      for (unsigned srcti = 0, srcte = TypeVec.size();
2219           srcti != srcte; ++srcti) {
2220        bool inQuad = false;
2221        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2222        if (srcti == ti || inQuad != outQuad)
2223          continue;
2224        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2225                                     OpCast, ClassS);
2226        if (EmittedMap.count(s))
2227          continue;
2228        EmittedMap[s] = ClassS;
2229        OS << s;
2230      }
2231    } else {
2232      std::string s =
2233          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2234      if (EmittedMap.count(s))
2235        continue;
2236      EmittedMap[s] = classKind;
2237      OS << s;
2238    }
2239  }
2240  OS << "\n";
2241}
2242
2243static unsigned RangeFromType(const char mod, StringRef typestr) {
2244  // base type to get the type string for.
2245  bool quad = false, dummy = false;
2246  char type = ClassifyType(typestr, quad, dummy, dummy);
2247  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2248
2249  switch (type) {
2250    case 'c':
2251      return (8 << (int)quad) - 1;
2252    case 'h':
2253    case 's':
2254      return (4 << (int)quad) - 1;
2255    case 'f':
2256    case 'i':
2257      return (2 << (int)quad) - 1;
2258    case 'l':
2259      return (1 << (int)quad) - 1;
2260    default:
2261      PrintFatalError("unhandled type!");
2262  }
2263}
2264
2265/// Generate the ARM and AArch64 intrinsic range checking code for
2266/// shift/lane immediates, checking for unique declarations.
2267void
2268NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2269                                        StringMap<ClassKind> &A64IntrinsicMap,
2270                                        bool isA64RangeCheck) {
2271  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2272  StringMap<OpKind> EmittedMap;
2273
2274  // Generate the intrinsic range checking code for shift/lane immediates.
2275  if (isA64RangeCheck)
2276    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2277  else
2278    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2279
2280  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2281    Record *R = RV[i];
2282
2283    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2284    if (k != OpNone)
2285      continue;
2286
2287    std::string name = R->getValueAsString("Name");
2288    std::string Proto = R->getValueAsString("Prototype");
2289    std::string Types = R->getValueAsString("Types");
2290    std::string Rename = name + "@" + Proto;
2291
2292    // Functions with 'a' (the splat code) in the type prototype should not get
2293    // their own builtin as they use the non-splat variant.
2294    if (Proto.find('a') != std::string::npos)
2295      continue;
2296
2297    // Functions which do not have an immediate do not need to have range
2298    // checking code emitted.
2299    size_t immPos = Proto.find('i');
2300    if (immPos == std::string::npos)
2301      continue;
2302
2303    SmallVector<StringRef, 16> TypeVec;
2304    ParseTypes(R, Types, TypeVec);
2305
2306    if (R->getSuperClasses().size() < 2)
2307      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2308
2309    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2310
2311    // Do not include AArch64 range checks if not generating code for AArch64.
2312    bool isA64 = R->getValueAsBit("isA64");
2313    if (!isA64RangeCheck && isA64)
2314      continue;
2315
2316    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2317    // redefined by AArch64 to handle new types.
2318    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2319      ClassKind &A64CK = A64IntrinsicMap[Rename];
2320      if (A64CK == ck && ck != ClassNone)
2321        continue;
2322    }
2323
2324    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2325      std::string namestr, shiftstr, rangestr;
2326
2327      if (R->getValueAsBit("isVCVT_N")) {
2328        // VCVT between floating- and fixed-point values takes an immediate
2329        // in the range [1, 32] for f32, or [1, 64] for f64.
2330        ck = ClassB;
2331        if (name.find("32") != std::string::npos)
2332          rangestr = "l = 1; u = 31"; // upper bound = l + u
2333        else if (name.find("64") != std::string::npos)
2334          rangestr = "l = 1; u = 63";
2335        else
2336          PrintFatalError(R->getLoc(),
2337              "Fixed point convert name should contains \"32\" or \"64\"");
2338      } else if (Proto.find('s') == std::string::npos) {
2339        // Builtins which are overloaded by type will need to have their upper
2340        // bound computed at Sema time based on the type constant.
2341        ck = ClassB;
2342        if (R->getValueAsBit("isShift")) {
2343          shiftstr = ", true";
2344
2345          // Right shifts have an 'r' in the name, left shifts do not.
2346          if (name.find('r') != std::string::npos)
2347            rangestr = "l = 1; ";
2348        }
2349        rangestr += "u = RFT(TV" + shiftstr + ")";
2350      } else {
2351        // The immediate generally refers to a lane in the preceding argument.
2352        assert(immPos > 0 && "unexpected immediate operand");
2353        rangestr =
2354            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2355      }
2356      // Make sure cases appear only once by uniquing them in a string map.
2357      namestr = MangleName(name, TypeVec[ti], ck);
2358      if (EmittedMap.count(namestr))
2359        continue;
2360      EmittedMap[namestr] = OpNone;
2361
2362      // Calculate the index of the immediate that should be range checked.
2363      unsigned immidx = 0;
2364
2365      // Builtins that return a struct of multiple vectors have an extra
2366      // leading arg for the struct return.
2367      if (Proto[0] >= '2' && Proto[0] <= '4')
2368        ++immidx;
2369
2370      // Add one to the index for each argument until we reach the immediate
2371      // to be checked.  Structs of vectors are passed as multiple arguments.
2372      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2373        switch (Proto[ii]) {
2374        default:
2375          immidx += 1;
2376          break;
2377        case '2':
2378          immidx += 2;
2379          break;
2380        case '3':
2381          immidx += 3;
2382          break;
2383        case '4':
2384          immidx += 4;
2385          break;
2386        case 'i':
2387          ie = ii + 1;
2388          break;
2389        }
2390      }
2391      if (isA64RangeCheck)
2392        OS << "case AArch64::BI__builtin_neon_";
2393      else
2394        OS << "case ARM::BI__builtin_neon_";
2395      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2396         << rangestr << "; break;\n";
2397    }
2398  }
2399  OS << "#endif\n\n";
2400}
2401
2402/// Generate the ARM and AArch64 overloaded type checking code for
2403/// SemaChecking.cpp, checking for unique builtin declarations.
2404void
2405NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2406                                      StringMap<ClassKind> &A64IntrinsicMap,
2407                                      bool isA64TypeCheck) {
2408  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2409  StringMap<OpKind> EmittedMap;
2410
2411  // Generate the overloaded type checking code for SemaChecking.cpp
2412  if (isA64TypeCheck)
2413    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2414  else
2415    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2416
2417  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2418    Record *R = RV[i];
2419    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2420    if (k != OpNone)
2421      continue;
2422
2423    std::string Proto = R->getValueAsString("Prototype");
2424    std::string Types = R->getValueAsString("Types");
2425    std::string name = R->getValueAsString("Name");
2426    std::string Rename = name + "@" + Proto;
2427
2428    // Functions with 'a' (the splat code) in the type prototype should not get
2429    // their own builtin as they use the non-splat variant.
2430    if (Proto.find('a') != std::string::npos)
2431      continue;
2432
2433    // Functions which have a scalar argument cannot be overloaded, no need to
2434    // check them if we are emitting the type checking code.
2435    if (Proto.find('s') != std::string::npos)
2436      continue;
2437
2438    SmallVector<StringRef, 16> TypeVec;
2439    ParseTypes(R, Types, TypeVec);
2440
2441    if (R->getSuperClasses().size() < 2)
2442      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2443
2444    // Do not include AArch64 type checks if not generating code for AArch64.
2445    bool isA64 = R->getValueAsBit("isA64");
2446    if (!isA64TypeCheck && isA64)
2447      continue;
2448
2449    // Include ARM  type check in AArch64 but only if ARM intrinsics
2450    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2451    // redefined in AArch64 to handle an additional 2 x f64 type.
2452    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2453    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2454      ClassKind &A64CK = A64IntrinsicMap[Rename];
2455      if (A64CK == ck && ck != ClassNone)
2456        continue;
2457    }
2458
2459    int si = -1, qi = -1;
2460    uint64_t mask = 0, qmask = 0;
2461    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2462      // Generate the switch case(s) for this builtin for the type validation.
2463      bool quad = false, poly = false, usgn = false;
2464      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2465
2466      if (quad) {
2467        qi = ti;
2468        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2469      } else {
2470        si = ti;
2471        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2472      }
2473    }
2474
2475    // Check if the builtin function has a pointer or const pointer argument.
2476    int PtrArgNum = -1;
2477    bool HasConstPtr = false;
2478    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2479      char ArgType = Proto[arg];
2480      if (ArgType == 'c') {
2481        HasConstPtr = true;
2482        PtrArgNum = arg - 1;
2483        break;
2484      }
2485      if (ArgType == 'p') {
2486        PtrArgNum = arg - 1;
2487        break;
2488      }
2489    }
2490    // For sret builtins, adjust the pointer argument index.
2491    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2492      PtrArgNum += 1;
2493
2494    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2495    // and vst1_lane intrinsics.  Using a pointer to the vector element
2496    // type with one of those operations causes codegen to select an aligned
2497    // load/store instruction.  If you want an unaligned operation,
2498    // the pointer argument needs to have less alignment than element type,
2499    // so just accept any pointer type.
2500    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2501      PtrArgNum = -1;
2502      HasConstPtr = false;
2503    }
2504
2505    if (mask) {
2506      if (isA64TypeCheck)
2507        OS << "case AArch64::BI__builtin_neon_";
2508      else
2509        OS << "case ARM::BI__builtin_neon_";
2510      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2511         << "0x" << utohexstr(mask) << "ULL";
2512      if (PtrArgNum >= 0)
2513        OS << "; PtrArgNum = " << PtrArgNum;
2514      if (HasConstPtr)
2515        OS << "; HasConstPtr = true";
2516      OS << "; break;\n";
2517    }
2518    if (qmask) {
2519      if (isA64TypeCheck)
2520        OS << "case AArch64::BI__builtin_neon_";
2521      else
2522        OS << "case ARM::BI__builtin_neon_";
2523      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2524         << "0x" << utohexstr(qmask) << "ULL";
2525      if (PtrArgNum >= 0)
2526        OS << "; PtrArgNum = " << PtrArgNum;
2527      if (HasConstPtr)
2528        OS << "; HasConstPtr = true";
2529      OS << "; break;\n";
2530    }
2531  }
2532  OS << "#endif\n\n";
2533}
2534
2535/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2536/// declaration of builtins, checking for unique builtin declarations.
2537void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2538                                 StringMap<ClassKind> &A64IntrinsicMap,
2539                                 bool isA64GenBuiltinDef) {
2540  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2541  StringMap<OpKind> EmittedMap;
2542
2543  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2544  if (isA64GenBuiltinDef)
2545    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2546  else
2547    OS << "#ifdef GET_NEON_BUILTINS\n";
2548
2549  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2550    Record *R = RV[i];
2551    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2552    if (k != OpNone)
2553      continue;
2554
2555    std::string Proto = R->getValueAsString("Prototype");
2556    std::string name = R->getValueAsString("Name");
2557    std::string Rename = name + "@" + Proto;
2558
2559    // Functions with 'a' (the splat code) in the type prototype should not get
2560    // their own builtin as they use the non-splat variant.
2561    if (Proto.find('a') != std::string::npos)
2562      continue;
2563
2564    std::string Types = R->getValueAsString("Types");
2565    SmallVector<StringRef, 16> TypeVec;
2566    ParseTypes(R, Types, TypeVec);
2567
2568    if (R->getSuperClasses().size() < 2)
2569      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2570
2571    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2572
2573    // Do not include AArch64 BUILTIN() macros if not generating
2574    // code for AArch64
2575    bool isA64 = R->getValueAsBit("isA64");
2576    if (!isA64GenBuiltinDef && isA64)
2577      continue;
2578
2579    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2580    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2581    // redefined in AArch64 to handle an additional 2 x f64 type.
2582    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2583      ClassKind &A64CK = A64IntrinsicMap[Rename];
2584      if (A64CK == ck && ck != ClassNone)
2585        continue;
2586    }
2587
2588    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2589      // Generate the declaration for this builtin, ensuring
2590      // that each unique BUILTIN() macro appears only once in the output
2591      // stream.
2592      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2593      if (EmittedMap.count(bd))
2594        continue;
2595
2596      EmittedMap[bd] = OpNone;
2597      OS << bd << "\n";
2598    }
2599  }
2600  OS << "#endif\n\n";
2601}
2602
2603/// runHeader - Emit a file with sections defining:
2604/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2605/// 2. the SemaChecking code for the type overload checking.
2606/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2607void NeonEmitter::runHeader(raw_ostream &OS) {
2608  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2609
2610  // build a map of AArch64 intriniscs to be used in uniqueness checks.
2611  StringMap<ClassKind> A64IntrinsicMap;
2612  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2613    Record *R = RV[i];
2614
2615    bool isA64 = R->getValueAsBit("isA64");
2616    if (!isA64)
2617      continue;
2618
2619    ClassKind CK = ClassNone;
2620    if (R->getSuperClasses().size() >= 2)
2621      CK = ClassMap[R->getSuperClasses()[1]];
2622
2623    std::string Name = R->getValueAsString("Name");
2624    std::string Proto = R->getValueAsString("Prototype");
2625    std::string Rename = Name + "@" + Proto;
2626    if (A64IntrinsicMap.count(Rename))
2627      continue;
2628    A64IntrinsicMap[Rename] = CK;
2629  }
2630
2631  // Generate BuiltinsARM.def for ARM
2632  genBuiltinsDef(OS, A64IntrinsicMap, false);
2633
2634  // Generate BuiltinsAArch64.def for AArch64
2635  genBuiltinsDef(OS, A64IntrinsicMap, true);
2636
2637  // Generate ARM overloaded type checking code for SemaChecking.cpp
2638  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2639
2640  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2641  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2642
2643  // Generate ARM range checking code for shift/lane immediates.
2644  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2645
2646  // Generate the AArch64 range checking code for shift/lane immediates.
2647  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2648}
2649
2650/// GenTest - Write out a test for the intrinsic specified by the name and
2651/// type strings, including the embedded patterns for FileCheck to match.
2652static std::string GenTest(const std::string &name,
2653                           const std::string &proto,
2654                           StringRef outTypeStr, StringRef inTypeStr,
2655                           bool isShift, bool isHiddenLOp,
2656                           ClassKind ck, const std::string &InstName,
2657						   bool isA64,
2658						   std::string & testFuncProto) {
2659  assert(!proto.empty() && "");
2660  std::string s;
2661
2662  // Function name with type suffix
2663  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2664  if (outTypeStr != inTypeStr) {
2665    // If the input type is different (e.g., for vreinterpret), append a suffix
2666    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2667    // does not insert another "q" in the name.
2668    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2669    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2670    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2671  }
2672
2673  // todo: GenerateChecksForIntrinsic does not generate CHECK
2674  // for aarch64 instructions yet
2675  std::vector<std::string> FileCheckPatterns;
2676  if (!isA64) {
2677	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2678							   isHiddenLOp, FileCheckPatterns);
2679	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2680  }
2681  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2682
2683  // Emit the FileCheck patterns.
2684  // If for any reason we do not want to emit a check, mangledInst
2685  // will be the empty string.
2686  if (FileCheckPatterns.size()) {
2687    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2688                                                  e = FileCheckPatterns.end();
2689         i != e;
2690         ++i) {
2691      s += "// CHECK_ARM: " + *i + "\n";
2692    }
2693  }
2694
2695  // Emit the start of the test function.
2696
2697  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2698  char arg = 'a';
2699  std::string comma;
2700  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2701    // Do not create arguments for values that must be immediate constants.
2702    if (proto[i] == 'i')
2703      continue;
2704    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2705    testFuncProto.push_back(arg);
2706    comma = ", ";
2707  }
2708  testFuncProto += ")";
2709
2710  s+= testFuncProto;
2711  s+= " {\n  ";
2712
2713  if (proto[0] != 'v')
2714    s += "return ";
2715  s += mangledName + "(";
2716  arg = 'a';
2717  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2718    if (proto[i] == 'i') {
2719      // For immediate operands, test the maximum value.
2720      if (isShift)
2721        s += "1"; // FIXME
2722      else
2723        // The immediate generally refers to a lane in the preceding argument.
2724        s += utostr(RangeFromType(proto[i-1], inTypeStr));
2725    } else {
2726      s.push_back(arg);
2727    }
2728    if ((i + 1) < e)
2729      s += ", ";
2730  }
2731  s += ");\n}\n\n";
2732  return s;
2733}
2734
2735/// Write out all intrinsic tests for the specified target, checking
2736/// for intrinsic test uniqueness.
2737void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2738                                bool isA64GenTest) {
2739  if (isA64GenTest)
2740	OS << "#ifdef __aarch64__\n";
2741
2742  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2743  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2744    Record *R = RV[i];
2745    std::string name = R->getValueAsString("Name");
2746    std::string Proto = R->getValueAsString("Prototype");
2747    std::string Types = R->getValueAsString("Types");
2748    bool isShift = R->getValueAsBit("isShift");
2749    std::string InstName = R->getValueAsString("InstName");
2750    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2751    bool isA64 = R->getValueAsBit("isA64");
2752
2753    // do not include AArch64 intrinsic test if not generating
2754    // code for AArch64
2755    if (!isA64GenTest && isA64)
2756      continue;
2757
2758    SmallVector<StringRef, 16> TypeVec;
2759    ParseTypes(R, Types, TypeVec);
2760
2761    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2762    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2763    if (kind == OpUnavailable)
2764      continue;
2765    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2766      if (kind == OpReinterpret) {
2767        bool outQuad = false;
2768        bool dummy = false;
2769        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2770        for (unsigned srcti = 0, srcte = TypeVec.size();
2771             srcti != srcte; ++srcti) {
2772          bool inQuad = false;
2773          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2774          if (srcti == ti || inQuad != outQuad)
2775            continue;
2776		  std::string testFuncProto;
2777          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2778                                  isShift, isHiddenLOp, ck, InstName, isA64,
2779								  testFuncProto);
2780          if (EmittedMap.count(testFuncProto))
2781            continue;
2782          EmittedMap[testFuncProto] = kind;
2783          OS << s << "\n";
2784        }
2785      } else {
2786		std::string testFuncProto;
2787        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2788                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
2789        if (EmittedMap.count(testFuncProto))
2790          continue;
2791        EmittedMap[testFuncProto] = kind;
2792        OS << s << "\n";
2793      }
2794    }
2795  }
2796
2797  if (isA64GenTest)
2798	OS << "#endif\n";
2799}
2800/// runTests - Write out a complete set of tests for all of the Neon
2801/// intrinsics.
2802void NeonEmitter::runTests(raw_ostream &OS) {
2803  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2804        "apcs-gnu\\\n"
2805        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2806        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2807		"\n"
2808	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2809	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2810	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2811        "\n"
2812        "// REQUIRES: long_tests\n"
2813        "\n"
2814        "#include <arm_neon.h>\n"
2815        "\n";
2816
2817  // ARM tests must be emitted before AArch64 tests to ensure
2818  // tests for intrinsics that are common to ARM and AArch64
2819  // appear only once in the output stream.
2820  // The check for uniqueness is done in genTargetTest.
2821  StringMap<OpKind> EmittedMap;
2822
2823  genTargetTest(OS, EmittedMap, false);
2824
2825  genTargetTest(OS, EmittedMap, true);
2826}
2827
2828namespace clang {
2829void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2830  NeonEmitter(Records).run(OS);
2831}
2832void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2833  NeonEmitter(Records).runHeader(OS);
2834}
2835void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2836  NeonEmitter(Records).runTests(OS);
2837}
2838} // End namespace clang
2839