NeonEmitter.cpp revision 944f09ff9b33637465906e1aeadcfa405d66331d
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddw,
44  OpSub,
45  OpSubl,
46  OpSubw,
47  OpMul,
48  OpMla,
49  OpMlal,
50  OpMls,
51  OpMlsl,
52  OpMulN,
53  OpMlaN,
54  OpMlsN,
55  OpMlalN,
56  OpMlslN,
57  OpMulLane,
58  OpMullLane,
59  OpMlaLane,
60  OpMlsLane,
61  OpMlalLane,
62  OpMlslLane,
63  OpQDMullLane,
64  OpQDMlalLane,
65  OpQDMlslLane,
66  OpQDMulhLane,
67  OpQRDMulhLane,
68  OpEq,
69  OpGe,
70  OpLe,
71  OpGt,
72  OpLt,
73  OpNeg,
74  OpNot,
75  OpAnd,
76  OpOr,
77  OpXor,
78  OpAndNot,
79  OpOrNot,
80  OpCast,
81  OpConcat,
82  OpDup,
83  OpDupLane,
84  OpHi,
85  OpLo,
86  OpSelect,
87  OpRev16,
88  OpRev32,
89  OpRev64,
90  OpReinterpret,
91  OpAbdl,
92  OpAba,
93  OpAbal,
94  OpDiv
95};
96
97enum ClassKind {
98  ClassNone,
99  ClassI,           // generic integer instruction, e.g., "i8" suffix
100  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
101  ClassW,           // width-specific instruction, e.g., "8" suffix
102  ClassB,           // bitcast arguments with enum argument to specify type
103  ClassL,           // Logical instructions which are op instructions
104                    // but we need to not emit any suffix for in our
105                    // tests.
106  ClassNoTest       // Instructions which we do not test since they are
107                    // not TRUE instructions.
108};
109
110/// NeonTypeFlags - Flags to identify the types for overloaded Neon
111/// builtins.  These must be kept in sync with the flags in
112/// include/clang/Basic/TargetBuiltins.h.
113namespace {
114class NeonTypeFlags {
115  enum {
116    EltTypeMask = 0xf,
117    UnsignedFlag = 0x10,
118    QuadFlag = 0x20
119  };
120  uint32_t Flags;
121
122public:
123  enum EltType {
124    Int8,
125    Int16,
126    Int32,
127    Int64,
128    Poly8,
129    Poly16,
130    Float16,
131    Float32,
132    Float64
133  };
134
135  NeonTypeFlags(unsigned F) : Flags(F) {}
136  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
137    if (IsUnsigned)
138      Flags |= UnsignedFlag;
139    if (IsQuad)
140      Flags |= QuadFlag;
141  }
142
143  uint32_t getFlags() const { return Flags; }
144};
145} // end anonymous namespace
146
147namespace {
148class NeonEmitter {
149  RecordKeeper &Records;
150  StringMap<OpKind> OpMap;
151  DenseMap<Record*, ClassKind> ClassMap;
152
153public:
154  NeonEmitter(RecordKeeper &R) : Records(R) {
155    OpMap["OP_NONE"]  = OpNone;
156    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
157    OpMap["OP_ADD"]   = OpAdd;
158    OpMap["OP_ADDL"]  = OpAddl;
159    OpMap["OP_ADDW"]  = OpAddw;
160    OpMap["OP_SUB"]   = OpSub;
161    OpMap["OP_SUBL"]  = OpSubl;
162    OpMap["OP_SUBW"]  = OpSubw;
163    OpMap["OP_MUL"]   = OpMul;
164    OpMap["OP_MLA"]   = OpMla;
165    OpMap["OP_MLAL"]  = OpMlal;
166    OpMap["OP_MLS"]   = OpMls;
167    OpMap["OP_MLSL"]  = OpMlsl;
168    OpMap["OP_MUL_N"] = OpMulN;
169    OpMap["OP_MLA_N"] = OpMlaN;
170    OpMap["OP_MLS_N"] = OpMlsN;
171    OpMap["OP_MLAL_N"] = OpMlalN;
172    OpMap["OP_MLSL_N"] = OpMlslN;
173    OpMap["OP_MUL_LN"]= OpMulLane;
174    OpMap["OP_MULL_LN"] = OpMullLane;
175    OpMap["OP_MLA_LN"]= OpMlaLane;
176    OpMap["OP_MLS_LN"]= OpMlsLane;
177    OpMap["OP_MLAL_LN"] = OpMlalLane;
178    OpMap["OP_MLSL_LN"] = OpMlslLane;
179    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
180    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
181    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
182    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
183    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
184    OpMap["OP_EQ"]    = OpEq;
185    OpMap["OP_GE"]    = OpGe;
186    OpMap["OP_LE"]    = OpLe;
187    OpMap["OP_GT"]    = OpGt;
188    OpMap["OP_LT"]    = OpLt;
189    OpMap["OP_NEG"]   = OpNeg;
190    OpMap["OP_NOT"]   = OpNot;
191    OpMap["OP_AND"]   = OpAnd;
192    OpMap["OP_OR"]    = OpOr;
193    OpMap["OP_XOR"]   = OpXor;
194    OpMap["OP_ANDN"]  = OpAndNot;
195    OpMap["OP_ORN"]   = OpOrNot;
196    OpMap["OP_CAST"]  = OpCast;
197    OpMap["OP_CONC"]  = OpConcat;
198    OpMap["OP_HI"]    = OpHi;
199    OpMap["OP_LO"]    = OpLo;
200    OpMap["OP_DUP"]   = OpDup;
201    OpMap["OP_DUP_LN"] = OpDupLane;
202    OpMap["OP_SEL"]   = OpSelect;
203    OpMap["OP_REV16"] = OpRev16;
204    OpMap["OP_REV32"] = OpRev32;
205    OpMap["OP_REV64"] = OpRev64;
206    OpMap["OP_REINT"] = OpReinterpret;
207    OpMap["OP_ABDL"]  = OpAbdl;
208    OpMap["OP_ABA"]   = OpAba;
209    OpMap["OP_ABAL"]  = OpAbal;
210    OpMap["OP_DIV"] = OpDiv;
211
212    Record *SI = R.getClass("SInst");
213    Record *II = R.getClass("IInst");
214    Record *WI = R.getClass("WInst");
215    Record *SOpI = R.getClass("SOpInst");
216    Record *IOpI = R.getClass("IOpInst");
217    Record *WOpI = R.getClass("WOpInst");
218    Record *LOpI = R.getClass("LOpInst");
219    Record *NoTestOpI = R.getClass("NoTestOpInst");
220
221    ClassMap[SI] = ClassS;
222    ClassMap[II] = ClassI;
223    ClassMap[WI] = ClassW;
224    ClassMap[SOpI] = ClassS;
225    ClassMap[IOpI] = ClassI;
226    ClassMap[WOpI] = ClassW;
227    ClassMap[LOpI] = ClassL;
228    ClassMap[NoTestOpI] = ClassNoTest;
229  }
230
231  // run - Emit arm_neon.h.inc
232  void run(raw_ostream &o);
233
234  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
235  void runHeader(raw_ostream &o);
236
237  // runTests - Emit tests for all the Neon intrinsics.
238  void runTests(raw_ostream &o);
239
240private:
241  void emitIntrinsic(raw_ostream &OS, Record *R,
242                     StringMap<ClassKind> &EmittedMap);
243  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
244                      bool isA64GenBuiltinDef);
245  void genOverloadTypeCheckCode(raw_ostream &OS,
246                                StringMap<ClassKind> &A64IntrinsicMap,
247                                bool isA64TypeCheck);
248  void genIntrinsicRangeCheckCode(raw_ostream &OS,
249                                  StringMap<ClassKind> &A64IntrinsicMap,
250                                  bool isA64RangeCheck);
251  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
252                     bool isA64TestGen);
253};
254} // end anonymous namespace
255
256/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
257/// which each StringRef representing a single type declared in the string.
258/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
259/// 2xfloat and 4xfloat respectively.
260static void ParseTypes(Record *r, std::string &s,
261                       SmallVectorImpl<StringRef> &TV) {
262  const char *data = s.data();
263  int len = 0;
264
265  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
266    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
267                         || data[len] == 'H' || data[len] == 'S')
268      continue;
269
270    switch (data[len]) {
271      case 'c':
272      case 's':
273      case 'i':
274      case 'l':
275      case 'h':
276      case 'f':
277      case 'd':
278        break;
279      default:
280        PrintFatalError(r->getLoc(),
281                      "Unexpected letter: " + std::string(data + len, 1));
282    }
283    TV.push_back(StringRef(data, len + 1));
284    data += len + 1;
285    len = -1;
286  }
287}
288
289/// Widen - Convert a type code into the next wider type.  char -> short,
290/// short -> int, etc.
291static char Widen(const char t) {
292  switch (t) {
293    case 'c':
294      return 's';
295    case 's':
296      return 'i';
297    case 'i':
298      return 'l';
299    case 'h':
300      return 'f';
301    default:
302      PrintFatalError("unhandled type in widen!");
303  }
304}
305
306/// Narrow - Convert a type code into the next smaller type.  short -> char,
307/// float -> half float, etc.
308static char Narrow(const char t) {
309  switch (t) {
310    case 's':
311      return 'c';
312    case 'i':
313      return 's';
314    case 'l':
315      return 'i';
316    case 'f':
317      return 'h';
318    default:
319      PrintFatalError("unhandled type in narrow!");
320  }
321}
322
323/// For a particular StringRef, return the base type code, and whether it has
324/// the quad-vector, polynomial, or unsigned modifiers set.
325static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
326  unsigned off = 0;
327  // ignore scalar.
328  if (ty[off] == 'S') {
329    ++off;
330  }
331  // remember quad.
332  if (ty[off] == 'Q' || ty[off] == 'H') {
333    quad = true;
334    ++off;
335  }
336
337  // remember poly.
338  if (ty[off] == 'P') {
339    poly = true;
340    ++off;
341  }
342
343  // remember unsigned.
344  if (ty[off] == 'U') {
345    usgn = true;
346    ++off;
347  }
348
349  // base type to get the type string for.
350  return ty[off];
351}
352
353/// ModType - Transform a type code and its modifiers based on a mod code. The
354/// mod code definitions may be found at the top of arm_neon.td.
355static char ModType(const char mod, char type, bool &quad, bool &poly,
356                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
357  switch (mod) {
358    case 't':
359      if (poly) {
360        poly = false;
361        usgn = true;
362      }
363      break;
364    case 'u':
365      usgn = true;
366      poly = false;
367      if (type == 'f')
368        type = 'i';
369      if (type == 'd')
370        type = 'l';
371      break;
372    case 'x':
373      usgn = false;
374      poly = false;
375      if (type == 'f')
376        type = 'i';
377      break;
378    case 'f':
379      if (type == 'h')
380        quad = true;
381      type = 'f';
382      usgn = false;
383      break;
384    case 'g':
385      quad = false;
386      break;
387    case 'w':
388      type = Widen(type);
389      quad = true;
390      break;
391    case 'n':
392      type = Widen(type);
393      break;
394    case 'i':
395      type = 'i';
396      scal = true;
397      break;
398    case 'l':
399      type = 'l';
400      scal = true;
401      usgn = true;
402      break;
403    case 's':
404    case 'a':
405      scal = true;
406      break;
407    case 'k':
408      quad = true;
409      break;
410    case 'c':
411      cnst = true;
412    case 'p':
413      pntr = true;
414      scal = true;
415      break;
416    case 'h':
417      type = Narrow(type);
418      if (type == 'h')
419        quad = false;
420      break;
421    case 'e':
422      type = Narrow(type);
423      usgn = true;
424      break;
425    default:
426      break;
427  }
428  return type;
429}
430
431/// TypeString - for a modifier and type, generate the name of the typedef for
432/// that type.  QUc -> uint8x8_t.
433static std::string TypeString(const char mod, StringRef typestr) {
434  bool quad = false;
435  bool poly = false;
436  bool usgn = false;
437  bool scal = false;
438  bool cnst = false;
439  bool pntr = false;
440
441  if (mod == 'v')
442    return "void";
443  if (mod == 'i')
444    return "int";
445
446  // base type to get the type string for.
447  char type = ClassifyType(typestr, quad, poly, usgn);
448
449  // Based on the modifying character, change the type and width if necessary.
450  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
451
452  SmallString<128> s;
453
454  if (usgn)
455    s.push_back('u');
456
457  switch (type) {
458    case 'c':
459      s += poly ? "poly8" : "int8";
460      if (scal)
461        break;
462      s += quad ? "x16" : "x8";
463      break;
464    case 's':
465      s += poly ? "poly16" : "int16";
466      if (scal)
467        break;
468      s += quad ? "x8" : "x4";
469      break;
470    case 'i':
471      s += "int32";
472      if (scal)
473        break;
474      s += quad ? "x4" : "x2";
475      break;
476    case 'l':
477      s += "int64";
478      if (scal)
479        break;
480      s += quad ? "x2" : "x1";
481      break;
482    case 'h':
483      s += "float16";
484      if (scal)
485        break;
486      s += quad ? "x8" : "x4";
487      break;
488    case 'f':
489      s += "float32";
490      if (scal)
491        break;
492      s += quad ? "x4" : "x2";
493      break;
494    case 'd':
495      s += "float64";
496      if (scal)
497        break;
498      s += quad ? "x2" : "x1";
499      break;
500
501    default:
502      PrintFatalError("unhandled type!");
503  }
504
505  if (mod == '2')
506    s += "x2";
507  if (mod == '3')
508    s += "x3";
509  if (mod == '4')
510    s += "x4";
511
512  // Append _t, finishing the type string typedef type.
513  s += "_t";
514
515  if (cnst)
516    s += " const";
517
518  if (pntr)
519    s += " *";
520
521  return s.str();
522}
523
524/// BuiltinTypeString - for a modifier and type, generate the clang
525/// BuiltinsARM.def prototype code for the function.  See the top of clang's
526/// Builtins.def for a description of the type strings.
527static std::string BuiltinTypeString(const char mod, StringRef typestr,
528                                     ClassKind ck, bool ret) {
529  bool quad = false;
530  bool poly = false;
531  bool usgn = false;
532  bool scal = false;
533  bool cnst = false;
534  bool pntr = false;
535
536  if (mod == 'v')
537    return "v"; // void
538  if (mod == 'i')
539    return "i"; // int
540
541  // base type to get the type string for.
542  char type = ClassifyType(typestr, quad, poly, usgn);
543
544  // Based on the modifying character, change the type and width if necessary.
545  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
546
547  // All pointers are void* pointers.  Change type to 'v' now.
548  if (pntr) {
549    usgn = false;
550    poly = false;
551    type = 'v';
552  }
553  // Treat half-float ('h') types as unsigned short ('s') types.
554  if (type == 'h') {
555    type = 's';
556    usgn = true;
557  }
558  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
559
560  if (scal) {
561    SmallString<128> s;
562
563    if (usgn)
564      s.push_back('U');
565    else if (type == 'c')
566      s.push_back('S'); // make chars explicitly signed
567
568    if (type == 'l') // 64-bit long
569      s += "LLi";
570    else
571      s.push_back(type);
572
573    if (cnst)
574      s.push_back('C');
575    if (pntr)
576      s.push_back('*');
577    return s.str();
578  }
579
580  // Since the return value must be one type, return a vector type of the
581  // appropriate width which we will bitcast.  An exception is made for
582  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
583  // fashion, storing them to a pointer arg.
584  if (ret) {
585    if (mod >= '2' && mod <= '4')
586      return "vv*"; // void result with void* first argument
587    if (mod == 'f' || (ck != ClassB && type == 'f'))
588      return quad ? "V4f" : "V2f";
589    if (ck != ClassB && type == 's')
590      return quad ? "V8s" : "V4s";
591    if (ck != ClassB && type == 'i')
592      return quad ? "V4i" : "V2i";
593    if (ck != ClassB && type == 'l')
594      return quad ? "V2LLi" : "V1LLi";
595
596    return quad ? "V16Sc" : "V8Sc";
597  }
598
599  // Non-return array types are passed as individual vectors.
600  if (mod == '2')
601    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
602  if (mod == '3')
603    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
604  if (mod == '4')
605    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
606
607  if (mod == 'f' || (ck != ClassB && type == 'f'))
608    return quad ? "V4f" : "V2f";
609  if (ck != ClassB && type == 's')
610    return quad ? "V8s" : "V4s";
611  if (ck != ClassB && type == 'i')
612    return quad ? "V4i" : "V2i";
613  if (ck != ClassB && type == 'l')
614    return quad ? "V2LLi" : "V1LLi";
615
616  return quad ? "V16Sc" : "V8Sc";
617}
618
619/// InstructionTypeCode - Computes the ARM argument character code and
620/// quad status for a specific type string and ClassKind.
621static void InstructionTypeCode(const StringRef &typeStr,
622                                const ClassKind ck,
623                                bool &quad,
624                                std::string &typeCode) {
625  bool poly = false;
626  bool usgn = false;
627  char type = ClassifyType(typeStr, quad, poly, usgn);
628
629  switch (type) {
630  case 'c':
631    switch (ck) {
632    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
633    case ClassI: typeCode = "i8"; break;
634    case ClassW: typeCode = "8"; break;
635    default: break;
636    }
637    break;
638  case 's':
639    switch (ck) {
640    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
641    case ClassI: typeCode = "i16"; break;
642    case ClassW: typeCode = "16"; break;
643    default: break;
644    }
645    break;
646  case 'i':
647    switch (ck) {
648    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
649    case ClassI: typeCode = "i32"; break;
650    case ClassW: typeCode = "32"; break;
651    default: break;
652    }
653    break;
654  case 'l':
655    switch (ck) {
656    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
657    case ClassI: typeCode = "i64"; break;
658    case ClassW: typeCode = "64"; break;
659    default: break;
660    }
661    break;
662  case 'h':
663    switch (ck) {
664    case ClassS:
665    case ClassI: typeCode = "f16"; break;
666    case ClassW: typeCode = "16"; break;
667    default: break;
668    }
669    break;
670  case 'f':
671    switch (ck) {
672    case ClassS:
673    case ClassI: typeCode = "f32"; break;
674    case ClassW: typeCode = "32"; break;
675    default: break;
676    }
677    break;
678  case 'd':
679    switch (ck) {
680    case ClassS:
681    case ClassI:
682      typeCode += "f64";
683      break;
684    case ClassW:
685      PrintFatalError("unhandled type!");
686    default:
687      break;
688    }
689    break;
690  default:
691    PrintFatalError("unhandled type!");
692  }
693}
694
695static char Insert_BHSD_Suffix(StringRef typestr){
696  unsigned off = 0;
697  if(typestr[off++] == 'S'){
698    while(typestr[off] == 'Q' || typestr[off] == 'H'||
699          typestr[off] == 'P' || typestr[off] == 'U')
700      ++off;
701    switch (typestr[off]){
702    default  : break;
703    case 'c' : return 'b';
704    case 's' : return 'h';
705    case 'i' :
706    case 'f' : return 's';
707    case 'l' :
708    case 'd' : return 'd';
709    }
710  }
711  return 0;
712}
713
714/// MangleName - Append a type or width suffix to a base neon function name,
715/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
716/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
717/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
718static std::string MangleName(const std::string &name, StringRef typestr,
719                              ClassKind ck) {
720  if (name == "vcvt_f32_f16")
721    return name;
722
723  bool quad = false;
724  std::string typeCode = "";
725
726  InstructionTypeCode(typestr, ck, quad, typeCode);
727
728  std::string s = name;
729
730  if (typeCode.size() > 0) {
731    s += "_" + typeCode;
732  }
733
734  if (ck == ClassB)
735    s += "_v";
736
737  // Insert a 'q' before the first '_' character so that it ends up before
738  // _lane or _n on vector-scalar operations.
739  if (typestr.find("Q") != StringRef::npos) {
740      size_t pos = s.find('_');
741      s = s.insert(pos, "q");
742  }
743  char ins = Insert_BHSD_Suffix(typestr);
744  if(ins){
745    size_t pos = s.find('_');
746    s = s.insert(pos, &ins, 1);
747  }
748
749  return s;
750}
751
752static void PreprocessInstruction(const StringRef &Name,
753                                  const std::string &InstName,
754                                  std::string &Prefix,
755                                  bool &HasNPostfix,
756                                  bool &HasLanePostfix,
757                                  bool &HasDupPostfix,
758                                  bool &IsSpecialVCvt,
759                                  size_t &TBNumber) {
760  // All of our instruction name fields from arm_neon.td are of the form
761  //   <instructionname>_...
762  // Thus we grab our instruction name via computation of said Prefix.
763  const size_t PrefixEnd = Name.find_first_of('_');
764  // If InstName is passed in, we use that instead of our name Prefix.
765  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
766
767  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
768
769  HasNPostfix = Postfix.count("_n");
770  HasLanePostfix = Postfix.count("_lane");
771  HasDupPostfix = Postfix.count("_dup");
772  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
773
774  if (InstName.compare("vtbl") == 0 ||
775      InstName.compare("vtbx") == 0) {
776    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
777    // encoding to get its true value.
778    TBNumber = Name[Name.size()-1] - 48;
779  }
780}
781
782/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
783/// extracted, generate a FileCheck pattern for a Load Or Store
784static void
785GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
786                                          const std::string& OutTypeCode,
787                                          const bool &IsQuad,
788                                          const bool &HasDupPostfix,
789                                          const bool &HasLanePostfix,
790                                          const size_t Count,
791                                          std::string &RegisterSuffix) {
792  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
793  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
794  // will output a series of v{ld,st}1s, so we have to handle it specially.
795  if ((Count == 3 || Count == 4) && IsQuad) {
796    RegisterSuffix += "{";
797    for (size_t i = 0; i < Count; i++) {
798      RegisterSuffix += "d{{[0-9]+}}";
799      if (HasDupPostfix) {
800        RegisterSuffix += "[]";
801      }
802      if (HasLanePostfix) {
803        RegisterSuffix += "[{{[0-9]+}}]";
804      }
805      if (i < Count-1) {
806        RegisterSuffix += ", ";
807      }
808    }
809    RegisterSuffix += "}";
810  } else {
811
812    // Handle normal loads and stores.
813    RegisterSuffix += "{";
814    for (size_t i = 0; i < Count; i++) {
815      RegisterSuffix += "d{{[0-9]+}}";
816      if (HasDupPostfix) {
817        RegisterSuffix += "[]";
818      }
819      if (HasLanePostfix) {
820        RegisterSuffix += "[{{[0-9]+}}]";
821      }
822      if (IsQuad && !HasLanePostfix) {
823        RegisterSuffix += ", d{{[0-9]+}}";
824        if (HasDupPostfix) {
825          RegisterSuffix += "[]";
826        }
827      }
828      if (i < Count-1) {
829        RegisterSuffix += ", ";
830      }
831    }
832    RegisterSuffix += "}, [r{{[0-9]+}}";
833
834    // We only include the alignment hint if we have a vld1.*64 or
835    // a dup/lane instruction.
836    if (IsLDSTOne) {
837      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
838        RegisterSuffix += ":" + OutTypeCode;
839      }
840    }
841
842    RegisterSuffix += "]";
843  }
844}
845
846static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
847                                     const bool &HasNPostfix) {
848  return (NameRef.count("vmla") ||
849          NameRef.count("vmlal") ||
850          NameRef.count("vmlsl") ||
851          NameRef.count("vmull") ||
852          NameRef.count("vqdmlal") ||
853          NameRef.count("vqdmlsl") ||
854          NameRef.count("vqdmulh") ||
855          NameRef.count("vqdmull") ||
856          NameRef.count("vqrdmulh")) && HasNPostfix;
857}
858
859static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
860                                         const bool &HasLanePostfix) {
861  return (NameRef.count("vmla") ||
862          NameRef.count("vmls") ||
863          NameRef.count("vmlal") ||
864          NameRef.count("vmlsl") ||
865          (NameRef.count("vmul") && NameRef.size() == 3)||
866          NameRef.count("vqdmlal") ||
867          NameRef.count("vqdmlsl") ||
868          NameRef.count("vqdmulh") ||
869          NameRef.count("vqrdmulh")) && HasLanePostfix;
870}
871
872static bool IsSpecialLaneMultiply(const StringRef &NameRef,
873                                  const bool &HasLanePostfix,
874                                  const bool &IsQuad) {
875  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
876                               && IsQuad;
877  const bool IsVMull = NameRef.count("mull") && !IsQuad;
878  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
879}
880
881static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
882                                                     const std::string &Proto,
883                                                     const bool &HasNPostfix,
884                                                     const bool &IsQuad,
885                                                     const bool &HasLanePostfix,
886                                                     const bool &HasDupPostfix,
887                                                     std::string &NormedProto) {
888  // Handle generic case.
889  const StringRef NameRef(Name);
890  for (size_t i = 0, end = Proto.size(); i < end; i++) {
891    switch (Proto[i]) {
892    case 'u':
893    case 'f':
894    case 'd':
895    case 's':
896    case 'x':
897    case 't':
898    case 'n':
899      NormedProto += IsQuad? 'q' : 'd';
900      break;
901    case 'w':
902    case 'k':
903      NormedProto += 'q';
904      break;
905    case 'g':
906    case 'h':
907    case 'e':
908      NormedProto += 'd';
909      break;
910    case 'i':
911      NormedProto += HasLanePostfix? 'a' : 'i';
912      break;
913    case 'a':
914      if (HasLanePostfix) {
915        NormedProto += 'a';
916      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
917        NormedProto += IsQuad? 'q' : 'd';
918      } else {
919        NormedProto += 'i';
920      }
921      break;
922    }
923  }
924
925  // Handle Special Cases.
926  const bool IsNotVExt = !NameRef.count("vext");
927  const bool IsVPADAL = NameRef.count("vpadal");
928  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
929                                                           HasLanePostfix);
930  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
931                                                      IsQuad);
932
933  if (IsSpecialLaneMul) {
934    // If
935    NormedProto[2] = NormedProto[3];
936    NormedProto.erase(3);
937  } else if (NormedProto.size() == 4 &&
938             NormedProto[0] == NormedProto[1] &&
939             IsNotVExt) {
940    // If NormedProto.size() == 4 and the first two proto characters are the
941    // same, ignore the first.
942    NormedProto = NormedProto.substr(1, 3);
943  } else if (Is5OpLaneAccum) {
944    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
945    std::string tmp = NormedProto.substr(1,2);
946    tmp += NormedProto[4];
947    NormedProto = tmp;
948  } else if (IsVPADAL) {
949    // If we have VPADAL, ignore the first character.
950    NormedProto = NormedProto.substr(0, 2);
951  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
952    // If our instruction is a dup instruction, keep only the first and
953    // last characters.
954    std::string tmp = "";
955    tmp += NormedProto[0];
956    tmp += NormedProto[NormedProto.size()-1];
957    NormedProto = tmp;
958  }
959}
960
961/// GenerateRegisterCheckPatterns - Given a bunch of data we have
962/// extracted, generate a FileCheck pattern to check that an
963/// instruction's arguments are correct.
964static void GenerateRegisterCheckPattern(const std::string &Name,
965                                         const std::string &Proto,
966                                         const std::string &OutTypeCode,
967                                         const bool &HasNPostfix,
968                                         const bool &IsQuad,
969                                         const bool &HasLanePostfix,
970                                         const bool &HasDupPostfix,
971                                         const size_t &TBNumber,
972                                         std::string &RegisterSuffix) {
973
974  RegisterSuffix = "";
975
976  const StringRef NameRef(Name);
977  const StringRef ProtoRef(Proto);
978
979  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
980    return;
981  }
982
983  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
984  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
985
986  if (IsLoadStore) {
987    // Grab N value from  v{ld,st}N using its ascii representation.
988    const size_t Count = NameRef[3] - 48;
989
990    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
991                                              HasDupPostfix, HasLanePostfix,
992                                              Count, RegisterSuffix);
993  } else if (IsTBXOrTBL) {
994    RegisterSuffix += "d{{[0-9]+}}, {";
995    for (size_t i = 0; i < TBNumber-1; i++) {
996      RegisterSuffix += "d{{[0-9]+}}, ";
997    }
998    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
999  } else {
1000    // Handle a normal instruction.
1001    if (NameRef.count("vget") || NameRef.count("vset"))
1002      return;
1003
1004    // We first normalize our proto, since we only need to emit 4
1005    // different types of checks, yet have more than 4 proto types
1006    // that map onto those 4 patterns.
1007    std::string NormalizedProto("");
1008    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1009                                             HasLanePostfix, HasDupPostfix,
1010                                             NormalizedProto);
1011
1012    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1013      const char &c = NormalizedProto[i];
1014      switch (c) {
1015      case 'q':
1016        RegisterSuffix += "q{{[0-9]+}}, ";
1017        break;
1018
1019      case 'd':
1020        RegisterSuffix += "d{{[0-9]+}}, ";
1021        break;
1022
1023      case 'i':
1024        RegisterSuffix += "#{{[0-9]+}}, ";
1025        break;
1026
1027      case 'a':
1028        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1029        break;
1030      }
1031    }
1032
1033    // Remove extra ", ".
1034    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1035  }
1036}
1037
1038/// GenerateChecksForIntrinsic - Given a specific instruction name +
1039/// typestr + class kind, generate the proper set of FileCheck
1040/// Patterns to check for. We could just return a string, but instead
1041/// use a vector since it provides us with the extra flexibility of
1042/// emitting multiple checks, which comes in handy for certain cases
1043/// like mla where we want to check for 2 different instructions.
1044static void GenerateChecksForIntrinsic(const std::string &Name,
1045                                       const std::string &Proto,
1046                                       StringRef &OutTypeStr,
1047                                       StringRef &InTypeStr,
1048                                       ClassKind Ck,
1049                                       const std::string &InstName,
1050                                       bool IsHiddenLOp,
1051                                       std::vector<std::string>& Result) {
1052
1053  // If Ck is a ClassNoTest instruction, just return so no test is
1054  // emitted.
1055  if(Ck == ClassNoTest)
1056    return;
1057
1058  if (Name == "vcvt_f32_f16") {
1059    Result.push_back("vcvt.f32.f16");
1060    return;
1061  }
1062
1063
1064  // Now we preprocess our instruction given the data we have to get the
1065  // data that we need.
1066  // Create a StringRef for String Manipulation of our Name.
1067  const StringRef NameRef(Name);
1068  // Instruction Prefix.
1069  std::string Prefix;
1070  // The type code for our out type string.
1071  std::string OutTypeCode;
1072  // To handle our different cases, we need to check for different postfixes.
1073  // Is our instruction a quad instruction.
1074  bool IsQuad = false;
1075  // Our instruction is of the form <instructionname>_n.
1076  bool HasNPostfix = false;
1077  // Our instruction is of the form <instructionname>_lane.
1078  bool HasLanePostfix = false;
1079  // Our instruction is of the form <instructionname>_dup.
1080  bool HasDupPostfix  = false;
1081  // Our instruction is a vcvt instruction which requires special handling.
1082  bool IsSpecialVCvt = false;
1083  // If we have a vtbxN or vtblN instruction, this is set to N.
1084  size_t TBNumber = -1;
1085  // Register Suffix
1086  std::string RegisterSuffix;
1087
1088  PreprocessInstruction(NameRef, InstName, Prefix,
1089                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1090                        IsSpecialVCvt, TBNumber);
1091
1092  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1093  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1094                               HasLanePostfix, HasDupPostfix, TBNumber,
1095                               RegisterSuffix);
1096
1097  // In the following section, we handle a bunch of special cases. You can tell
1098  // a special case by the fact we are returning early.
1099
1100  // If our instruction is a logical instruction without postfix or a
1101  // hidden LOp just return the current Prefix.
1102  if (Ck == ClassL || IsHiddenLOp) {
1103    Result.push_back(Prefix + " " + RegisterSuffix);
1104    return;
1105  }
1106
1107  // If we have a vmov, due to the many different cases, some of which
1108  // vary within the different intrinsics generated for a single
1109  // instruction type, just output a vmov. (e.g. given an instruction
1110  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1111  //
1112  // FIXME: Maybe something can be done about this. The two cases that we care
1113  // about are vmov as an LType and vmov as a WType.
1114  if (Prefix == "vmov") {
1115    Result.push_back(Prefix + " " + RegisterSuffix);
1116    return;
1117  }
1118
1119  // In the following section, we handle special cases.
1120
1121  if (OutTypeCode == "64") {
1122    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1123    // type, the intrinsic will be optimized away, so just return
1124    // nothing.  On the other hand if we are handling an uint64x2_t
1125    // (i.e. quad instruction), vdup/vmov instructions should be
1126    // emitted.
1127    if (Prefix == "vdup" || Prefix == "vext") {
1128      if (IsQuad) {
1129        Result.push_back("{{vmov|vdup}}");
1130      }
1131      return;
1132    }
1133
1134    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1135    // multiple register operands.
1136    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1137                            || Prefix == "vld4";
1138    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1139                            || Prefix == "vst4";
1140    if (MultiLoadPrefix || MultiStorePrefix) {
1141      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1142      return;
1143    }
1144
1145    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1146    // emitting said instructions. So return a check for
1147    // vldr/vstr/vmov/str instead.
1148    if (HasLanePostfix || HasDupPostfix) {
1149      if (Prefix == "vst1") {
1150        Result.push_back("{{str|vstr|vmov}}");
1151        return;
1152      } else if (Prefix == "vld1") {
1153        Result.push_back("{{ldr|vldr|vmov}}");
1154        return;
1155      }
1156    }
1157  }
1158
1159  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1160  // sometimes disassembled as vtrn.32. We use a regex to handle both
1161  // cases.
1162  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1163    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1164    return;
1165  }
1166
1167  // Currently on most ARM processors, we do not use vmla/vmls for
1168  // quad floating point operations. Instead we output vmul + vadd. So
1169  // check if we have one of those instructions and just output a
1170  // check for vmul.
1171  if (OutTypeCode == "f32") {
1172    if (Prefix == "vmls") {
1173      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1174      Result.push_back("vsub." + OutTypeCode);
1175      return;
1176    } else if (Prefix == "vmla") {
1177      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1178      Result.push_back("vadd." + OutTypeCode);
1179      return;
1180    }
1181  }
1182
1183  // If we have vcvt, get the input type from the instruction name
1184  // (which should be of the form instname_inputtype) and append it
1185  // before the output type.
1186  if (Prefix == "vcvt") {
1187    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1188    Prefix += "." + inTypeCode;
1189  }
1190
1191  // Append output type code to get our final mangled instruction.
1192  Prefix += "." + OutTypeCode;
1193
1194  Result.push_back(Prefix + " " + RegisterSuffix);
1195}
1196
1197/// UseMacro - Examine the prototype string to determine if the intrinsic
1198/// should be defined as a preprocessor macro instead of an inline function.
1199static bool UseMacro(const std::string &proto) {
1200  // If this builtin takes an immediate argument, we need to #define it rather
1201  // than use a standard declaration, so that SemaChecking can range check
1202  // the immediate passed by the user.
1203  if (proto.find('i') != std::string::npos)
1204    return true;
1205
1206  // Pointer arguments need to use macros to avoid hiding aligned attributes
1207  // from the pointer type.
1208  if (proto.find('p') != std::string::npos ||
1209      proto.find('c') != std::string::npos)
1210    return true;
1211
1212  return false;
1213}
1214
1215/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1216/// defined as a macro should be accessed directly instead of being first
1217/// assigned to a local temporary.
1218static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1219  // True for constant ints (i), pointers (p) and const pointers (c).
1220  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1221}
1222
1223// Generate the string "(argtype a, argtype b, ...)"
1224static std::string GenArgs(const std::string &proto, StringRef typestr) {
1225  bool define = UseMacro(proto);
1226  char arg = 'a';
1227
1228  std::string s;
1229  s += "(";
1230
1231  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1232    if (define) {
1233      // Some macro arguments are used directly instead of being assigned
1234      // to local temporaries; prepend an underscore prefix to make their
1235      // names consistent with the local temporaries.
1236      if (MacroArgUsedDirectly(proto, i))
1237        s += "__";
1238    } else {
1239      s += TypeString(proto[i], typestr) + " __";
1240    }
1241    s.push_back(arg);
1242    if ((i + 1) < e)
1243      s += ", ";
1244  }
1245
1246  s += ")";
1247  return s;
1248}
1249
1250// Macro arguments are not type-checked like inline function arguments, so
1251// assign them to local temporaries to get the right type checking.
1252static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1253  char arg = 'a';
1254  std::string s;
1255  bool generatedLocal = false;
1256
1257  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1258    // Do not create a temporary for an immediate argument.
1259    // That would defeat the whole point of using a macro!
1260    if (MacroArgUsedDirectly(proto, i))
1261      continue;
1262    generatedLocal = true;
1263
1264    s += TypeString(proto[i], typestr) + " __";
1265    s.push_back(arg);
1266    s += " = (";
1267    s.push_back(arg);
1268    s += "); ";
1269  }
1270
1271  if (generatedLocal)
1272    s += "\\\n  ";
1273  return s;
1274}
1275
1276// Use the vmovl builtin to sign-extend or zero-extend a vector.
1277static std::string Extend(StringRef typestr, const std::string &a) {
1278  std::string s;
1279  s = MangleName("vmovl", typestr, ClassS);
1280  s += "(" + a + ")";
1281  return s;
1282}
1283
1284static std::string Duplicate(unsigned nElts, StringRef typestr,
1285                             const std::string &a) {
1286  std::string s;
1287
1288  s = "(" + TypeString('d', typestr) + "){ ";
1289  for (unsigned i = 0; i != nElts; ++i) {
1290    s += a;
1291    if ((i + 1) < nElts)
1292      s += ", ";
1293  }
1294  s += " }";
1295
1296  return s;
1297}
1298
1299static std::string SplatLane(unsigned nElts, const std::string &vec,
1300                             const std::string &lane) {
1301  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1302  for (unsigned i = 0; i < nElts; ++i)
1303    s += ", " + lane;
1304  s += ")";
1305  return s;
1306}
1307
1308static unsigned GetNumElements(StringRef typestr, bool &quad) {
1309  quad = false;
1310  bool dummy = false;
1311  char type = ClassifyType(typestr, quad, dummy, dummy);
1312  unsigned nElts = 0;
1313  switch (type) {
1314  case 'c': nElts = 8; break;
1315  case 's': nElts = 4; break;
1316  case 'i': nElts = 2; break;
1317  case 'l': nElts = 1; break;
1318  case 'h': nElts = 4; break;
1319  case 'f': nElts = 2; break;
1320  case 'd':
1321    nElts = 1;
1322    break;
1323  default:
1324    PrintFatalError("unhandled type!");
1325  }
1326  if (quad) nElts <<= 1;
1327  return nElts;
1328}
1329
1330// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1331static std::string GenOpString(OpKind op, const std::string &proto,
1332                               StringRef typestr) {
1333  bool quad;
1334  unsigned nElts = GetNumElements(typestr, quad);
1335  bool define = UseMacro(proto);
1336
1337  std::string ts = TypeString(proto[0], typestr);
1338  std::string s;
1339  if (!define) {
1340    s = "return ";
1341  }
1342
1343  switch(op) {
1344  case OpAdd:
1345    s += "__a + __b;";
1346    break;
1347  case OpAddl:
1348    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1349    break;
1350  case OpAddw:
1351    s += "__a + " + Extend(typestr, "__b") + ";";
1352    break;
1353  case OpSub:
1354    s += "__a - __b;";
1355    break;
1356  case OpSubl:
1357    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1358    break;
1359  case OpSubw:
1360    s += "__a - " + Extend(typestr, "__b") + ";";
1361    break;
1362  case OpMulN:
1363    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1364    break;
1365  case OpMulLane:
1366    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1367    break;
1368  case OpMul:
1369    s += "__a * __b;";
1370    break;
1371  case OpMullLane:
1372    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1373      SplatLane(nElts, "__b", "__c") + ");";
1374    break;
1375  case OpMlaN:
1376    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1377    break;
1378  case OpMlaLane:
1379    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1380    break;
1381  case OpMla:
1382    s += "__a + (__b * __c);";
1383    break;
1384  case OpMlalN:
1385    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1386      Duplicate(nElts, typestr, "__c") + ");";
1387    break;
1388  case OpMlalLane:
1389    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1390      SplatLane(nElts, "__c", "__d") + ");";
1391    break;
1392  case OpMlal:
1393    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1394    break;
1395  case OpMlsN:
1396    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1397    break;
1398  case OpMlsLane:
1399    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1400    break;
1401  case OpMls:
1402    s += "__a - (__b * __c);";
1403    break;
1404  case OpMlslN:
1405    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1406      Duplicate(nElts, typestr, "__c") + ");";
1407    break;
1408  case OpMlslLane:
1409    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1410      SplatLane(nElts, "__c", "__d") + ");";
1411    break;
1412  case OpMlsl:
1413    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1414    break;
1415  case OpQDMullLane:
1416    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1417      SplatLane(nElts, "__b", "__c") + ");";
1418    break;
1419  case OpQDMlalLane:
1420    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1421      SplatLane(nElts, "__c", "__d") + ");";
1422    break;
1423  case OpQDMlslLane:
1424    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1425      SplatLane(nElts, "__c", "__d") + ");";
1426    break;
1427  case OpQDMulhLane:
1428    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1429      SplatLane(nElts, "__b", "__c") + ");";
1430    break;
1431  case OpQRDMulhLane:
1432    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1433      SplatLane(nElts, "__b", "__c") + ");";
1434    break;
1435  case OpEq:
1436    s += "(" + ts + ")(__a == __b);";
1437    break;
1438  case OpGe:
1439    s += "(" + ts + ")(__a >= __b);";
1440    break;
1441  case OpLe:
1442    s += "(" + ts + ")(__a <= __b);";
1443    break;
1444  case OpGt:
1445    s += "(" + ts + ")(__a > __b);";
1446    break;
1447  case OpLt:
1448    s += "(" + ts + ")(__a < __b);";
1449    break;
1450  case OpNeg:
1451    s += " -__a;";
1452    break;
1453  case OpNot:
1454    s += " ~__a;";
1455    break;
1456  case OpAnd:
1457    s += "__a & __b;";
1458    break;
1459  case OpOr:
1460    s += "__a | __b;";
1461    break;
1462  case OpXor:
1463    s += "__a ^ __b;";
1464    break;
1465  case OpAndNot:
1466    s += "__a & ~__b;";
1467    break;
1468  case OpOrNot:
1469    s += "__a | ~__b;";
1470    break;
1471  case OpCast:
1472    s += "(" + ts + ")__a;";
1473    break;
1474  case OpConcat:
1475    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1476    s += ", (int64x1_t)__b, 0, 1);";
1477    break;
1478  case OpHi:
1479    // nElts is for the result vector, so the source is twice that number.
1480    s += "__builtin_shufflevector(__a, __a";
1481    for (unsigned i = nElts; i < nElts * 2; ++i)
1482      s += ", " + utostr(i);
1483    s+= ");";
1484    break;
1485  case OpLo:
1486    s += "__builtin_shufflevector(__a, __a";
1487    for (unsigned i = 0; i < nElts; ++i)
1488      s += ", " + utostr(i);
1489    s+= ");";
1490    break;
1491  case OpDup:
1492    s += Duplicate(nElts, typestr, "__a") + ";";
1493    break;
1494  case OpDupLane:
1495    s += SplatLane(nElts, "__a", "__b") + ";";
1496    break;
1497  case OpSelect:
1498    // ((0 & 1) | (~0 & 2))
1499    s += "(" + ts + ")";
1500    ts = TypeString(proto[1], typestr);
1501    s += "((__a & (" + ts + ")__b) | ";
1502    s += "(~__a & (" + ts + ")__c));";
1503    break;
1504  case OpRev16:
1505    s += "__builtin_shufflevector(__a, __a";
1506    for (unsigned i = 2; i <= nElts; i += 2)
1507      for (unsigned j = 0; j != 2; ++j)
1508        s += ", " + utostr(i - j - 1);
1509    s += ");";
1510    break;
1511  case OpRev32: {
1512    unsigned WordElts = nElts >> (1 + (int)quad);
1513    s += "__builtin_shufflevector(__a, __a";
1514    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1515      for (unsigned j = 0; j != WordElts; ++j)
1516        s += ", " + utostr(i - j - 1);
1517    s += ");";
1518    break;
1519  }
1520  case OpRev64: {
1521    unsigned DblWordElts = nElts >> (int)quad;
1522    s += "__builtin_shufflevector(__a, __a";
1523    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1524      for (unsigned j = 0; j != DblWordElts; ++j)
1525        s += ", " + utostr(i - j - 1);
1526    s += ");";
1527    break;
1528  }
1529  case OpAbdl: {
1530    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1531    if (typestr[0] != 'U') {
1532      // vabd results are always unsigned and must be zero-extended.
1533      std::string utype = "U" + typestr.str();
1534      s += "(" + TypeString(proto[0], typestr) + ")";
1535      abd = "(" + TypeString('d', utype) + ")" + abd;
1536      s += Extend(utype, abd) + ";";
1537    } else {
1538      s += Extend(typestr, abd) + ";";
1539    }
1540    break;
1541  }
1542  case OpAba:
1543    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1544    break;
1545  case OpAbal: {
1546    s += "__a + ";
1547    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1548    if (typestr[0] != 'U') {
1549      // vabd results are always unsigned and must be zero-extended.
1550      std::string utype = "U" + typestr.str();
1551      s += "(" + TypeString(proto[0], typestr) + ")";
1552      abd = "(" + TypeString('d', utype) + ")" + abd;
1553      s += Extend(utype, abd) + ";";
1554    } else {
1555      s += Extend(typestr, abd) + ";";
1556    }
1557    break;
1558  }
1559  case OpDiv:
1560    s += "__a / __b;";
1561    break;
1562  default:
1563    PrintFatalError("unknown OpKind!");
1564  }
1565  return s;
1566}
1567
1568static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1569  unsigned mod = proto[0];
1570
1571  if (mod == 'v' || mod == 'f')
1572    mod = proto[1];
1573
1574  bool quad = false;
1575  bool poly = false;
1576  bool usgn = false;
1577  bool scal = false;
1578  bool cnst = false;
1579  bool pntr = false;
1580
1581  // Base type to get the type string for.
1582  char type = ClassifyType(typestr, quad, poly, usgn);
1583
1584  // Based on the modifying character, change the type and width if necessary.
1585  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1586
1587  NeonTypeFlags::EltType ET;
1588  switch (type) {
1589    case 'c':
1590      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1591      break;
1592    case 's':
1593      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1594      break;
1595    case 'i':
1596      ET = NeonTypeFlags::Int32;
1597      break;
1598    case 'l':
1599      ET = NeonTypeFlags::Int64;
1600      break;
1601    case 'h':
1602      ET = NeonTypeFlags::Float16;
1603      break;
1604    case 'f':
1605      ET = NeonTypeFlags::Float32;
1606      break;
1607    case 'd':
1608      ET = NeonTypeFlags::Float64;
1609      break;
1610    default:
1611      PrintFatalError("unhandled type!");
1612  }
1613  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1614  return Flags.getFlags();
1615}
1616
1617// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1618static std::string GenBuiltin(const std::string &name, const std::string &proto,
1619                              StringRef typestr, ClassKind ck) {
1620  std::string s;
1621
1622  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1623  // sret-like argument.
1624  bool sret = (proto[0] >= '2' && proto[0] <= '4');
1625
1626  bool define = UseMacro(proto);
1627
1628  // Check if the prototype has a scalar operand with the type of the vector
1629  // elements.  If not, bitcasting the args will take care of arg checking.
1630  // The actual signedness etc. will be taken care of with special enums.
1631  if (proto.find('s') == std::string::npos)
1632    ck = ClassB;
1633
1634  if (proto[0] != 'v') {
1635    std::string ts = TypeString(proto[0], typestr);
1636
1637    if (define) {
1638      if (sret)
1639        s += ts + " r; ";
1640      else
1641        s += "(" + ts + ")";
1642    } else if (sret) {
1643      s += ts + " r; ";
1644    } else {
1645      s += "return (" + ts + ")";
1646    }
1647  }
1648
1649  bool splat = proto.find('a') != std::string::npos;
1650
1651  s += "__builtin_neon_";
1652  if (splat) {
1653    // Call the non-splat builtin: chop off the "_n" suffix from the name.
1654    std::string vname(name, 0, name.size()-2);
1655    s += MangleName(vname, typestr, ck);
1656  } else {
1657    s += MangleName(name, typestr, ck);
1658  }
1659  s += "(";
1660
1661  // Pass the address of the return variable as the first argument to sret-like
1662  // builtins.
1663  if (sret)
1664    s += "&r, ";
1665
1666  char arg = 'a';
1667  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1668    std::string args = std::string(&arg, 1);
1669
1670    // Use the local temporaries instead of the macro arguments.
1671    args = "__" + args;
1672
1673    bool argQuad = false;
1674    bool argPoly = false;
1675    bool argUsgn = false;
1676    bool argScalar = false;
1677    bool dummy = false;
1678    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1679    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1680                      dummy, dummy);
1681
1682    // Handle multiple-vector values specially, emitting each subvector as an
1683    // argument to the __builtin.
1684    if (proto[i] >= '2' && proto[i] <= '4') {
1685      // Check if an explicit cast is needed.
1686      if (argType != 'c' || argPoly || argUsgn)
1687        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1688
1689      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1690        s += args + ".val[" + utostr(vi) + "]";
1691        if ((vi + 1) < ve)
1692          s += ", ";
1693      }
1694      if ((i + 1) < e)
1695        s += ", ";
1696
1697      continue;
1698    }
1699
1700    if (splat && (i + 1) == e)
1701      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1702
1703    // Check if an explicit cast is needed.
1704    if ((splat || !argScalar) &&
1705        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1706      std::string argTypeStr = "c";
1707      if (ck != ClassB)
1708        argTypeStr = argType;
1709      if (argQuad)
1710        argTypeStr = "Q" + argTypeStr;
1711      args = "(" + TypeString('d', argTypeStr) + ")" + args;
1712    }
1713
1714    s += args;
1715    if ((i + 1) < e)
1716      s += ", ";
1717  }
1718
1719  // Extra constant integer to hold type class enum for this function, e.g. s8
1720  if (ck == ClassB)
1721    s += ", " + utostr(GetNeonEnum(proto, typestr));
1722
1723  s += ");";
1724
1725  if (proto[0] != 'v' && sret) {
1726    if (define)
1727      s += " r;";
1728    else
1729      s += " return r;";
1730  }
1731  return s;
1732}
1733
1734static std::string GenBuiltinDef(const std::string &name,
1735                                 const std::string &proto,
1736                                 StringRef typestr, ClassKind ck) {
1737  std::string s("BUILTIN(__builtin_neon_");
1738
1739  // If all types are the same size, bitcasting the args will take care
1740  // of arg checking.  The actual signedness etc. will be taken care of with
1741  // special enums.
1742  if (proto.find('s') == std::string::npos)
1743    ck = ClassB;
1744
1745  s += MangleName(name, typestr, ck);
1746  s += ", \"";
1747
1748  for (unsigned i = 0, e = proto.size(); i != e; ++i)
1749    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1750
1751  // Extra constant integer to hold type class enum for this function, e.g. s8
1752  if (ck == ClassB)
1753    s += "i";
1754
1755  s += "\", \"n\")";
1756  return s;
1757}
1758
1759static std::string GenIntrinsic(const std::string &name,
1760                                const std::string &proto,
1761                                StringRef outTypeStr, StringRef inTypeStr,
1762                                OpKind kind, ClassKind classKind) {
1763  assert(!proto.empty() && "");
1764  bool define = UseMacro(proto) && kind != OpUnavailable;
1765  std::string s;
1766
1767  // static always inline + return type
1768  if (define)
1769    s += "#define ";
1770  else
1771    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1772
1773  // Function name with type suffix
1774  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1775  if (outTypeStr != inTypeStr) {
1776    // If the input type is different (e.g., for vreinterpret), append a suffix
1777    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1778    // does not insert another "q" in the name.
1779    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1780    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1781    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1782  }
1783  s += mangledName;
1784
1785  // Function arguments
1786  s += GenArgs(proto, inTypeStr);
1787
1788  // Definition.
1789  if (define) {
1790    s += " __extension__ ({ \\\n  ";
1791    s += GenMacroLocals(proto, inTypeStr);
1792  } else if (kind == OpUnavailable) {
1793    s += " __attribute__((unavailable));\n";
1794    return s;
1795  } else
1796    s += " {\n  ";
1797
1798  if (kind != OpNone)
1799    s += GenOpString(kind, proto, outTypeStr);
1800  else
1801    s += GenBuiltin(name, proto, outTypeStr, classKind);
1802  if (define)
1803    s += " })";
1804  else
1805    s += " }";
1806  s += "\n";
1807  return s;
1808}
1809
1810/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1811/// is comprised of type definitions and function declarations.
1812void NeonEmitter::run(raw_ostream &OS) {
1813  OS <<
1814    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1815    "---===\n"
1816    " *\n"
1817    " * Permission is hereby granted, free of charge, to any person obtaining "
1818    "a copy\n"
1819    " * of this software and associated documentation files (the \"Software\"),"
1820    " to deal\n"
1821    " * in the Software without restriction, including without limitation the "
1822    "rights\n"
1823    " * to use, copy, modify, merge, publish, distribute, sublicense, "
1824    "and/or sell\n"
1825    " * copies of the Software, and to permit persons to whom the Software is\n"
1826    " * furnished to do so, subject to the following conditions:\n"
1827    " *\n"
1828    " * The above copyright notice and this permission notice shall be "
1829    "included in\n"
1830    " * all copies or substantial portions of the Software.\n"
1831    " *\n"
1832    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1833    "EXPRESS OR\n"
1834    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1835    "MERCHANTABILITY,\n"
1836    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1837    "SHALL THE\n"
1838    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1839    "OTHER\n"
1840    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1841    "ARISING FROM,\n"
1842    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1843    "DEALINGS IN\n"
1844    " * THE SOFTWARE.\n"
1845    " *\n"
1846    " *===--------------------------------------------------------------------"
1847    "---===\n"
1848    " */\n\n";
1849
1850  OS << "#ifndef __ARM_NEON_H\n";
1851  OS << "#define __ARM_NEON_H\n\n";
1852
1853  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
1854  OS << "#error \"NEON support not enabled\"\n";
1855  OS << "#endif\n\n";
1856
1857  OS << "#include <stdint.h>\n\n";
1858
1859  // Emit NEON-specific scalar typedefs.
1860  OS << "typedef float float32_t;\n";
1861  OS << "typedef __fp16 float16_t;\n";
1862
1863  OS << "#ifdef __aarch64__\n";
1864  OS << "typedef double float64_t;\n";
1865  OS << "#endif\n\n";
1866
1867  // For now, signedness of polynomial types depends on target
1868  OS << "#ifdef __aarch64__\n";
1869  OS << "typedef uint8_t poly8_t;\n";
1870  OS << "typedef uint16_t poly16_t;\n";
1871  OS << "#else\n";
1872  OS << "typedef int8_t poly8_t;\n";
1873  OS << "typedef int16_t poly16_t;\n";
1874  OS << "#endif\n";
1875
1876  // Emit Neon vector typedefs.
1877  std::string TypedefTypes(
1878      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
1879  SmallVector<StringRef, 24> TDTypeVec;
1880  ParseTypes(0, TypedefTypes, TDTypeVec);
1881
1882  // Emit vector typedefs.
1883  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1884    bool dummy, quad = false, poly = false;
1885    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1886    bool isA64 = false;
1887
1888    if (type == 'd' && quad)
1889      isA64 = true;
1890
1891    if (isA64)
1892      OS << "#ifdef __aarch64__\n";
1893
1894    if (poly)
1895      OS << "typedef __attribute__((neon_polyvector_type(";
1896    else
1897      OS << "typedef __attribute__((neon_vector_type(";
1898
1899    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1900    OS << utostr(nElts) << "))) ";
1901    if (nElts < 10)
1902      OS << " ";
1903
1904    OS << TypeString('s', TDTypeVec[i]);
1905    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1906
1907    if (isA64)
1908      OS << "#endif\n";
1909  }
1910  OS << "\n";
1911
1912  // Emit struct typedefs.
1913  for (unsigned vi = 2; vi != 5; ++vi) {
1914    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1915      bool dummy, quad = false, poly = false;
1916      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1917      bool isA64 = false;
1918
1919      if (type == 'd' && quad)
1920        isA64 = true;
1921
1922      if (isA64)
1923        OS << "#ifdef __aarch64__\n";
1924
1925      std::string ts = TypeString('d', TDTypeVec[i]);
1926      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1927      OS << "typedef struct " << vs << " {\n";
1928      OS << "  " << ts << " val";
1929      OS << "[" << utostr(vi) << "]";
1930      OS << ";\n} ";
1931      OS << vs << ";\n";
1932
1933      if (isA64)
1934        OS << "#endif\n";
1935
1936      OS << "\n";
1937    }
1938  }
1939
1940  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1941
1942  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1943
1944  StringMap<ClassKind> EmittedMap;
1945
1946  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1947  // intrinsics.  (Some of the saturating multiply instructions are also
1948  // used to implement the corresponding "_lane" variants, but tablegen
1949  // sorts the records into alphabetical order so that the "_lane" variants
1950  // come after the intrinsics they use.)
1951  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
1952  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
1953  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
1954
1955  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
1956  // common intrinsics appear only once in the output stream.
1957  // The check for uniquiness is done in emitIntrinsic.
1958  // Emit ARM intrinsics.
1959  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1960    Record *R = RV[i];
1961
1962    // Skip AArch64 intrinsics; they will be emitted at the end.
1963    bool isA64 = R->getValueAsBit("isA64");
1964    if (isA64)
1965      continue;
1966
1967    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
1968        R->getName() != "VABD")
1969      emitIntrinsic(OS, R, EmittedMap);
1970  }
1971
1972  // Emit AArch64-specific intrinsics.
1973  OS << "#ifdef __aarch64__\n";
1974
1975  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1976    Record *R = RV[i];
1977
1978    // Skip ARM intrinsics already included above.
1979    bool isA64 = R->getValueAsBit("isA64");
1980    if (!isA64)
1981      continue;
1982
1983    emitIntrinsic(OS, R, EmittedMap);
1984  }
1985
1986  OS << "#endif\n\n";
1987
1988  OS << "#undef __ai\n\n";
1989  OS << "#endif /* __ARM_NEON_H */\n";
1990}
1991
1992/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1993/// intrinsics specified by record R checking for intrinsic uniqueness.
1994void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
1995                                StringMap<ClassKind> &EmittedMap) {
1996  std::string name = R->getValueAsString("Name");
1997  std::string Proto = R->getValueAsString("Prototype");
1998  std::string Types = R->getValueAsString("Types");
1999
2000  SmallVector<StringRef, 16> TypeVec;
2001  ParseTypes(R, Types, TypeVec);
2002
2003  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2004
2005  ClassKind classKind = ClassNone;
2006  if (R->getSuperClasses().size() >= 2)
2007    classKind = ClassMap[R->getSuperClasses()[1]];
2008  if (classKind == ClassNone && kind == OpNone)
2009    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2010
2011  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2012    if (kind == OpReinterpret) {
2013      bool outQuad = false;
2014      bool dummy = false;
2015      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2016      for (unsigned srcti = 0, srcte = TypeVec.size();
2017           srcti != srcte; ++srcti) {
2018        bool inQuad = false;
2019        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2020        if (srcti == ti || inQuad != outQuad)
2021          continue;
2022        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2023                                     OpCast, ClassS);
2024        if (EmittedMap.count(s))
2025          continue;
2026        EmittedMap[s] = ClassS;
2027        OS << s;
2028      }
2029    } else {
2030      std::string s =
2031          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2032      if (EmittedMap.count(s))
2033        continue;
2034      EmittedMap[s] = classKind;
2035      OS << s;
2036    }
2037  }
2038  OS << "\n";
2039}
2040
2041static unsigned RangeFromType(const char mod, StringRef typestr) {
2042  // base type to get the type string for.
2043  bool quad = false, dummy = false;
2044  char type = ClassifyType(typestr, quad, dummy, dummy);
2045  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2046
2047  switch (type) {
2048    case 'c':
2049      return (8 << (int)quad) - 1;
2050    case 'h':
2051    case 's':
2052      return (4 << (int)quad) - 1;
2053    case 'f':
2054    case 'i':
2055      return (2 << (int)quad) - 1;
2056    case 'l':
2057      return (1 << (int)quad) - 1;
2058    default:
2059      PrintFatalError("unhandled type!");
2060  }
2061}
2062
2063/// Generate the ARM and AArch64 intrinsic range checking code for
2064/// shift/lane immediates, checking for unique declarations.
2065void
2066NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2067                                        StringMap<ClassKind> &A64IntrinsicMap,
2068                                        bool isA64RangeCheck) {
2069  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2070  StringMap<OpKind> EmittedMap;
2071
2072  // Generate the intrinsic range checking code for shift/lane immediates.
2073  if (isA64RangeCheck)
2074    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2075  else
2076    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2077
2078  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2079    Record *R = RV[i];
2080
2081    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2082    if (k != OpNone)
2083      continue;
2084
2085    std::string name = R->getValueAsString("Name");
2086    std::string Proto = R->getValueAsString("Prototype");
2087    std::string Types = R->getValueAsString("Types");
2088    std::string Rename = name + "@" + Proto;
2089
2090    // Functions with 'a' (the splat code) in the type prototype should not get
2091    // their own builtin as they use the non-splat variant.
2092    if (Proto.find('a') != std::string::npos)
2093      continue;
2094
2095    // Functions which do not have an immediate do not need to have range
2096    // checking code emitted.
2097    size_t immPos = Proto.find('i');
2098    if (immPos == std::string::npos)
2099      continue;
2100
2101    SmallVector<StringRef, 16> TypeVec;
2102    ParseTypes(R, Types, TypeVec);
2103
2104    if (R->getSuperClasses().size() < 2)
2105      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2106
2107    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2108
2109    // Do not include AArch64 range checks if not generating code for AArch64.
2110    bool isA64 = R->getValueAsBit("isA64");
2111    if (!isA64RangeCheck && isA64)
2112      continue;
2113
2114    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2115    // redefined by AArch64 to handle new types.
2116    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2117      ClassKind &A64CK = A64IntrinsicMap[Rename];
2118      if (A64CK == ck && ck != ClassNone)
2119        continue;
2120    }
2121
2122    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2123      std::string namestr, shiftstr, rangestr;
2124
2125      if (R->getValueAsBit("isVCVT_N")) {
2126        // VCVT between floating- and fixed-point values takes an immediate
2127        // in the range 1 to 32.
2128        ck = ClassB;
2129        rangestr = "l = 1; u = 31"; // upper bound = l + u
2130      } else if (Proto.find('s') == std::string::npos) {
2131        // Builtins which are overloaded by type will need to have their upper
2132        // bound computed at Sema time based on the type constant.
2133        ck = ClassB;
2134        if (R->getValueAsBit("isShift")) {
2135          shiftstr = ", true";
2136
2137          // Right shifts have an 'r' in the name, left shifts do not.
2138          if (name.find('r') != std::string::npos)
2139            rangestr = "l = 1; ";
2140        }
2141        rangestr += "u = RFT(TV" + shiftstr + ")";
2142      } else {
2143        // The immediate generally refers to a lane in the preceding argument.
2144        assert(immPos > 0 && "unexpected immediate operand");
2145        rangestr =
2146            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2147      }
2148      // Make sure cases appear only once by uniquing them in a string map.
2149      namestr = MangleName(name, TypeVec[ti], ck);
2150      if (EmittedMap.count(namestr))
2151        continue;
2152      EmittedMap[namestr] = OpNone;
2153
2154      // Calculate the index of the immediate that should be range checked.
2155      unsigned immidx = 0;
2156
2157      // Builtins that return a struct of multiple vectors have an extra
2158      // leading arg for the struct return.
2159      if (Proto[0] >= '2' && Proto[0] <= '4')
2160        ++immidx;
2161
2162      // Add one to the index for each argument until we reach the immediate
2163      // to be checked.  Structs of vectors are passed as multiple arguments.
2164      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2165        switch (Proto[ii]) {
2166        default:
2167          immidx += 1;
2168          break;
2169        case '2':
2170          immidx += 2;
2171          break;
2172        case '3':
2173          immidx += 3;
2174          break;
2175        case '4':
2176          immidx += 4;
2177          break;
2178        case 'i':
2179          ie = ii + 1;
2180          break;
2181        }
2182      }
2183      if (isA64RangeCheck)
2184        OS << "case AArch64::BI__builtin_neon_";
2185      else
2186        OS << "case ARM::BI__builtin_neon_";
2187      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2188         << rangestr << "; break;\n";
2189    }
2190  }
2191  OS << "#endif\n\n";
2192}
2193
2194/// Generate the ARM and AArch64 overloaded type checking code for
2195/// SemaChecking.cpp, checking for unique builtin declarations.
2196void
2197NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2198                                      StringMap<ClassKind> &A64IntrinsicMap,
2199                                      bool isA64TypeCheck) {
2200  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2201  StringMap<OpKind> EmittedMap;
2202
2203  // Generate the overloaded type checking code for SemaChecking.cpp
2204  if (isA64TypeCheck)
2205    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2206  else
2207    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2208
2209  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2210    Record *R = RV[i];
2211    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2212    if (k != OpNone)
2213      continue;
2214
2215    std::string Proto = R->getValueAsString("Prototype");
2216    std::string Types = R->getValueAsString("Types");
2217    std::string name = R->getValueAsString("Name");
2218    std::string Rename = name + "@" + Proto;
2219
2220    // Functions with 'a' (the splat code) in the type prototype should not get
2221    // their own builtin as they use the non-splat variant.
2222    if (Proto.find('a') != std::string::npos)
2223      continue;
2224
2225    // Functions which have a scalar argument cannot be overloaded, no need to
2226    // check them if we are emitting the type checking code.
2227    if (Proto.find('s') != std::string::npos)
2228      continue;
2229
2230    SmallVector<StringRef, 16> TypeVec;
2231    ParseTypes(R, Types, TypeVec);
2232
2233    if (R->getSuperClasses().size() < 2)
2234      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2235
2236    // Do not include AArch64 type checks if not generating code for AArch64.
2237    bool isA64 = R->getValueAsBit("isA64");
2238    if (!isA64TypeCheck && isA64)
2239      continue;
2240
2241    // Include ARM  type check in AArch64 but only if ARM intrinsics
2242    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2243    // redefined in AArch64 to handle an additional 2 x f64 type.
2244    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2245    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2246      ClassKind &A64CK = A64IntrinsicMap[Rename];
2247      if (A64CK == ck && ck != ClassNone)
2248        continue;
2249    }
2250
2251    int si = -1, qi = -1;
2252    uint64_t mask = 0, qmask = 0;
2253    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2254      // Generate the switch case(s) for this builtin for the type validation.
2255      bool quad = false, poly = false, usgn = false;
2256      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2257
2258      if (quad) {
2259        qi = ti;
2260        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2261      } else {
2262        si = ti;
2263        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2264      }
2265    }
2266
2267    // Check if the builtin function has a pointer or const pointer argument.
2268    int PtrArgNum = -1;
2269    bool HasConstPtr = false;
2270    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2271      char ArgType = Proto[arg];
2272      if (ArgType == 'c') {
2273        HasConstPtr = true;
2274        PtrArgNum = arg - 1;
2275        break;
2276      }
2277      if (ArgType == 'p') {
2278        PtrArgNum = arg - 1;
2279        break;
2280      }
2281    }
2282    // For sret builtins, adjust the pointer argument index.
2283    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2284      PtrArgNum += 1;
2285
2286    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2287    // and vst1_lane intrinsics.  Using a pointer to the vector element
2288    // type with one of those operations causes codegen to select an aligned
2289    // load/store instruction.  If you want an unaligned operation,
2290    // the pointer argument needs to have less alignment than element type,
2291    // so just accept any pointer type.
2292    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2293      PtrArgNum = -1;
2294      HasConstPtr = false;
2295    }
2296
2297    if (mask) {
2298      if (isA64TypeCheck)
2299        OS << "case AArch64::BI__builtin_neon_";
2300      else
2301        OS << "case ARM::BI__builtin_neon_";
2302      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2303         << "0x" << utohexstr(mask) << "ULL";
2304      if (PtrArgNum >= 0)
2305        OS << "; PtrArgNum = " << PtrArgNum;
2306      if (HasConstPtr)
2307        OS << "; HasConstPtr = true";
2308      OS << "; break;\n";
2309    }
2310    if (qmask) {
2311      if (isA64TypeCheck)
2312        OS << "case AArch64::BI__builtin_neon_";
2313      else
2314        OS << "case ARM::BI__builtin_neon_";
2315      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2316         << "0x" << utohexstr(qmask) << "ULL";
2317      if (PtrArgNum >= 0)
2318        OS << "; PtrArgNum = " << PtrArgNum;
2319      if (HasConstPtr)
2320        OS << "; HasConstPtr = true";
2321      OS << "; break;\n";
2322    }
2323  }
2324  OS << "#endif\n\n";
2325}
2326
2327/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2328/// declaration of builtins, checking for unique builtin declarations.
2329void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2330                                 StringMap<ClassKind> &A64IntrinsicMap,
2331                                 bool isA64GenBuiltinDef) {
2332  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2333  StringMap<OpKind> EmittedMap;
2334
2335  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2336  if (isA64GenBuiltinDef)
2337    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2338  else
2339    OS << "#ifdef GET_NEON_BUILTINS\n";
2340
2341  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2342    Record *R = RV[i];
2343    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2344    if (k != OpNone)
2345      continue;
2346
2347    std::string Proto = R->getValueAsString("Prototype");
2348    std::string name = R->getValueAsString("Name");
2349    std::string Rename = name + "@" + Proto;
2350
2351    // Functions with 'a' (the splat code) in the type prototype should not get
2352    // their own builtin as they use the non-splat variant.
2353    if (Proto.find('a') != std::string::npos)
2354      continue;
2355
2356    std::string Types = R->getValueAsString("Types");
2357    SmallVector<StringRef, 16> TypeVec;
2358    ParseTypes(R, Types, TypeVec);
2359
2360    if (R->getSuperClasses().size() < 2)
2361      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2362
2363    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2364
2365    // Do not include AArch64 BUILTIN() macros if not generating
2366    // code for AArch64
2367    bool isA64 = R->getValueAsBit("isA64");
2368    if (!isA64GenBuiltinDef && isA64)
2369      continue;
2370
2371    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2372    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2373    // redefined in AArch64 to handle an additional 2 x f64 type.
2374    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2375      ClassKind &A64CK = A64IntrinsicMap[Rename];
2376      if (A64CK == ck && ck != ClassNone)
2377        continue;
2378    }
2379
2380    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2381      // Generate the declaration for this builtin, ensuring
2382      // that each unique BUILTIN() macro appears only once in the output
2383      // stream.
2384      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2385      if (EmittedMap.count(bd))
2386        continue;
2387
2388      EmittedMap[bd] = OpNone;
2389      OS << bd << "\n";
2390    }
2391  }
2392  OS << "#endif\n\n";
2393}
2394
2395/// runHeader - Emit a file with sections defining:
2396/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2397/// 2. the SemaChecking code for the type overload checking.
2398/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2399void NeonEmitter::runHeader(raw_ostream &OS) {
2400  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2401
2402  // build a map of AArch64 intriniscs to be used in uniqueness checks.
2403  StringMap<ClassKind> A64IntrinsicMap;
2404  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2405    Record *R = RV[i];
2406
2407    bool isA64 = R->getValueAsBit("isA64");
2408    if (!isA64)
2409      continue;
2410
2411    ClassKind CK = ClassNone;
2412    if (R->getSuperClasses().size() >= 2)
2413      CK = ClassMap[R->getSuperClasses()[1]];
2414
2415    std::string Name = R->getValueAsString("Name");
2416    std::string Proto = R->getValueAsString("Prototype");
2417    std::string Rename = Name + "@" + Proto;
2418    if (A64IntrinsicMap.count(Rename))
2419      continue;
2420    A64IntrinsicMap[Rename] = CK;
2421  }
2422
2423  // Generate BuiltinsARM.def for ARM
2424  genBuiltinsDef(OS, A64IntrinsicMap, false);
2425
2426  // Generate BuiltinsAArch64.def for AArch64
2427  genBuiltinsDef(OS, A64IntrinsicMap, true);
2428
2429  // Generate ARM overloaded type checking code for SemaChecking.cpp
2430  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2431
2432  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2433  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2434
2435  // Generate ARM range checking code for shift/lane immediates.
2436  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2437
2438  // Generate the AArch64 range checking code for shift/lane immediates.
2439  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2440}
2441
2442/// GenTest - Write out a test for the intrinsic specified by the name and
2443/// type strings, including the embedded patterns for FileCheck to match.
2444static std::string GenTest(const std::string &name,
2445                           const std::string &proto,
2446                           StringRef outTypeStr, StringRef inTypeStr,
2447                           bool isShift, bool isHiddenLOp,
2448                           ClassKind ck, const std::string &InstName,
2449						   bool isA64,
2450						   std::string & testFuncProto) {
2451  assert(!proto.empty() && "");
2452  std::string s;
2453
2454  // Function name with type suffix
2455  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2456  if (outTypeStr != inTypeStr) {
2457    // If the input type is different (e.g., for vreinterpret), append a suffix
2458    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2459    // does not insert another "q" in the name.
2460    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2461    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2462    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2463  }
2464
2465  // todo: GenerateChecksForIntrinsic does not generate CHECK
2466  // for aarch64 instructions yet
2467  std::vector<std::string> FileCheckPatterns;
2468  if (!isA64) {
2469	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2470							   isHiddenLOp, FileCheckPatterns);
2471	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2472  }
2473  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2474
2475  // Emit the FileCheck patterns.
2476  // If for any reason we do not want to emit a check, mangledInst
2477  // will be the empty string.
2478  if (FileCheckPatterns.size()) {
2479    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2480                                                  e = FileCheckPatterns.end();
2481         i != e;
2482         ++i) {
2483      s += "// CHECK_ARM: " + *i + "\n";
2484    }
2485  }
2486
2487  // Emit the start of the test function.
2488
2489  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2490  char arg = 'a';
2491  std::string comma;
2492  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2493    // Do not create arguments for values that must be immediate constants.
2494    if (proto[i] == 'i')
2495      continue;
2496    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2497    testFuncProto.push_back(arg);
2498    comma = ", ";
2499  }
2500  testFuncProto += ")";
2501
2502  s+= testFuncProto;
2503  s+= " {\n  ";
2504
2505  if (proto[0] != 'v')
2506    s += "return ";
2507  s += mangledName + "(";
2508  arg = 'a';
2509  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2510    if (proto[i] == 'i') {
2511      // For immediate operands, test the maximum value.
2512      if (isShift)
2513        s += "1"; // FIXME
2514      else
2515        // The immediate generally refers to a lane in the preceding argument.
2516        s += utostr(RangeFromType(proto[i-1], inTypeStr));
2517    } else {
2518      s.push_back(arg);
2519    }
2520    if ((i + 1) < e)
2521      s += ", ";
2522  }
2523  s += ");\n}\n\n";
2524  return s;
2525}
2526
2527/// Write out all intrinsic tests for the specified target, checking
2528/// for intrinsic test uniqueness.
2529void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2530                                bool isA64GenTest) {
2531  if (isA64GenTest)
2532	OS << "#ifdef __aarch64__\n";
2533
2534  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2535  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2536    Record *R = RV[i];
2537    std::string name = R->getValueAsString("Name");
2538    std::string Proto = R->getValueAsString("Prototype");
2539    std::string Types = R->getValueAsString("Types");
2540    bool isShift = R->getValueAsBit("isShift");
2541    std::string InstName = R->getValueAsString("InstName");
2542    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2543    bool isA64 = R->getValueAsBit("isA64");
2544
2545    // do not include AArch64 intrinsic test if not generating
2546    // code for AArch64
2547    if (!isA64GenTest && isA64)
2548      continue;
2549
2550    SmallVector<StringRef, 16> TypeVec;
2551    ParseTypes(R, Types, TypeVec);
2552
2553    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2554    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2555    if (kind == OpUnavailable)
2556      continue;
2557    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2558      if (kind == OpReinterpret) {
2559        bool outQuad = false;
2560        bool dummy = false;
2561        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2562        for (unsigned srcti = 0, srcte = TypeVec.size();
2563             srcti != srcte; ++srcti) {
2564          bool inQuad = false;
2565          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2566          if (srcti == ti || inQuad != outQuad)
2567            continue;
2568		  std::string testFuncProto;
2569          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2570                                  isShift, isHiddenLOp, ck, InstName, isA64,
2571								  testFuncProto);
2572          if (EmittedMap.count(testFuncProto))
2573            continue;
2574          EmittedMap[testFuncProto] = kind;
2575          OS << s << "\n";
2576        }
2577      } else {
2578		std::string testFuncProto;
2579        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2580                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
2581        if (EmittedMap.count(testFuncProto))
2582          continue;
2583        EmittedMap[testFuncProto] = kind;
2584        OS << s << "\n";
2585      }
2586    }
2587  }
2588
2589  if (isA64GenTest)
2590	OS << "#endif\n";
2591}
2592/// runTests - Write out a complete set of tests for all of the Neon
2593/// intrinsics.
2594void NeonEmitter::runTests(raw_ostream &OS) {
2595  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2596        "apcs-gnu\\\n"
2597        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2598        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2599		"\n"
2600	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2601	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2602	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2603        "\n"
2604        "// REQUIRES: long_tests\n"
2605        "\n"
2606        "#include <arm_neon.h>\n"
2607        "\n";
2608
2609  // ARM tests must be emitted before AArch64 tests to ensure
2610  // tests for intrinsics that are common to ARM and AArch64
2611  // appear only once in the output stream.
2612  // The check for uniqueness is done in genTargetTest.
2613  StringMap<OpKind> EmittedMap;
2614
2615  genTargetTest(OS, EmittedMap, false);
2616
2617  genTargetTest(OS, EmittedMap, true);
2618}
2619
2620namespace clang {
2621void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2622  NeonEmitter(Records).run(OS);
2623}
2624void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2625  NeonEmitter(Records).runHeader(OS);
2626}
2627void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2628  NeonEmitter(Records).runTests(OS);
2629}
2630} // End namespace clang
2631