NeonEmitter.cpp revision 410c3f73cb0c78f72335dc712a9d887d77b8e7ce
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddw,
44  OpSub,
45  OpSubl,
46  OpSubw,
47  OpMul,
48  OpMla,
49  OpMlal,
50  OpMls,
51  OpMlsl,
52  OpMulN,
53  OpMlaN,
54  OpMlsN,
55  OpMlalN,
56  OpMlslN,
57  OpMulLane,
58  OpMullLane,
59  OpMlaLane,
60  OpMlsLane,
61  OpMlalLane,
62  OpMlslLane,
63  OpQDMullLane,
64  OpQDMlalLane,
65  OpQDMlslLane,
66  OpQDMulhLane,
67  OpQRDMulhLane,
68  OpEq,
69  OpGe,
70  OpLe,
71  OpGt,
72  OpLt,
73  OpNeg,
74  OpNot,
75  OpAnd,
76  OpOr,
77  OpXor,
78  OpAndNot,
79  OpOrNot,
80  OpCast,
81  OpConcat,
82  OpDup,
83  OpDupLane,
84  OpHi,
85  OpLo,
86  OpSelect,
87  OpRev16,
88  OpRev32,
89  OpRev64,
90  OpReinterpret,
91  OpAbdl,
92  OpAba,
93  OpAbal
94};
95
96enum ClassKind {
97  ClassNone,
98  ClassI,           // generic integer instruction, e.g., "i8" suffix
99  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
100  ClassW,           // width-specific instruction, e.g., "8" suffix
101  ClassB,           // bitcast arguments with enum argument to specify type
102  ClassL,           // Logical instructions which are op instructions
103                    // but we need to not emit any suffix for in our
104                    // tests.
105  ClassNoTest       // Instructions which we do not test since they are
106                    // not TRUE instructions.
107};
108
109/// NeonTypeFlags - Flags to identify the types for overloaded Neon
110/// builtins.  These must be kept in sync with the flags in
111/// include/clang/Basic/TargetBuiltins.h.
112namespace {
113class NeonTypeFlags {
114  enum {
115    EltTypeMask = 0xf,
116    UnsignedFlag = 0x10,
117    QuadFlag = 0x20
118  };
119  uint32_t Flags;
120
121public:
122  enum EltType {
123    Int8,
124    Int16,
125    Int32,
126    Int64,
127    Poly8,
128    Poly16,
129    Float16,
130    Float32
131  };
132
133  NeonTypeFlags(unsigned F) : Flags(F) {}
134  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
135    if (IsUnsigned)
136      Flags |= UnsignedFlag;
137    if (IsQuad)
138      Flags |= QuadFlag;
139  }
140
141  uint32_t getFlags() const { return Flags; }
142};
143} // end anonymous namespace
144
145namespace {
146class NeonEmitter {
147  RecordKeeper &Records;
148  StringMap<OpKind> OpMap;
149  DenseMap<Record*, ClassKind> ClassMap;
150
151public:
152  NeonEmitter(RecordKeeper &R) : Records(R) {
153    OpMap["OP_NONE"]  = OpNone;
154    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
155    OpMap["OP_ADD"]   = OpAdd;
156    OpMap["OP_ADDL"]  = OpAddl;
157    OpMap["OP_ADDW"]  = OpAddw;
158    OpMap["OP_SUB"]   = OpSub;
159    OpMap["OP_SUBL"]  = OpSubl;
160    OpMap["OP_SUBW"]  = OpSubw;
161    OpMap["OP_MUL"]   = OpMul;
162    OpMap["OP_MLA"]   = OpMla;
163    OpMap["OP_MLAL"]  = OpMlal;
164    OpMap["OP_MLS"]   = OpMls;
165    OpMap["OP_MLSL"]  = OpMlsl;
166    OpMap["OP_MUL_N"] = OpMulN;
167    OpMap["OP_MLA_N"] = OpMlaN;
168    OpMap["OP_MLS_N"] = OpMlsN;
169    OpMap["OP_MLAL_N"] = OpMlalN;
170    OpMap["OP_MLSL_N"] = OpMlslN;
171    OpMap["OP_MUL_LN"]= OpMulLane;
172    OpMap["OP_MULL_LN"] = OpMullLane;
173    OpMap["OP_MLA_LN"]= OpMlaLane;
174    OpMap["OP_MLS_LN"]= OpMlsLane;
175    OpMap["OP_MLAL_LN"] = OpMlalLane;
176    OpMap["OP_MLSL_LN"] = OpMlslLane;
177    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
178    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
179    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
180    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
181    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
182    OpMap["OP_EQ"]    = OpEq;
183    OpMap["OP_GE"]    = OpGe;
184    OpMap["OP_LE"]    = OpLe;
185    OpMap["OP_GT"]    = OpGt;
186    OpMap["OP_LT"]    = OpLt;
187    OpMap["OP_NEG"]   = OpNeg;
188    OpMap["OP_NOT"]   = OpNot;
189    OpMap["OP_AND"]   = OpAnd;
190    OpMap["OP_OR"]    = OpOr;
191    OpMap["OP_XOR"]   = OpXor;
192    OpMap["OP_ANDN"]  = OpAndNot;
193    OpMap["OP_ORN"]   = OpOrNot;
194    OpMap["OP_CAST"]  = OpCast;
195    OpMap["OP_CONC"]  = OpConcat;
196    OpMap["OP_HI"]    = OpHi;
197    OpMap["OP_LO"]    = OpLo;
198    OpMap["OP_DUP"]   = OpDup;
199    OpMap["OP_DUP_LN"] = OpDupLane;
200    OpMap["OP_SEL"]   = OpSelect;
201    OpMap["OP_REV16"] = OpRev16;
202    OpMap["OP_REV32"] = OpRev32;
203    OpMap["OP_REV64"] = OpRev64;
204    OpMap["OP_REINT"] = OpReinterpret;
205    OpMap["OP_ABDL"]  = OpAbdl;
206    OpMap["OP_ABA"]   = OpAba;
207    OpMap["OP_ABAL"]  = OpAbal;
208
209    Record *SI = R.getClass("SInst");
210    Record *II = R.getClass("IInst");
211    Record *WI = R.getClass("WInst");
212    Record *SOpI = R.getClass("SOpInst");
213    Record *IOpI = R.getClass("IOpInst");
214    Record *WOpI = R.getClass("WOpInst");
215    Record *LOpI = R.getClass("LOpInst");
216    Record *NoTestOpI = R.getClass("NoTestOpInst");
217
218    ClassMap[SI] = ClassS;
219    ClassMap[II] = ClassI;
220    ClassMap[WI] = ClassW;
221    ClassMap[SOpI] = ClassS;
222    ClassMap[IOpI] = ClassI;
223    ClassMap[WOpI] = ClassW;
224    ClassMap[LOpI] = ClassL;
225    ClassMap[NoTestOpI] = ClassNoTest;
226  }
227
228  // run - Emit arm_neon.h.inc
229  void run(raw_ostream &o);
230
231  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
232  void runHeader(raw_ostream &o);
233
234  // runTests - Emit tests for all the Neon intrinsics.
235  void runTests(raw_ostream &o);
236
237private:
238  void emitIntrinsic(raw_ostream &OS, Record *R);
239};
240} // end anonymous namespace
241
242/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
243/// which each StringRef representing a single type declared in the string.
244/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
245/// 2xfloat and 4xfloat respectively.
246static void ParseTypes(Record *r, std::string &s,
247                       SmallVectorImpl<StringRef> &TV) {
248  const char *data = s.data();
249  int len = 0;
250
251  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
252    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
253      continue;
254
255    switch (data[len]) {
256      case 'c':
257      case 's':
258      case 'i':
259      case 'l':
260      case 'h':
261      case 'f':
262        break;
263      default:
264        PrintFatalError(r->getLoc(),
265                      "Unexpected letter: " + std::string(data + len, 1));
266    }
267    TV.push_back(StringRef(data, len + 1));
268    data += len + 1;
269    len = -1;
270  }
271}
272
273/// Widen - Convert a type code into the next wider type.  char -> short,
274/// short -> int, etc.
275static char Widen(const char t) {
276  switch (t) {
277    case 'c':
278      return 's';
279    case 's':
280      return 'i';
281    case 'i':
282      return 'l';
283    case 'h':
284      return 'f';
285    default:
286      PrintFatalError("unhandled type in widen!");
287  }
288}
289
290/// Narrow - Convert a type code into the next smaller type.  short -> char,
291/// float -> half float, etc.
292static char Narrow(const char t) {
293  switch (t) {
294    case 's':
295      return 'c';
296    case 'i':
297      return 's';
298    case 'l':
299      return 'i';
300    case 'f':
301      return 'h';
302    default:
303      PrintFatalError("unhandled type in narrow!");
304  }
305}
306
307/// For a particular StringRef, return the base type code, and whether it has
308/// the quad-vector, polynomial, or unsigned modifiers set.
309static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
310  unsigned off = 0;
311
312  // remember quad.
313  if (ty[off] == 'Q') {
314    quad = true;
315    ++off;
316  }
317
318  // remember poly.
319  if (ty[off] == 'P') {
320    poly = true;
321    ++off;
322  }
323
324  // remember unsigned.
325  if (ty[off] == 'U') {
326    usgn = true;
327    ++off;
328  }
329
330  // base type to get the type string for.
331  return ty[off];
332}
333
334/// ModType - Transform a type code and its modifiers based on a mod code. The
335/// mod code definitions may be found at the top of arm_neon.td.
336static char ModType(const char mod, char type, bool &quad, bool &poly,
337                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
338  switch (mod) {
339    case 't':
340      if (poly) {
341        poly = false;
342        usgn = true;
343      }
344      break;
345    case 'u':
346      usgn = true;
347      poly = false;
348      if (type == 'f')
349        type = 'i';
350      break;
351    case 'x':
352      usgn = false;
353      poly = false;
354      if (type == 'f')
355        type = 'i';
356      break;
357    case 'f':
358      if (type == 'h')
359        quad = true;
360      type = 'f';
361      usgn = false;
362      break;
363    case 'g':
364      quad = false;
365      break;
366    case 'w':
367      type = Widen(type);
368      quad = true;
369      break;
370    case 'n':
371      type = Widen(type);
372      break;
373    case 'i':
374      type = 'i';
375      scal = true;
376      break;
377    case 'l':
378      type = 'l';
379      scal = true;
380      usgn = true;
381      break;
382    case 's':
383    case 'a':
384      scal = true;
385      break;
386    case 'k':
387      quad = true;
388      break;
389    case 'c':
390      cnst = true;
391    case 'p':
392      pntr = true;
393      scal = true;
394      break;
395    case 'h':
396      type = Narrow(type);
397      if (type == 'h')
398        quad = false;
399      break;
400    case 'e':
401      type = Narrow(type);
402      usgn = true;
403      break;
404    default:
405      break;
406  }
407  return type;
408}
409
410/// TypeString - for a modifier and type, generate the name of the typedef for
411/// that type.  QUc -> uint8x8_t.
412static std::string TypeString(const char mod, StringRef typestr) {
413  bool quad = false;
414  bool poly = false;
415  bool usgn = false;
416  bool scal = false;
417  bool cnst = false;
418  bool pntr = false;
419
420  if (mod == 'v')
421    return "void";
422  if (mod == 'i')
423    return "int";
424
425  // base type to get the type string for.
426  char type = ClassifyType(typestr, quad, poly, usgn);
427
428  // Based on the modifying character, change the type and width if necessary.
429  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
430
431  SmallString<128> s;
432
433  if (usgn)
434    s.push_back('u');
435
436  switch (type) {
437    case 'c':
438      s += poly ? "poly8" : "int8";
439      if (scal)
440        break;
441      s += quad ? "x16" : "x8";
442      break;
443    case 's':
444      s += poly ? "poly16" : "int16";
445      if (scal)
446        break;
447      s += quad ? "x8" : "x4";
448      break;
449    case 'i':
450      s += "int32";
451      if (scal)
452        break;
453      s += quad ? "x4" : "x2";
454      break;
455    case 'l':
456      s += "int64";
457      if (scal)
458        break;
459      s += quad ? "x2" : "x1";
460      break;
461    case 'h':
462      s += "float16";
463      if (scal)
464        break;
465      s += quad ? "x8" : "x4";
466      break;
467    case 'f':
468      s += "float32";
469      if (scal)
470        break;
471      s += quad ? "x4" : "x2";
472      break;
473    default:
474      PrintFatalError("unhandled type!");
475  }
476
477  if (mod == '2')
478    s += "x2";
479  if (mod == '3')
480    s += "x3";
481  if (mod == '4')
482    s += "x4";
483
484  // Append _t, finishing the type string typedef type.
485  s += "_t";
486
487  if (cnst)
488    s += " const";
489
490  if (pntr)
491    s += " *";
492
493  return s.str();
494}
495
496/// BuiltinTypeString - for a modifier and type, generate the clang
497/// BuiltinsARM.def prototype code for the function.  See the top of clang's
498/// Builtins.def for a description of the type strings.
499static std::string BuiltinTypeString(const char mod, StringRef typestr,
500                                     ClassKind ck, bool ret) {
501  bool quad = false;
502  bool poly = false;
503  bool usgn = false;
504  bool scal = false;
505  bool cnst = false;
506  bool pntr = false;
507
508  if (mod == 'v')
509    return "v"; // void
510  if (mod == 'i')
511    return "i"; // int
512
513  // base type to get the type string for.
514  char type = ClassifyType(typestr, quad, poly, usgn);
515
516  // Based on the modifying character, change the type and width if necessary.
517  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
518
519  // All pointers are void* pointers.  Change type to 'v' now.
520  if (pntr) {
521    usgn = false;
522    poly = false;
523    type = 'v';
524  }
525  // Treat half-float ('h') types as unsigned short ('s') types.
526  if (type == 'h') {
527    type = 's';
528    usgn = true;
529  }
530  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
531
532  if (scal) {
533    SmallString<128> s;
534
535    if (usgn)
536      s.push_back('U');
537    else if (type == 'c')
538      s.push_back('S'); // make chars explicitly signed
539
540    if (type == 'l') // 64-bit long
541      s += "LLi";
542    else
543      s.push_back(type);
544
545    if (cnst)
546      s.push_back('C');
547    if (pntr)
548      s.push_back('*');
549    return s.str();
550  }
551
552  // Since the return value must be one type, return a vector type of the
553  // appropriate width which we will bitcast.  An exception is made for
554  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
555  // fashion, storing them to a pointer arg.
556  if (ret) {
557    if (mod >= '2' && mod <= '4')
558      return "vv*"; // void result with void* first argument
559    if (mod == 'f' || (ck != ClassB && type == 'f'))
560      return quad ? "V4f" : "V2f";
561    if (ck != ClassB && type == 's')
562      return quad ? "V8s" : "V4s";
563    if (ck != ClassB && type == 'i')
564      return quad ? "V4i" : "V2i";
565    if (ck != ClassB && type == 'l')
566      return quad ? "V2LLi" : "V1LLi";
567
568    return quad ? "V16Sc" : "V8Sc";
569  }
570
571  // Non-return array types are passed as individual vectors.
572  if (mod == '2')
573    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
574  if (mod == '3')
575    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
576  if (mod == '4')
577    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
578
579  if (mod == 'f' || (ck != ClassB && type == 'f'))
580    return quad ? "V4f" : "V2f";
581  if (ck != ClassB && type == 's')
582    return quad ? "V8s" : "V4s";
583  if (ck != ClassB && type == 'i')
584    return quad ? "V4i" : "V2i";
585  if (ck != ClassB && type == 'l')
586    return quad ? "V2LLi" : "V1LLi";
587
588  return quad ? "V16Sc" : "V8Sc";
589}
590
591/// InstructionTypeCode - Computes the ARM argument character code and
592/// quad status for a specific type string and ClassKind.
593static void InstructionTypeCode(const StringRef &typeStr,
594                                const ClassKind ck,
595                                bool &quad,
596                                std::string &typeCode) {
597  bool poly = false;
598  bool usgn = false;
599  char type = ClassifyType(typeStr, quad, poly, usgn);
600
601  switch (type) {
602  case 'c':
603    switch (ck) {
604    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
605    case ClassI: typeCode = "i8"; break;
606    case ClassW: typeCode = "8"; break;
607    default: break;
608    }
609    break;
610  case 's':
611    switch (ck) {
612    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
613    case ClassI: typeCode = "i16"; break;
614    case ClassW: typeCode = "16"; break;
615    default: break;
616    }
617    break;
618  case 'i':
619    switch (ck) {
620    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
621    case ClassI: typeCode = "i32"; break;
622    case ClassW: typeCode = "32"; break;
623    default: break;
624    }
625    break;
626  case 'l':
627    switch (ck) {
628    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
629    case ClassI: typeCode = "i64"; break;
630    case ClassW: typeCode = "64"; break;
631    default: break;
632    }
633    break;
634  case 'h':
635    switch (ck) {
636    case ClassS:
637    case ClassI: typeCode = "f16"; break;
638    case ClassW: typeCode = "16"; break;
639    default: break;
640    }
641    break;
642  case 'f':
643    switch (ck) {
644    case ClassS:
645    case ClassI: typeCode = "f32"; break;
646    case ClassW: typeCode = "32"; break;
647    default: break;
648    }
649    break;
650  default:
651    PrintFatalError("unhandled type!");
652  }
653}
654
655/// MangleName - Append a type or width suffix to a base neon function name,
656/// and insert a 'q' in the appropriate location if the operation works on
657/// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
658static std::string MangleName(const std::string &name, StringRef typestr,
659                              ClassKind ck) {
660  if (name == "vcvt_f32_f16")
661    return name;
662
663  bool quad = false;
664  std::string typeCode = "";
665
666  InstructionTypeCode(typestr, ck, quad, typeCode);
667
668  std::string s = name;
669
670  if (typeCode.size() > 0) {
671    s += "_" + typeCode;
672  }
673
674  if (ck == ClassB)
675    s += "_v";
676
677  // Insert a 'q' before the first '_' character so that it ends up before
678  // _lane or _n on vector-scalar operations.
679  if (quad) {
680    size_t pos = s.find('_');
681    s = s.insert(pos, "q");
682  }
683
684  return s;
685}
686
687static void PreprocessInstruction(const StringRef &Name,
688                                  const std::string &InstName,
689                                  std::string &Prefix,
690                                  bool &HasNPostfix,
691                                  bool &HasLanePostfix,
692                                  bool &HasDupPostfix,
693                                  bool &IsSpecialVCvt,
694                                  size_t &TBNumber) {
695  // All of our instruction name fields from arm_neon.td are of the form
696  //   <instructionname>_...
697  // Thus we grab our instruction name via computation of said Prefix.
698  const size_t PrefixEnd = Name.find_first_of('_');
699  // If InstName is passed in, we use that instead of our name Prefix.
700  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
701
702  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
703
704  HasNPostfix = Postfix.count("_n");
705  HasLanePostfix = Postfix.count("_lane");
706  HasDupPostfix = Postfix.count("_dup");
707  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
708
709  if (InstName.compare("vtbl") == 0 ||
710      InstName.compare("vtbx") == 0) {
711    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
712    // encoding to get its true value.
713    TBNumber = Name[Name.size()-1] - 48;
714  }
715}
716
717/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
718/// extracted, generate a FileCheck pattern for a Load Or Store
719static void
720GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
721                                          const std::string& OutTypeCode,
722                                          const bool &IsQuad,
723                                          const bool &HasDupPostfix,
724                                          const bool &HasLanePostfix,
725                                          const size_t Count,
726                                          std::string &RegisterSuffix) {
727  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
728  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
729  // will output a series of v{ld,st}1s, so we have to handle it specially.
730  if ((Count == 3 || Count == 4) && IsQuad) {
731    RegisterSuffix += "{";
732    for (size_t i = 0; i < Count; i++) {
733      RegisterSuffix += "d{{[0-9]+}}";
734      if (HasDupPostfix) {
735        RegisterSuffix += "[]";
736      }
737      if (HasLanePostfix) {
738        RegisterSuffix += "[{{[0-9]+}}]";
739      }
740      if (i < Count-1) {
741        RegisterSuffix += ", ";
742      }
743    }
744    RegisterSuffix += "}";
745  } else {
746
747    // Handle normal loads and stores.
748    RegisterSuffix += "{";
749    for (size_t i = 0; i < Count; i++) {
750      RegisterSuffix += "d{{[0-9]+}}";
751      if (HasDupPostfix) {
752        RegisterSuffix += "[]";
753      }
754      if (HasLanePostfix) {
755        RegisterSuffix += "[{{[0-9]+}}]";
756      }
757      if (IsQuad && !HasLanePostfix) {
758        RegisterSuffix += ", d{{[0-9]+}}";
759        if (HasDupPostfix) {
760          RegisterSuffix += "[]";
761        }
762      }
763      if (i < Count-1) {
764        RegisterSuffix += ", ";
765      }
766    }
767    RegisterSuffix += "}, [r{{[0-9]+}}";
768
769    // We only include the alignment hint if we have a vld1.*64 or
770    // a dup/lane instruction.
771    if (IsLDSTOne) {
772      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
773        RegisterSuffix += ":" + OutTypeCode;
774      } else if (OutTypeCode == "64") {
775        RegisterSuffix += ":64";
776      }
777    }
778
779    RegisterSuffix += "]";
780  }
781}
782
783static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
784                                     const bool &HasNPostfix) {
785  return (NameRef.count("vmla") ||
786          NameRef.count("vmlal") ||
787          NameRef.count("vmlsl") ||
788          NameRef.count("vmull") ||
789          NameRef.count("vqdmlal") ||
790          NameRef.count("vqdmlsl") ||
791          NameRef.count("vqdmulh") ||
792          NameRef.count("vqdmull") ||
793          NameRef.count("vqrdmulh")) && HasNPostfix;
794}
795
796static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
797                                         const bool &HasLanePostfix) {
798  return (NameRef.count("vmla") ||
799          NameRef.count("vmls") ||
800          NameRef.count("vmlal") ||
801          NameRef.count("vmlsl") ||
802          (NameRef.count("vmul") && NameRef.size() == 3)||
803          NameRef.count("vqdmlal") ||
804          NameRef.count("vqdmlsl") ||
805          NameRef.count("vqdmulh") ||
806          NameRef.count("vqrdmulh")) && HasLanePostfix;
807}
808
809static bool IsSpecialLaneMultiply(const StringRef &NameRef,
810                                  const bool &HasLanePostfix,
811                                  const bool &IsQuad) {
812  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
813                               && IsQuad;
814  const bool IsVMull = NameRef.count("mull") && !IsQuad;
815  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
816}
817
818static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
819                                                     const std::string &Proto,
820                                                     const bool &HasNPostfix,
821                                                     const bool &IsQuad,
822                                                     const bool &HasLanePostfix,
823                                                     const bool &HasDupPostfix,
824                                                     std::string &NormedProto) {
825  // Handle generic case.
826  const StringRef NameRef(Name);
827  for (size_t i = 0, end = Proto.size(); i < end; i++) {
828    switch (Proto[i]) {
829    case 'u':
830    case 'f':
831    case 'd':
832    case 's':
833    case 'x':
834    case 't':
835    case 'n':
836      NormedProto += IsQuad? 'q' : 'd';
837      break;
838    case 'w':
839    case 'k':
840      NormedProto += 'q';
841      break;
842    case 'g':
843    case 'h':
844    case 'e':
845      NormedProto += 'd';
846      break;
847    case 'i':
848      NormedProto += HasLanePostfix? 'a' : 'i';
849      break;
850    case 'a':
851      if (HasLanePostfix) {
852        NormedProto += 'a';
853      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
854        NormedProto += IsQuad? 'q' : 'd';
855      } else {
856        NormedProto += 'i';
857      }
858      break;
859    }
860  }
861
862  // Handle Special Cases.
863  const bool IsNotVExt = !NameRef.count("vext");
864  const bool IsVPADAL = NameRef.count("vpadal");
865  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
866                                                           HasLanePostfix);
867  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
868                                                      IsQuad);
869
870  if (IsSpecialLaneMul) {
871    // If
872    NormedProto[2] = NormedProto[3];
873    NormedProto.erase(3);
874  } else if (NormedProto.size() == 4 &&
875             NormedProto[0] == NormedProto[1] &&
876             IsNotVExt) {
877    // If NormedProto.size() == 4 and the first two proto characters are the
878    // same, ignore the first.
879    NormedProto = NormedProto.substr(1, 3);
880  } else if (Is5OpLaneAccum) {
881    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
882    std::string tmp = NormedProto.substr(1,2);
883    tmp += NormedProto[4];
884    NormedProto = tmp;
885  } else if (IsVPADAL) {
886    // If we have VPADAL, ignore the first character.
887    NormedProto = NormedProto.substr(0, 2);
888  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
889    // If our instruction is a dup instruction, keep only the first and
890    // last characters.
891    std::string tmp = "";
892    tmp += NormedProto[0];
893    tmp += NormedProto[NormedProto.size()-1];
894    NormedProto = tmp;
895  }
896}
897
898/// GenerateRegisterCheckPatterns - Given a bunch of data we have
899/// extracted, generate a FileCheck pattern to check that an
900/// instruction's arguments are correct.
901static void GenerateRegisterCheckPattern(const std::string &Name,
902                                         const std::string &Proto,
903                                         const std::string &OutTypeCode,
904                                         const bool &HasNPostfix,
905                                         const bool &IsQuad,
906                                         const bool &HasLanePostfix,
907                                         const bool &HasDupPostfix,
908                                         const size_t &TBNumber,
909                                         std::string &RegisterSuffix) {
910
911  RegisterSuffix = "";
912
913  const StringRef NameRef(Name);
914  const StringRef ProtoRef(Proto);
915
916  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
917    return;
918  }
919
920  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
921  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
922
923  if (IsLoadStore) {
924    // Grab N value from  v{ld,st}N using its ascii representation.
925    const size_t Count = NameRef[3] - 48;
926
927    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
928                                              HasDupPostfix, HasLanePostfix,
929                                              Count, RegisterSuffix);
930  } else if (IsTBXOrTBL) {
931    RegisterSuffix += "d{{[0-9]+}}, {";
932    for (size_t i = 0; i < TBNumber-1; i++) {
933      RegisterSuffix += "d{{[0-9]+}}, ";
934    }
935    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
936  } else {
937    // Handle a normal instruction.
938    if (NameRef.count("vget") || NameRef.count("vset"))
939      return;
940
941    // We first normalize our proto, since we only need to emit 4
942    // different types of checks, yet have more than 4 proto types
943    // that map onto those 4 patterns.
944    std::string NormalizedProto("");
945    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
946                                             HasLanePostfix, HasDupPostfix,
947                                             NormalizedProto);
948
949    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
950      const char &c = NormalizedProto[i];
951      switch (c) {
952      case 'q':
953        RegisterSuffix += "q{{[0-9]+}}, ";
954        break;
955
956      case 'd':
957        RegisterSuffix += "d{{[0-9]+}}, ";
958        break;
959
960      case 'i':
961        RegisterSuffix += "#{{[0-9]+}}, ";
962        break;
963
964      case 'a':
965        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
966        break;
967      }
968    }
969
970    // Remove extra ", ".
971    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
972  }
973}
974
975/// GenerateChecksForIntrinsic - Given a specific instruction name +
976/// typestr + class kind, generate the proper set of FileCheck
977/// Patterns to check for. We could just return a string, but instead
978/// use a vector since it provides us with the extra flexibility of
979/// emitting multiple checks, which comes in handy for certain cases
980/// like mla where we want to check for 2 different instructions.
981static void GenerateChecksForIntrinsic(const std::string &Name,
982                                       const std::string &Proto,
983                                       StringRef &OutTypeStr,
984                                       StringRef &InTypeStr,
985                                       ClassKind Ck,
986                                       const std::string &InstName,
987                                       bool IsHiddenLOp,
988                                       std::vector<std::string>& Result) {
989
990  // If Ck is a ClassNoTest instruction, just return so no test is
991  // emitted.
992  if(Ck == ClassNoTest)
993    return;
994
995  if (Name == "vcvt_f32_f16") {
996    Result.push_back("vcvt.f32.f16");
997    return;
998  }
999
1000
1001  // Now we preprocess our instruction given the data we have to get the
1002  // data that we need.
1003  // Create a StringRef for String Manipulation of our Name.
1004  const StringRef NameRef(Name);
1005  // Instruction Prefix.
1006  std::string Prefix;
1007  // The type code for our out type string.
1008  std::string OutTypeCode;
1009  // To handle our different cases, we need to check for different postfixes.
1010  // Is our instruction a quad instruction.
1011  bool IsQuad = false;
1012  // Our instruction is of the form <instructionname>_n.
1013  bool HasNPostfix = false;
1014  // Our instruction is of the form <instructionname>_lane.
1015  bool HasLanePostfix = false;
1016  // Our instruction is of the form <instructionname>_dup.
1017  bool HasDupPostfix  = false;
1018  // Our instruction is a vcvt instruction which requires special handling.
1019  bool IsSpecialVCvt = false;
1020  // If we have a vtbxN or vtblN instruction, this is set to N.
1021  size_t TBNumber = -1;
1022  // Register Suffix
1023  std::string RegisterSuffix;
1024
1025  PreprocessInstruction(NameRef, InstName, Prefix,
1026                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1027                        IsSpecialVCvt, TBNumber);
1028
1029  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1030  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1031                               HasLanePostfix, HasDupPostfix, TBNumber,
1032                               RegisterSuffix);
1033
1034  // In the following section, we handle a bunch of special cases. You can tell
1035  // a special case by the fact we are returning early.
1036
1037  // If our instruction is a logical instruction without postfix or a
1038  // hidden LOp just return the current Prefix.
1039  if (Ck == ClassL || IsHiddenLOp) {
1040    Result.push_back(Prefix + " " + RegisterSuffix);
1041    return;
1042  }
1043
1044  // If we have a vmov, due to the many different cases, some of which
1045  // vary within the different intrinsics generated for a single
1046  // instruction type, just output a vmov. (e.g. given an instruction
1047  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1048  //
1049  // FIXME: Maybe something can be done about this. The two cases that we care
1050  // about are vmov as an LType and vmov as a WType.
1051  if (Prefix == "vmov") {
1052    Result.push_back(Prefix + " " + RegisterSuffix);
1053    return;
1054  }
1055
1056  // In the following section, we handle special cases.
1057
1058  if (OutTypeCode == "64") {
1059    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1060    // type, the intrinsic will be optimized away, so just return
1061    // nothing.  On the other hand if we are handling an uint64x2_t
1062    // (i.e. quad instruction), vdup/vmov instructions should be
1063    // emitted.
1064    if (Prefix == "vdup" || Prefix == "vext") {
1065      if (IsQuad) {
1066        Result.push_back("{{vmov|vdup}}");
1067      }
1068      return;
1069    }
1070
1071    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1072    // multiple register operands.
1073    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1074                            || Prefix == "vld4";
1075    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1076                            || Prefix == "vst4";
1077    if (MultiLoadPrefix || MultiStorePrefix) {
1078      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1079      return;
1080    }
1081
1082    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1083    // emitting said instructions. So return a check for
1084    // vldr/vstr/vmov/str instead.
1085    if (HasLanePostfix || HasDupPostfix) {
1086      if (Prefix == "vst1") {
1087        Result.push_back("{{str|vstr|vmov}}");
1088        return;
1089      } else if (Prefix == "vld1") {
1090        Result.push_back("{{ldr|vldr|vmov}}");
1091        return;
1092      }
1093    }
1094  }
1095
1096  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1097  // sometimes disassembled as vtrn.32. We use a regex to handle both
1098  // cases.
1099  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1100    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1101    return;
1102  }
1103
1104  // Currently on most ARM processors, we do not use vmla/vmls for
1105  // quad floating point operations. Instead we output vmul + vadd. So
1106  // check if we have one of those instructions and just output a
1107  // check for vmul.
1108  if (OutTypeCode == "f32") {
1109    if (Prefix == "vmls") {
1110      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1111      Result.push_back("vsub." + OutTypeCode);
1112      return;
1113    } else if (Prefix == "vmla") {
1114      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1115      Result.push_back("vadd." + OutTypeCode);
1116      return;
1117    }
1118  }
1119
1120  // If we have vcvt, get the input type from the instruction name
1121  // (which should be of the form instname_inputtype) and append it
1122  // before the output type.
1123  if (Prefix == "vcvt") {
1124    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1125    Prefix += "." + inTypeCode;
1126  }
1127
1128  // Append output type code to get our final mangled instruction.
1129  Prefix += "." + OutTypeCode;
1130
1131  Result.push_back(Prefix + " " + RegisterSuffix);
1132}
1133
1134/// UseMacro - Examine the prototype string to determine if the intrinsic
1135/// should be defined as a preprocessor macro instead of an inline function.
1136static bool UseMacro(const std::string &proto) {
1137  // If this builtin takes an immediate argument, we need to #define it rather
1138  // than use a standard declaration, so that SemaChecking can range check
1139  // the immediate passed by the user.
1140  if (proto.find('i') != std::string::npos)
1141    return true;
1142
1143  // Pointer arguments need to use macros to avoid hiding aligned attributes
1144  // from the pointer type.
1145  if (proto.find('p') != std::string::npos ||
1146      proto.find('c') != std::string::npos)
1147    return true;
1148
1149  return false;
1150}
1151
1152/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1153/// defined as a macro should be accessed directly instead of being first
1154/// assigned to a local temporary.
1155static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1156  // True for constant ints (i), pointers (p) and const pointers (c).
1157  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1158}
1159
1160// Generate the string "(argtype a, argtype b, ...)"
1161static std::string GenArgs(const std::string &proto, StringRef typestr) {
1162  bool define = UseMacro(proto);
1163  char arg = 'a';
1164
1165  std::string s;
1166  s += "(";
1167
1168  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1169    if (define) {
1170      // Some macro arguments are used directly instead of being assigned
1171      // to local temporaries; prepend an underscore prefix to make their
1172      // names consistent with the local temporaries.
1173      if (MacroArgUsedDirectly(proto, i))
1174        s += "__";
1175    } else {
1176      s += TypeString(proto[i], typestr) + " __";
1177    }
1178    s.push_back(arg);
1179    if ((i + 1) < e)
1180      s += ", ";
1181  }
1182
1183  s += ")";
1184  return s;
1185}
1186
1187// Macro arguments are not type-checked like inline function arguments, so
1188// assign them to local temporaries to get the right type checking.
1189static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1190  char arg = 'a';
1191  std::string s;
1192  bool generatedLocal = false;
1193
1194  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1195    // Do not create a temporary for an immediate argument.
1196    // That would defeat the whole point of using a macro!
1197    if (MacroArgUsedDirectly(proto, i))
1198      continue;
1199    generatedLocal = true;
1200
1201    s += TypeString(proto[i], typestr) + " __";
1202    s.push_back(arg);
1203    s += " = (";
1204    s.push_back(arg);
1205    s += "); ";
1206  }
1207
1208  if (generatedLocal)
1209    s += "\\\n  ";
1210  return s;
1211}
1212
1213// Use the vmovl builtin to sign-extend or zero-extend a vector.
1214static std::string Extend(StringRef typestr, const std::string &a) {
1215  std::string s;
1216  s = MangleName("vmovl", typestr, ClassS);
1217  s += "(" + a + ")";
1218  return s;
1219}
1220
1221static std::string Duplicate(unsigned nElts, StringRef typestr,
1222                             const std::string &a) {
1223  std::string s;
1224
1225  s = "(" + TypeString('d', typestr) + "){ ";
1226  for (unsigned i = 0; i != nElts; ++i) {
1227    s += a;
1228    if ((i + 1) < nElts)
1229      s += ", ";
1230  }
1231  s += " }";
1232
1233  return s;
1234}
1235
1236static std::string SplatLane(unsigned nElts, const std::string &vec,
1237                             const std::string &lane) {
1238  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1239  for (unsigned i = 0; i < nElts; ++i)
1240    s += ", " + lane;
1241  s += ")";
1242  return s;
1243}
1244
1245static unsigned GetNumElements(StringRef typestr, bool &quad) {
1246  quad = false;
1247  bool dummy = false;
1248  char type = ClassifyType(typestr, quad, dummy, dummy);
1249  unsigned nElts = 0;
1250  switch (type) {
1251  case 'c': nElts = 8; break;
1252  case 's': nElts = 4; break;
1253  case 'i': nElts = 2; break;
1254  case 'l': nElts = 1; break;
1255  case 'h': nElts = 4; break;
1256  case 'f': nElts = 2; break;
1257  default:
1258    PrintFatalError("unhandled type!");
1259  }
1260  if (quad) nElts <<= 1;
1261  return nElts;
1262}
1263
1264// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1265static std::string GenOpString(OpKind op, const std::string &proto,
1266                               StringRef typestr) {
1267  bool quad;
1268  unsigned nElts = GetNumElements(typestr, quad);
1269  bool define = UseMacro(proto);
1270
1271  std::string ts = TypeString(proto[0], typestr);
1272  std::string s;
1273  if (!define) {
1274    s = "return ";
1275  }
1276
1277  switch(op) {
1278  case OpAdd:
1279    s += "__a + __b;";
1280    break;
1281  case OpAddl:
1282    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1283    break;
1284  case OpAddw:
1285    s += "__a + " + Extend(typestr, "__b") + ";";
1286    break;
1287  case OpSub:
1288    s += "__a - __b;";
1289    break;
1290  case OpSubl:
1291    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1292    break;
1293  case OpSubw:
1294    s += "__a - " + Extend(typestr, "__b") + ";";
1295    break;
1296  case OpMulN:
1297    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1298    break;
1299  case OpMulLane:
1300    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1301    break;
1302  case OpMul:
1303    s += "__a * __b;";
1304    break;
1305  case OpMullLane:
1306    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1307      SplatLane(nElts, "__b", "__c") + ");";
1308    break;
1309  case OpMlaN:
1310    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1311    break;
1312  case OpMlaLane:
1313    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1314    break;
1315  case OpMla:
1316    s += "__a + (__b * __c);";
1317    break;
1318  case OpMlalN:
1319    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1320      Duplicate(nElts, typestr, "__c") + ");";
1321    break;
1322  case OpMlalLane:
1323    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1324      SplatLane(nElts, "__c", "__d") + ");";
1325    break;
1326  case OpMlal:
1327    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1328    break;
1329  case OpMlsN:
1330    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1331    break;
1332  case OpMlsLane:
1333    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1334    break;
1335  case OpMls:
1336    s += "__a - (__b * __c);";
1337    break;
1338  case OpMlslN:
1339    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1340      Duplicate(nElts, typestr, "__c") + ");";
1341    break;
1342  case OpMlslLane:
1343    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1344      SplatLane(nElts, "__c", "__d") + ");";
1345    break;
1346  case OpMlsl:
1347    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1348    break;
1349  case OpQDMullLane:
1350    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1351      SplatLane(nElts, "__b", "__c") + ");";
1352    break;
1353  case OpQDMlalLane:
1354    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1355      SplatLane(nElts, "__c", "__d") + ");";
1356    break;
1357  case OpQDMlslLane:
1358    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1359      SplatLane(nElts, "__c", "__d") + ");";
1360    break;
1361  case OpQDMulhLane:
1362    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1363      SplatLane(nElts, "__b", "__c") + ");";
1364    break;
1365  case OpQRDMulhLane:
1366    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1367      SplatLane(nElts, "__b", "__c") + ");";
1368    break;
1369  case OpEq:
1370    s += "(" + ts + ")(__a == __b);";
1371    break;
1372  case OpGe:
1373    s += "(" + ts + ")(__a >= __b);";
1374    break;
1375  case OpLe:
1376    s += "(" + ts + ")(__a <= __b);";
1377    break;
1378  case OpGt:
1379    s += "(" + ts + ")(__a > __b);";
1380    break;
1381  case OpLt:
1382    s += "(" + ts + ")(__a < __b);";
1383    break;
1384  case OpNeg:
1385    s += " -__a;";
1386    break;
1387  case OpNot:
1388    s += " ~__a;";
1389    break;
1390  case OpAnd:
1391    s += "__a & __b;";
1392    break;
1393  case OpOr:
1394    s += "__a | __b;";
1395    break;
1396  case OpXor:
1397    s += "__a ^ __b;";
1398    break;
1399  case OpAndNot:
1400    s += "__a & ~__b;";
1401    break;
1402  case OpOrNot:
1403    s += "__a | ~__b;";
1404    break;
1405  case OpCast:
1406    s += "(" + ts + ")__a;";
1407    break;
1408  case OpConcat:
1409    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1410    s += ", (int64x1_t)__b, 0, 1);";
1411    break;
1412  case OpHi:
1413    // nElts is for the result vector, so the source is twice that number.
1414    s += "__builtin_shufflevector(__a, __a";
1415    for (unsigned i = nElts; i < nElts * 2; ++i)
1416      s += ", " + utostr(i);
1417    s+= ");";
1418    break;
1419  case OpLo:
1420    s += "__builtin_shufflevector(__a, __a";
1421    for (unsigned i = 0; i < nElts; ++i)
1422      s += ", " + utostr(i);
1423    s+= ");";
1424    break;
1425  case OpDup:
1426    s += Duplicate(nElts, typestr, "__a") + ";";
1427    break;
1428  case OpDupLane:
1429    s += SplatLane(nElts, "__a", "__b") + ";";
1430    break;
1431  case OpSelect:
1432    // ((0 & 1) | (~0 & 2))
1433    s += "(" + ts + ")";
1434    ts = TypeString(proto[1], typestr);
1435    s += "((__a & (" + ts + ")__b) | ";
1436    s += "(~__a & (" + ts + ")__c));";
1437    break;
1438  case OpRev16:
1439    s += "__builtin_shufflevector(__a, __a";
1440    for (unsigned i = 2; i <= nElts; i += 2)
1441      for (unsigned j = 0; j != 2; ++j)
1442        s += ", " + utostr(i - j - 1);
1443    s += ");";
1444    break;
1445  case OpRev32: {
1446    unsigned WordElts = nElts >> (1 + (int)quad);
1447    s += "__builtin_shufflevector(__a, __a";
1448    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1449      for (unsigned j = 0; j != WordElts; ++j)
1450        s += ", " + utostr(i - j - 1);
1451    s += ");";
1452    break;
1453  }
1454  case OpRev64: {
1455    unsigned DblWordElts = nElts >> (int)quad;
1456    s += "__builtin_shufflevector(__a, __a";
1457    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1458      for (unsigned j = 0; j != DblWordElts; ++j)
1459        s += ", " + utostr(i - j - 1);
1460    s += ");";
1461    break;
1462  }
1463  case OpAbdl: {
1464    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1465    if (typestr[0] != 'U') {
1466      // vabd results are always unsigned and must be zero-extended.
1467      std::string utype = "U" + typestr.str();
1468      s += "(" + TypeString(proto[0], typestr) + ")";
1469      abd = "(" + TypeString('d', utype) + ")" + abd;
1470      s += Extend(utype, abd) + ";";
1471    } else {
1472      s += Extend(typestr, abd) + ";";
1473    }
1474    break;
1475  }
1476  case OpAba:
1477    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1478    break;
1479  case OpAbal: {
1480    s += "__a + ";
1481    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1482    if (typestr[0] != 'U') {
1483      // vabd results are always unsigned and must be zero-extended.
1484      std::string utype = "U" + typestr.str();
1485      s += "(" + TypeString(proto[0], typestr) + ")";
1486      abd = "(" + TypeString('d', utype) + ")" + abd;
1487      s += Extend(utype, abd) + ";";
1488    } else {
1489      s += Extend(typestr, abd) + ";";
1490    }
1491    break;
1492  }
1493  default:
1494    PrintFatalError("unknown OpKind!");
1495  }
1496  return s;
1497}
1498
1499static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1500  unsigned mod = proto[0];
1501
1502  if (mod == 'v' || mod == 'f')
1503    mod = proto[1];
1504
1505  bool quad = false;
1506  bool poly = false;
1507  bool usgn = false;
1508  bool scal = false;
1509  bool cnst = false;
1510  bool pntr = false;
1511
1512  // Base type to get the type string for.
1513  char type = ClassifyType(typestr, quad, poly, usgn);
1514
1515  // Based on the modifying character, change the type and width if necessary.
1516  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1517
1518  NeonTypeFlags::EltType ET;
1519  switch (type) {
1520    case 'c':
1521      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1522      break;
1523    case 's':
1524      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1525      break;
1526    case 'i':
1527      ET = NeonTypeFlags::Int32;
1528      break;
1529    case 'l':
1530      ET = NeonTypeFlags::Int64;
1531      break;
1532    case 'h':
1533      ET = NeonTypeFlags::Float16;
1534      break;
1535    case 'f':
1536      ET = NeonTypeFlags::Float32;
1537      break;
1538    default:
1539      PrintFatalError("unhandled type!");
1540  }
1541  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1542  return Flags.getFlags();
1543}
1544
1545// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1546static std::string GenBuiltin(const std::string &name, const std::string &proto,
1547                              StringRef typestr, ClassKind ck) {
1548  std::string s;
1549
1550  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1551  // sret-like argument.
1552  bool sret = (proto[0] >= '2' && proto[0] <= '4');
1553
1554  bool define = UseMacro(proto);
1555
1556  // Check if the prototype has a scalar operand with the type of the vector
1557  // elements.  If not, bitcasting the args will take care of arg checking.
1558  // The actual signedness etc. will be taken care of with special enums.
1559  if (proto.find('s') == std::string::npos)
1560    ck = ClassB;
1561
1562  if (proto[0] != 'v') {
1563    std::string ts = TypeString(proto[0], typestr);
1564
1565    if (define) {
1566      if (sret)
1567        s += ts + " r; ";
1568      else
1569        s += "(" + ts + ")";
1570    } else if (sret) {
1571      s += ts + " r; ";
1572    } else {
1573      s += "return (" + ts + ")";
1574    }
1575  }
1576
1577  bool splat = proto.find('a') != std::string::npos;
1578
1579  s += "__builtin_neon_";
1580  if (splat) {
1581    // Call the non-splat builtin: chop off the "_n" suffix from the name.
1582    std::string vname(name, 0, name.size()-2);
1583    s += MangleName(vname, typestr, ck);
1584  } else {
1585    s += MangleName(name, typestr, ck);
1586  }
1587  s += "(";
1588
1589  // Pass the address of the return variable as the first argument to sret-like
1590  // builtins.
1591  if (sret)
1592    s += "&r, ";
1593
1594  char arg = 'a';
1595  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1596    std::string args = std::string(&arg, 1);
1597
1598    // Use the local temporaries instead of the macro arguments.
1599    args = "__" + args;
1600
1601    bool argQuad = false;
1602    bool argPoly = false;
1603    bool argUsgn = false;
1604    bool argScalar = false;
1605    bool dummy = false;
1606    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1607    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1608                      dummy, dummy);
1609
1610    // Handle multiple-vector values specially, emitting each subvector as an
1611    // argument to the __builtin.
1612    if (proto[i] >= '2' && proto[i] <= '4') {
1613      // Check if an explicit cast is needed.
1614      if (argType != 'c' || argPoly || argUsgn)
1615        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1616
1617      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1618        s += args + ".val[" + utostr(vi) + "]";
1619        if ((vi + 1) < ve)
1620          s += ", ";
1621      }
1622      if ((i + 1) < e)
1623        s += ", ";
1624
1625      continue;
1626    }
1627
1628    if (splat && (i + 1) == e)
1629      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1630
1631    // Check if an explicit cast is needed.
1632    if ((splat || !argScalar) &&
1633        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1634      std::string argTypeStr = "c";
1635      if (ck != ClassB)
1636        argTypeStr = argType;
1637      if (argQuad)
1638        argTypeStr = "Q" + argTypeStr;
1639      args = "(" + TypeString('d', argTypeStr) + ")" + args;
1640    }
1641
1642    s += args;
1643    if ((i + 1) < e)
1644      s += ", ";
1645  }
1646
1647  // Extra constant integer to hold type class enum for this function, e.g. s8
1648  if (ck == ClassB)
1649    s += ", " + utostr(GetNeonEnum(proto, typestr));
1650
1651  s += ");";
1652
1653  if (proto[0] != 'v' && sret) {
1654    if (define)
1655      s += " r;";
1656    else
1657      s += " return r;";
1658  }
1659  return s;
1660}
1661
1662static std::string GenBuiltinDef(const std::string &name,
1663                                 const std::string &proto,
1664                                 StringRef typestr, ClassKind ck) {
1665  std::string s("BUILTIN(__builtin_neon_");
1666
1667  // If all types are the same size, bitcasting the args will take care
1668  // of arg checking.  The actual signedness etc. will be taken care of with
1669  // special enums.
1670  if (proto.find('s') == std::string::npos)
1671    ck = ClassB;
1672
1673  s += MangleName(name, typestr, ck);
1674  s += ", \"";
1675
1676  for (unsigned i = 0, e = proto.size(); i != e; ++i)
1677    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1678
1679  // Extra constant integer to hold type class enum for this function, e.g. s8
1680  if (ck == ClassB)
1681    s += "i";
1682
1683  s += "\", \"n\")";
1684  return s;
1685}
1686
1687static std::string GenIntrinsic(const std::string &name,
1688                                const std::string &proto,
1689                                StringRef outTypeStr, StringRef inTypeStr,
1690                                OpKind kind, ClassKind classKind) {
1691  assert(!proto.empty() && "");
1692  bool define = UseMacro(proto) && kind != OpUnavailable;
1693  std::string s;
1694
1695  // static always inline + return type
1696  if (define)
1697    s += "#define ";
1698  else
1699    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1700
1701  // Function name with type suffix
1702  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1703  if (outTypeStr != inTypeStr) {
1704    // If the input type is different (e.g., for vreinterpret), append a suffix
1705    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1706    // does not insert another "q" in the name.
1707    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1708    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1709    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1710  }
1711  s += mangledName;
1712
1713  // Function arguments
1714  s += GenArgs(proto, inTypeStr);
1715
1716  // Definition.
1717  if (define) {
1718    s += " __extension__ ({ \\\n  ";
1719    s += GenMacroLocals(proto, inTypeStr);
1720  } else if (kind == OpUnavailable) {
1721    s += " __attribute__((unavailable));\n";
1722    return s;
1723  } else
1724    s += " {\n  ";
1725
1726  if (kind != OpNone)
1727    s += GenOpString(kind, proto, outTypeStr);
1728  else
1729    s += GenBuiltin(name, proto, outTypeStr, classKind);
1730  if (define)
1731    s += " })";
1732  else
1733    s += " }";
1734  s += "\n";
1735  return s;
1736}
1737
1738/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1739/// is comprised of type definitions and function declarations.
1740void NeonEmitter::run(raw_ostream &OS) {
1741  OS <<
1742    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1743    "---===\n"
1744    " *\n"
1745    " * Permission is hereby granted, free of charge, to any person obtaining "
1746    "a copy\n"
1747    " * of this software and associated documentation files (the \"Software\"),"
1748    " to deal\n"
1749    " * in the Software without restriction, including without limitation the "
1750    "rights\n"
1751    " * to use, copy, modify, merge, publish, distribute, sublicense, "
1752    "and/or sell\n"
1753    " * copies of the Software, and to permit persons to whom the Software is\n"
1754    " * furnished to do so, subject to the following conditions:\n"
1755    " *\n"
1756    " * The above copyright notice and this permission notice shall be "
1757    "included in\n"
1758    " * all copies or substantial portions of the Software.\n"
1759    " *\n"
1760    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1761    "EXPRESS OR\n"
1762    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1763    "MERCHANTABILITY,\n"
1764    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1765    "SHALL THE\n"
1766    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1767    "OTHER\n"
1768    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1769    "ARISING FROM,\n"
1770    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1771    "DEALINGS IN\n"
1772    " * THE SOFTWARE.\n"
1773    " *\n"
1774    " *===--------------------------------------------------------------------"
1775    "---===\n"
1776    " */\n\n";
1777
1778  OS << "#ifndef __ARM_NEON_H\n";
1779  OS << "#define __ARM_NEON_H\n\n";
1780
1781  OS << "#ifndef __ARM_NEON__\n";
1782  OS << "#error \"NEON support not enabled\"\n";
1783  OS << "#endif\n\n";
1784
1785  OS << "#include <stdint.h>\n\n";
1786
1787  // Emit NEON-specific scalar typedefs.
1788  OS << "typedef float float32_t;\n";
1789  OS << "typedef int8_t poly8_t;\n";
1790  OS << "typedef int16_t poly16_t;\n";
1791  OS << "typedef uint16_t float16_t;\n";
1792
1793  // Emit Neon vector typedefs.
1794  std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
1795  SmallVector<StringRef, 24> TDTypeVec;
1796  ParseTypes(0, TypedefTypes, TDTypeVec);
1797
1798  // Emit vector typedefs.
1799  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1800    bool dummy, quad = false, poly = false;
1801    (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
1802    if (poly)
1803      OS << "typedef __attribute__((neon_polyvector_type(";
1804    else
1805      OS << "typedef __attribute__((neon_vector_type(";
1806
1807    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1808    OS << utostr(nElts) << "))) ";
1809    if (nElts < 10)
1810      OS << " ";
1811
1812    OS << TypeString('s', TDTypeVec[i]);
1813    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1814  }
1815  OS << "\n";
1816
1817  // Emit struct typedefs.
1818  for (unsigned vi = 2; vi != 5; ++vi) {
1819    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1820      std::string ts = TypeString('d', TDTypeVec[i]);
1821      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1822      OS << "typedef struct " << vs << " {\n";
1823      OS << "  " << ts << " val";
1824      OS << "[" << utostr(vi) << "]";
1825      OS << ";\n} ";
1826      OS << vs << ";\n\n";
1827    }
1828  }
1829
1830  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1831
1832  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1833
1834  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1835  // intrinsics.  (Some of the saturating multiply instructions are also
1836  // used to implement the corresponding "_lane" variants, but tablegen
1837  // sorts the records into alphabetical order so that the "_lane" variants
1838  // come after the intrinsics they use.)
1839  emitIntrinsic(OS, Records.getDef("VMOVL"));
1840  emitIntrinsic(OS, Records.getDef("VMULL"));
1841  emitIntrinsic(OS, Records.getDef("VABD"));
1842
1843  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1844    Record *R = RV[i];
1845    if (R->getName() != "VMOVL" &&
1846        R->getName() != "VMULL" &&
1847        R->getName() != "VABD")
1848      emitIntrinsic(OS, R);
1849  }
1850
1851  OS << "#undef __ai\n\n";
1852  OS << "#endif /* __ARM_NEON_H */\n";
1853}
1854
1855/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1856/// intrinsics specified by record R.
1857void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
1858  std::string name = R->getValueAsString("Name");
1859  std::string Proto = R->getValueAsString("Prototype");
1860  std::string Types = R->getValueAsString("Types");
1861
1862  SmallVector<StringRef, 16> TypeVec;
1863  ParseTypes(R, Types, TypeVec);
1864
1865  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1866
1867  ClassKind classKind = ClassNone;
1868  if (R->getSuperClasses().size() >= 2)
1869    classKind = ClassMap[R->getSuperClasses()[1]];
1870  if (classKind == ClassNone && kind == OpNone)
1871    PrintFatalError(R->getLoc(), "Builtin has no class kind");
1872
1873  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1874    if (kind == OpReinterpret) {
1875      bool outQuad = false;
1876      bool dummy = false;
1877      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1878      for (unsigned srcti = 0, srcte = TypeVec.size();
1879           srcti != srcte; ++srcti) {
1880        bool inQuad = false;
1881        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1882        if (srcti == ti || inQuad != outQuad)
1883          continue;
1884        OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1885                           OpCast, ClassS);
1886      }
1887    } else {
1888      OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
1889                         kind, classKind);
1890    }
1891  }
1892  OS << "\n";
1893}
1894
1895static unsigned RangeFromType(const char mod, StringRef typestr) {
1896  // base type to get the type string for.
1897  bool quad = false, dummy = false;
1898  char type = ClassifyType(typestr, quad, dummy, dummy);
1899  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
1900
1901  switch (type) {
1902    case 'c':
1903      return (8 << (int)quad) - 1;
1904    case 'h':
1905    case 's':
1906      return (4 << (int)quad) - 1;
1907    case 'f':
1908    case 'i':
1909      return (2 << (int)quad) - 1;
1910    case 'l':
1911      return (1 << (int)quad) - 1;
1912    default:
1913      PrintFatalError("unhandled type!");
1914  }
1915}
1916
1917/// runHeader - Emit a file with sections defining:
1918/// 1. the NEON section of BuiltinsARM.def.
1919/// 2. the SemaChecking code for the type overload checking.
1920/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
1921void NeonEmitter::runHeader(raw_ostream &OS) {
1922  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1923
1924  StringMap<OpKind> EmittedMap;
1925
1926  // Generate BuiltinsARM.def for NEON
1927  OS << "#ifdef GET_NEON_BUILTINS\n";
1928  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1929    Record *R = RV[i];
1930    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1931    if (k != OpNone)
1932      continue;
1933
1934    std::string Proto = R->getValueAsString("Prototype");
1935
1936    // Functions with 'a' (the splat code) in the type prototype should not get
1937    // their own builtin as they use the non-splat variant.
1938    if (Proto.find('a') != std::string::npos)
1939      continue;
1940
1941    std::string Types = R->getValueAsString("Types");
1942    SmallVector<StringRef, 16> TypeVec;
1943    ParseTypes(R, Types, TypeVec);
1944
1945    if (R->getSuperClasses().size() < 2)
1946      PrintFatalError(R->getLoc(), "Builtin has no class kind");
1947
1948    std::string name = R->getValueAsString("Name");
1949    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1950
1951    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1952      // Generate the BuiltinsARM.def declaration for this builtin, ensuring
1953      // that each unique BUILTIN() macro appears only once in the output
1954      // stream.
1955      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
1956      if (EmittedMap.count(bd))
1957        continue;
1958
1959      EmittedMap[bd] = OpNone;
1960      OS << bd << "\n";
1961    }
1962  }
1963  OS << "#endif\n\n";
1964
1965  // Generate the overloaded type checking code for SemaChecking.cpp
1966  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1967  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1968    Record *R = RV[i];
1969    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1970    if (k != OpNone)
1971      continue;
1972
1973    std::string Proto = R->getValueAsString("Prototype");
1974    std::string Types = R->getValueAsString("Types");
1975    std::string name = R->getValueAsString("Name");
1976
1977    // Functions with 'a' (the splat code) in the type prototype should not get
1978    // their own builtin as they use the non-splat variant.
1979    if (Proto.find('a') != std::string::npos)
1980      continue;
1981
1982    // Functions which have a scalar argument cannot be overloaded, no need to
1983    // check them if we are emitting the type checking code.
1984    if (Proto.find('s') != std::string::npos)
1985      continue;
1986
1987    SmallVector<StringRef, 16> TypeVec;
1988    ParseTypes(R, Types, TypeVec);
1989
1990    if (R->getSuperClasses().size() < 2)
1991      PrintFatalError(R->getLoc(), "Builtin has no class kind");
1992
1993    int si = -1, qi = -1;
1994    uint64_t mask = 0, qmask = 0;
1995    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1996      // Generate the switch case(s) for this builtin for the type validation.
1997      bool quad = false, poly = false, usgn = false;
1998      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
1999
2000      if (quad) {
2001        qi = ti;
2002        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2003      } else {
2004        si = ti;
2005        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2006      }
2007    }
2008
2009    // Check if the builtin function has a pointer or const pointer argument.
2010    int PtrArgNum = -1;
2011    bool HasConstPtr = false;
2012    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2013      char ArgType = Proto[arg];
2014      if (ArgType == 'c') {
2015        HasConstPtr = true;
2016        PtrArgNum = arg - 1;
2017        break;
2018      }
2019      if (ArgType == 'p') {
2020        PtrArgNum = arg - 1;
2021        break;
2022      }
2023    }
2024    // For sret builtins, adjust the pointer argument index.
2025    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2026      PtrArgNum += 1;
2027
2028    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2029    // and vst1_lane intrinsics.  Using a pointer to the vector element
2030    // type with one of those operations causes codegen to select an aligned
2031    // load/store instruction.  If you want an unaligned operation,
2032    // the pointer argument needs to have less alignment than element type,
2033    // so just accept any pointer type.
2034    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2035      PtrArgNum = -1;
2036      HasConstPtr = false;
2037    }
2038
2039    if (mask) {
2040      OS << "case ARM::BI__builtin_neon_"
2041         << MangleName(name, TypeVec[si], ClassB)
2042         << ": mask = " << "0x" << utohexstr(mask) << "ULL";
2043      if (PtrArgNum >= 0)
2044        OS << "; PtrArgNum = " << PtrArgNum;
2045      if (HasConstPtr)
2046        OS << "; HasConstPtr = true";
2047      OS << "; break;\n";
2048    }
2049    if (qmask) {
2050      OS << "case ARM::BI__builtin_neon_"
2051         << MangleName(name, TypeVec[qi], ClassB)
2052         << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
2053      if (PtrArgNum >= 0)
2054        OS << "; PtrArgNum = " << PtrArgNum;
2055      if (HasConstPtr)
2056        OS << "; HasConstPtr = true";
2057      OS << "; break;\n";
2058    }
2059  }
2060  OS << "#endif\n\n";
2061
2062  // Generate the intrinsic range checking code for shift/lane immediates.
2063  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2064  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2065    Record *R = RV[i];
2066
2067    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2068    if (k != OpNone)
2069      continue;
2070
2071    std::string name = R->getValueAsString("Name");
2072    std::string Proto = R->getValueAsString("Prototype");
2073    std::string Types = R->getValueAsString("Types");
2074
2075    // Functions with 'a' (the splat code) in the type prototype should not get
2076    // their own builtin as they use the non-splat variant.
2077    if (Proto.find('a') != std::string::npos)
2078      continue;
2079
2080    // Functions which do not have an immediate do not need to have range
2081    // checking code emitted.
2082    size_t immPos = Proto.find('i');
2083    if (immPos == std::string::npos)
2084      continue;
2085
2086    SmallVector<StringRef, 16> TypeVec;
2087    ParseTypes(R, Types, TypeVec);
2088
2089    if (R->getSuperClasses().size() < 2)
2090      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2091
2092    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2093
2094    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2095      std::string namestr, shiftstr, rangestr;
2096
2097      if (R->getValueAsBit("isVCVT_N")) {
2098        // VCVT between floating- and fixed-point values takes an immediate
2099        // in the range 1 to 32.
2100        ck = ClassB;
2101        rangestr = "l = 1; u = 31"; // upper bound = l + u
2102      } else if (Proto.find('s') == std::string::npos) {
2103        // Builtins which are overloaded by type will need to have their upper
2104        // bound computed at Sema time based on the type constant.
2105        ck = ClassB;
2106        if (R->getValueAsBit("isShift")) {
2107          shiftstr = ", true";
2108
2109          // Right shifts have an 'r' in the name, left shifts do not.
2110          if (name.find('r') != std::string::npos)
2111            rangestr = "l = 1; ";
2112        }
2113        rangestr += "u = RFT(TV" + shiftstr + ")";
2114      } else {
2115        // The immediate generally refers to a lane in the preceding argument.
2116        assert(immPos > 0 && "unexpected immediate operand");
2117        rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
2118      }
2119      // Make sure cases appear only once by uniquing them in a string map.
2120      namestr = MangleName(name, TypeVec[ti], ck);
2121      if (EmittedMap.count(namestr))
2122        continue;
2123      EmittedMap[namestr] = OpNone;
2124
2125      // Calculate the index of the immediate that should be range checked.
2126      unsigned immidx = 0;
2127
2128      // Builtins that return a struct of multiple vectors have an extra
2129      // leading arg for the struct return.
2130      if (Proto[0] >= '2' && Proto[0] <= '4')
2131        ++immidx;
2132
2133      // Add one to the index for each argument until we reach the immediate
2134      // to be checked.  Structs of vectors are passed as multiple arguments.
2135      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2136        switch (Proto[ii]) {
2137          default:  immidx += 1; break;
2138          case '2': immidx += 2; break;
2139          case '3': immidx += 3; break;
2140          case '4': immidx += 4; break;
2141          case 'i': ie = ii + 1; break;
2142        }
2143      }
2144      OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
2145         << ": i = " << immidx << "; " << rangestr << "; break;\n";
2146    }
2147  }
2148  OS << "#endif\n\n";
2149}
2150
2151/// GenTest - Write out a test for the intrinsic specified by the name and
2152/// type strings, including the embedded patterns for FileCheck to match.
2153static std::string GenTest(const std::string &name,
2154                           const std::string &proto,
2155                           StringRef outTypeStr, StringRef inTypeStr,
2156                           bool isShift, bool isHiddenLOp,
2157                           ClassKind ck, const std::string &InstName) {
2158  assert(!proto.empty() && "");
2159  std::string s;
2160
2161  // Function name with type suffix
2162  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2163  if (outTypeStr != inTypeStr) {
2164    // If the input type is different (e.g., for vreinterpret), append a suffix
2165    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2166    // does not insert another "q" in the name.
2167    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2168    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2169    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2170  }
2171
2172  std::vector<std::string> FileCheckPatterns;
2173  GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2174                             isHiddenLOp, FileCheckPatterns);
2175
2176  // Emit the FileCheck patterns.
2177  s += "// CHECK: test_" + mangledName + "\n";
2178  // If for any reason we do not want to emit a check, mangledInst
2179  // will be the empty string.
2180  if (FileCheckPatterns.size()) {
2181    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2182                                                  e = FileCheckPatterns.end();
2183         i != e;
2184         ++i) {
2185      s += "// CHECK: " + *i + "\n";
2186    }
2187  }
2188
2189  // Emit the start of the test function.
2190  s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2191  char arg = 'a';
2192  std::string comma;
2193  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2194    // Do not create arguments for values that must be immediate constants.
2195    if (proto[i] == 'i')
2196      continue;
2197    s += comma + TypeString(proto[i], inTypeStr) + " ";
2198    s.push_back(arg);
2199    comma = ", ";
2200  }
2201  s += ") {\n  ";
2202
2203  if (proto[0] != 'v')
2204    s += "return ";
2205  s += mangledName + "(";
2206  arg = 'a';
2207  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2208    if (proto[i] == 'i') {
2209      // For immediate operands, test the maximum value.
2210      if (isShift)
2211        s += "1"; // FIXME
2212      else
2213        // The immediate generally refers to a lane in the preceding argument.
2214        s += utostr(RangeFromType(proto[i-1], inTypeStr));
2215    } else {
2216      s.push_back(arg);
2217    }
2218    if ((i + 1) < e)
2219      s += ", ";
2220  }
2221  s += ");\n}\n\n";
2222  return s;
2223}
2224
2225/// runTests - Write out a complete set of tests for all of the Neon
2226/// intrinsics.
2227void NeonEmitter::runTests(raw_ostream &OS) {
2228  OS <<
2229    "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n"
2230    "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2231    "// RUN:  | FileCheck %s\n"
2232    "\n"
2233    "// REQUIRES: long_tests\n"
2234    "\n"
2235    "#include <arm_neon.h>\n"
2236    "\n";
2237
2238  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2239  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2240    Record *R = RV[i];
2241    std::string name = R->getValueAsString("Name");
2242    std::string Proto = R->getValueAsString("Prototype");
2243    std::string Types = R->getValueAsString("Types");
2244    bool isShift = R->getValueAsBit("isShift");
2245    std::string InstName = R->getValueAsString("InstName");
2246    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2247
2248    SmallVector<StringRef, 16> TypeVec;
2249    ParseTypes(R, Types, TypeVec);
2250
2251    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2252    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2253    if (kind == OpUnavailable)
2254      continue;
2255    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2256      if (kind == OpReinterpret) {
2257        bool outQuad = false;
2258        bool dummy = false;
2259        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2260        for (unsigned srcti = 0, srcte = TypeVec.size();
2261             srcti != srcte; ++srcti) {
2262          bool inQuad = false;
2263          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2264          if (srcti == ti || inQuad != outQuad)
2265            continue;
2266          OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2267                        isShift, isHiddenLOp, ck, InstName);
2268        }
2269      } else {
2270        OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti],
2271                      isShift, isHiddenLOp, ck, InstName);
2272      }
2273    }
2274    OS << "\n";
2275  }
2276}
2277
2278namespace clang {
2279void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2280  NeonEmitter(Records).run(OS);
2281}
2282void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2283  NeonEmitter(Records).runHeader(OS);
2284}
2285void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2286  NeonEmitter(Records).runTests(OS);
2287}
2288} // End namespace clang
2289