1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddw,
44  OpSub,
45  OpSubl,
46  OpSubw,
47  OpMul,
48  OpMla,
49  OpMlal,
50  OpMls,
51  OpMlsl,
52  OpMulN,
53  OpMlaN,
54  OpMlsN,
55  OpMlalN,
56  OpMlslN,
57  OpMulLane,
58  OpMullLane,
59  OpMlaLane,
60  OpMlsLane,
61  OpMlalLane,
62  OpMlslLane,
63  OpQDMullLane,
64  OpQDMlalLane,
65  OpQDMlslLane,
66  OpQDMulhLane,
67  OpQRDMulhLane,
68  OpEq,
69  OpGe,
70  OpLe,
71  OpGt,
72  OpLt,
73  OpNeg,
74  OpNot,
75  OpAnd,
76  OpOr,
77  OpXor,
78  OpAndNot,
79  OpOrNot,
80  OpCast,
81  OpConcat,
82  OpDup,
83  OpDupLane,
84  OpHi,
85  OpLo,
86  OpSelect,
87  OpRev16,
88  OpRev32,
89  OpRev64,
90  OpReinterpret,
91  OpAbdl,
92  OpAba,
93  OpAbal,
94  OpDiv
95};
96
97enum ClassKind {
98  ClassNone,
99  ClassI,           // generic integer instruction, e.g., "i8" suffix
100  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
101  ClassW,           // width-specific instruction, e.g., "8" suffix
102  ClassB,           // bitcast arguments with enum argument to specify type
103  ClassL,           // Logical instructions which are op instructions
104                    // but we need to not emit any suffix for in our
105                    // tests.
106  ClassNoTest       // Instructions which we do not test since they are
107                    // not TRUE instructions.
108};
109
110/// NeonTypeFlags - Flags to identify the types for overloaded Neon
111/// builtins.  These must be kept in sync with the flags in
112/// include/clang/Basic/TargetBuiltins.h.
113namespace {
114class NeonTypeFlags {
115  enum {
116    EltTypeMask = 0xf,
117    UnsignedFlag = 0x10,
118    QuadFlag = 0x20
119  };
120  uint32_t Flags;
121
122public:
123  enum EltType {
124    Int8,
125    Int16,
126    Int32,
127    Int64,
128    Poly8,
129    Poly16,
130    Float16,
131    Float32,
132    Float64
133  };
134
135  NeonTypeFlags(unsigned F) : Flags(F) {}
136  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
137    if (IsUnsigned)
138      Flags |= UnsignedFlag;
139    if (IsQuad)
140      Flags |= QuadFlag;
141  }
142
143  uint32_t getFlags() const { return Flags; }
144};
145} // end anonymous namespace
146
147namespace {
148class NeonEmitter {
149  RecordKeeper &Records;
150  StringMap<OpKind> OpMap;
151  DenseMap<Record*, ClassKind> ClassMap;
152
153public:
154  NeonEmitter(RecordKeeper &R) : Records(R) {
155    OpMap["OP_NONE"]  = OpNone;
156    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
157    OpMap["OP_ADD"]   = OpAdd;
158    OpMap["OP_ADDL"]  = OpAddl;
159    OpMap["OP_ADDW"]  = OpAddw;
160    OpMap["OP_SUB"]   = OpSub;
161    OpMap["OP_SUBL"]  = OpSubl;
162    OpMap["OP_SUBW"]  = OpSubw;
163    OpMap["OP_MUL"]   = OpMul;
164    OpMap["OP_MLA"]   = OpMla;
165    OpMap["OP_MLAL"]  = OpMlal;
166    OpMap["OP_MLS"]   = OpMls;
167    OpMap["OP_MLSL"]  = OpMlsl;
168    OpMap["OP_MUL_N"] = OpMulN;
169    OpMap["OP_MLA_N"] = OpMlaN;
170    OpMap["OP_MLS_N"] = OpMlsN;
171    OpMap["OP_MLAL_N"] = OpMlalN;
172    OpMap["OP_MLSL_N"] = OpMlslN;
173    OpMap["OP_MUL_LN"]= OpMulLane;
174    OpMap["OP_MULL_LN"] = OpMullLane;
175    OpMap["OP_MLA_LN"]= OpMlaLane;
176    OpMap["OP_MLS_LN"]= OpMlsLane;
177    OpMap["OP_MLAL_LN"] = OpMlalLane;
178    OpMap["OP_MLSL_LN"] = OpMlslLane;
179    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
180    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
181    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
182    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
183    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
184    OpMap["OP_EQ"]    = OpEq;
185    OpMap["OP_GE"]    = OpGe;
186    OpMap["OP_LE"]    = OpLe;
187    OpMap["OP_GT"]    = OpGt;
188    OpMap["OP_LT"]    = OpLt;
189    OpMap["OP_NEG"]   = OpNeg;
190    OpMap["OP_NOT"]   = OpNot;
191    OpMap["OP_AND"]   = OpAnd;
192    OpMap["OP_OR"]    = OpOr;
193    OpMap["OP_XOR"]   = OpXor;
194    OpMap["OP_ANDN"]  = OpAndNot;
195    OpMap["OP_ORN"]   = OpOrNot;
196    OpMap["OP_CAST"]  = OpCast;
197    OpMap["OP_CONC"]  = OpConcat;
198    OpMap["OP_HI"]    = OpHi;
199    OpMap["OP_LO"]    = OpLo;
200    OpMap["OP_DUP"]   = OpDup;
201    OpMap["OP_DUP_LN"] = OpDupLane;
202    OpMap["OP_SEL"]   = OpSelect;
203    OpMap["OP_REV16"] = OpRev16;
204    OpMap["OP_REV32"] = OpRev32;
205    OpMap["OP_REV64"] = OpRev64;
206    OpMap["OP_REINT"] = OpReinterpret;
207    OpMap["OP_ABDL"]  = OpAbdl;
208    OpMap["OP_ABA"]   = OpAba;
209    OpMap["OP_ABAL"]  = OpAbal;
210    OpMap["OP_DIV"] = OpDiv;
211
212    Record *SI = R.getClass("SInst");
213    Record *II = R.getClass("IInst");
214    Record *WI = R.getClass("WInst");
215    Record *SOpI = R.getClass("SOpInst");
216    Record *IOpI = R.getClass("IOpInst");
217    Record *WOpI = R.getClass("WOpInst");
218    Record *LOpI = R.getClass("LOpInst");
219    Record *NoTestOpI = R.getClass("NoTestOpInst");
220
221    ClassMap[SI] = ClassS;
222    ClassMap[II] = ClassI;
223    ClassMap[WI] = ClassW;
224    ClassMap[SOpI] = ClassS;
225    ClassMap[IOpI] = ClassI;
226    ClassMap[WOpI] = ClassW;
227    ClassMap[LOpI] = ClassL;
228    ClassMap[NoTestOpI] = ClassNoTest;
229  }
230
231  // run - Emit arm_neon.h.inc
232  void run(raw_ostream &o);
233
234  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
235  void runHeader(raw_ostream &o);
236
237  // runTests - Emit tests for all the Neon intrinsics.
238  void runTests(raw_ostream &o);
239
240private:
241  void emitIntrinsic(raw_ostream &OS, Record *R,
242                     StringMap<ClassKind> &EmittedMap);
243  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
244                      bool isA64GenBuiltinDef);
245  void genOverloadTypeCheckCode(raw_ostream &OS,
246                                StringMap<ClassKind> &A64IntrinsicMap,
247                                bool isA64TypeCheck);
248  void genIntrinsicRangeCheckCode(raw_ostream &OS,
249                                  StringMap<ClassKind> &A64IntrinsicMap,
250                                  bool isA64RangeCheck);
251  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
252                     bool isA64TestGen);
253};
254} // end anonymous namespace
255
256/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
257/// which each StringRef representing a single type declared in the string.
258/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
259/// 2xfloat and 4xfloat respectively.
260static void ParseTypes(Record *r, std::string &s,
261                       SmallVectorImpl<StringRef> &TV) {
262  const char *data = s.data();
263  int len = 0;
264
265  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
266    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
267      continue;
268
269    switch (data[len]) {
270      case 'c':
271      case 's':
272      case 'i':
273      case 'l':
274      case 'h':
275      case 'f':
276      case 'd':
277        break;
278      default:
279        PrintFatalError(r->getLoc(),
280                      "Unexpected letter: " + std::string(data + len, 1));
281    }
282    TV.push_back(StringRef(data, len + 1));
283    data += len + 1;
284    len = -1;
285  }
286}
287
288/// Widen - Convert a type code into the next wider type.  char -> short,
289/// short -> int, etc.
290static char Widen(const char t) {
291  switch (t) {
292    case 'c':
293      return 's';
294    case 's':
295      return 'i';
296    case 'i':
297      return 'l';
298    case 'h':
299      return 'f';
300    default:
301      PrintFatalError("unhandled type in widen!");
302  }
303}
304
305/// Narrow - Convert a type code into the next smaller type.  short -> char,
306/// float -> half float, etc.
307static char Narrow(const char t) {
308  switch (t) {
309    case 's':
310      return 'c';
311    case 'i':
312      return 's';
313    case 'l':
314      return 'i';
315    case 'f':
316      return 'h';
317    default:
318      PrintFatalError("unhandled type in narrow!");
319  }
320}
321
322/// For a particular StringRef, return the base type code, and whether it has
323/// the quad-vector, polynomial, or unsigned modifiers set.
324static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
325  unsigned off = 0;
326
327  // remember quad.
328  if (ty[off] == 'Q') {
329    quad = true;
330    ++off;
331  }
332
333  // remember poly.
334  if (ty[off] == 'P') {
335    poly = true;
336    ++off;
337  }
338
339  // remember unsigned.
340  if (ty[off] == 'U') {
341    usgn = true;
342    ++off;
343  }
344
345  // base type to get the type string for.
346  return ty[off];
347}
348
349/// ModType - Transform a type code and its modifiers based on a mod code. The
350/// mod code definitions may be found at the top of arm_neon.td.
351static char ModType(const char mod, char type, bool &quad, bool &poly,
352                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
353  switch (mod) {
354    case 't':
355      if (poly) {
356        poly = false;
357        usgn = true;
358      }
359      break;
360    case 'u':
361      usgn = true;
362      poly = false;
363      if (type == 'f')
364        type = 'i';
365      if (type == 'd')
366        type = 'l';
367      break;
368    case 'x':
369      usgn = false;
370      poly = false;
371      if (type == 'f')
372        type = 'i';
373      break;
374    case 'f':
375      if (type == 'h')
376        quad = true;
377      type = 'f';
378      usgn = false;
379      break;
380    case 'g':
381      quad = false;
382      break;
383    case 'w':
384      type = Widen(type);
385      quad = true;
386      break;
387    case 'n':
388      type = Widen(type);
389      break;
390    case 'i':
391      type = 'i';
392      scal = true;
393      break;
394    case 'l':
395      type = 'l';
396      scal = true;
397      usgn = true;
398      break;
399    case 's':
400    case 'a':
401      scal = true;
402      break;
403    case 'k':
404      quad = true;
405      break;
406    case 'c':
407      cnst = true;
408    case 'p':
409      pntr = true;
410      scal = true;
411      break;
412    case 'h':
413      type = Narrow(type);
414      if (type == 'h')
415        quad = false;
416      break;
417    case 'e':
418      type = Narrow(type);
419      usgn = true;
420      break;
421    default:
422      break;
423  }
424  return type;
425}
426
427/// TypeString - for a modifier and type, generate the name of the typedef for
428/// that type.  QUc -> uint8x8_t.
429static std::string TypeString(const char mod, StringRef typestr) {
430  bool quad = false;
431  bool poly = false;
432  bool usgn = false;
433  bool scal = false;
434  bool cnst = false;
435  bool pntr = false;
436
437  if (mod == 'v')
438    return "void";
439  if (mod == 'i')
440    return "int";
441
442  // base type to get the type string for.
443  char type = ClassifyType(typestr, quad, poly, usgn);
444
445  // Based on the modifying character, change the type and width if necessary.
446  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
447
448  SmallString<128> s;
449
450  if (usgn)
451    s.push_back('u');
452
453  switch (type) {
454    case 'c':
455      s += poly ? "poly8" : "int8";
456      if (scal)
457        break;
458      s += quad ? "x16" : "x8";
459      break;
460    case 's':
461      s += poly ? "poly16" : "int16";
462      if (scal)
463        break;
464      s += quad ? "x8" : "x4";
465      break;
466    case 'i':
467      s += "int32";
468      if (scal)
469        break;
470      s += quad ? "x4" : "x2";
471      break;
472    case 'l':
473      s += "int64";
474      if (scal)
475        break;
476      s += quad ? "x2" : "x1";
477      break;
478    case 'h':
479      s += "float16";
480      if (scal)
481        break;
482      s += quad ? "x8" : "x4";
483      break;
484    case 'f':
485      s += "float32";
486      if (scal)
487        break;
488      s += quad ? "x4" : "x2";
489      break;
490    case 'd':
491      s += "float64";
492      if (scal)
493        break;
494      s += quad ? "x2" : "x1";
495      break;
496
497    default:
498      PrintFatalError("unhandled type!");
499  }
500
501  if (mod == '2')
502    s += "x2";
503  if (mod == '3')
504    s += "x3";
505  if (mod == '4')
506    s += "x4";
507
508  // Append _t, finishing the type string typedef type.
509  s += "_t";
510
511  if (cnst)
512    s += " const";
513
514  if (pntr)
515    s += " *";
516
517  return s.str();
518}
519
520/// BuiltinTypeString - for a modifier and type, generate the clang
521/// BuiltinsARM.def prototype code for the function.  See the top of clang's
522/// Builtins.def for a description of the type strings.
523static std::string BuiltinTypeString(const char mod, StringRef typestr,
524                                     ClassKind ck, bool ret) {
525  bool quad = false;
526  bool poly = false;
527  bool usgn = false;
528  bool scal = false;
529  bool cnst = false;
530  bool pntr = false;
531
532  if (mod == 'v')
533    return "v"; // void
534  if (mod == 'i')
535    return "i"; // int
536
537  // base type to get the type string for.
538  char type = ClassifyType(typestr, quad, poly, usgn);
539
540  // Based on the modifying character, change the type and width if necessary.
541  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
542
543  // All pointers are void* pointers.  Change type to 'v' now.
544  if (pntr) {
545    usgn = false;
546    poly = false;
547    type = 'v';
548  }
549  // Treat half-float ('h') types as unsigned short ('s') types.
550  if (type == 'h') {
551    type = 's';
552    usgn = true;
553  }
554  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
555
556  if (scal) {
557    SmallString<128> s;
558
559    if (usgn)
560      s.push_back('U');
561    else if (type == 'c')
562      s.push_back('S'); // make chars explicitly signed
563
564    if (type == 'l') // 64-bit long
565      s += "LLi";
566    else
567      s.push_back(type);
568
569    if (cnst)
570      s.push_back('C');
571    if (pntr)
572      s.push_back('*');
573    return s.str();
574  }
575
576  // Since the return value must be one type, return a vector type of the
577  // appropriate width which we will bitcast.  An exception is made for
578  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
579  // fashion, storing them to a pointer arg.
580  if (ret) {
581    if (mod >= '2' && mod <= '4')
582      return "vv*"; // void result with void* first argument
583    if (mod == 'f' || (ck != ClassB && type == 'f'))
584      return quad ? "V4f" : "V2f";
585    if (ck != ClassB && type == 's')
586      return quad ? "V8s" : "V4s";
587    if (ck != ClassB && type == 'i')
588      return quad ? "V4i" : "V2i";
589    if (ck != ClassB && type == 'l')
590      return quad ? "V2LLi" : "V1LLi";
591
592    return quad ? "V16Sc" : "V8Sc";
593  }
594
595  // Non-return array types are passed as individual vectors.
596  if (mod == '2')
597    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
598  if (mod == '3')
599    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
600  if (mod == '4')
601    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
602
603  if (mod == 'f' || (ck != ClassB && type == 'f'))
604    return quad ? "V4f" : "V2f";
605  if (ck != ClassB && type == 's')
606    return quad ? "V8s" : "V4s";
607  if (ck != ClassB && type == 'i')
608    return quad ? "V4i" : "V2i";
609  if (ck != ClassB && type == 'l')
610    return quad ? "V2LLi" : "V1LLi";
611
612  return quad ? "V16Sc" : "V8Sc";
613}
614
615/// InstructionTypeCode - Computes the ARM argument character code and
616/// quad status for a specific type string and ClassKind.
617static void InstructionTypeCode(const StringRef &typeStr,
618                                const ClassKind ck,
619                                bool &quad,
620                                std::string &typeCode) {
621  bool poly = false;
622  bool usgn = false;
623  char type = ClassifyType(typeStr, quad, poly, usgn);
624
625  switch (type) {
626  case 'c':
627    switch (ck) {
628    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
629    case ClassI: typeCode = "i8"; break;
630    case ClassW: typeCode = "8"; break;
631    default: break;
632    }
633    break;
634  case 's':
635    switch (ck) {
636    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
637    case ClassI: typeCode = "i16"; break;
638    case ClassW: typeCode = "16"; break;
639    default: break;
640    }
641    break;
642  case 'i':
643    switch (ck) {
644    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
645    case ClassI: typeCode = "i32"; break;
646    case ClassW: typeCode = "32"; break;
647    default: break;
648    }
649    break;
650  case 'l':
651    switch (ck) {
652    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
653    case ClassI: typeCode = "i64"; break;
654    case ClassW: typeCode = "64"; break;
655    default: break;
656    }
657    break;
658  case 'h':
659    switch (ck) {
660    case ClassS:
661    case ClassI: typeCode = "f16"; break;
662    case ClassW: typeCode = "16"; break;
663    default: break;
664    }
665    break;
666  case 'f':
667    switch (ck) {
668    case ClassS:
669    case ClassI: typeCode = "f32"; break;
670    case ClassW: typeCode = "32"; break;
671    default: break;
672    }
673    break;
674  case 'd':
675    switch (ck) {
676    case ClassS:
677    case ClassI:
678      typeCode += "f64";
679      break;
680    case ClassW:
681      PrintFatalError("unhandled type!");
682    default:
683      break;
684    }
685    break;
686  default:
687    PrintFatalError("unhandled type!");
688  }
689}
690
691/// MangleName - Append a type or width suffix to a base neon function name,
692/// and insert a 'q' in the appropriate location if the operation works on
693/// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
694static std::string MangleName(const std::string &name, StringRef typestr,
695                              ClassKind ck) {
696  if (name == "vcvt_f32_f16")
697    return name;
698
699  bool quad = false;
700  std::string typeCode = "";
701
702  InstructionTypeCode(typestr, ck, quad, typeCode);
703
704  std::string s = name;
705
706  if (typeCode.size() > 0) {
707    s += "_" + typeCode;
708  }
709
710  if (ck == ClassB)
711    s += "_v";
712
713  // Insert a 'q' before the first '_' character so that it ends up before
714  // _lane or _n on vector-scalar operations.
715  if (quad) {
716    size_t pos = s.find('_');
717    s = s.insert(pos, "q");
718  }
719
720  return s;
721}
722
723static void PreprocessInstruction(const StringRef &Name,
724                                  const std::string &InstName,
725                                  std::string &Prefix,
726                                  bool &HasNPostfix,
727                                  bool &HasLanePostfix,
728                                  bool &HasDupPostfix,
729                                  bool &IsSpecialVCvt,
730                                  size_t &TBNumber) {
731  // All of our instruction name fields from arm_neon.td are of the form
732  //   <instructionname>_...
733  // Thus we grab our instruction name via computation of said Prefix.
734  const size_t PrefixEnd = Name.find_first_of('_');
735  // If InstName is passed in, we use that instead of our name Prefix.
736  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
737
738  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
739
740  HasNPostfix = Postfix.count("_n");
741  HasLanePostfix = Postfix.count("_lane");
742  HasDupPostfix = Postfix.count("_dup");
743  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
744
745  if (InstName.compare("vtbl") == 0 ||
746      InstName.compare("vtbx") == 0) {
747    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
748    // encoding to get its true value.
749    TBNumber = Name[Name.size()-1] - 48;
750  }
751}
752
753/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
754/// extracted, generate a FileCheck pattern for a Load Or Store
755static void
756GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
757                                          const std::string& OutTypeCode,
758                                          const bool &IsQuad,
759                                          const bool &HasDupPostfix,
760                                          const bool &HasLanePostfix,
761                                          const size_t Count,
762                                          std::string &RegisterSuffix) {
763  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
764  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
765  // will output a series of v{ld,st}1s, so we have to handle it specially.
766  if ((Count == 3 || Count == 4) && IsQuad) {
767    RegisterSuffix += "{";
768    for (size_t i = 0; i < Count; i++) {
769      RegisterSuffix += "d{{[0-9]+}}";
770      if (HasDupPostfix) {
771        RegisterSuffix += "[]";
772      }
773      if (HasLanePostfix) {
774        RegisterSuffix += "[{{[0-9]+}}]";
775      }
776      if (i < Count-1) {
777        RegisterSuffix += ", ";
778      }
779    }
780    RegisterSuffix += "}";
781  } else {
782
783    // Handle normal loads and stores.
784    RegisterSuffix += "{";
785    for (size_t i = 0; i < Count; i++) {
786      RegisterSuffix += "d{{[0-9]+}}";
787      if (HasDupPostfix) {
788        RegisterSuffix += "[]";
789      }
790      if (HasLanePostfix) {
791        RegisterSuffix += "[{{[0-9]+}}]";
792      }
793      if (IsQuad && !HasLanePostfix) {
794        RegisterSuffix += ", d{{[0-9]+}}";
795        if (HasDupPostfix) {
796          RegisterSuffix += "[]";
797        }
798      }
799      if (i < Count-1) {
800        RegisterSuffix += ", ";
801      }
802    }
803    RegisterSuffix += "}, [r{{[0-9]+}}";
804
805    // We only include the alignment hint if we have a vld1.*64 or
806    // a dup/lane instruction.
807    if (IsLDSTOne) {
808      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
809        RegisterSuffix += ":" + OutTypeCode;
810      }
811    }
812
813    RegisterSuffix += "]";
814  }
815}
816
817static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
818                                     const bool &HasNPostfix) {
819  return (NameRef.count("vmla") ||
820          NameRef.count("vmlal") ||
821          NameRef.count("vmlsl") ||
822          NameRef.count("vmull") ||
823          NameRef.count("vqdmlal") ||
824          NameRef.count("vqdmlsl") ||
825          NameRef.count("vqdmulh") ||
826          NameRef.count("vqdmull") ||
827          NameRef.count("vqrdmulh")) && HasNPostfix;
828}
829
830static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
831                                         const bool &HasLanePostfix) {
832  return (NameRef.count("vmla") ||
833          NameRef.count("vmls") ||
834          NameRef.count("vmlal") ||
835          NameRef.count("vmlsl") ||
836          (NameRef.count("vmul") && NameRef.size() == 3)||
837          NameRef.count("vqdmlal") ||
838          NameRef.count("vqdmlsl") ||
839          NameRef.count("vqdmulh") ||
840          NameRef.count("vqrdmulh")) && HasLanePostfix;
841}
842
843static bool IsSpecialLaneMultiply(const StringRef &NameRef,
844                                  const bool &HasLanePostfix,
845                                  const bool &IsQuad) {
846  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
847                               && IsQuad;
848  const bool IsVMull = NameRef.count("mull") && !IsQuad;
849  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
850}
851
852static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
853                                                     const std::string &Proto,
854                                                     const bool &HasNPostfix,
855                                                     const bool &IsQuad,
856                                                     const bool &HasLanePostfix,
857                                                     const bool &HasDupPostfix,
858                                                     std::string &NormedProto) {
859  // Handle generic case.
860  const StringRef NameRef(Name);
861  for (size_t i = 0, end = Proto.size(); i < end; i++) {
862    switch (Proto[i]) {
863    case 'u':
864    case 'f':
865    case 'd':
866    case 's':
867    case 'x':
868    case 't':
869    case 'n':
870      NormedProto += IsQuad? 'q' : 'd';
871      break;
872    case 'w':
873    case 'k':
874      NormedProto += 'q';
875      break;
876    case 'g':
877    case 'h':
878    case 'e':
879      NormedProto += 'd';
880      break;
881    case 'i':
882      NormedProto += HasLanePostfix? 'a' : 'i';
883      break;
884    case 'a':
885      if (HasLanePostfix) {
886        NormedProto += 'a';
887      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
888        NormedProto += IsQuad? 'q' : 'd';
889      } else {
890        NormedProto += 'i';
891      }
892      break;
893    }
894  }
895
896  // Handle Special Cases.
897  const bool IsNotVExt = !NameRef.count("vext");
898  const bool IsVPADAL = NameRef.count("vpadal");
899  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
900                                                           HasLanePostfix);
901  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
902                                                      IsQuad);
903
904  if (IsSpecialLaneMul) {
905    // If
906    NormedProto[2] = NormedProto[3];
907    NormedProto.erase(3);
908  } else if (NormedProto.size() == 4 &&
909             NormedProto[0] == NormedProto[1] &&
910             IsNotVExt) {
911    // If NormedProto.size() == 4 and the first two proto characters are the
912    // same, ignore the first.
913    NormedProto = NormedProto.substr(1, 3);
914  } else if (Is5OpLaneAccum) {
915    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
916    std::string tmp = NormedProto.substr(1,2);
917    tmp += NormedProto[4];
918    NormedProto = tmp;
919  } else if (IsVPADAL) {
920    // If we have VPADAL, ignore the first character.
921    NormedProto = NormedProto.substr(0, 2);
922  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
923    // If our instruction is a dup instruction, keep only the first and
924    // last characters.
925    std::string tmp = "";
926    tmp += NormedProto[0];
927    tmp += NormedProto[NormedProto.size()-1];
928    NormedProto = tmp;
929  }
930}
931
932/// GenerateRegisterCheckPatterns - Given a bunch of data we have
933/// extracted, generate a FileCheck pattern to check that an
934/// instruction's arguments are correct.
935static void GenerateRegisterCheckPattern(const std::string &Name,
936                                         const std::string &Proto,
937                                         const std::string &OutTypeCode,
938                                         const bool &HasNPostfix,
939                                         const bool &IsQuad,
940                                         const bool &HasLanePostfix,
941                                         const bool &HasDupPostfix,
942                                         const size_t &TBNumber,
943                                         std::string &RegisterSuffix) {
944
945  RegisterSuffix = "";
946
947  const StringRef NameRef(Name);
948  const StringRef ProtoRef(Proto);
949
950  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
951    return;
952  }
953
954  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
955  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
956
957  if (IsLoadStore) {
958    // Grab N value from  v{ld,st}N using its ascii representation.
959    const size_t Count = NameRef[3] - 48;
960
961    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
962                                              HasDupPostfix, HasLanePostfix,
963                                              Count, RegisterSuffix);
964  } else if (IsTBXOrTBL) {
965    RegisterSuffix += "d{{[0-9]+}}, {";
966    for (size_t i = 0; i < TBNumber-1; i++) {
967      RegisterSuffix += "d{{[0-9]+}}, ";
968    }
969    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
970  } else {
971    // Handle a normal instruction.
972    if (NameRef.count("vget") || NameRef.count("vset"))
973      return;
974
975    // We first normalize our proto, since we only need to emit 4
976    // different types of checks, yet have more than 4 proto types
977    // that map onto those 4 patterns.
978    std::string NormalizedProto("");
979    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
980                                             HasLanePostfix, HasDupPostfix,
981                                             NormalizedProto);
982
983    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
984      const char &c = NormalizedProto[i];
985      switch (c) {
986      case 'q':
987        RegisterSuffix += "q{{[0-9]+}}, ";
988        break;
989
990      case 'd':
991        RegisterSuffix += "d{{[0-9]+}}, ";
992        break;
993
994      case 'i':
995        RegisterSuffix += "#{{[0-9]+}}, ";
996        break;
997
998      case 'a':
999        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1000        break;
1001      }
1002    }
1003
1004    // Remove extra ", ".
1005    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1006  }
1007}
1008
1009/// GenerateChecksForIntrinsic - Given a specific instruction name +
1010/// typestr + class kind, generate the proper set of FileCheck
1011/// Patterns to check for. We could just return a string, but instead
1012/// use a vector since it provides us with the extra flexibility of
1013/// emitting multiple checks, which comes in handy for certain cases
1014/// like mla where we want to check for 2 different instructions.
1015static void GenerateChecksForIntrinsic(const std::string &Name,
1016                                       const std::string &Proto,
1017                                       StringRef &OutTypeStr,
1018                                       StringRef &InTypeStr,
1019                                       ClassKind Ck,
1020                                       const std::string &InstName,
1021                                       bool IsHiddenLOp,
1022                                       std::vector<std::string>& Result) {
1023
1024  // If Ck is a ClassNoTest instruction, just return so no test is
1025  // emitted.
1026  if(Ck == ClassNoTest)
1027    return;
1028
1029  if (Name == "vcvt_f32_f16") {
1030    Result.push_back("vcvt.f32.f16");
1031    return;
1032  }
1033
1034
1035  // Now we preprocess our instruction given the data we have to get the
1036  // data that we need.
1037  // Create a StringRef for String Manipulation of our Name.
1038  const StringRef NameRef(Name);
1039  // Instruction Prefix.
1040  std::string Prefix;
1041  // The type code for our out type string.
1042  std::string OutTypeCode;
1043  // To handle our different cases, we need to check for different postfixes.
1044  // Is our instruction a quad instruction.
1045  bool IsQuad = false;
1046  // Our instruction is of the form <instructionname>_n.
1047  bool HasNPostfix = false;
1048  // Our instruction is of the form <instructionname>_lane.
1049  bool HasLanePostfix = false;
1050  // Our instruction is of the form <instructionname>_dup.
1051  bool HasDupPostfix  = false;
1052  // Our instruction is a vcvt instruction which requires special handling.
1053  bool IsSpecialVCvt = false;
1054  // If we have a vtbxN or vtblN instruction, this is set to N.
1055  size_t TBNumber = -1;
1056  // Register Suffix
1057  std::string RegisterSuffix;
1058
1059  PreprocessInstruction(NameRef, InstName, Prefix,
1060                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1061                        IsSpecialVCvt, TBNumber);
1062
1063  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1064  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1065                               HasLanePostfix, HasDupPostfix, TBNumber,
1066                               RegisterSuffix);
1067
1068  // In the following section, we handle a bunch of special cases. You can tell
1069  // a special case by the fact we are returning early.
1070
1071  // If our instruction is a logical instruction without postfix or a
1072  // hidden LOp just return the current Prefix.
1073  if (Ck == ClassL || IsHiddenLOp) {
1074    Result.push_back(Prefix + " " + RegisterSuffix);
1075    return;
1076  }
1077
1078  // If we have a vmov, due to the many different cases, some of which
1079  // vary within the different intrinsics generated for a single
1080  // instruction type, just output a vmov. (e.g. given an instruction
1081  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1082  //
1083  // FIXME: Maybe something can be done about this. The two cases that we care
1084  // about are vmov as an LType and vmov as a WType.
1085  if (Prefix == "vmov") {
1086    Result.push_back(Prefix + " " + RegisterSuffix);
1087    return;
1088  }
1089
1090  // In the following section, we handle special cases.
1091
1092  if (OutTypeCode == "64") {
1093    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1094    // type, the intrinsic will be optimized away, so just return
1095    // nothing.  On the other hand if we are handling an uint64x2_t
1096    // (i.e. quad instruction), vdup/vmov instructions should be
1097    // emitted.
1098    if (Prefix == "vdup" || Prefix == "vext") {
1099      if (IsQuad) {
1100        Result.push_back("{{vmov|vdup}}");
1101      }
1102      return;
1103    }
1104
1105    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1106    // multiple register operands.
1107    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1108                            || Prefix == "vld4";
1109    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1110                            || Prefix == "vst4";
1111    if (MultiLoadPrefix || MultiStorePrefix) {
1112      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1113      return;
1114    }
1115
1116    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1117    // emitting said instructions. So return a check for
1118    // vldr/vstr/vmov/str instead.
1119    if (HasLanePostfix || HasDupPostfix) {
1120      if (Prefix == "vst1") {
1121        Result.push_back("{{str|vstr|vmov}}");
1122        return;
1123      } else if (Prefix == "vld1") {
1124        Result.push_back("{{ldr|vldr|vmov}}");
1125        return;
1126      }
1127    }
1128  }
1129
1130  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1131  // sometimes disassembled as vtrn.32. We use a regex to handle both
1132  // cases.
1133  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1134    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1135    return;
1136  }
1137
1138  // Currently on most ARM processors, we do not use vmla/vmls for
1139  // quad floating point operations. Instead we output vmul + vadd. So
1140  // check if we have one of those instructions and just output a
1141  // check for vmul.
1142  if (OutTypeCode == "f32") {
1143    if (Prefix == "vmls") {
1144      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1145      Result.push_back("vsub." + OutTypeCode);
1146      return;
1147    } else if (Prefix == "vmla") {
1148      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1149      Result.push_back("vadd." + OutTypeCode);
1150      return;
1151    }
1152  }
1153
1154  // If we have vcvt, get the input type from the instruction name
1155  // (which should be of the form instname_inputtype) and append it
1156  // before the output type.
1157  if (Prefix == "vcvt") {
1158    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1159    Prefix += "." + inTypeCode;
1160  }
1161
1162  // Append output type code to get our final mangled instruction.
1163  Prefix += "." + OutTypeCode;
1164
1165  Result.push_back(Prefix + " " + RegisterSuffix);
1166}
1167
1168/// UseMacro - Examine the prototype string to determine if the intrinsic
1169/// should be defined as a preprocessor macro instead of an inline function.
1170static bool UseMacro(const std::string &proto) {
1171  // If this builtin takes an immediate argument, we need to #define it rather
1172  // than use a standard declaration, so that SemaChecking can range check
1173  // the immediate passed by the user.
1174  if (proto.find('i') != std::string::npos)
1175    return true;
1176
1177  // Pointer arguments need to use macros to avoid hiding aligned attributes
1178  // from the pointer type.
1179  if (proto.find('p') != std::string::npos ||
1180      proto.find('c') != std::string::npos)
1181    return true;
1182
1183  return false;
1184}
1185
1186/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1187/// defined as a macro should be accessed directly instead of being first
1188/// assigned to a local temporary.
1189static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1190  // True for constant ints (i), pointers (p) and const pointers (c).
1191  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1192}
1193
1194// Generate the string "(argtype a, argtype b, ...)"
1195static std::string GenArgs(const std::string &proto, StringRef typestr) {
1196  bool define = UseMacro(proto);
1197  char arg = 'a';
1198
1199  std::string s;
1200  s += "(";
1201
1202  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1203    if (define) {
1204      // Some macro arguments are used directly instead of being assigned
1205      // to local temporaries; prepend an underscore prefix to make their
1206      // names consistent with the local temporaries.
1207      if (MacroArgUsedDirectly(proto, i))
1208        s += "__";
1209    } else {
1210      s += TypeString(proto[i], typestr) + " __";
1211    }
1212    s.push_back(arg);
1213    if ((i + 1) < e)
1214      s += ", ";
1215  }
1216
1217  s += ")";
1218  return s;
1219}
1220
1221// Macro arguments are not type-checked like inline function arguments, so
1222// assign them to local temporaries to get the right type checking.
1223static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1224  char arg = 'a';
1225  std::string s;
1226  bool generatedLocal = false;
1227
1228  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1229    // Do not create a temporary for an immediate argument.
1230    // That would defeat the whole point of using a macro!
1231    if (MacroArgUsedDirectly(proto, i))
1232      continue;
1233    generatedLocal = true;
1234
1235    s += TypeString(proto[i], typestr) + " __";
1236    s.push_back(arg);
1237    s += " = (";
1238    s.push_back(arg);
1239    s += "); ";
1240  }
1241
1242  if (generatedLocal)
1243    s += "\\\n  ";
1244  return s;
1245}
1246
1247// Use the vmovl builtin to sign-extend or zero-extend a vector.
1248static std::string Extend(StringRef typestr, const std::string &a) {
1249  std::string s;
1250  s = MangleName("vmovl", typestr, ClassS);
1251  s += "(" + a + ")";
1252  return s;
1253}
1254
1255static std::string Duplicate(unsigned nElts, StringRef typestr,
1256                             const std::string &a) {
1257  std::string s;
1258
1259  s = "(" + TypeString('d', typestr) + "){ ";
1260  for (unsigned i = 0; i != nElts; ++i) {
1261    s += a;
1262    if ((i + 1) < nElts)
1263      s += ", ";
1264  }
1265  s += " }";
1266
1267  return s;
1268}
1269
1270static std::string SplatLane(unsigned nElts, const std::string &vec,
1271                             const std::string &lane) {
1272  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1273  for (unsigned i = 0; i < nElts; ++i)
1274    s += ", " + lane;
1275  s += ")";
1276  return s;
1277}
1278
1279static unsigned GetNumElements(StringRef typestr, bool &quad) {
1280  quad = false;
1281  bool dummy = false;
1282  char type = ClassifyType(typestr, quad, dummy, dummy);
1283  unsigned nElts = 0;
1284  switch (type) {
1285  case 'c': nElts = 8; break;
1286  case 's': nElts = 4; break;
1287  case 'i': nElts = 2; break;
1288  case 'l': nElts = 1; break;
1289  case 'h': nElts = 4; break;
1290  case 'f': nElts = 2; break;
1291  case 'd':
1292    nElts = 1;
1293    break;
1294  default:
1295    PrintFatalError("unhandled type!");
1296  }
1297  if (quad) nElts <<= 1;
1298  return nElts;
1299}
1300
1301// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1302static std::string GenOpString(OpKind op, const std::string &proto,
1303                               StringRef typestr) {
1304  bool quad;
1305  unsigned nElts = GetNumElements(typestr, quad);
1306  bool define = UseMacro(proto);
1307
1308  std::string ts = TypeString(proto[0], typestr);
1309  std::string s;
1310  if (!define) {
1311    s = "return ";
1312  }
1313
1314  switch(op) {
1315  case OpAdd:
1316    s += "__a + __b;";
1317    break;
1318  case OpAddl:
1319    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1320    break;
1321  case OpAddw:
1322    s += "__a + " + Extend(typestr, "__b") + ";";
1323    break;
1324  case OpSub:
1325    s += "__a - __b;";
1326    break;
1327  case OpSubl:
1328    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1329    break;
1330  case OpSubw:
1331    s += "__a - " + Extend(typestr, "__b") + ";";
1332    break;
1333  case OpMulN:
1334    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1335    break;
1336  case OpMulLane:
1337    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1338    break;
1339  case OpMul:
1340    s += "__a * __b;";
1341    break;
1342  case OpMullLane:
1343    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1344      SplatLane(nElts, "__b", "__c") + ");";
1345    break;
1346  case OpMlaN:
1347    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1348    break;
1349  case OpMlaLane:
1350    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1351    break;
1352  case OpMla:
1353    s += "__a + (__b * __c);";
1354    break;
1355  case OpMlalN:
1356    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1357      Duplicate(nElts, typestr, "__c") + ");";
1358    break;
1359  case OpMlalLane:
1360    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1361      SplatLane(nElts, "__c", "__d") + ");";
1362    break;
1363  case OpMlal:
1364    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1365    break;
1366  case OpMlsN:
1367    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1368    break;
1369  case OpMlsLane:
1370    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1371    break;
1372  case OpMls:
1373    s += "__a - (__b * __c);";
1374    break;
1375  case OpMlslN:
1376    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1377      Duplicate(nElts, typestr, "__c") + ");";
1378    break;
1379  case OpMlslLane:
1380    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1381      SplatLane(nElts, "__c", "__d") + ");";
1382    break;
1383  case OpMlsl:
1384    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1385    break;
1386  case OpQDMullLane:
1387    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1388      SplatLane(nElts, "__b", "__c") + ");";
1389    break;
1390  case OpQDMlalLane:
1391    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1392      SplatLane(nElts, "__c", "__d") + ");";
1393    break;
1394  case OpQDMlslLane:
1395    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1396      SplatLane(nElts, "__c", "__d") + ");";
1397    break;
1398  case OpQDMulhLane:
1399    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1400      SplatLane(nElts, "__b", "__c") + ");";
1401    break;
1402  case OpQRDMulhLane:
1403    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1404      SplatLane(nElts, "__b", "__c") + ");";
1405    break;
1406  case OpEq:
1407    s += "(" + ts + ")(__a == __b);";
1408    break;
1409  case OpGe:
1410    s += "(" + ts + ")(__a >= __b);";
1411    break;
1412  case OpLe:
1413    s += "(" + ts + ")(__a <= __b);";
1414    break;
1415  case OpGt:
1416    s += "(" + ts + ")(__a > __b);";
1417    break;
1418  case OpLt:
1419    s += "(" + ts + ")(__a < __b);";
1420    break;
1421  case OpNeg:
1422    s += " -__a;";
1423    break;
1424  case OpNot:
1425    s += " ~__a;";
1426    break;
1427  case OpAnd:
1428    s += "__a & __b;";
1429    break;
1430  case OpOr:
1431    s += "__a | __b;";
1432    break;
1433  case OpXor:
1434    s += "__a ^ __b;";
1435    break;
1436  case OpAndNot:
1437    s += "__a & ~__b;";
1438    break;
1439  case OpOrNot:
1440    s += "__a | ~__b;";
1441    break;
1442  case OpCast:
1443    s += "(" + ts + ")__a;";
1444    break;
1445  case OpConcat:
1446    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1447    s += ", (int64x1_t)__b, 0, 1);";
1448    break;
1449  case OpHi:
1450    // nElts is for the result vector, so the source is twice that number.
1451    s += "__builtin_shufflevector(__a, __a";
1452    for (unsigned i = nElts; i < nElts * 2; ++i)
1453      s += ", " + utostr(i);
1454    s+= ");";
1455    break;
1456  case OpLo:
1457    s += "__builtin_shufflevector(__a, __a";
1458    for (unsigned i = 0; i < nElts; ++i)
1459      s += ", " + utostr(i);
1460    s+= ");";
1461    break;
1462  case OpDup:
1463    s += Duplicate(nElts, typestr, "__a") + ";";
1464    break;
1465  case OpDupLane:
1466    s += SplatLane(nElts, "__a", "__b") + ";";
1467    break;
1468  case OpSelect:
1469    // ((0 & 1) | (~0 & 2))
1470    s += "(" + ts + ")";
1471    ts = TypeString(proto[1], typestr);
1472    s += "((__a & (" + ts + ")__b) | ";
1473    s += "(~__a & (" + ts + ")__c));";
1474    break;
1475  case OpRev16:
1476    s += "__builtin_shufflevector(__a, __a";
1477    for (unsigned i = 2; i <= nElts; i += 2)
1478      for (unsigned j = 0; j != 2; ++j)
1479        s += ", " + utostr(i - j - 1);
1480    s += ");";
1481    break;
1482  case OpRev32: {
1483    unsigned WordElts = nElts >> (1 + (int)quad);
1484    s += "__builtin_shufflevector(__a, __a";
1485    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1486      for (unsigned j = 0; j != WordElts; ++j)
1487        s += ", " + utostr(i - j - 1);
1488    s += ");";
1489    break;
1490  }
1491  case OpRev64: {
1492    unsigned DblWordElts = nElts >> (int)quad;
1493    s += "__builtin_shufflevector(__a, __a";
1494    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1495      for (unsigned j = 0; j != DblWordElts; ++j)
1496        s += ", " + utostr(i - j - 1);
1497    s += ");";
1498    break;
1499  }
1500  case OpAbdl: {
1501    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1502    if (typestr[0] != 'U') {
1503      // vabd results are always unsigned and must be zero-extended.
1504      std::string utype = "U" + typestr.str();
1505      s += "(" + TypeString(proto[0], typestr) + ")";
1506      abd = "(" + TypeString('d', utype) + ")" + abd;
1507      s += Extend(utype, abd) + ";";
1508    } else {
1509      s += Extend(typestr, abd) + ";";
1510    }
1511    break;
1512  }
1513  case OpAba:
1514    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1515    break;
1516  case OpAbal: {
1517    s += "__a + ";
1518    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1519    if (typestr[0] != 'U') {
1520      // vabd results are always unsigned and must be zero-extended.
1521      std::string utype = "U" + typestr.str();
1522      s += "(" + TypeString(proto[0], typestr) + ")";
1523      abd = "(" + TypeString('d', utype) + ")" + abd;
1524      s += Extend(utype, abd) + ";";
1525    } else {
1526      s += Extend(typestr, abd) + ";";
1527    }
1528    break;
1529  }
1530  case OpDiv:
1531    s += "__a / __b;";
1532    break;
1533  default:
1534    PrintFatalError("unknown OpKind!");
1535  }
1536  return s;
1537}
1538
1539static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1540  unsigned mod = proto[0];
1541
1542  if (mod == 'v' || mod == 'f')
1543    mod = proto[1];
1544
1545  bool quad = false;
1546  bool poly = false;
1547  bool usgn = false;
1548  bool scal = false;
1549  bool cnst = false;
1550  bool pntr = false;
1551
1552  // Base type to get the type string for.
1553  char type = ClassifyType(typestr, quad, poly, usgn);
1554
1555  // Based on the modifying character, change the type and width if necessary.
1556  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1557
1558  NeonTypeFlags::EltType ET;
1559  switch (type) {
1560    case 'c':
1561      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1562      break;
1563    case 's':
1564      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1565      break;
1566    case 'i':
1567      ET = NeonTypeFlags::Int32;
1568      break;
1569    case 'l':
1570      ET = NeonTypeFlags::Int64;
1571      break;
1572    case 'h':
1573      ET = NeonTypeFlags::Float16;
1574      break;
1575    case 'f':
1576      ET = NeonTypeFlags::Float32;
1577      break;
1578    case 'd':
1579      ET = NeonTypeFlags::Float64;
1580      break;
1581    default:
1582      PrintFatalError("unhandled type!");
1583  }
1584  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1585  return Flags.getFlags();
1586}
1587
1588// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1589static std::string GenBuiltin(const std::string &name, const std::string &proto,
1590                              StringRef typestr, ClassKind ck) {
1591  std::string s;
1592
1593  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1594  // sret-like argument.
1595  bool sret = (proto[0] >= '2' && proto[0] <= '4');
1596
1597  bool define = UseMacro(proto);
1598
1599  // Check if the prototype has a scalar operand with the type of the vector
1600  // elements.  If not, bitcasting the args will take care of arg checking.
1601  // The actual signedness etc. will be taken care of with special enums.
1602  if (proto.find('s') == std::string::npos)
1603    ck = ClassB;
1604
1605  if (proto[0] != 'v') {
1606    std::string ts = TypeString(proto[0], typestr);
1607
1608    if (define) {
1609      if (sret)
1610        s += ts + " r; ";
1611      else
1612        s += "(" + ts + ")";
1613    } else if (sret) {
1614      s += ts + " r; ";
1615    } else {
1616      s += "return (" + ts + ")";
1617    }
1618  }
1619
1620  bool splat = proto.find('a') != std::string::npos;
1621
1622  s += "__builtin_neon_";
1623  if (splat) {
1624    // Call the non-splat builtin: chop off the "_n" suffix from the name.
1625    std::string vname(name, 0, name.size()-2);
1626    s += MangleName(vname, typestr, ck);
1627  } else {
1628    s += MangleName(name, typestr, ck);
1629  }
1630  s += "(";
1631
1632  // Pass the address of the return variable as the first argument to sret-like
1633  // builtins.
1634  if (sret)
1635    s += "&r, ";
1636
1637  char arg = 'a';
1638  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1639    std::string args = std::string(&arg, 1);
1640
1641    // Use the local temporaries instead of the macro arguments.
1642    args = "__" + args;
1643
1644    bool argQuad = false;
1645    bool argPoly = false;
1646    bool argUsgn = false;
1647    bool argScalar = false;
1648    bool dummy = false;
1649    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1650    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1651                      dummy, dummy);
1652
1653    // Handle multiple-vector values specially, emitting each subvector as an
1654    // argument to the __builtin.
1655    if (proto[i] >= '2' && proto[i] <= '4') {
1656      // Check if an explicit cast is needed.
1657      if (argType != 'c' || argPoly || argUsgn)
1658        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1659
1660      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1661        s += args + ".val[" + utostr(vi) + "]";
1662        if ((vi + 1) < ve)
1663          s += ", ";
1664      }
1665      if ((i + 1) < e)
1666        s += ", ";
1667
1668      continue;
1669    }
1670
1671    if (splat && (i + 1) == e)
1672      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1673
1674    // Check if an explicit cast is needed.
1675    if ((splat || !argScalar) &&
1676        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1677      std::string argTypeStr = "c";
1678      if (ck != ClassB)
1679        argTypeStr = argType;
1680      if (argQuad)
1681        argTypeStr = "Q" + argTypeStr;
1682      args = "(" + TypeString('d', argTypeStr) + ")" + args;
1683    }
1684
1685    s += args;
1686    if ((i + 1) < e)
1687      s += ", ";
1688  }
1689
1690  // Extra constant integer to hold type class enum for this function, e.g. s8
1691  if (ck == ClassB)
1692    s += ", " + utostr(GetNeonEnum(proto, typestr));
1693
1694  s += ");";
1695
1696  if (proto[0] != 'v' && sret) {
1697    if (define)
1698      s += " r;";
1699    else
1700      s += " return r;";
1701  }
1702  return s;
1703}
1704
1705static std::string GenBuiltinDef(const std::string &name,
1706                                 const std::string &proto,
1707                                 StringRef typestr, ClassKind ck) {
1708  std::string s("BUILTIN(__builtin_neon_");
1709
1710  // If all types are the same size, bitcasting the args will take care
1711  // of arg checking.  The actual signedness etc. will be taken care of with
1712  // special enums.
1713  if (proto.find('s') == std::string::npos)
1714    ck = ClassB;
1715
1716  s += MangleName(name, typestr, ck);
1717  s += ", \"";
1718
1719  for (unsigned i = 0, e = proto.size(); i != e; ++i)
1720    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1721
1722  // Extra constant integer to hold type class enum for this function, e.g. s8
1723  if (ck == ClassB)
1724    s += "i";
1725
1726  s += "\", \"n\")";
1727  return s;
1728}
1729
1730static std::string GenIntrinsic(const std::string &name,
1731                                const std::string &proto,
1732                                StringRef outTypeStr, StringRef inTypeStr,
1733                                OpKind kind, ClassKind classKind) {
1734  assert(!proto.empty() && "");
1735  bool define = UseMacro(proto) && kind != OpUnavailable;
1736  std::string s;
1737
1738  // static always inline + return type
1739  if (define)
1740    s += "#define ";
1741  else
1742    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1743
1744  // Function name with type suffix
1745  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1746  if (outTypeStr != inTypeStr) {
1747    // If the input type is different (e.g., for vreinterpret), append a suffix
1748    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1749    // does not insert another "q" in the name.
1750    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1751    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1752    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1753  }
1754  s += mangledName;
1755
1756  // Function arguments
1757  s += GenArgs(proto, inTypeStr);
1758
1759  // Definition.
1760  if (define) {
1761    s += " __extension__ ({ \\\n  ";
1762    s += GenMacroLocals(proto, inTypeStr);
1763  } else if (kind == OpUnavailable) {
1764    s += " __attribute__((unavailable));\n";
1765    return s;
1766  } else
1767    s += " {\n  ";
1768
1769  if (kind != OpNone)
1770    s += GenOpString(kind, proto, outTypeStr);
1771  else
1772    s += GenBuiltin(name, proto, outTypeStr, classKind);
1773  if (define)
1774    s += " })";
1775  else
1776    s += " }";
1777  s += "\n";
1778  return s;
1779}
1780
1781/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1782/// is comprised of type definitions and function declarations.
1783void NeonEmitter::run(raw_ostream &OS) {
1784  OS <<
1785    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1786    "---===\n"
1787    " *\n"
1788    " * Permission is hereby granted, free of charge, to any person obtaining "
1789    "a copy\n"
1790    " * of this software and associated documentation files (the \"Software\"),"
1791    " to deal\n"
1792    " * in the Software without restriction, including without limitation the "
1793    "rights\n"
1794    " * to use, copy, modify, merge, publish, distribute, sublicense, "
1795    "and/or sell\n"
1796    " * copies of the Software, and to permit persons to whom the Software is\n"
1797    " * furnished to do so, subject to the following conditions:\n"
1798    " *\n"
1799    " * The above copyright notice and this permission notice shall be "
1800    "included in\n"
1801    " * all copies or substantial portions of the Software.\n"
1802    " *\n"
1803    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1804    "EXPRESS OR\n"
1805    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1806    "MERCHANTABILITY,\n"
1807    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1808    "SHALL THE\n"
1809    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1810    "OTHER\n"
1811    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1812    "ARISING FROM,\n"
1813    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1814    "DEALINGS IN\n"
1815    " * THE SOFTWARE.\n"
1816    " *\n"
1817    " *===--------------------------------------------------------------------"
1818    "---===\n"
1819    " */\n\n";
1820
1821  OS << "#ifndef __ARM_NEON_H\n";
1822  OS << "#define __ARM_NEON_H\n\n";
1823
1824  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
1825  OS << "#error \"NEON support not enabled\"\n";
1826  OS << "#endif\n\n";
1827
1828  OS << "#include <stdint.h>\n\n";
1829
1830  // Emit NEON-specific scalar typedefs.
1831  OS << "typedef float float32_t;\n";
1832  OS << "typedef __fp16 float16_t;\n";
1833
1834  OS << "#ifdef __aarch64__\n";
1835  OS << "typedef double float64_t;\n";
1836  OS << "#endif\n\n";
1837
1838  // For now, signedness of polynomial types depends on target
1839  OS << "#ifdef __aarch64__\n";
1840  OS << "typedef uint8_t poly8_t;\n";
1841  OS << "typedef uint16_t poly16_t;\n";
1842  OS << "#else\n";
1843  OS << "typedef int8_t poly8_t;\n";
1844  OS << "typedef int16_t poly16_t;\n";
1845  OS << "#endif\n";
1846
1847  // Emit Neon vector typedefs.
1848  std::string TypedefTypes(
1849      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
1850  SmallVector<StringRef, 24> TDTypeVec;
1851  ParseTypes(0, TypedefTypes, TDTypeVec);
1852
1853  // Emit vector typedefs.
1854  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1855    bool dummy, quad = false, poly = false;
1856    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1857    bool isA64 = false;
1858
1859    if (type == 'd' && quad)
1860      isA64 = true;
1861
1862    if (isA64)
1863      OS << "#ifdef __aarch64__\n";
1864
1865    if (poly)
1866      OS << "typedef __attribute__((neon_polyvector_type(";
1867    else
1868      OS << "typedef __attribute__((neon_vector_type(";
1869
1870    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1871    OS << utostr(nElts) << "))) ";
1872    if (nElts < 10)
1873      OS << " ";
1874
1875    OS << TypeString('s', TDTypeVec[i]);
1876    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1877
1878    if (isA64)
1879      OS << "#endif\n";
1880  }
1881  OS << "\n";
1882
1883  // Emit struct typedefs.
1884  for (unsigned vi = 2; vi != 5; ++vi) {
1885    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1886      bool dummy, quad = false, poly = false;
1887      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1888      bool isA64 = false;
1889
1890      if (type == 'd' && quad)
1891        isA64 = true;
1892
1893      if (isA64)
1894        OS << "#ifdef __aarch64__\n";
1895
1896      std::string ts = TypeString('d', TDTypeVec[i]);
1897      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1898      OS << "typedef struct " << vs << " {\n";
1899      OS << "  " << ts << " val";
1900      OS << "[" << utostr(vi) << "]";
1901      OS << ";\n} ";
1902      OS << vs << ";\n";
1903
1904      if (isA64)
1905        OS << "#endif\n";
1906
1907      OS << "\n";
1908    }
1909  }
1910
1911  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1912
1913  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1914
1915  StringMap<ClassKind> EmittedMap;
1916
1917  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1918  // intrinsics.  (Some of the saturating multiply instructions are also
1919  // used to implement the corresponding "_lane" variants, but tablegen
1920  // sorts the records into alphabetical order so that the "_lane" variants
1921  // come after the intrinsics they use.)
1922  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
1923  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
1924  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
1925
1926  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
1927  // common intrinsics appear only once in the output stream.
1928  // The check for uniquiness is done in emitIntrinsic.
1929  // Emit ARM intrinsics.
1930  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1931    Record *R = RV[i];
1932
1933    // Skip AArch64 intrinsics; they will be emitted at the end.
1934    bool isA64 = R->getValueAsBit("isA64");
1935    if (isA64)
1936      continue;
1937
1938    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
1939        R->getName() != "VABD")
1940      emitIntrinsic(OS, R, EmittedMap);
1941  }
1942
1943  // Emit AArch64-specific intrinsics.
1944  OS << "#ifdef __aarch64__\n";
1945
1946  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1947    Record *R = RV[i];
1948
1949    // Skip ARM intrinsics already included above.
1950    bool isA64 = R->getValueAsBit("isA64");
1951    if (!isA64)
1952      continue;
1953
1954    emitIntrinsic(OS, R, EmittedMap);
1955  }
1956
1957  OS << "#endif\n\n";
1958
1959  OS << "#undef __ai\n\n";
1960  OS << "#endif /* __ARM_NEON_H */\n";
1961}
1962
1963/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1964/// intrinsics specified by record R checking for intrinsic uniqueness.
1965void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
1966                                StringMap<ClassKind> &EmittedMap) {
1967  std::string name = R->getValueAsString("Name");
1968  std::string Proto = R->getValueAsString("Prototype");
1969  std::string Types = R->getValueAsString("Types");
1970
1971  SmallVector<StringRef, 16> TypeVec;
1972  ParseTypes(R, Types, TypeVec);
1973
1974  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1975
1976  ClassKind classKind = ClassNone;
1977  if (R->getSuperClasses().size() >= 2)
1978    classKind = ClassMap[R->getSuperClasses()[1]];
1979  if (classKind == ClassNone && kind == OpNone)
1980    PrintFatalError(R->getLoc(), "Builtin has no class kind");
1981
1982  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1983    if (kind == OpReinterpret) {
1984      bool outQuad = false;
1985      bool dummy = false;
1986      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1987      for (unsigned srcti = 0, srcte = TypeVec.size();
1988           srcti != srcte; ++srcti) {
1989        bool inQuad = false;
1990        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1991        if (srcti == ti || inQuad != outQuad)
1992          continue;
1993        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1994                                     OpCast, ClassS);
1995        if (EmittedMap.count(s))
1996          continue;
1997        EmittedMap[s] = ClassS;
1998        OS << s;
1999      }
2000    } else {
2001      std::string s =
2002          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2003      if (EmittedMap.count(s))
2004        continue;
2005      EmittedMap[s] = classKind;
2006      OS << s;
2007    }
2008  }
2009  OS << "\n";
2010}
2011
2012static unsigned RangeFromType(const char mod, StringRef typestr) {
2013  // base type to get the type string for.
2014  bool quad = false, dummy = false;
2015  char type = ClassifyType(typestr, quad, dummy, dummy);
2016  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2017
2018  switch (type) {
2019    case 'c':
2020      return (8 << (int)quad) - 1;
2021    case 'h':
2022    case 's':
2023      return (4 << (int)quad) - 1;
2024    case 'f':
2025    case 'i':
2026      return (2 << (int)quad) - 1;
2027    case 'l':
2028      return (1 << (int)quad) - 1;
2029    default:
2030      PrintFatalError("unhandled type!");
2031  }
2032}
2033
2034/// Generate the ARM and AArch64 intrinsic range checking code for
2035/// shift/lane immediates, checking for unique declarations.
2036void
2037NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2038                                        StringMap<ClassKind> &A64IntrinsicMap,
2039                                        bool isA64RangeCheck) {
2040  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2041  StringMap<OpKind> EmittedMap;
2042
2043  // Generate the intrinsic range checking code for shift/lane immediates.
2044  if (isA64RangeCheck)
2045    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2046  else
2047    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2048
2049  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2050    Record *R = RV[i];
2051
2052    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2053    if (k != OpNone)
2054      continue;
2055
2056    std::string name = R->getValueAsString("Name");
2057    std::string Proto = R->getValueAsString("Prototype");
2058    std::string Types = R->getValueAsString("Types");
2059
2060    // Functions with 'a' (the splat code) in the type prototype should not get
2061    // their own builtin as they use the non-splat variant.
2062    if (Proto.find('a') != std::string::npos)
2063      continue;
2064
2065    // Functions which do not have an immediate do not need to have range
2066    // checking code emitted.
2067    size_t immPos = Proto.find('i');
2068    if (immPos == std::string::npos)
2069      continue;
2070
2071    SmallVector<StringRef, 16> TypeVec;
2072    ParseTypes(R, Types, TypeVec);
2073
2074    if (R->getSuperClasses().size() < 2)
2075      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2076
2077    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2078
2079    // Do not include AArch64 range checks if not generating code for AArch64.
2080    bool isA64 = R->getValueAsBit("isA64");
2081    if (!isA64RangeCheck && isA64)
2082      continue;
2083
2084    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2085    // redefined by AArch64 to handle new types.
2086    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2087      ClassKind &A64CK = A64IntrinsicMap[name];
2088      if (A64CK == ck && ck != ClassNone)
2089        continue;
2090    }
2091
2092    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2093      std::string namestr, shiftstr, rangestr;
2094
2095      if (R->getValueAsBit("isVCVT_N")) {
2096        // VCVT between floating- and fixed-point values takes an immediate
2097        // in the range 1 to 32.
2098        ck = ClassB;
2099        rangestr = "l = 1; u = 31"; // upper bound = l + u
2100      } else if (Proto.find('s') == std::string::npos) {
2101        // Builtins which are overloaded by type will need to have their upper
2102        // bound computed at Sema time based on the type constant.
2103        ck = ClassB;
2104        if (R->getValueAsBit("isShift")) {
2105          shiftstr = ", true";
2106
2107          // Right shifts have an 'r' in the name, left shifts do not.
2108          if (name.find('r') != std::string::npos)
2109            rangestr = "l = 1; ";
2110        }
2111        rangestr += "u = RFT(TV" + shiftstr + ")";
2112      } else {
2113        // The immediate generally refers to a lane in the preceding argument.
2114        assert(immPos > 0 && "unexpected immediate operand");
2115        rangestr =
2116            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2117      }
2118      // Make sure cases appear only once by uniquing them in a string map.
2119      namestr = MangleName(name, TypeVec[ti], ck);
2120      if (EmittedMap.count(namestr))
2121        continue;
2122      EmittedMap[namestr] = OpNone;
2123
2124      // Calculate the index of the immediate that should be range checked.
2125      unsigned immidx = 0;
2126
2127      // Builtins that return a struct of multiple vectors have an extra
2128      // leading arg for the struct return.
2129      if (Proto[0] >= '2' && Proto[0] <= '4')
2130        ++immidx;
2131
2132      // Add one to the index for each argument until we reach the immediate
2133      // to be checked.  Structs of vectors are passed as multiple arguments.
2134      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2135        switch (Proto[ii]) {
2136        default:
2137          immidx += 1;
2138          break;
2139        case '2':
2140          immidx += 2;
2141          break;
2142        case '3':
2143          immidx += 3;
2144          break;
2145        case '4':
2146          immidx += 4;
2147          break;
2148        case 'i':
2149          ie = ii + 1;
2150          break;
2151        }
2152      }
2153      if (isA64RangeCheck)
2154        OS << "case AArch64::BI__builtin_neon_";
2155      else
2156        OS << "case ARM::BI__builtin_neon_";
2157      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2158         << rangestr << "; break;\n";
2159    }
2160  }
2161  OS << "#endif\n\n";
2162}
2163
2164/// Generate the ARM and AArch64 overloaded type checking code for
2165/// SemaChecking.cpp, checking for unique builtin declarations.
2166void
2167NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2168                                      StringMap<ClassKind> &A64IntrinsicMap,
2169                                      bool isA64TypeCheck) {
2170  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2171  StringMap<OpKind> EmittedMap;
2172
2173  // Generate the overloaded type checking code for SemaChecking.cpp
2174  if (isA64TypeCheck)
2175    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2176  else
2177    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2178
2179  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2180    Record *R = RV[i];
2181    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2182    if (k != OpNone)
2183      continue;
2184
2185    std::string Proto = R->getValueAsString("Prototype");
2186    std::string Types = R->getValueAsString("Types");
2187    std::string name = R->getValueAsString("Name");
2188
2189    // Functions with 'a' (the splat code) in the type prototype should not get
2190    // their own builtin as they use the non-splat variant.
2191    if (Proto.find('a') != std::string::npos)
2192      continue;
2193
2194    // Functions which have a scalar argument cannot be overloaded, no need to
2195    // check them if we are emitting the type checking code.
2196    if (Proto.find('s') != std::string::npos)
2197      continue;
2198
2199    SmallVector<StringRef, 16> TypeVec;
2200    ParseTypes(R, Types, TypeVec);
2201
2202    if (R->getSuperClasses().size() < 2)
2203      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2204
2205    // Do not include AArch64 type checks if not generating code for AArch64.
2206    bool isA64 = R->getValueAsBit("isA64");
2207    if (!isA64TypeCheck && isA64)
2208      continue;
2209
2210    // Include ARM  type check in AArch64 but only if ARM intrinsics
2211    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2212    // redefined in AArch64 to handle an additional 2 x f64 type.
2213    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2214    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2215      ClassKind &A64CK = A64IntrinsicMap[name];
2216      if (A64CK == ck && ck != ClassNone)
2217        continue;
2218    }
2219
2220    int si = -1, qi = -1;
2221    uint64_t mask = 0, qmask = 0;
2222    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2223      // Generate the switch case(s) for this builtin for the type validation.
2224      bool quad = false, poly = false, usgn = false;
2225      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2226
2227      if (quad) {
2228        qi = ti;
2229        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2230      } else {
2231        si = ti;
2232        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2233      }
2234    }
2235
2236    // Check if the builtin function has a pointer or const pointer argument.
2237    int PtrArgNum = -1;
2238    bool HasConstPtr = false;
2239    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2240      char ArgType = Proto[arg];
2241      if (ArgType == 'c') {
2242        HasConstPtr = true;
2243        PtrArgNum = arg - 1;
2244        break;
2245      }
2246      if (ArgType == 'p') {
2247        PtrArgNum = arg - 1;
2248        break;
2249      }
2250    }
2251    // For sret builtins, adjust the pointer argument index.
2252    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2253      PtrArgNum += 1;
2254
2255    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2256    // and vst1_lane intrinsics.  Using a pointer to the vector element
2257    // type with one of those operations causes codegen to select an aligned
2258    // load/store instruction.  If you want an unaligned operation,
2259    // the pointer argument needs to have less alignment than element type,
2260    // so just accept any pointer type.
2261    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2262      PtrArgNum = -1;
2263      HasConstPtr = false;
2264    }
2265
2266    if (mask) {
2267      if (isA64TypeCheck)
2268        OS << "case AArch64::BI__builtin_neon_";
2269      else
2270        OS << "case ARM::BI__builtin_neon_";
2271      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2272         << "0x" << utohexstr(mask) << "ULL";
2273      if (PtrArgNum >= 0)
2274        OS << "; PtrArgNum = " << PtrArgNum;
2275      if (HasConstPtr)
2276        OS << "; HasConstPtr = true";
2277      OS << "; break;\n";
2278    }
2279    if (qmask) {
2280      if (isA64TypeCheck)
2281        OS << "case AArch64::BI__builtin_neon_";
2282      else
2283        OS << "case ARM::BI__builtin_neon_";
2284      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2285         << "0x" << utohexstr(qmask) << "ULL";
2286      if (PtrArgNum >= 0)
2287        OS << "; PtrArgNum = " << PtrArgNum;
2288      if (HasConstPtr)
2289        OS << "; HasConstPtr = true";
2290      OS << "; break;\n";
2291    }
2292  }
2293  OS << "#endif\n\n";
2294}
2295
2296/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2297/// declaration of builtins, checking for unique builtin declarations.
2298void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2299                                 StringMap<ClassKind> &A64IntrinsicMap,
2300                                 bool isA64GenBuiltinDef) {
2301  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2302  StringMap<OpKind> EmittedMap;
2303
2304  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2305  if (isA64GenBuiltinDef)
2306    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2307  else
2308    OS << "#ifdef GET_NEON_BUILTINS\n";
2309
2310  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2311    Record *R = RV[i];
2312    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2313    if (k != OpNone)
2314      continue;
2315
2316    std::string Proto = R->getValueAsString("Prototype");
2317    std::string name = R->getValueAsString("Name");
2318
2319    // Functions with 'a' (the splat code) in the type prototype should not get
2320    // their own builtin as they use the non-splat variant.
2321    if (Proto.find('a') != std::string::npos)
2322      continue;
2323
2324    std::string Types = R->getValueAsString("Types");
2325    SmallVector<StringRef, 16> TypeVec;
2326    ParseTypes(R, Types, TypeVec);
2327
2328    if (R->getSuperClasses().size() < 2)
2329      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2330
2331    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2332
2333    // Do not include AArch64 BUILTIN() macros if not generating
2334    // code for AArch64
2335    bool isA64 = R->getValueAsBit("isA64");
2336    if (!isA64GenBuiltinDef && isA64)
2337      continue;
2338
2339    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2340    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2341    // redefined in AArch64 to handle an additional 2 x f64 type.
2342    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
2343      ClassKind &A64CK = A64IntrinsicMap[name];
2344      if (A64CK == ck && ck != ClassNone)
2345        continue;
2346    }
2347
2348    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2349      // Generate the declaration for this builtin, ensuring
2350      // that each unique BUILTIN() macro appears only once in the output
2351      // stream.
2352      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2353      if (EmittedMap.count(bd))
2354        continue;
2355
2356      EmittedMap[bd] = OpNone;
2357      OS << bd << "\n";
2358    }
2359  }
2360  OS << "#endif\n\n";
2361}
2362
2363/// runHeader - Emit a file with sections defining:
2364/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2365/// 2. the SemaChecking code for the type overload checking.
2366/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2367void NeonEmitter::runHeader(raw_ostream &OS) {
2368  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2369
2370  // build a map of AArch64 intriniscs to be used in uniqueness checks.
2371  StringMap<ClassKind> A64IntrinsicMap;
2372  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2373    Record *R = RV[i];
2374
2375    bool isA64 = R->getValueAsBit("isA64");
2376    if (!isA64)
2377      continue;
2378
2379    ClassKind CK = ClassNone;
2380    if (R->getSuperClasses().size() >= 2)
2381      CK = ClassMap[R->getSuperClasses()[1]];
2382
2383    std::string Name = R->getValueAsString("Name");
2384    if (A64IntrinsicMap.count(Name))
2385      continue;
2386    A64IntrinsicMap[Name] = CK;
2387  }
2388
2389  // Generate BuiltinsARM.def for ARM
2390  genBuiltinsDef(OS, A64IntrinsicMap, false);
2391
2392  // Generate BuiltinsAArch64.def for AArch64
2393  genBuiltinsDef(OS, A64IntrinsicMap, true);
2394
2395  // Generate ARM overloaded type checking code for SemaChecking.cpp
2396  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2397
2398  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2399  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2400
2401  // Generate ARM range checking code for shift/lane immediates.
2402  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2403
2404  // Generate the AArch64 range checking code for shift/lane immediates.
2405  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2406}
2407
2408/// GenTest - Write out a test for the intrinsic specified by the name and
2409/// type strings, including the embedded patterns for FileCheck to match.
2410static std::string GenTest(const std::string &name,
2411                           const std::string &proto,
2412                           StringRef outTypeStr, StringRef inTypeStr,
2413                           bool isShift, bool isHiddenLOp,
2414                           ClassKind ck, const std::string &InstName,
2415						   bool isA64,
2416						   std::string & testFuncProto) {
2417  assert(!proto.empty() && "");
2418  std::string s;
2419
2420  // Function name with type suffix
2421  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2422  if (outTypeStr != inTypeStr) {
2423    // If the input type is different (e.g., for vreinterpret), append a suffix
2424    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2425    // does not insert another "q" in the name.
2426    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2427    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2428    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2429  }
2430
2431  // todo: GenerateChecksForIntrinsic does not generate CHECK
2432  // for aarch64 instructions yet
2433  std::vector<std::string> FileCheckPatterns;
2434  if (!isA64) {
2435	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2436							   isHiddenLOp, FileCheckPatterns);
2437	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2438  }
2439  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2440
2441  // Emit the FileCheck patterns.
2442  // If for any reason we do not want to emit a check, mangledInst
2443  // will be the empty string.
2444  if (FileCheckPatterns.size()) {
2445    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2446                                                  e = FileCheckPatterns.end();
2447         i != e;
2448         ++i) {
2449      s += "// CHECK_ARM: " + *i + "\n";
2450    }
2451  }
2452
2453  // Emit the start of the test function.
2454
2455  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2456  char arg = 'a';
2457  std::string comma;
2458  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2459    // Do not create arguments for values that must be immediate constants.
2460    if (proto[i] == 'i')
2461      continue;
2462    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2463    testFuncProto.push_back(arg);
2464    comma = ", ";
2465  }
2466  testFuncProto += ")";
2467
2468  s+= testFuncProto;
2469  s+= " {\n  ";
2470
2471  if (proto[0] != 'v')
2472    s += "return ";
2473  s += mangledName + "(";
2474  arg = 'a';
2475  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2476    if (proto[i] == 'i') {
2477      // For immediate operands, test the maximum value.
2478      if (isShift)
2479        s += "1"; // FIXME
2480      else
2481        // The immediate generally refers to a lane in the preceding argument.
2482        s += utostr(RangeFromType(proto[i-1], inTypeStr));
2483    } else {
2484      s.push_back(arg);
2485    }
2486    if ((i + 1) < e)
2487      s += ", ";
2488  }
2489  s += ");\n}\n\n";
2490  return s;
2491}
2492
2493/// Write out all intrinsic tests for the specified target, checking
2494/// for intrinsic test uniqueness.
2495void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2496                                bool isA64GenTest) {
2497  if (isA64GenTest)
2498	OS << "#ifdef __aarch64__\n";
2499
2500  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2501  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2502    Record *R = RV[i];
2503    std::string name = R->getValueAsString("Name");
2504    std::string Proto = R->getValueAsString("Prototype");
2505    std::string Types = R->getValueAsString("Types");
2506    bool isShift = R->getValueAsBit("isShift");
2507    std::string InstName = R->getValueAsString("InstName");
2508    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2509    bool isA64 = R->getValueAsBit("isA64");
2510
2511    // do not include AArch64 intrinsic test if not generating
2512    // code for AArch64
2513    if (!isA64GenTest && isA64)
2514      continue;
2515
2516    SmallVector<StringRef, 16> TypeVec;
2517    ParseTypes(R, Types, TypeVec);
2518
2519    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2520    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2521    if (kind == OpUnavailable)
2522      continue;
2523    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2524      if (kind == OpReinterpret) {
2525        bool outQuad = false;
2526        bool dummy = false;
2527        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2528        for (unsigned srcti = 0, srcte = TypeVec.size();
2529             srcti != srcte; ++srcti) {
2530          bool inQuad = false;
2531          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2532          if (srcti == ti || inQuad != outQuad)
2533            continue;
2534		  std::string testFuncProto;
2535          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2536                                  isShift, isHiddenLOp, ck, InstName, isA64,
2537								  testFuncProto);
2538          if (EmittedMap.count(testFuncProto))
2539            continue;
2540          EmittedMap[testFuncProto] = kind;
2541          OS << s << "\n";
2542        }
2543      } else {
2544		std::string testFuncProto;
2545        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2546                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
2547        if (EmittedMap.count(testFuncProto))
2548          continue;
2549        EmittedMap[testFuncProto] = kind;
2550        OS << s << "\n";
2551      }
2552    }
2553  }
2554
2555  if (isA64GenTest)
2556	OS << "#endif\n";
2557}
2558/// runTests - Write out a complete set of tests for all of the Neon
2559/// intrinsics.
2560void NeonEmitter::runTests(raw_ostream &OS) {
2561  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2562        "apcs-gnu\\\n"
2563        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2564        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2565		"\n"
2566	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2567	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2568	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2569        "\n"
2570        "// REQUIRES: long_tests\n"
2571        "\n"
2572        "#include <arm_neon.h>\n"
2573        "\n";
2574
2575  // ARM tests must be emitted before AArch64 tests to ensure
2576  // tests for intrinsics that are common to ARM and AArch64
2577  // appear only once in the output stream.
2578  // The check for uniqueness is done in genTargetTest.
2579  StringMap<OpKind> EmittedMap;
2580
2581  genTargetTest(OS, EmittedMap, false);
2582
2583  genTargetTest(OS, EmittedMap, true);
2584}
2585
2586namespace clang {
2587void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2588  NeonEmitter(Records).run(OS);
2589}
2590void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2591  NeonEmitter(Records).runHeader(OS);
2592}
2593void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2594  NeonEmitter(Records).runTests(OS);
2595}
2596} // End namespace clang
2597