NeonEmitter.cpp revision 66981c7ca1fcef529d7d5e5c53b07020ff23d8e3
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddw,
44  OpSub,
45  OpSubl,
46  OpSubw,
47  OpMul,
48  OpMla,
49  OpMlal,
50  OpMls,
51  OpMlsl,
52  OpMulN,
53  OpMlaN,
54  OpMlsN,
55  OpMlalN,
56  OpMlslN,
57  OpMulLane,
58  OpMullLane,
59  OpMlaLane,
60  OpMlsLane,
61  OpMlalLane,
62  OpMlslLane,
63  OpQDMullLane,
64  OpQDMlalLane,
65  OpQDMlslLane,
66  OpQDMulhLane,
67  OpQRDMulhLane,
68  OpEq,
69  OpGe,
70  OpLe,
71  OpGt,
72  OpLt,
73  OpNeg,
74  OpNot,
75  OpAnd,
76  OpOr,
77  OpXor,
78  OpAndNot,
79  OpOrNot,
80  OpCast,
81  OpConcat,
82  OpDup,
83  OpDupLane,
84  OpHi,
85  OpLo,
86  OpSelect,
87  OpRev16,
88  OpRev32,
89  OpRev64,
90  OpReinterpret,
91  OpAbdl,
92  OpAba,
93  OpAbal
94};
95
96enum ClassKind {
97  ClassNone,
98  ClassI,           // generic integer instruction, e.g., "i8" suffix
99  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
100  ClassW,           // width-specific instruction, e.g., "8" suffix
101  ClassB            // bitcast arguments with enum argument to specify type
102};
103
104/// NeonTypeFlags - Flags to identify the types for overloaded Neon
105/// builtins.  These must be kept in sync with the flags in
106/// include/clang/Basic/TargetBuiltins.h.
107namespace {
108class NeonTypeFlags {
109  enum {
110    EltTypeMask = 0xf,
111    UnsignedFlag = 0x10,
112    QuadFlag = 0x20
113  };
114  uint32_t Flags;
115
116public:
117  enum EltType {
118    Int8,
119    Int16,
120    Int32,
121    Int64,
122    Poly8,
123    Poly16,
124    Float16,
125    Float32
126  };
127
128  NeonTypeFlags(unsigned F) : Flags(F) {}
129  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
130    if (IsUnsigned)
131      Flags |= UnsignedFlag;
132    if (IsQuad)
133      Flags |= QuadFlag;
134  }
135
136  uint32_t getFlags() const { return Flags; }
137};
138} // end anonymous namespace
139
140namespace {
141class NeonEmitter {
142  RecordKeeper &Records;
143  StringMap<OpKind> OpMap;
144  DenseMap<Record*, ClassKind> ClassMap;
145
146public:
147  NeonEmitter(RecordKeeper &R) : Records(R) {
148    OpMap["OP_NONE"]  = OpNone;
149    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
150    OpMap["OP_ADD"]   = OpAdd;
151    OpMap["OP_ADDL"]  = OpAddl;
152    OpMap["OP_ADDW"]  = OpAddw;
153    OpMap["OP_SUB"]   = OpSub;
154    OpMap["OP_SUBL"]  = OpSubl;
155    OpMap["OP_SUBW"]  = OpSubw;
156    OpMap["OP_MUL"]   = OpMul;
157    OpMap["OP_MLA"]   = OpMla;
158    OpMap["OP_MLAL"]  = OpMlal;
159    OpMap["OP_MLS"]   = OpMls;
160    OpMap["OP_MLSL"]  = OpMlsl;
161    OpMap["OP_MUL_N"] = OpMulN;
162    OpMap["OP_MLA_N"] = OpMlaN;
163    OpMap["OP_MLS_N"] = OpMlsN;
164    OpMap["OP_MLAL_N"] = OpMlalN;
165    OpMap["OP_MLSL_N"] = OpMlslN;
166    OpMap["OP_MUL_LN"]= OpMulLane;
167    OpMap["OP_MULL_LN"] = OpMullLane;
168    OpMap["OP_MLA_LN"]= OpMlaLane;
169    OpMap["OP_MLS_LN"]= OpMlsLane;
170    OpMap["OP_MLAL_LN"] = OpMlalLane;
171    OpMap["OP_MLSL_LN"] = OpMlslLane;
172    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
173    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
174    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
175    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
176    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
177    OpMap["OP_EQ"]    = OpEq;
178    OpMap["OP_GE"]    = OpGe;
179    OpMap["OP_LE"]    = OpLe;
180    OpMap["OP_GT"]    = OpGt;
181    OpMap["OP_LT"]    = OpLt;
182    OpMap["OP_NEG"]   = OpNeg;
183    OpMap["OP_NOT"]   = OpNot;
184    OpMap["OP_AND"]   = OpAnd;
185    OpMap["OP_OR"]    = OpOr;
186    OpMap["OP_XOR"]   = OpXor;
187    OpMap["OP_ANDN"]  = OpAndNot;
188    OpMap["OP_ORN"]   = OpOrNot;
189    OpMap["OP_CAST"]  = OpCast;
190    OpMap["OP_CONC"]  = OpConcat;
191    OpMap["OP_HI"]    = OpHi;
192    OpMap["OP_LO"]    = OpLo;
193    OpMap["OP_DUP"]   = OpDup;
194    OpMap["OP_DUP_LN"] = OpDupLane;
195    OpMap["OP_SEL"]   = OpSelect;
196    OpMap["OP_REV16"] = OpRev16;
197    OpMap["OP_REV32"] = OpRev32;
198    OpMap["OP_REV64"] = OpRev64;
199    OpMap["OP_REINT"] = OpReinterpret;
200    OpMap["OP_ABDL"]  = OpAbdl;
201    OpMap["OP_ABA"]   = OpAba;
202    OpMap["OP_ABAL"]  = OpAbal;
203
204    Record *SI = R.getClass("SInst");
205    Record *II = R.getClass("IInst");
206    Record *WI = R.getClass("WInst");
207    ClassMap[SI] = ClassS;
208    ClassMap[II] = ClassI;
209    ClassMap[WI] = ClassW;
210  }
211
212  // run - Emit arm_neon.h.inc
213  void run(raw_ostream &o);
214
215  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
216  void runHeader(raw_ostream &o);
217
218  // runTests - Emit tests for all the Neon intrinsics.
219  void runTests(raw_ostream &o);
220
221private:
222  void emitIntrinsic(raw_ostream &OS, Record *R);
223};
224} // end anonymous namespace
225
226/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
227/// which each StringRef representing a single type declared in the string.
228/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
229/// 2xfloat and 4xfloat respectively.
230static void ParseTypes(Record *r, std::string &s,
231                       SmallVectorImpl<StringRef> &TV) {
232  const char *data = s.data();
233  int len = 0;
234
235  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
236    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
237      continue;
238
239    switch (data[len]) {
240      case 'c':
241      case 's':
242      case 'i':
243      case 'l':
244      case 'h':
245      case 'f':
246        break;
247      default:
248        throw TGError(r->getLoc(),
249                      "Unexpected letter: " + std::string(data + len, 1));
250    }
251    TV.push_back(StringRef(data, len + 1));
252    data += len + 1;
253    len = -1;
254  }
255}
256
257/// Widen - Convert a type code into the next wider type.  char -> short,
258/// short -> int, etc.
259static char Widen(const char t) {
260  switch (t) {
261    case 'c':
262      return 's';
263    case 's':
264      return 'i';
265    case 'i':
266      return 'l';
267    case 'h':
268      return 'f';
269    default: throw "unhandled type in widen!";
270  }
271}
272
273/// Narrow - Convert a type code into the next smaller type.  short -> char,
274/// float -> half float, etc.
275static char Narrow(const char t) {
276  switch (t) {
277    case 's':
278      return 'c';
279    case 'i':
280      return 's';
281    case 'l':
282      return 'i';
283    case 'f':
284      return 'h';
285    default: throw "unhandled type in narrow!";
286  }
287}
288
289/// For a particular StringRef, return the base type code, and whether it has
290/// the quad-vector, polynomial, or unsigned modifiers set.
291static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
292  unsigned off = 0;
293
294  // remember quad.
295  if (ty[off] == 'Q') {
296    quad = true;
297    ++off;
298  }
299
300  // remember poly.
301  if (ty[off] == 'P') {
302    poly = true;
303    ++off;
304  }
305
306  // remember unsigned.
307  if (ty[off] == 'U') {
308    usgn = true;
309    ++off;
310  }
311
312  // base type to get the type string for.
313  return ty[off];
314}
315
316/// ModType - Transform a type code and its modifiers based on a mod code. The
317/// mod code definitions may be found at the top of arm_neon.td.
318static char ModType(const char mod, char type, bool &quad, bool &poly,
319                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
320  switch (mod) {
321    case 't':
322      if (poly) {
323        poly = false;
324        usgn = true;
325      }
326      break;
327    case 'u':
328      usgn = true;
329      poly = false;
330      if (type == 'f')
331        type = 'i';
332      break;
333    case 'x':
334      usgn = false;
335      poly = false;
336      if (type == 'f')
337        type = 'i';
338      break;
339    case 'f':
340      if (type == 'h')
341        quad = true;
342      type = 'f';
343      usgn = false;
344      break;
345    case 'g':
346      quad = false;
347      break;
348    case 'w':
349      type = Widen(type);
350      quad = true;
351      break;
352    case 'n':
353      type = Widen(type);
354      break;
355    case 'i':
356      type = 'i';
357      scal = true;
358      break;
359    case 'l':
360      type = 'l';
361      scal = true;
362      usgn = true;
363      break;
364    case 's':
365    case 'a':
366      scal = true;
367      break;
368    case 'k':
369      quad = true;
370      break;
371    case 'c':
372      cnst = true;
373    case 'p':
374      pntr = true;
375      scal = true;
376      break;
377    case 'h':
378      type = Narrow(type);
379      if (type == 'h')
380        quad = false;
381      break;
382    case 'e':
383      type = Narrow(type);
384      usgn = true;
385      break;
386    default:
387      break;
388  }
389  return type;
390}
391
392/// TypeString - for a modifier and type, generate the name of the typedef for
393/// that type.  QUc -> uint8x8_t.
394static std::string TypeString(const char mod, StringRef typestr) {
395  bool quad = false;
396  bool poly = false;
397  bool usgn = false;
398  bool scal = false;
399  bool cnst = false;
400  bool pntr = false;
401
402  if (mod == 'v')
403    return "void";
404  if (mod == 'i')
405    return "int";
406
407  // base type to get the type string for.
408  char type = ClassifyType(typestr, quad, poly, usgn);
409
410  // Based on the modifying character, change the type and width if necessary.
411  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
412
413  SmallString<128> s;
414
415  if (usgn)
416    s.push_back('u');
417
418  switch (type) {
419    case 'c':
420      s += poly ? "poly8" : "int8";
421      if (scal)
422        break;
423      s += quad ? "x16" : "x8";
424      break;
425    case 's':
426      s += poly ? "poly16" : "int16";
427      if (scal)
428        break;
429      s += quad ? "x8" : "x4";
430      break;
431    case 'i':
432      s += "int32";
433      if (scal)
434        break;
435      s += quad ? "x4" : "x2";
436      break;
437    case 'l':
438      s += "int64";
439      if (scal)
440        break;
441      s += quad ? "x2" : "x1";
442      break;
443    case 'h':
444      s += "float16";
445      if (scal)
446        break;
447      s += quad ? "x8" : "x4";
448      break;
449    case 'f':
450      s += "float32";
451      if (scal)
452        break;
453      s += quad ? "x4" : "x2";
454      break;
455    default:
456      throw "unhandled type!";
457  }
458
459  if (mod == '2')
460    s += "x2";
461  if (mod == '3')
462    s += "x3";
463  if (mod == '4')
464    s += "x4";
465
466  // Append _t, finishing the type string typedef type.
467  s += "_t";
468
469  if (cnst)
470    s += " const";
471
472  if (pntr)
473    s += " *";
474
475  return s.str();
476}
477
478/// BuiltinTypeString - for a modifier and type, generate the clang
479/// BuiltinsARM.def prototype code for the function.  See the top of clang's
480/// Builtins.def for a description of the type strings.
481static std::string BuiltinTypeString(const char mod, StringRef typestr,
482                                     ClassKind ck, bool ret) {
483  bool quad = false;
484  bool poly = false;
485  bool usgn = false;
486  bool scal = false;
487  bool cnst = false;
488  bool pntr = false;
489
490  if (mod == 'v')
491    return "v"; // void
492  if (mod == 'i')
493    return "i"; // int
494
495  // base type to get the type string for.
496  char type = ClassifyType(typestr, quad, poly, usgn);
497
498  // Based on the modifying character, change the type and width if necessary.
499  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
500
501  // All pointers are void* pointers.  Change type to 'v' now.
502  if (pntr) {
503    usgn = false;
504    poly = false;
505    type = 'v';
506  }
507  // Treat half-float ('h') types as unsigned short ('s') types.
508  if (type == 'h') {
509    type = 's';
510    usgn = true;
511  }
512  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
513
514  if (scal) {
515    SmallString<128> s;
516
517    if (usgn)
518      s.push_back('U');
519    else if (type == 'c')
520      s.push_back('S'); // make chars explicitly signed
521
522    if (type == 'l') // 64-bit long
523      s += "LLi";
524    else
525      s.push_back(type);
526
527    if (cnst)
528      s.push_back('C');
529    if (pntr)
530      s.push_back('*');
531    return s.str();
532  }
533
534  // Since the return value must be one type, return a vector type of the
535  // appropriate width which we will bitcast.  An exception is made for
536  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
537  // fashion, storing them to a pointer arg.
538  if (ret) {
539    if (mod >= '2' && mod <= '4')
540      return "vv*"; // void result with void* first argument
541    if (mod == 'f' || (ck != ClassB && type == 'f'))
542      return quad ? "V4f" : "V2f";
543    if (ck != ClassB && type == 's')
544      return quad ? "V8s" : "V4s";
545    if (ck != ClassB && type == 'i')
546      return quad ? "V4i" : "V2i";
547    if (ck != ClassB && type == 'l')
548      return quad ? "V2LLi" : "V1LLi";
549
550    return quad ? "V16Sc" : "V8Sc";
551  }
552
553  // Non-return array types are passed as individual vectors.
554  if (mod == '2')
555    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
556  if (mod == '3')
557    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
558  if (mod == '4')
559    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
560
561  if (mod == 'f' || (ck != ClassB && type == 'f'))
562    return quad ? "V4f" : "V2f";
563  if (ck != ClassB && type == 's')
564    return quad ? "V8s" : "V4s";
565  if (ck != ClassB && type == 'i')
566    return quad ? "V4i" : "V2i";
567  if (ck != ClassB && type == 'l')
568    return quad ? "V2LLi" : "V1LLi";
569
570  return quad ? "V16Sc" : "V8Sc";
571}
572
573/// MangleName - Append a type or width suffix to a base neon function name,
574/// and insert a 'q' in the appropriate location if the operation works on
575/// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
576static std::string MangleName(const std::string &name, StringRef typestr,
577                              ClassKind ck) {
578  if (name == "vcvt_f32_f16")
579    return name;
580
581  bool quad = false;
582  bool poly = false;
583  bool usgn = false;
584  char type = ClassifyType(typestr, quad, poly, usgn);
585
586  std::string s = name;
587
588  switch (type) {
589  case 'c':
590    switch (ck) {
591    case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
592    case ClassI: s += "_i8"; break;
593    case ClassW: s += "_8"; break;
594    default: break;
595    }
596    break;
597  case 's':
598    switch (ck) {
599    case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
600    case ClassI: s += "_i16"; break;
601    case ClassW: s += "_16"; break;
602    default: break;
603    }
604    break;
605  case 'i':
606    switch (ck) {
607    case ClassS: s += usgn ? "_u32" : "_s32"; break;
608    case ClassI: s += "_i32"; break;
609    case ClassW: s += "_32"; break;
610    default: break;
611    }
612    break;
613  case 'l':
614    switch (ck) {
615    case ClassS: s += usgn ? "_u64" : "_s64"; break;
616    case ClassI: s += "_i64"; break;
617    case ClassW: s += "_64"; break;
618    default: break;
619    }
620    break;
621  case 'h':
622    switch (ck) {
623    case ClassS:
624    case ClassI: s += "_f16"; break;
625    case ClassW: s += "_16"; break;
626    default: break;
627    }
628    break;
629  case 'f':
630    switch (ck) {
631    case ClassS:
632    case ClassI: s += "_f32"; break;
633    case ClassW: s += "_32"; break;
634    default: break;
635    }
636    break;
637  default:
638    throw "unhandled type!";
639  }
640  if (ck == ClassB)
641    s += "_v";
642
643  // Insert a 'q' before the first '_' character so that it ends up before
644  // _lane or _n on vector-scalar operations.
645  if (quad) {
646    size_t pos = s.find('_');
647    s = s.insert(pos, "q");
648  }
649  return s;
650}
651
652/// UseMacro - Examine the prototype string to determine if the intrinsic
653/// should be defined as a preprocessor macro instead of an inline function.
654static bool UseMacro(const std::string &proto) {
655  // If this builtin takes an immediate argument, we need to #define it rather
656  // than use a standard declaration, so that SemaChecking can range check
657  // the immediate passed by the user.
658  if (proto.find('i') != std::string::npos)
659    return true;
660
661  // Pointer arguments need to use macros to avoid hiding aligned attributes
662  // from the pointer type.
663  if (proto.find('p') != std::string::npos ||
664      proto.find('c') != std::string::npos)
665    return true;
666
667  return false;
668}
669
670/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
671/// defined as a macro should be accessed directly instead of being first
672/// assigned to a local temporary.
673static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
674  // True for constant ints (i), pointers (p) and const pointers (c).
675  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
676}
677
678// Generate the string "(argtype a, argtype b, ...)"
679static std::string GenArgs(const std::string &proto, StringRef typestr) {
680  bool define = UseMacro(proto);
681  char arg = 'a';
682
683  std::string s;
684  s += "(";
685
686  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
687    if (define) {
688      // Some macro arguments are used directly instead of being assigned
689      // to local temporaries; prepend an underscore prefix to make their
690      // names consistent with the local temporaries.
691      if (MacroArgUsedDirectly(proto, i))
692        s += "__";
693    } else {
694      s += TypeString(proto[i], typestr) + " __";
695    }
696    s.push_back(arg);
697    if ((i + 1) < e)
698      s += ", ";
699  }
700
701  s += ")";
702  return s;
703}
704
705// Macro arguments are not type-checked like inline function arguments, so
706// assign them to local temporaries to get the right type checking.
707static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
708  char arg = 'a';
709  std::string s;
710  bool generatedLocal = false;
711
712  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
713    // Do not create a temporary for an immediate argument.
714    // That would defeat the whole point of using a macro!
715    if (MacroArgUsedDirectly(proto, i))
716      continue;
717    generatedLocal = true;
718
719    s += TypeString(proto[i], typestr) + " __";
720    s.push_back(arg);
721    s += " = (";
722    s.push_back(arg);
723    s += "); ";
724  }
725
726  if (generatedLocal)
727    s += "\\\n  ";
728  return s;
729}
730
731// Use the vmovl builtin to sign-extend or zero-extend a vector.
732static std::string Extend(StringRef typestr, const std::string &a) {
733  std::string s;
734  s = MangleName("vmovl", typestr, ClassS);
735  s += "(" + a + ")";
736  return s;
737}
738
739static std::string Duplicate(unsigned nElts, StringRef typestr,
740                             const std::string &a) {
741  std::string s;
742
743  s = "(" + TypeString('d', typestr) + "){ ";
744  for (unsigned i = 0; i != nElts; ++i) {
745    s += a;
746    if ((i + 1) < nElts)
747      s += ", ";
748  }
749  s += " }";
750
751  return s;
752}
753
754static std::string SplatLane(unsigned nElts, const std::string &vec,
755                             const std::string &lane) {
756  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
757  for (unsigned i = 0; i < nElts; ++i)
758    s += ", " + lane;
759  s += ")";
760  return s;
761}
762
763static unsigned GetNumElements(StringRef typestr, bool &quad) {
764  quad = false;
765  bool dummy = false;
766  char type = ClassifyType(typestr, quad, dummy, dummy);
767  unsigned nElts = 0;
768  switch (type) {
769  case 'c': nElts = 8; break;
770  case 's': nElts = 4; break;
771  case 'i': nElts = 2; break;
772  case 'l': nElts = 1; break;
773  case 'h': nElts = 4; break;
774  case 'f': nElts = 2; break;
775  default:
776    throw "unhandled type!";
777  }
778  if (quad) nElts <<= 1;
779  return nElts;
780}
781
782// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
783static std::string GenOpString(OpKind op, const std::string &proto,
784                               StringRef typestr) {
785  bool quad;
786  unsigned nElts = GetNumElements(typestr, quad);
787  bool define = UseMacro(proto);
788
789  std::string ts = TypeString(proto[0], typestr);
790  std::string s;
791  if (!define) {
792    s = "return ";
793  }
794
795  switch(op) {
796  case OpAdd:
797    s += "__a + __b;";
798    break;
799  case OpAddl:
800    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
801    break;
802  case OpAddw:
803    s += "__a + " + Extend(typestr, "__b") + ";";
804    break;
805  case OpSub:
806    s += "__a - __b;";
807    break;
808  case OpSubl:
809    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
810    break;
811  case OpSubw:
812    s += "__a - " + Extend(typestr, "__b") + ";";
813    break;
814  case OpMulN:
815    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
816    break;
817  case OpMulLane:
818    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
819    break;
820  case OpMul:
821    s += "__a * __b;";
822    break;
823  case OpMullLane:
824    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
825      SplatLane(nElts, "__b", "__c") + ");";
826    break;
827  case OpMlaN:
828    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
829    break;
830  case OpMlaLane:
831    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
832    break;
833  case OpMla:
834    s += "__a + (__b * __c);";
835    break;
836  case OpMlalN:
837    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
838      Duplicate(nElts, typestr, "__c") + ");";
839    break;
840  case OpMlalLane:
841    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
842      SplatLane(nElts, "__c", "__d") + ");";
843    break;
844  case OpMlal:
845    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
846    break;
847  case OpMlsN:
848    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
849    break;
850  case OpMlsLane:
851    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
852    break;
853  case OpMls:
854    s += "__a - (__b * __c);";
855    break;
856  case OpMlslN:
857    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
858      Duplicate(nElts, typestr, "__c") + ");";
859    break;
860  case OpMlslLane:
861    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
862      SplatLane(nElts, "__c", "__d") + ");";
863    break;
864  case OpMlsl:
865    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
866    break;
867  case OpQDMullLane:
868    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
869      SplatLane(nElts, "__b", "__c") + ");";
870    break;
871  case OpQDMlalLane:
872    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
873      SplatLane(nElts, "__c", "__d") + ");";
874    break;
875  case OpQDMlslLane:
876    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
877      SplatLane(nElts, "__c", "__d") + ");";
878    break;
879  case OpQDMulhLane:
880    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
881      SplatLane(nElts, "__b", "__c") + ");";
882    break;
883  case OpQRDMulhLane:
884    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
885      SplatLane(nElts, "__b", "__c") + ");";
886    break;
887  case OpEq:
888    s += "(" + ts + ")(__a == __b);";
889    break;
890  case OpGe:
891    s += "(" + ts + ")(__a >= __b);";
892    break;
893  case OpLe:
894    s += "(" + ts + ")(__a <= __b);";
895    break;
896  case OpGt:
897    s += "(" + ts + ")(__a > __b);";
898    break;
899  case OpLt:
900    s += "(" + ts + ")(__a < __b);";
901    break;
902  case OpNeg:
903    s += " -__a;";
904    break;
905  case OpNot:
906    s += " ~__a;";
907    break;
908  case OpAnd:
909    s += "__a & __b;";
910    break;
911  case OpOr:
912    s += "__a | __b;";
913    break;
914  case OpXor:
915    s += "__a ^ __b;";
916    break;
917  case OpAndNot:
918    s += "__a & ~__b;";
919    break;
920  case OpOrNot:
921    s += "__a | ~__b;";
922    break;
923  case OpCast:
924    s += "(" + ts + ")__a;";
925    break;
926  case OpConcat:
927    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
928    s += ", (int64x1_t)__b, 0, 1);";
929    break;
930  case OpHi:
931    s += "(" + ts +
932      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
933    break;
934  case OpLo:
935    s += "(" + ts +
936      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
937    break;
938  case OpDup:
939    s += Duplicate(nElts, typestr, "__a") + ";";
940    break;
941  case OpDupLane:
942    s += SplatLane(nElts, "__a", "__b") + ";";
943    break;
944  case OpSelect:
945    // ((0 & 1) | (~0 & 2))
946    s += "(" + ts + ")";
947    ts = TypeString(proto[1], typestr);
948    s += "((__a & (" + ts + ")__b) | ";
949    s += "(~__a & (" + ts + ")__c));";
950    break;
951  case OpRev16:
952    s += "__builtin_shufflevector(__a, __a";
953    for (unsigned i = 2; i <= nElts; i += 2)
954      for (unsigned j = 0; j != 2; ++j)
955        s += ", " + utostr(i - j - 1);
956    s += ");";
957    break;
958  case OpRev32: {
959    unsigned WordElts = nElts >> (1 + (int)quad);
960    s += "__builtin_shufflevector(__a, __a";
961    for (unsigned i = WordElts; i <= nElts; i += WordElts)
962      for (unsigned j = 0; j != WordElts; ++j)
963        s += ", " + utostr(i - j - 1);
964    s += ");";
965    break;
966  }
967  case OpRev64: {
968    unsigned DblWordElts = nElts >> (int)quad;
969    s += "__builtin_shufflevector(__a, __a";
970    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
971      for (unsigned j = 0; j != DblWordElts; ++j)
972        s += ", " + utostr(i - j - 1);
973    s += ");";
974    break;
975  }
976  case OpAbdl: {
977    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
978    if (typestr[0] != 'U') {
979      // vabd results are always unsigned and must be zero-extended.
980      std::string utype = "U" + typestr.str();
981      s += "(" + TypeString(proto[0], typestr) + ")";
982      abd = "(" + TypeString('d', utype) + ")" + abd;
983      s += Extend(utype, abd) + ";";
984    } else {
985      s += Extend(typestr, abd) + ";";
986    }
987    break;
988  }
989  case OpAba:
990    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
991    break;
992  case OpAbal: {
993    s += "__a + ";
994    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
995    if (typestr[0] != 'U') {
996      // vabd results are always unsigned and must be zero-extended.
997      std::string utype = "U" + typestr.str();
998      s += "(" + TypeString(proto[0], typestr) + ")";
999      abd = "(" + TypeString('d', utype) + ")" + abd;
1000      s += Extend(utype, abd) + ";";
1001    } else {
1002      s += Extend(typestr, abd) + ";";
1003    }
1004    break;
1005  }
1006  default:
1007    throw "unknown OpKind!";
1008  }
1009  return s;
1010}
1011
1012static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1013  unsigned mod = proto[0];
1014
1015  if (mod == 'v' || mod == 'f')
1016    mod = proto[1];
1017
1018  bool quad = false;
1019  bool poly = false;
1020  bool usgn = false;
1021  bool scal = false;
1022  bool cnst = false;
1023  bool pntr = false;
1024
1025  // Base type to get the type string for.
1026  char type = ClassifyType(typestr, quad, poly, usgn);
1027
1028  // Based on the modifying character, change the type and width if necessary.
1029  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1030
1031  NeonTypeFlags::EltType ET;
1032  switch (type) {
1033    case 'c':
1034      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1035      break;
1036    case 's':
1037      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1038      break;
1039    case 'i':
1040      ET = NeonTypeFlags::Int32;
1041      break;
1042    case 'l':
1043      ET = NeonTypeFlags::Int64;
1044      break;
1045    case 'h':
1046      ET = NeonTypeFlags::Float16;
1047      break;
1048    case 'f':
1049      ET = NeonTypeFlags::Float32;
1050      break;
1051    default:
1052      throw "unhandled type!";
1053  }
1054  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1055  return Flags.getFlags();
1056}
1057
1058// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1059static std::string GenBuiltin(const std::string &name, const std::string &proto,
1060                              StringRef typestr, ClassKind ck) {
1061  std::string s;
1062
1063  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1064  // sret-like argument.
1065  bool sret = (proto[0] >= '2' && proto[0] <= '4');
1066
1067  bool define = UseMacro(proto);
1068
1069  // Check if the prototype has a scalar operand with the type of the vector
1070  // elements.  If not, bitcasting the args will take care of arg checking.
1071  // The actual signedness etc. will be taken care of with special enums.
1072  if (proto.find('s') == std::string::npos)
1073    ck = ClassB;
1074
1075  if (proto[0] != 'v') {
1076    std::string ts = TypeString(proto[0], typestr);
1077
1078    if (define) {
1079      if (sret)
1080        s += ts + " r; ";
1081      else
1082        s += "(" + ts + ")";
1083    } else if (sret) {
1084      s += ts + " r; ";
1085    } else {
1086      s += "return (" + ts + ")";
1087    }
1088  }
1089
1090  bool splat = proto.find('a') != std::string::npos;
1091
1092  s += "__builtin_neon_";
1093  if (splat) {
1094    // Call the non-splat builtin: chop off the "_n" suffix from the name.
1095    std::string vname(name, 0, name.size()-2);
1096    s += MangleName(vname, typestr, ck);
1097  } else {
1098    s += MangleName(name, typestr, ck);
1099  }
1100  s += "(";
1101
1102  // Pass the address of the return variable as the first argument to sret-like
1103  // builtins.
1104  if (sret)
1105    s += "&r, ";
1106
1107  char arg = 'a';
1108  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1109    std::string args = std::string(&arg, 1);
1110
1111    // Use the local temporaries instead of the macro arguments.
1112    args = "__" + args;
1113
1114    bool argQuad = false;
1115    bool argPoly = false;
1116    bool argUsgn = false;
1117    bool argScalar = false;
1118    bool dummy = false;
1119    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1120    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1121                      dummy, dummy);
1122
1123    // Handle multiple-vector values specially, emitting each subvector as an
1124    // argument to the __builtin.
1125    if (proto[i] >= '2' && proto[i] <= '4') {
1126      // Check if an explicit cast is needed.
1127      if (argType != 'c' || argPoly || argUsgn)
1128        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1129
1130      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1131        s += args + ".val[" + utostr(vi) + "]";
1132        if ((vi + 1) < ve)
1133          s += ", ";
1134      }
1135      if ((i + 1) < e)
1136        s += ", ";
1137
1138      continue;
1139    }
1140
1141    if (splat && (i + 1) == e)
1142      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1143
1144    // Check if an explicit cast is needed.
1145    if ((splat || !argScalar) &&
1146        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1147      std::string argTypeStr = "c";
1148      if (ck != ClassB)
1149        argTypeStr = argType;
1150      if (argQuad)
1151        argTypeStr = "Q" + argTypeStr;
1152      args = "(" + TypeString('d', argTypeStr) + ")" + args;
1153    }
1154
1155    s += args;
1156    if ((i + 1) < e)
1157      s += ", ";
1158  }
1159
1160  // Extra constant integer to hold type class enum for this function, e.g. s8
1161  if (ck == ClassB)
1162    s += ", " + utostr(GetNeonEnum(proto, typestr));
1163
1164  s += ");";
1165
1166  if (proto[0] != 'v' && sret) {
1167    if (define)
1168      s += " r;";
1169    else
1170      s += " return r;";
1171  }
1172  return s;
1173}
1174
1175static std::string GenBuiltinDef(const std::string &name,
1176                                 const std::string &proto,
1177                                 StringRef typestr, ClassKind ck) {
1178  std::string s("BUILTIN(__builtin_neon_");
1179
1180  // If all types are the same size, bitcasting the args will take care
1181  // of arg checking.  The actual signedness etc. will be taken care of with
1182  // special enums.
1183  if (proto.find('s') == std::string::npos)
1184    ck = ClassB;
1185
1186  s += MangleName(name, typestr, ck);
1187  s += ", \"";
1188
1189  for (unsigned i = 0, e = proto.size(); i != e; ++i)
1190    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1191
1192  // Extra constant integer to hold type class enum for this function, e.g. s8
1193  if (ck == ClassB)
1194    s += "i";
1195
1196  s += "\", \"n\")";
1197  return s;
1198}
1199
1200static std::string GenIntrinsic(const std::string &name,
1201                                const std::string &proto,
1202                                StringRef outTypeStr, StringRef inTypeStr,
1203                                OpKind kind, ClassKind classKind) {
1204  assert(!proto.empty() && "");
1205  bool define = UseMacro(proto) && kind != OpUnavailable;
1206  std::string s;
1207
1208  // static always inline + return type
1209  if (define)
1210    s += "#define ";
1211  else
1212    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1213
1214  // Function name with type suffix
1215  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1216  if (outTypeStr != inTypeStr) {
1217    // If the input type is different (e.g., for vreinterpret), append a suffix
1218    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1219    // does not insert another "q" in the name.
1220    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1221    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1222    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1223  }
1224  s += mangledName;
1225
1226  // Function arguments
1227  s += GenArgs(proto, inTypeStr);
1228
1229  // Definition.
1230  if (define) {
1231    s += " __extension__ ({ \\\n  ";
1232    s += GenMacroLocals(proto, inTypeStr);
1233  } else if (kind == OpUnavailable) {
1234    s += " __attribute__((unavailable));\n";
1235    return s;
1236  } else
1237    s += " {\n  ";
1238
1239  if (kind != OpNone)
1240    s += GenOpString(kind, proto, outTypeStr);
1241  else
1242    s += GenBuiltin(name, proto, outTypeStr, classKind);
1243  if (define)
1244    s += " })";
1245  else
1246    s += " }";
1247  s += "\n";
1248  return s;
1249}
1250
1251/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1252/// is comprised of type definitions and function declarations.
1253void NeonEmitter::run(raw_ostream &OS) {
1254  OS <<
1255    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1256    "---===\n"
1257    " *\n"
1258    " * Permission is hereby granted, free of charge, to any person obtaining "
1259    "a copy\n"
1260    " * of this software and associated documentation files (the \"Software\"),"
1261    " to deal\n"
1262    " * in the Software without restriction, including without limitation the "
1263    "rights\n"
1264    " * to use, copy, modify, merge, publish, distribute, sublicense, "
1265    "and/or sell\n"
1266    " * copies of the Software, and to permit persons to whom the Software is\n"
1267    " * furnished to do so, subject to the following conditions:\n"
1268    " *\n"
1269    " * The above copyright notice and this permission notice shall be "
1270    "included in\n"
1271    " * all copies or substantial portions of the Software.\n"
1272    " *\n"
1273    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1274    "EXPRESS OR\n"
1275    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1276    "MERCHANTABILITY,\n"
1277    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1278    "SHALL THE\n"
1279    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1280    "OTHER\n"
1281    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1282    "ARISING FROM,\n"
1283    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1284    "DEALINGS IN\n"
1285    " * THE SOFTWARE.\n"
1286    " *\n"
1287    " *===--------------------------------------------------------------------"
1288    "---===\n"
1289    " */\n\n";
1290
1291  OS << "#ifndef __ARM_NEON_H\n";
1292  OS << "#define __ARM_NEON_H\n\n";
1293
1294  OS << "#ifndef __ARM_NEON__\n";
1295  OS << "#error \"NEON support not enabled\"\n";
1296  OS << "#endif\n\n";
1297
1298  OS << "#include <stdint.h>\n\n";
1299
1300  // Emit NEON-specific scalar typedefs.
1301  OS << "typedef float float32_t;\n";
1302  OS << "typedef int8_t poly8_t;\n";
1303  OS << "typedef int16_t poly16_t;\n";
1304  OS << "typedef uint16_t float16_t;\n";
1305
1306  // Emit Neon vector typedefs.
1307  std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
1308  SmallVector<StringRef, 24> TDTypeVec;
1309  ParseTypes(0, TypedefTypes, TDTypeVec);
1310
1311  // Emit vector typedefs.
1312  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1313    bool dummy, quad = false, poly = false;
1314    (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
1315    if (poly)
1316      OS << "typedef __attribute__((neon_polyvector_type(";
1317    else
1318      OS << "typedef __attribute__((neon_vector_type(";
1319
1320    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1321    OS << utostr(nElts) << "))) ";
1322    if (nElts < 10)
1323      OS << " ";
1324
1325    OS << TypeString('s', TDTypeVec[i]);
1326    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1327  }
1328  OS << "\n";
1329
1330  // Emit struct typedefs.
1331  for (unsigned vi = 2; vi != 5; ++vi) {
1332    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1333      std::string ts = TypeString('d', TDTypeVec[i]);
1334      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1335      OS << "typedef struct " << vs << " {\n";
1336      OS << "  " << ts << " val";
1337      OS << "[" << utostr(vi) << "]";
1338      OS << ";\n} ";
1339      OS << vs << ";\n\n";
1340    }
1341  }
1342
1343  OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n";
1344
1345  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1346
1347  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1348  // intrinsics.  (Some of the saturating multiply instructions are also
1349  // used to implement the corresponding "_lane" variants, but tablegen
1350  // sorts the records into alphabetical order so that the "_lane" variants
1351  // come after the intrinsics they use.)
1352  emitIntrinsic(OS, Records.getDef("VMOVL"));
1353  emitIntrinsic(OS, Records.getDef("VMULL"));
1354  emitIntrinsic(OS, Records.getDef("VABD"));
1355
1356  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1357    Record *R = RV[i];
1358    if (R->getName() != "VMOVL" &&
1359        R->getName() != "VMULL" &&
1360        R->getName() != "VABD")
1361      emitIntrinsic(OS, R);
1362  }
1363
1364  OS << "#undef __ai\n\n";
1365  OS << "#endif /* __ARM_NEON_H */\n";
1366}
1367
1368/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1369/// intrinsics specified by record R.
1370void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
1371  std::string name = R->getValueAsString("Name");
1372  std::string Proto = R->getValueAsString("Prototype");
1373  std::string Types = R->getValueAsString("Types");
1374
1375  SmallVector<StringRef, 16> TypeVec;
1376  ParseTypes(R, Types, TypeVec);
1377
1378  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1379
1380  ClassKind classKind = ClassNone;
1381  if (R->getSuperClasses().size() >= 2)
1382    classKind = ClassMap[R->getSuperClasses()[1]];
1383  if (classKind == ClassNone && kind == OpNone)
1384    throw TGError(R->getLoc(), "Builtin has no class kind");
1385
1386  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1387    if (kind == OpReinterpret) {
1388      bool outQuad = false;
1389      bool dummy = false;
1390      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1391      for (unsigned srcti = 0, srcte = TypeVec.size();
1392           srcti != srcte; ++srcti) {
1393        bool inQuad = false;
1394        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1395        if (srcti == ti || inQuad != outQuad)
1396          continue;
1397        OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1398                           OpCast, ClassS);
1399      }
1400    } else {
1401      OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
1402                         kind, classKind);
1403    }
1404  }
1405  OS << "\n";
1406}
1407
1408static unsigned RangeFromType(const char mod, StringRef typestr) {
1409  // base type to get the type string for.
1410  bool quad = false, dummy = false;
1411  char type = ClassifyType(typestr, quad, dummy, dummy);
1412  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
1413
1414  switch (type) {
1415    case 'c':
1416      return (8 << (int)quad) - 1;
1417    case 'h':
1418    case 's':
1419      return (4 << (int)quad) - 1;
1420    case 'f':
1421    case 'i':
1422      return (2 << (int)quad) - 1;
1423    case 'l':
1424      return (1 << (int)quad) - 1;
1425    default:
1426      throw "unhandled type!";
1427  }
1428}
1429
1430/// runHeader - Emit a file with sections defining:
1431/// 1. the NEON section of BuiltinsARM.def.
1432/// 2. the SemaChecking code for the type overload checking.
1433/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
1434void NeonEmitter::runHeader(raw_ostream &OS) {
1435  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1436
1437  StringMap<OpKind> EmittedMap;
1438
1439  // Generate BuiltinsARM.def for NEON
1440  OS << "#ifdef GET_NEON_BUILTINS\n";
1441  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1442    Record *R = RV[i];
1443    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1444    if (k != OpNone)
1445      continue;
1446
1447    std::string Proto = R->getValueAsString("Prototype");
1448
1449    // Functions with 'a' (the splat code) in the type prototype should not get
1450    // their own builtin as they use the non-splat variant.
1451    if (Proto.find('a') != std::string::npos)
1452      continue;
1453
1454    std::string Types = R->getValueAsString("Types");
1455    SmallVector<StringRef, 16> TypeVec;
1456    ParseTypes(R, Types, TypeVec);
1457
1458    if (R->getSuperClasses().size() < 2)
1459      throw TGError(R->getLoc(), "Builtin has no class kind");
1460
1461    std::string name = R->getValueAsString("Name");
1462    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1463
1464    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1465      // Generate the BuiltinsARM.def declaration for this builtin, ensuring
1466      // that each unique BUILTIN() macro appears only once in the output
1467      // stream.
1468      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
1469      if (EmittedMap.count(bd))
1470        continue;
1471
1472      EmittedMap[bd] = OpNone;
1473      OS << bd << "\n";
1474    }
1475  }
1476  OS << "#endif\n\n";
1477
1478  // Generate the overloaded type checking code for SemaChecking.cpp
1479  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1480  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1481    Record *R = RV[i];
1482    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1483    if (k != OpNone)
1484      continue;
1485
1486    std::string Proto = R->getValueAsString("Prototype");
1487    std::string Types = R->getValueAsString("Types");
1488    std::string name = R->getValueAsString("Name");
1489
1490    // Functions with 'a' (the splat code) in the type prototype should not get
1491    // their own builtin as they use the non-splat variant.
1492    if (Proto.find('a') != std::string::npos)
1493      continue;
1494
1495    // Functions which have a scalar argument cannot be overloaded, no need to
1496    // check them if we are emitting the type checking code.
1497    if (Proto.find('s') != std::string::npos)
1498      continue;
1499
1500    SmallVector<StringRef, 16> TypeVec;
1501    ParseTypes(R, Types, TypeVec);
1502
1503    if (R->getSuperClasses().size() < 2)
1504      throw TGError(R->getLoc(), "Builtin has no class kind");
1505
1506    int si = -1, qi = -1;
1507    unsigned mask = 0, qmask = 0;
1508    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1509      // Generate the switch case(s) for this builtin for the type validation.
1510      bool quad = false, poly = false, usgn = false;
1511      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
1512
1513      if (quad) {
1514        qi = ti;
1515        qmask |= 1 << GetNeonEnum(Proto, TypeVec[ti]);
1516      } else {
1517        si = ti;
1518        mask |= 1 << GetNeonEnum(Proto, TypeVec[ti]);
1519      }
1520    }
1521
1522    // Check if the builtin function has a pointer or const pointer argument.
1523    int PtrArgNum = -1;
1524    bool HasConstPtr = false;
1525    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
1526      char ArgType = Proto[arg];
1527      if (ArgType == 'c') {
1528        HasConstPtr = true;
1529        PtrArgNum = arg - 1;
1530        break;
1531      }
1532      if (ArgType == 'p') {
1533        PtrArgNum = arg - 1;
1534        break;
1535      }
1536    }
1537    // For sret builtins, adjust the pointer argument index.
1538    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
1539      PtrArgNum += 1;
1540
1541    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
1542    // and vst1_lane intrinsics.  Using a pointer to the vector element
1543    // type with one of those operations causes codegen to select an aligned
1544    // load/store instruction.  If you want an unaligned operation,
1545    // the pointer argument needs to have less alignment than element type,
1546    // so just accept any pointer type.
1547    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
1548      PtrArgNum = -1;
1549      HasConstPtr = false;
1550    }
1551
1552    if (mask) {
1553      OS << "case ARM::BI__builtin_neon_"
1554         << MangleName(name, TypeVec[si], ClassB)
1555         << ": mask = " << "0x" << utohexstr(mask);
1556      if (PtrArgNum >= 0)
1557        OS << "; PtrArgNum = " << PtrArgNum;
1558      if (HasConstPtr)
1559        OS << "; HasConstPtr = true";
1560      OS << "; break;\n";
1561    }
1562    if (qmask) {
1563      OS << "case ARM::BI__builtin_neon_"
1564         << MangleName(name, TypeVec[qi], ClassB)
1565         << ": mask = " << "0x" << utohexstr(qmask);
1566      if (PtrArgNum >= 0)
1567        OS << "; PtrArgNum = " << PtrArgNum;
1568      if (HasConstPtr)
1569        OS << "; HasConstPtr = true";
1570      OS << "; break;\n";
1571    }
1572  }
1573  OS << "#endif\n\n";
1574
1575  // Generate the intrinsic range checking code for shift/lane immediates.
1576  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
1577  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1578    Record *R = RV[i];
1579
1580    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1581    if (k != OpNone)
1582      continue;
1583
1584    std::string name = R->getValueAsString("Name");
1585    std::string Proto = R->getValueAsString("Prototype");
1586    std::string Types = R->getValueAsString("Types");
1587
1588    // Functions with 'a' (the splat code) in the type prototype should not get
1589    // their own builtin as they use the non-splat variant.
1590    if (Proto.find('a') != std::string::npos)
1591      continue;
1592
1593    // Functions which do not have an immediate do not need to have range
1594    // checking code emitted.
1595    size_t immPos = Proto.find('i');
1596    if (immPos == std::string::npos)
1597      continue;
1598
1599    SmallVector<StringRef, 16> TypeVec;
1600    ParseTypes(R, Types, TypeVec);
1601
1602    if (R->getSuperClasses().size() < 2)
1603      throw TGError(R->getLoc(), "Builtin has no class kind");
1604
1605    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1606
1607    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1608      std::string namestr, shiftstr, rangestr;
1609
1610      if (R->getValueAsBit("isVCVT_N")) {
1611        // VCVT between floating- and fixed-point values takes an immediate
1612        // in the range 1 to 32.
1613        ck = ClassB;
1614        rangestr = "l = 1; u = 31"; // upper bound = l + u
1615      } else if (Proto.find('s') == std::string::npos) {
1616        // Builtins which are overloaded by type will need to have their upper
1617        // bound computed at Sema time based on the type constant.
1618        ck = ClassB;
1619        if (R->getValueAsBit("isShift")) {
1620          shiftstr = ", true";
1621
1622          // Right shifts have an 'r' in the name, left shifts do not.
1623          if (name.find('r') != std::string::npos)
1624            rangestr = "l = 1; ";
1625        }
1626        rangestr += "u = RFT(TV" + shiftstr + ")";
1627      } else {
1628        // The immediate generally refers to a lane in the preceding argument.
1629        assert(immPos > 0 && "unexpected immediate operand");
1630        rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
1631      }
1632      // Make sure cases appear only once by uniquing them in a string map.
1633      namestr = MangleName(name, TypeVec[ti], ck);
1634      if (EmittedMap.count(namestr))
1635        continue;
1636      EmittedMap[namestr] = OpNone;
1637
1638      // Calculate the index of the immediate that should be range checked.
1639      unsigned immidx = 0;
1640
1641      // Builtins that return a struct of multiple vectors have an extra
1642      // leading arg for the struct return.
1643      if (Proto[0] >= '2' && Proto[0] <= '4')
1644        ++immidx;
1645
1646      // Add one to the index for each argument until we reach the immediate
1647      // to be checked.  Structs of vectors are passed as multiple arguments.
1648      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
1649        switch (Proto[ii]) {
1650          default:  immidx += 1; break;
1651          case '2': immidx += 2; break;
1652          case '3': immidx += 3; break;
1653          case '4': immidx += 4; break;
1654          case 'i': ie = ii + 1; break;
1655        }
1656      }
1657      OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
1658         << ": i = " << immidx << "; " << rangestr << "; break;\n";
1659    }
1660  }
1661  OS << "#endif\n\n";
1662}
1663
1664/// GenTest - Write out a test for the intrinsic specified by the name and
1665/// type strings, including the embedded patterns for FileCheck to match.
1666static std::string GenTest(const std::string &name,
1667                           const std::string &proto,
1668                           StringRef outTypeStr, StringRef inTypeStr,
1669                           bool isShift) {
1670  assert(!proto.empty() && "");
1671  std::string s;
1672
1673  // Function name with type suffix
1674  std::string mangledName = MangleName(name, outTypeStr, ClassS);
1675  if (outTypeStr != inTypeStr) {
1676    // If the input type is different (e.g., for vreinterpret), append a suffix
1677    // for the input type.  String off a "Q" (quad) prefix so that MangleName
1678    // does not insert another "q" in the name.
1679    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1680    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1681    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1682  }
1683
1684  // Emit the FileCheck patterns.
1685  s += "// CHECK: test_" + mangledName + "\n";
1686  // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
1687
1688  // Emit the start of the test function.
1689  s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
1690  char arg = 'a';
1691  std::string comma;
1692  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1693    // Do not create arguments for values that must be immediate constants.
1694    if (proto[i] == 'i')
1695      continue;
1696    s += comma + TypeString(proto[i], inTypeStr) + " ";
1697    s.push_back(arg);
1698    comma = ", ";
1699  }
1700  s += ") {\n  ";
1701
1702  if (proto[0] != 'v')
1703    s += "return ";
1704  s += mangledName + "(";
1705  arg = 'a';
1706  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1707    if (proto[i] == 'i') {
1708      // For immediate operands, test the maximum value.
1709      if (isShift)
1710        s += "1"; // FIXME
1711      else
1712        // The immediate generally refers to a lane in the preceding argument.
1713        s += utostr(RangeFromType(proto[i-1], inTypeStr));
1714    } else {
1715      s.push_back(arg);
1716    }
1717    if ((i + 1) < e)
1718      s += ", ";
1719  }
1720  s += ");\n}\n\n";
1721  return s;
1722}
1723
1724/// runTests - Write out a complete set of tests for all of the Neon
1725/// intrinsics.
1726void NeonEmitter::runTests(raw_ostream &OS) {
1727  OS <<
1728    "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
1729    "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
1730    "\n"
1731    "#include <arm_neon.h>\n"
1732    "\n";
1733
1734  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1735  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1736    Record *R = RV[i];
1737    std::string name = R->getValueAsString("Name");
1738    std::string Proto = R->getValueAsString("Prototype");
1739    std::string Types = R->getValueAsString("Types");
1740    bool isShift = R->getValueAsBit("isShift");
1741
1742    SmallVector<StringRef, 16> TypeVec;
1743    ParseTypes(R, Types, TypeVec);
1744
1745    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1746    if (kind == OpUnavailable)
1747      continue;
1748    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1749      if (kind == OpReinterpret) {
1750        bool outQuad = false;
1751        bool dummy = false;
1752        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1753        for (unsigned srcti = 0, srcte = TypeVec.size();
1754             srcti != srcte; ++srcti) {
1755          bool inQuad = false;
1756          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1757          if (srcti == ti || inQuad != outQuad)
1758            continue;
1759          OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
1760        }
1761      } else {
1762        OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
1763      }
1764    }
1765    OS << "\n";
1766  }
1767}
1768
1769namespace clang {
1770void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
1771  NeonEmitter(Records).run(OS);
1772}
1773void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
1774  NeonEmitter(Records).runHeader(OS);
1775}
1776void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
1777  NeonEmitter(Records).runTests(OS);
1778}
1779} // End namespace clang
1780