NeonEmitter.cpp revision 2e22f29b92768ea65ac5c26d354226ecc7509311
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMlalHi,
56  OpMls,
57  OpMlsl,
58  OpMlslHi,
59  OpMulN,
60  OpMlaN,
61  OpMlsN,
62  OpMlalN,
63  OpMlslN,
64  OpMulLane,
65  OpMulXLane,
66  OpMullLane,
67  OpMullHiLane,
68  OpMlaLane,
69  OpMlsLane,
70  OpMlalLane,
71  OpMlalHiLane,
72  OpMlslLane,
73  OpMlslHiLane,
74  OpQDMullLane,
75  OpQDMullHiLane,
76  OpQDMlalLane,
77  OpQDMlalHiLane,
78  OpQDMlslLane,
79  OpQDMlslHiLane,
80  OpQDMulhLane,
81  OpQRDMulhLane,
82  OpFMSLane,
83  OpFMSLaneQ,
84  OpTrn1,
85  OpZip1,
86  OpUzp1,
87  OpTrn2,
88  OpZip2,
89  OpUzp2,
90  OpEq,
91  OpGe,
92  OpLe,
93  OpGt,
94  OpLt,
95  OpNeg,
96  OpNot,
97  OpAnd,
98  OpOr,
99  OpXor,
100  OpAndNot,
101  OpOrNot,
102  OpCast,
103  OpConcat,
104  OpDup,
105  OpDupLane,
106  OpHi,
107  OpLo,
108  OpSelect,
109  OpRev16,
110  OpRev32,
111  OpRev64,
112  OpReinterpret,
113  OpAddhnHi,
114  OpRAddhnHi,
115  OpSubhnHi,
116  OpRSubhnHi,
117  OpAbdl,
118  OpAbdlHi,
119  OpAba,
120  OpAbal,
121  OpAbalHi,
122  OpQDMullHi,
123  OpQDMlalHi,
124  OpQDMlslHi,
125  OpDiv,
126  OpLongHi,
127  OpNarrowHi,
128  OpMovlHi,
129  OpCopyLane,
130  OpCopyQLane,
131  OpCopyLaneQ
132};
133
134enum ClassKind {
135  ClassNone,
136  ClassI,           // generic integer instruction, e.g., "i8" suffix
137  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
138  ClassW,           // width-specific instruction, e.g., "8" suffix
139  ClassB,           // bitcast arguments with enum argument to specify type
140  ClassL,           // Logical instructions which are op instructions
141                    // but we need to not emit any suffix for in our
142                    // tests.
143  ClassNoTest       // Instructions which we do not test since they are
144                    // not TRUE instructions.
145};
146
147/// NeonTypeFlags - Flags to identify the types for overloaded Neon
148/// builtins.  These must be kept in sync with the flags in
149/// include/clang/Basic/TargetBuiltins.h.
150namespace {
151class NeonTypeFlags {
152  enum {
153    EltTypeMask = 0xf,
154    UnsignedFlag = 0x10,
155    QuadFlag = 0x20
156  };
157  uint32_t Flags;
158
159public:
160  enum EltType {
161    Int8,
162    Int16,
163    Int32,
164    Int64,
165    Poly8,
166    Poly16,
167    Float16,
168    Float32,
169    Float64
170  };
171
172  NeonTypeFlags(unsigned F) : Flags(F) {}
173  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
174    if (IsUnsigned)
175      Flags |= UnsignedFlag;
176    if (IsQuad)
177      Flags |= QuadFlag;
178  }
179
180  uint32_t getFlags() const { return Flags; }
181};
182} // end anonymous namespace
183
184namespace {
185class NeonEmitter {
186  RecordKeeper &Records;
187  StringMap<OpKind> OpMap;
188  DenseMap<Record*, ClassKind> ClassMap;
189
190public:
191  NeonEmitter(RecordKeeper &R) : Records(R) {
192    OpMap["OP_NONE"]  = OpNone;
193    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
194    OpMap["OP_ADD"]   = OpAdd;
195    OpMap["OP_ADDL"]  = OpAddl;
196    OpMap["OP_ADDLHi"] = OpAddlHi;
197    OpMap["OP_ADDW"]  = OpAddw;
198    OpMap["OP_ADDWHi"] = OpAddwHi;
199    OpMap["OP_SUB"]   = OpSub;
200    OpMap["OP_SUBL"]  = OpSubl;
201    OpMap["OP_SUBLHi"] = OpSublHi;
202    OpMap["OP_SUBW"]  = OpSubw;
203    OpMap["OP_SUBWHi"] = OpSubwHi;
204    OpMap["OP_MUL"]   = OpMul;
205    OpMap["OP_MLA"]   = OpMla;
206    OpMap["OP_MLAL"]  = OpMlal;
207    OpMap["OP_MULLHi"]  = OpMullHi;
208    OpMap["OP_MLALHi"]  = OpMlalHi;
209    OpMap["OP_MLS"]   = OpMls;
210    OpMap["OP_MLSL"]  = OpMlsl;
211    OpMap["OP_MLSLHi"] = OpMlslHi;
212    OpMap["OP_MUL_N"] = OpMulN;
213    OpMap["OP_MLA_N"] = OpMlaN;
214    OpMap["OP_MLS_N"] = OpMlsN;
215    OpMap["OP_MLAL_N"] = OpMlalN;
216    OpMap["OP_MLSL_N"] = OpMlslN;
217    OpMap["OP_MUL_LN"]= OpMulLane;
218    OpMap["OP_MULX_LN"]= OpMulXLane;
219    OpMap["OP_MULL_LN"] = OpMullLane;
220    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
221    OpMap["OP_MLA_LN"]= OpMlaLane;
222    OpMap["OP_MLS_LN"]= OpMlsLane;
223    OpMap["OP_MLAL_LN"] = OpMlalLane;
224    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
225    OpMap["OP_MLSL_LN"] = OpMlslLane;
226    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
227    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
228    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
229    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
230    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
231    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
232    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
233    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
234    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
235    OpMap["OP_FMS_LN"] = OpFMSLane;
236    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
237    OpMap["OP_TRN1"]  = OpTrn1;
238    OpMap["OP_ZIP1"]  = OpZip1;
239    OpMap["OP_UZP1"]  = OpUzp1;
240    OpMap["OP_TRN2"]  = OpTrn2;
241    OpMap["OP_ZIP2"]  = OpZip2;
242    OpMap["OP_UZP2"]  = OpUzp2;
243    OpMap["OP_EQ"]    = OpEq;
244    OpMap["OP_GE"]    = OpGe;
245    OpMap["OP_LE"]    = OpLe;
246    OpMap["OP_GT"]    = OpGt;
247    OpMap["OP_LT"]    = OpLt;
248    OpMap["OP_NEG"]   = OpNeg;
249    OpMap["OP_NOT"]   = OpNot;
250    OpMap["OP_AND"]   = OpAnd;
251    OpMap["OP_OR"]    = OpOr;
252    OpMap["OP_XOR"]   = OpXor;
253    OpMap["OP_ANDN"]  = OpAndNot;
254    OpMap["OP_ORN"]   = OpOrNot;
255    OpMap["OP_CAST"]  = OpCast;
256    OpMap["OP_CONC"]  = OpConcat;
257    OpMap["OP_HI"]    = OpHi;
258    OpMap["OP_LO"]    = OpLo;
259    OpMap["OP_DUP"]   = OpDup;
260    OpMap["OP_DUP_LN"] = OpDupLane;
261    OpMap["OP_SEL"]   = OpSelect;
262    OpMap["OP_REV16"] = OpRev16;
263    OpMap["OP_REV32"] = OpRev32;
264    OpMap["OP_REV64"] = OpRev64;
265    OpMap["OP_REINT"] = OpReinterpret;
266    OpMap["OP_ADDHNHi"] = OpAddhnHi;
267    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
268    OpMap["OP_SUBHNHi"] = OpSubhnHi;
269    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
270    OpMap["OP_ABDL"]  = OpAbdl;
271    OpMap["OP_ABDLHi"] = OpAbdlHi;
272    OpMap["OP_ABA"]   = OpAba;
273    OpMap["OP_ABAL"]  = OpAbal;
274    OpMap["OP_ABALHi"] = OpAbalHi;
275    OpMap["OP_QDMULLHi"] = OpQDMullHi;
276    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
277    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
278    OpMap["OP_DIV"] = OpDiv;
279    OpMap["OP_LONG_HI"] = OpLongHi;
280    OpMap["OP_NARROW_HI"] = OpNarrowHi;
281    OpMap["OP_MOVL_HI"] = OpMovlHi;
282    OpMap["OP_COPY_LN"] = OpCopyLane;
283    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
284    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
285
286    Record *SI = R.getClass("SInst");
287    Record *II = R.getClass("IInst");
288    Record *WI = R.getClass("WInst");
289    Record *SOpI = R.getClass("SOpInst");
290    Record *IOpI = R.getClass("IOpInst");
291    Record *WOpI = R.getClass("WOpInst");
292    Record *LOpI = R.getClass("LOpInst");
293    Record *NoTestOpI = R.getClass("NoTestOpInst");
294
295    ClassMap[SI] = ClassS;
296    ClassMap[II] = ClassI;
297    ClassMap[WI] = ClassW;
298    ClassMap[SOpI] = ClassS;
299    ClassMap[IOpI] = ClassI;
300    ClassMap[WOpI] = ClassW;
301    ClassMap[LOpI] = ClassL;
302    ClassMap[NoTestOpI] = ClassNoTest;
303  }
304
305  // run - Emit arm_neon.h.inc
306  void run(raw_ostream &o);
307
308  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
309  void runHeader(raw_ostream &o);
310
311  // runTests - Emit tests for all the Neon intrinsics.
312  void runTests(raw_ostream &o);
313
314private:
315  void emitIntrinsic(raw_ostream &OS, Record *R,
316                     StringMap<ClassKind> &EmittedMap);
317  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
318                      bool isA64GenBuiltinDef);
319  void genOverloadTypeCheckCode(raw_ostream &OS,
320                                StringMap<ClassKind> &A64IntrinsicMap,
321                                bool isA64TypeCheck);
322  void genIntrinsicRangeCheckCode(raw_ostream &OS,
323                                  StringMap<ClassKind> &A64IntrinsicMap,
324                                  bool isA64RangeCheck);
325  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
326                     bool isA64TestGen);
327};
328} // end anonymous namespace
329
330/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
331/// which each StringRef representing a single type declared in the string.
332/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
333/// 2xfloat and 4xfloat respectively.
334static void ParseTypes(Record *r, std::string &s,
335                       SmallVectorImpl<StringRef> &TV) {
336  const char *data = s.data();
337  int len = 0;
338
339  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
340    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
341                         || data[len] == 'H' || data[len] == 'S')
342      continue;
343
344    switch (data[len]) {
345      case 'c':
346      case 's':
347      case 'i':
348      case 'l':
349      case 'h':
350      case 'f':
351      case 'd':
352        break;
353      default:
354        PrintFatalError(r->getLoc(),
355                      "Unexpected letter: " + std::string(data + len, 1));
356    }
357    TV.push_back(StringRef(data, len + 1));
358    data += len + 1;
359    len = -1;
360  }
361}
362
363/// Widen - Convert a type code into the next wider type.  char -> short,
364/// short -> int, etc.
365static char Widen(const char t) {
366  switch (t) {
367    case 'c':
368      return 's';
369    case 's':
370      return 'i';
371    case 'i':
372      return 'l';
373    case 'h':
374      return 'f';
375    default:
376      PrintFatalError("unhandled type in widen!");
377  }
378}
379
380/// Narrow - Convert a type code into the next smaller type.  short -> char,
381/// float -> half float, etc.
382static char Narrow(const char t) {
383  switch (t) {
384    case 's':
385      return 'c';
386    case 'i':
387      return 's';
388    case 'l':
389      return 'i';
390    case 'f':
391      return 'h';
392    default:
393      PrintFatalError("unhandled type in narrow!");
394  }
395}
396
397static std::string GetNarrowTypestr(StringRef ty)
398{
399  std::string s;
400  for (size_t i = 0, end = ty.size(); i < end; i++) {
401    switch (ty[i]) {
402      case 's':
403        s += 'c';
404        break;
405      case 'i':
406        s += 's';
407        break;
408      case 'l':
409        s += 'i';
410        break;
411      default:
412        s += ty[i];
413        break;
414    }
415  }
416
417  return s;
418}
419
420/// For a particular StringRef, return the base type code, and whether it has
421/// the quad-vector, polynomial, or unsigned modifiers set.
422static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
423  unsigned off = 0;
424  // ignore scalar.
425  if (ty[off] == 'S') {
426    ++off;
427  }
428  // remember quad.
429  if (ty[off] == 'Q' || ty[off] == 'H') {
430    quad = true;
431    ++off;
432  }
433
434  // remember poly.
435  if (ty[off] == 'P') {
436    poly = true;
437    ++off;
438  }
439
440  // remember unsigned.
441  if (ty[off] == 'U') {
442    usgn = true;
443    ++off;
444  }
445
446  // base type to get the type string for.
447  return ty[off];
448}
449
450/// ModType - Transform a type code and its modifiers based on a mod code. The
451/// mod code definitions may be found at the top of arm_neon.td.
452static char ModType(const char mod, char type, bool &quad, bool &poly,
453                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
454  switch (mod) {
455    case 't':
456      if (poly) {
457        poly = false;
458        usgn = true;
459      }
460      break;
461    case 'b':
462      scal = true;
463    case 'u':
464      usgn = true;
465      poly = false;
466      if (type == 'f')
467        type = 'i';
468      if (type == 'd')
469        type = 'l';
470      break;
471    case '$':
472      scal = true;
473    case 'x':
474      usgn = false;
475      poly = false;
476      if (type == 'f')
477        type = 'i';
478      if (type == 'd')
479        type = 'l';
480      break;
481    case 'o':
482      scal = true;
483      type = 'd';
484      usgn = false;
485      break;
486    case 'y':
487      scal = true;
488    case 'f':
489      if (type == 'h')
490        quad = true;
491      type = 'f';
492      usgn = false;
493      break;
494    case 'g':
495      quad = false;
496      break;
497    case 'B':
498    case 'C':
499    case 'D':
500    case 'j':
501      quad = true;
502      break;
503    case 'w':
504      type = Widen(type);
505      quad = true;
506      break;
507    case 'n':
508      type = Widen(type);
509      break;
510    case 'i':
511      type = 'i';
512      scal = true;
513      break;
514    case 'l':
515      type = 'l';
516      scal = true;
517      usgn = true;
518      break;
519    case 'z':
520      type = Narrow(type);
521      scal = true;
522      break;
523    case 'r':
524      type = Widen(type);
525      scal = true;
526      break;
527    case 's':
528    case 'a':
529      scal = true;
530      break;
531    case 'k':
532      quad = true;
533      break;
534    case 'c':
535      cnst = true;
536    case 'p':
537      pntr = true;
538      scal = true;
539      break;
540    case 'h':
541      type = Narrow(type);
542      if (type == 'h')
543        quad = false;
544      break;
545    case 'q':
546      type = Narrow(type);
547      quad = true;
548      break;
549    case 'e':
550      type = Narrow(type);
551      usgn = true;
552      break;
553    case 'm':
554      type = Narrow(type);
555      quad = false;
556      break;
557    default:
558      break;
559  }
560  return type;
561}
562
563static bool IsMultiVecProto(const char p) {
564  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
565}
566
567/// TypeString - for a modifier and type, generate the name of the typedef for
568/// that type.  QUc -> uint8x8_t.
569static std::string TypeString(const char mod, StringRef typestr) {
570  bool quad = false;
571  bool poly = false;
572  bool usgn = false;
573  bool scal = false;
574  bool cnst = false;
575  bool pntr = false;
576
577  if (mod == 'v')
578    return "void";
579  if (mod == 'i')
580    return "int";
581
582  // base type to get the type string for.
583  char type = ClassifyType(typestr, quad, poly, usgn);
584
585  // Based on the modifying character, change the type and width if necessary.
586  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
587
588  SmallString<128> s;
589
590  if (usgn)
591    s.push_back('u');
592
593  switch (type) {
594    case 'c':
595      s += poly ? "poly8" : "int8";
596      if (scal)
597        break;
598      s += quad ? "x16" : "x8";
599      break;
600    case 's':
601      s += poly ? "poly16" : "int16";
602      if (scal)
603        break;
604      s += quad ? "x8" : "x4";
605      break;
606    case 'i':
607      s += "int32";
608      if (scal)
609        break;
610      s += quad ? "x4" : "x2";
611      break;
612    case 'l':
613      s += "int64";
614      if (scal)
615        break;
616      s += quad ? "x2" : "x1";
617      break;
618    case 'h':
619      s += "float16";
620      if (scal)
621        break;
622      s += quad ? "x8" : "x4";
623      break;
624    case 'f':
625      s += "float32";
626      if (scal)
627        break;
628      s += quad ? "x4" : "x2";
629      break;
630    case 'd':
631      s += "float64";
632      if (scal)
633        break;
634      s += quad ? "x2" : "x1";
635      break;
636
637    default:
638      PrintFatalError("unhandled type!");
639  }
640
641  if (mod == '2' || mod == 'B')
642    s += "x2";
643  if (mod == '3' || mod == 'C')
644    s += "x3";
645  if (mod == '4' || mod == 'D')
646    s += "x4";
647
648  // Append _t, finishing the type string typedef type.
649  s += "_t";
650
651  if (cnst)
652    s += " const";
653
654  if (pntr)
655    s += " *";
656
657  return s.str();
658}
659
660/// BuiltinTypeString - for a modifier and type, generate the clang
661/// BuiltinsARM.def prototype code for the function.  See the top of clang's
662/// Builtins.def for a description of the type strings.
663static std::string BuiltinTypeString(const char mod, StringRef typestr,
664                                     ClassKind ck, bool ret) {
665  bool quad = false;
666  bool poly = false;
667  bool usgn = false;
668  bool scal = false;
669  bool cnst = false;
670  bool pntr = false;
671
672  if (mod == 'v')
673    return "v"; // void
674  if (mod == 'i')
675    return "i"; // int
676
677  // base type to get the type string for.
678  char type = ClassifyType(typestr, quad, poly, usgn);
679
680  // Based on the modifying character, change the type and width if necessary.
681  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
682
683  // All pointers are void* pointers.  Change type to 'v' now.
684  if (pntr) {
685    usgn = false;
686    poly = false;
687    type = 'v';
688  }
689  // Treat half-float ('h') types as unsigned short ('s') types.
690  if (type == 'h') {
691    type = 's';
692    usgn = true;
693  }
694  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
695                         scal && type != 'f' && type != 'd');
696
697  if (scal) {
698    SmallString<128> s;
699
700    if (usgn)
701      s.push_back('U');
702    else if (type == 'c')
703      s.push_back('S'); // make chars explicitly signed
704
705    if (type == 'l') // 64-bit long
706      s += "LLi";
707    else
708      s.push_back(type);
709
710    if (cnst)
711      s.push_back('C');
712    if (pntr)
713      s.push_back('*');
714    return s.str();
715  }
716
717  // Since the return value must be one type, return a vector type of the
718  // appropriate width which we will bitcast.  An exception is made for
719  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
720  // fashion, storing them to a pointer arg.
721  if (ret) {
722    if (IsMultiVecProto(mod))
723      return "vv*"; // void result with void* first argument
724    if (mod == 'f' || (ck != ClassB && type == 'f'))
725      return quad ? "V4f" : "V2f";
726    if (ck != ClassB && type == 'd')
727      return quad ? "V2d" : "V1d";
728    if (ck != ClassB && type == 's')
729      return quad ? "V8s" : "V4s";
730    if (ck != ClassB && type == 'i')
731      return quad ? "V4i" : "V2i";
732    if (ck != ClassB && type == 'l')
733      return quad ? "V2LLi" : "V1LLi";
734
735    return quad ? "V16Sc" : "V8Sc";
736  }
737
738  // Non-return array types are passed as individual vectors.
739  if (mod == '2' || mod == 'B')
740    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
741  if (mod == '3' || mod == 'C')
742    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
743  if (mod == '4' || mod == 'D')
744    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
745
746  if (mod == 'f' || (ck != ClassB && type == 'f'))
747    return quad ? "V4f" : "V2f";
748  if (ck != ClassB && type == 'd')
749    return quad ? "V2d" : "V1d";
750  if (ck != ClassB && type == 's')
751    return quad ? "V8s" : "V4s";
752  if (ck != ClassB && type == 'i')
753    return quad ? "V4i" : "V2i";
754  if (ck != ClassB && type == 'l')
755    return quad ? "V2LLi" : "V1LLi";
756
757  return quad ? "V16Sc" : "V8Sc";
758}
759
760/// InstructionTypeCode - Computes the ARM argument character code and
761/// quad status for a specific type string and ClassKind.
762static void InstructionTypeCode(const StringRef &typeStr,
763                                const ClassKind ck,
764                                bool &quad,
765                                std::string &typeCode) {
766  bool poly = false;
767  bool usgn = false;
768  char type = ClassifyType(typeStr, quad, poly, usgn);
769
770  switch (type) {
771  case 'c':
772    switch (ck) {
773    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
774    case ClassI: typeCode = "i8"; break;
775    case ClassW: typeCode = "8"; break;
776    default: break;
777    }
778    break;
779  case 's':
780    switch (ck) {
781    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
782    case ClassI: typeCode = "i16"; break;
783    case ClassW: typeCode = "16"; break;
784    default: break;
785    }
786    break;
787  case 'i':
788    switch (ck) {
789    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
790    case ClassI: typeCode = "i32"; break;
791    case ClassW: typeCode = "32"; break;
792    default: break;
793    }
794    break;
795  case 'l':
796    switch (ck) {
797    case ClassS: typeCode = usgn ? "u64" : "s64"; break;
798    case ClassI: typeCode = "i64"; break;
799    case ClassW: typeCode = "64"; break;
800    default: break;
801    }
802    break;
803  case 'h':
804    switch (ck) {
805    case ClassS:
806    case ClassI: typeCode = "f16"; break;
807    case ClassW: typeCode = "16"; break;
808    default: break;
809    }
810    break;
811  case 'f':
812    switch (ck) {
813    case ClassS:
814    case ClassI: typeCode = "f32"; break;
815    case ClassW: typeCode = "32"; break;
816    default: break;
817    }
818    break;
819  case 'd':
820    switch (ck) {
821    case ClassS:
822    case ClassI:
823      typeCode += "f64";
824      break;
825    case ClassW:
826      PrintFatalError("unhandled type!");
827    default:
828      break;
829    }
830    break;
831  default:
832    PrintFatalError("unhandled type!");
833  }
834}
835
836static char Insert_BHSD_Suffix(StringRef typestr){
837  unsigned off = 0;
838  if(typestr[off++] == 'S'){
839    while(typestr[off] == 'Q' || typestr[off] == 'H'||
840          typestr[off] == 'P' || typestr[off] == 'U')
841      ++off;
842    switch (typestr[off]){
843    default  : break;
844    case 'c' : return 'b';
845    case 's' : return 'h';
846    case 'i' :
847    case 'f' : return 's';
848    case 'l' :
849    case 'd' : return 'd';
850    }
851  }
852  return 0;
853}
854
855/// MangleName - Append a type or width suffix to a base neon function name,
856/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
857/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
858/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
859static std::string MangleName(const std::string &name, StringRef typestr,
860                              ClassKind ck) {
861  if (name == "vcvt_f32_f16")
862    return name;
863
864  bool quad = false;
865  std::string typeCode = "";
866
867  InstructionTypeCode(typestr, ck, quad, typeCode);
868
869  std::string s = name;
870
871  if (typeCode.size() > 0) {
872    s += "_" + typeCode;
873  }
874
875  if (ck == ClassB)
876    s += "_v";
877
878  // Insert a 'q' before the first '_' character so that it ends up before
879  // _lane or _n on vector-scalar operations.
880  if (typestr.find("Q") != StringRef::npos) {
881      size_t pos = s.find('_');
882      s = s.insert(pos, "q");
883  }
884  char ins = Insert_BHSD_Suffix(typestr);
885  if(ins){
886    size_t pos = s.find('_');
887    s = s.insert(pos, &ins, 1);
888  }
889
890  return s;
891}
892
893static void PreprocessInstruction(const StringRef &Name,
894                                  const std::string &InstName,
895                                  std::string &Prefix,
896                                  bool &HasNPostfix,
897                                  bool &HasLanePostfix,
898                                  bool &HasDupPostfix,
899                                  bool &IsSpecialVCvt,
900                                  size_t &TBNumber) {
901  // All of our instruction name fields from arm_neon.td are of the form
902  //   <instructionname>_...
903  // Thus we grab our instruction name via computation of said Prefix.
904  const size_t PrefixEnd = Name.find_first_of('_');
905  // If InstName is passed in, we use that instead of our name Prefix.
906  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
907
908  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
909
910  HasNPostfix = Postfix.count("_n");
911  HasLanePostfix = Postfix.count("_lane");
912  HasDupPostfix = Postfix.count("_dup");
913  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
914
915  if (InstName.compare("vtbl") == 0 ||
916      InstName.compare("vtbx") == 0) {
917    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
918    // encoding to get its true value.
919    TBNumber = Name[Name.size()-1] - 48;
920  }
921}
922
923/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
924/// extracted, generate a FileCheck pattern for a Load Or Store
925static void
926GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
927                                          const std::string& OutTypeCode,
928                                          const bool &IsQuad,
929                                          const bool &HasDupPostfix,
930                                          const bool &HasLanePostfix,
931                                          const size_t Count,
932                                          std::string &RegisterSuffix) {
933  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
934  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
935  // will output a series of v{ld,st}1s, so we have to handle it specially.
936  if ((Count == 3 || Count == 4) && IsQuad) {
937    RegisterSuffix += "{";
938    for (size_t i = 0; i < Count; i++) {
939      RegisterSuffix += "d{{[0-9]+}}";
940      if (HasDupPostfix) {
941        RegisterSuffix += "[]";
942      }
943      if (HasLanePostfix) {
944        RegisterSuffix += "[{{[0-9]+}}]";
945      }
946      if (i < Count-1) {
947        RegisterSuffix += ", ";
948      }
949    }
950    RegisterSuffix += "}";
951  } else {
952
953    // Handle normal loads and stores.
954    RegisterSuffix += "{";
955    for (size_t i = 0; i < Count; i++) {
956      RegisterSuffix += "d{{[0-9]+}}";
957      if (HasDupPostfix) {
958        RegisterSuffix += "[]";
959      }
960      if (HasLanePostfix) {
961        RegisterSuffix += "[{{[0-9]+}}]";
962      }
963      if (IsQuad && !HasLanePostfix) {
964        RegisterSuffix += ", d{{[0-9]+}}";
965        if (HasDupPostfix) {
966          RegisterSuffix += "[]";
967        }
968      }
969      if (i < Count-1) {
970        RegisterSuffix += ", ";
971      }
972    }
973    RegisterSuffix += "}, [r{{[0-9]+}}";
974
975    // We only include the alignment hint if we have a vld1.*64 or
976    // a dup/lane instruction.
977    if (IsLDSTOne) {
978      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
979        RegisterSuffix += ":" + OutTypeCode;
980      }
981    }
982
983    RegisterSuffix += "]";
984  }
985}
986
987static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
988                                     const bool &HasNPostfix) {
989  return (NameRef.count("vmla") ||
990          NameRef.count("vmlal") ||
991          NameRef.count("vmlsl") ||
992          NameRef.count("vmull") ||
993          NameRef.count("vqdmlal") ||
994          NameRef.count("vqdmlsl") ||
995          NameRef.count("vqdmulh") ||
996          NameRef.count("vqdmull") ||
997          NameRef.count("vqrdmulh")) && HasNPostfix;
998}
999
1000static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1001                                         const bool &HasLanePostfix) {
1002  return (NameRef.count("vmla") ||
1003          NameRef.count("vmls") ||
1004          NameRef.count("vmlal") ||
1005          NameRef.count("vmlsl") ||
1006          (NameRef.count("vmul") && NameRef.size() == 3)||
1007          NameRef.count("vqdmlal") ||
1008          NameRef.count("vqdmlsl") ||
1009          NameRef.count("vqdmulh") ||
1010          NameRef.count("vqrdmulh")) && HasLanePostfix;
1011}
1012
1013static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1014                                  const bool &HasLanePostfix,
1015                                  const bool &IsQuad) {
1016  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1017                               && IsQuad;
1018  const bool IsVMull = NameRef.count("mull") && !IsQuad;
1019  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1020}
1021
1022static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1023                                                     const std::string &Proto,
1024                                                     const bool &HasNPostfix,
1025                                                     const bool &IsQuad,
1026                                                     const bool &HasLanePostfix,
1027                                                     const bool &HasDupPostfix,
1028                                                     std::string &NormedProto) {
1029  // Handle generic case.
1030  const StringRef NameRef(Name);
1031  for (size_t i = 0, end = Proto.size(); i < end; i++) {
1032    switch (Proto[i]) {
1033    case 'u':
1034    case 'f':
1035    case 'd':
1036    case 's':
1037    case 'x':
1038    case 't':
1039    case 'n':
1040      NormedProto += IsQuad? 'q' : 'd';
1041      break;
1042    case 'w':
1043    case 'k':
1044      NormedProto += 'q';
1045      break;
1046    case 'g':
1047    case 'j':
1048    case 'h':
1049    case 'e':
1050      NormedProto += 'd';
1051      break;
1052    case 'i':
1053      NormedProto += HasLanePostfix? 'a' : 'i';
1054      break;
1055    case 'a':
1056      if (HasLanePostfix) {
1057        NormedProto += 'a';
1058      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1059        NormedProto += IsQuad? 'q' : 'd';
1060      } else {
1061        NormedProto += 'i';
1062      }
1063      break;
1064    }
1065  }
1066
1067  // Handle Special Cases.
1068  const bool IsNotVExt = !NameRef.count("vext");
1069  const bool IsVPADAL = NameRef.count("vpadal");
1070  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1071                                                           HasLanePostfix);
1072  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1073                                                      IsQuad);
1074
1075  if (IsSpecialLaneMul) {
1076    // If
1077    NormedProto[2] = NormedProto[3];
1078    NormedProto.erase(3);
1079  } else if (NormedProto.size() == 4 &&
1080             NormedProto[0] == NormedProto[1] &&
1081             IsNotVExt) {
1082    // If NormedProto.size() == 4 and the first two proto characters are the
1083    // same, ignore the first.
1084    NormedProto = NormedProto.substr(1, 3);
1085  } else if (Is5OpLaneAccum) {
1086    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1087    std::string tmp = NormedProto.substr(1,2);
1088    tmp += NormedProto[4];
1089    NormedProto = tmp;
1090  } else if (IsVPADAL) {
1091    // If we have VPADAL, ignore the first character.
1092    NormedProto = NormedProto.substr(0, 2);
1093  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1094    // If our instruction is a dup instruction, keep only the first and
1095    // last characters.
1096    std::string tmp = "";
1097    tmp += NormedProto[0];
1098    tmp += NormedProto[NormedProto.size()-1];
1099    NormedProto = tmp;
1100  }
1101}
1102
1103/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1104/// extracted, generate a FileCheck pattern to check that an
1105/// instruction's arguments are correct.
1106static void GenerateRegisterCheckPattern(const std::string &Name,
1107                                         const std::string &Proto,
1108                                         const std::string &OutTypeCode,
1109                                         const bool &HasNPostfix,
1110                                         const bool &IsQuad,
1111                                         const bool &HasLanePostfix,
1112                                         const bool &HasDupPostfix,
1113                                         const size_t &TBNumber,
1114                                         std::string &RegisterSuffix) {
1115
1116  RegisterSuffix = "";
1117
1118  const StringRef NameRef(Name);
1119  const StringRef ProtoRef(Proto);
1120
1121  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1122    return;
1123  }
1124
1125  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1126  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1127
1128  if (IsLoadStore) {
1129    // Grab N value from  v{ld,st}N using its ascii representation.
1130    const size_t Count = NameRef[3] - 48;
1131
1132    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1133                                              HasDupPostfix, HasLanePostfix,
1134                                              Count, RegisterSuffix);
1135  } else if (IsTBXOrTBL) {
1136    RegisterSuffix += "d{{[0-9]+}}, {";
1137    for (size_t i = 0; i < TBNumber-1; i++) {
1138      RegisterSuffix += "d{{[0-9]+}}, ";
1139    }
1140    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1141  } else {
1142    // Handle a normal instruction.
1143    if (NameRef.count("vget") || NameRef.count("vset"))
1144      return;
1145
1146    // We first normalize our proto, since we only need to emit 4
1147    // different types of checks, yet have more than 4 proto types
1148    // that map onto those 4 patterns.
1149    std::string NormalizedProto("");
1150    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1151                                             HasLanePostfix, HasDupPostfix,
1152                                             NormalizedProto);
1153
1154    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1155      const char &c = NormalizedProto[i];
1156      switch (c) {
1157      case 'q':
1158        RegisterSuffix += "q{{[0-9]+}}, ";
1159        break;
1160
1161      case 'd':
1162        RegisterSuffix += "d{{[0-9]+}}, ";
1163        break;
1164
1165      case 'i':
1166        RegisterSuffix += "#{{[0-9]+}}, ";
1167        break;
1168
1169      case 'a':
1170        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1171        break;
1172      }
1173    }
1174
1175    // Remove extra ", ".
1176    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1177  }
1178}
1179
1180/// GenerateChecksForIntrinsic - Given a specific instruction name +
1181/// typestr + class kind, generate the proper set of FileCheck
1182/// Patterns to check for. We could just return a string, but instead
1183/// use a vector since it provides us with the extra flexibility of
1184/// emitting multiple checks, which comes in handy for certain cases
1185/// like mla where we want to check for 2 different instructions.
1186static void GenerateChecksForIntrinsic(const std::string &Name,
1187                                       const std::string &Proto,
1188                                       StringRef &OutTypeStr,
1189                                       StringRef &InTypeStr,
1190                                       ClassKind Ck,
1191                                       const std::string &InstName,
1192                                       bool IsHiddenLOp,
1193                                       std::vector<std::string>& Result) {
1194
1195  // If Ck is a ClassNoTest instruction, just return so no test is
1196  // emitted.
1197  if(Ck == ClassNoTest)
1198    return;
1199
1200  if (Name == "vcvt_f32_f16") {
1201    Result.push_back("vcvt.f32.f16");
1202    return;
1203  }
1204
1205
1206  // Now we preprocess our instruction given the data we have to get the
1207  // data that we need.
1208  // Create a StringRef for String Manipulation of our Name.
1209  const StringRef NameRef(Name);
1210  // Instruction Prefix.
1211  std::string Prefix;
1212  // The type code for our out type string.
1213  std::string OutTypeCode;
1214  // To handle our different cases, we need to check for different postfixes.
1215  // Is our instruction a quad instruction.
1216  bool IsQuad = false;
1217  // Our instruction is of the form <instructionname>_n.
1218  bool HasNPostfix = false;
1219  // Our instruction is of the form <instructionname>_lane.
1220  bool HasLanePostfix = false;
1221  // Our instruction is of the form <instructionname>_dup.
1222  bool HasDupPostfix  = false;
1223  // Our instruction is a vcvt instruction which requires special handling.
1224  bool IsSpecialVCvt = false;
1225  // If we have a vtbxN or vtblN instruction, this is set to N.
1226  size_t TBNumber = -1;
1227  // Register Suffix
1228  std::string RegisterSuffix;
1229
1230  PreprocessInstruction(NameRef, InstName, Prefix,
1231                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1232                        IsSpecialVCvt, TBNumber);
1233
1234  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1235  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1236                               HasLanePostfix, HasDupPostfix, TBNumber,
1237                               RegisterSuffix);
1238
1239  // In the following section, we handle a bunch of special cases. You can tell
1240  // a special case by the fact we are returning early.
1241
1242  // If our instruction is a logical instruction without postfix or a
1243  // hidden LOp just return the current Prefix.
1244  if (Ck == ClassL || IsHiddenLOp) {
1245    Result.push_back(Prefix + " " + RegisterSuffix);
1246    return;
1247  }
1248
1249  // If we have a vmov, due to the many different cases, some of which
1250  // vary within the different intrinsics generated for a single
1251  // instruction type, just output a vmov. (e.g. given an instruction
1252  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1253  //
1254  // FIXME: Maybe something can be done about this. The two cases that we care
1255  // about are vmov as an LType and vmov as a WType.
1256  if (Prefix == "vmov") {
1257    Result.push_back(Prefix + " " + RegisterSuffix);
1258    return;
1259  }
1260
1261  // In the following section, we handle special cases.
1262
1263  if (OutTypeCode == "64") {
1264    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1265    // type, the intrinsic will be optimized away, so just return
1266    // nothing.  On the other hand if we are handling an uint64x2_t
1267    // (i.e. quad instruction), vdup/vmov instructions should be
1268    // emitted.
1269    if (Prefix == "vdup" || Prefix == "vext") {
1270      if (IsQuad) {
1271        Result.push_back("{{vmov|vdup}}");
1272      }
1273      return;
1274    }
1275
1276    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1277    // multiple register operands.
1278    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1279                            || Prefix == "vld4";
1280    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1281                            || Prefix == "vst4";
1282    if (MultiLoadPrefix || MultiStorePrefix) {
1283      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1284      return;
1285    }
1286
1287    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1288    // emitting said instructions. So return a check for
1289    // vldr/vstr/vmov/str instead.
1290    if (HasLanePostfix || HasDupPostfix) {
1291      if (Prefix == "vst1") {
1292        Result.push_back("{{str|vstr|vmov}}");
1293        return;
1294      } else if (Prefix == "vld1") {
1295        Result.push_back("{{ldr|vldr|vmov}}");
1296        return;
1297      }
1298    }
1299  }
1300
1301  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1302  // sometimes disassembled as vtrn.32. We use a regex to handle both
1303  // cases.
1304  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1305    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1306    return;
1307  }
1308
1309  // Currently on most ARM processors, we do not use vmla/vmls for
1310  // quad floating point operations. Instead we output vmul + vadd. So
1311  // check if we have one of those instructions and just output a
1312  // check for vmul.
1313  if (OutTypeCode == "f32") {
1314    if (Prefix == "vmls") {
1315      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1316      Result.push_back("vsub." + OutTypeCode);
1317      return;
1318    } else if (Prefix == "vmla") {
1319      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1320      Result.push_back("vadd." + OutTypeCode);
1321      return;
1322    }
1323  }
1324
1325  // If we have vcvt, get the input type from the instruction name
1326  // (which should be of the form instname_inputtype) and append it
1327  // before the output type.
1328  if (Prefix == "vcvt") {
1329    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1330    Prefix += "." + inTypeCode;
1331  }
1332
1333  // Append output type code to get our final mangled instruction.
1334  Prefix += "." + OutTypeCode;
1335
1336  Result.push_back(Prefix + " " + RegisterSuffix);
1337}
1338
1339/// UseMacro - Examine the prototype string to determine if the intrinsic
1340/// should be defined as a preprocessor macro instead of an inline function.
1341static bool UseMacro(const std::string &proto) {
1342  // If this builtin takes an immediate argument, we need to #define it rather
1343  // than use a standard declaration, so that SemaChecking can range check
1344  // the immediate passed by the user.
1345  if (proto.find('i') != std::string::npos)
1346    return true;
1347
1348  // Pointer arguments need to use macros to avoid hiding aligned attributes
1349  // from the pointer type.
1350  if (proto.find('p') != std::string::npos ||
1351      proto.find('c') != std::string::npos)
1352    return true;
1353
1354  return false;
1355}
1356
1357/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1358/// defined as a macro should be accessed directly instead of being first
1359/// assigned to a local temporary.
1360static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1361  // True for constant ints (i), pointers (p) and const pointers (c).
1362  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1363}
1364
1365// Generate the string "(argtype a, argtype b, ...)"
1366static std::string GenArgs(const std::string &proto, StringRef typestr,
1367                           const std::string &name) {
1368  bool define = UseMacro(proto);
1369  char arg = 'a';
1370
1371  std::string s;
1372  s += "(";
1373
1374  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1375    if (define) {
1376      // Some macro arguments are used directly instead of being assigned
1377      // to local temporaries; prepend an underscore prefix to make their
1378      // names consistent with the local temporaries.
1379      if (MacroArgUsedDirectly(proto, i))
1380        s += "__";
1381    } else {
1382      s += TypeString(proto[i], typestr) + " __";
1383    }
1384    s.push_back(arg);
1385    //To avoid argument being multiple defined, add extra number for renaming.
1386    if (name == "vcopy_lane" || name == "vcopy_laneq")
1387      s.push_back('1');
1388    if ((i + 1) < e)
1389      s += ", ";
1390  }
1391
1392  s += ")";
1393  return s;
1394}
1395
1396// Macro arguments are not type-checked like inline function arguments, so
1397// assign them to local temporaries to get the right type checking.
1398static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1399                                  const std::string &name ) {
1400  char arg = 'a';
1401  std::string s;
1402  bool generatedLocal = false;
1403
1404  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1405    // Do not create a temporary for an immediate argument.
1406    // That would defeat the whole point of using a macro!
1407    if (MacroArgUsedDirectly(proto, i))
1408      continue;
1409    generatedLocal = true;
1410    bool extranumber = false;
1411    if (name == "vcopy_lane" || name == "vcopy_laneq")
1412      extranumber = true;
1413
1414    s += TypeString(proto[i], typestr) + " __";
1415    s.push_back(arg);
1416    if(extranumber)
1417      s.push_back('1');
1418    s += " = (";
1419    s.push_back(arg);
1420    if(extranumber)
1421      s.push_back('1');
1422    s += "); ";
1423  }
1424
1425  if (generatedLocal)
1426    s += "\\\n  ";
1427  return s;
1428}
1429
1430// Use the vmovl builtin to sign-extend or zero-extend a vector.
1431static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1432  std::string s, high;
1433  high = h ? "_high" : "";
1434  s = MangleName("vmovl" + high, typestr, ClassS);
1435  s += "(" + a + ")";
1436  return s;
1437}
1438
1439// Get the high 64-bit part of a vector
1440static std::string GetHigh(const std::string &a, StringRef typestr) {
1441  std::string s;
1442  s = MangleName("vget_high", typestr, ClassS);
1443  s += "(" + a + ")";
1444  return s;
1445}
1446
1447// Gen operation with two operands and get high 64-bit for both of two operands.
1448static std::string Gen2OpWith2High(StringRef typestr,
1449                                   const std::string &op,
1450                                   const std::string &a,
1451                                   const std::string &b) {
1452  std::string s;
1453  std::string Op1 = GetHigh(a, typestr);
1454  std::string Op2 = GetHigh(b, typestr);
1455  s = MangleName(op, typestr, ClassS);
1456  s += "(" + Op1 + ", " + Op2 + ");";
1457  return s;
1458}
1459
1460// Gen operation with three operands and get high 64-bit of the latter
1461// two operands.
1462static std::string Gen3OpWith2High(StringRef typestr,
1463                                   const std::string &op,
1464                                   const std::string &a,
1465                                   const std::string &b,
1466                                   const std::string &c) {
1467  std::string s;
1468  std::string Op1 = GetHigh(b, typestr);
1469  std::string Op2 = GetHigh(c, typestr);
1470  s = MangleName(op, typestr, ClassS);
1471  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1472  return s;
1473}
1474
1475// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1476static std::string GenCombine(std::string typestr,
1477                              const std::string &a,
1478                              const std::string &b) {
1479  std::string s;
1480  s = MangleName("vcombine", typestr, ClassS);
1481  s += "(" + a + ", " + b + ")";
1482  return s;
1483}
1484
1485static std::string Duplicate(unsigned nElts, StringRef typestr,
1486                             const std::string &a) {
1487  std::string s;
1488
1489  s = "(" + TypeString('d', typestr) + "){ ";
1490  for (unsigned i = 0; i != nElts; ++i) {
1491    s += a;
1492    if ((i + 1) < nElts)
1493      s += ", ";
1494  }
1495  s += " }";
1496
1497  return s;
1498}
1499
1500static std::string SplatLane(unsigned nElts, const std::string &vec,
1501                             const std::string &lane) {
1502  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1503  for (unsigned i = 0; i < nElts; ++i)
1504    s += ", " + lane;
1505  s += ")";
1506  return s;
1507}
1508
1509static std::string RemoveHigh(const std::string &name) {
1510  std::string s = name;
1511  std::size_t found = s.find("_high_");
1512  if (found == std::string::npos)
1513    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1514  s.replace(found, 5, "");
1515  return s;
1516}
1517
1518static unsigned GetNumElements(StringRef typestr, bool &quad) {
1519  quad = false;
1520  bool dummy = false;
1521  char type = ClassifyType(typestr, quad, dummy, dummy);
1522  unsigned nElts = 0;
1523  switch (type) {
1524  case 'c': nElts = 8; break;
1525  case 's': nElts = 4; break;
1526  case 'i': nElts = 2; break;
1527  case 'l': nElts = 1; break;
1528  case 'h': nElts = 4; break;
1529  case 'f': nElts = 2; break;
1530  case 'd':
1531    nElts = 1;
1532    break;
1533  default:
1534    PrintFatalError("unhandled type!");
1535  }
1536  if (quad) nElts <<= 1;
1537  return nElts;
1538}
1539
1540// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1541static std::string GenOpString(const std::string &name, OpKind op,
1542                               const std::string &proto, StringRef typestr) {
1543  bool quad;
1544  unsigned nElts = GetNumElements(typestr, quad);
1545  bool define = UseMacro(proto);
1546
1547  std::string ts = TypeString(proto[0], typestr);
1548  std::string s;
1549  if (!define) {
1550    s = "return ";
1551  }
1552
1553  switch(op) {
1554  case OpAdd:
1555    s += "__a + __b;";
1556    break;
1557  case OpAddl:
1558    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1559    break;
1560  case OpAddlHi:
1561    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1562    break;
1563  case OpAddw:
1564    s += "__a + " + Extend(typestr, "__b") + ";";
1565    break;
1566  case OpAddwHi:
1567    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1568    break;
1569  case OpSub:
1570    s += "__a - __b;";
1571    break;
1572  case OpSubl:
1573    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1574    break;
1575  case OpSublHi:
1576    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1577    break;
1578  case OpSubw:
1579    s += "__a - " + Extend(typestr, "__b") + ";";
1580    break;
1581  case OpSubwHi:
1582    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1583    break;
1584  case OpMulN:
1585    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1586    break;
1587  case OpMulLane:
1588    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1589    break;
1590  case OpMulXLane:
1591    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1592      SplatLane(nElts, "__b", "__c") + ");";
1593    break;
1594  case OpMul:
1595    s += "__a * __b;";
1596    break;
1597  case OpMullLane:
1598    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1599      SplatLane(nElts, "__b", "__c") + ");";
1600    break;
1601  case OpMullHiLane:
1602    s += MangleName("vmull", typestr, ClassS) + "(" +
1603      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1604    break;
1605  case OpMlaN:
1606    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1607    break;
1608  case OpMlaLane:
1609    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1610    break;
1611  case OpMla:
1612    s += "__a + (__b * __c);";
1613    break;
1614  case OpMlalN:
1615    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1616      Duplicate(nElts, typestr, "__c") + ");";
1617    break;
1618  case OpMlalLane:
1619    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1620      SplatLane(nElts, "__c", "__d") + ");";
1621    break;
1622  case OpMlalHiLane:
1623    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1624      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1625    break;
1626  case OpMlal:
1627    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1628    break;
1629  case OpMullHi:
1630    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1631    break;
1632  case OpMlalHi:
1633    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1634    break;
1635  case OpMlsN:
1636    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1637    break;
1638  case OpMlsLane:
1639    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1640    break;
1641  case OpFMSLane:
1642    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1643    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1644    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1645    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1646    break;
1647  case OpFMSLaneQ:
1648    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1649    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1650    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1651    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1652    break;
1653  case OpMls:
1654    s += "__a - (__b * __c);";
1655    break;
1656  case OpMlslN:
1657    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1658      Duplicate(nElts, typestr, "__c") + ");";
1659    break;
1660  case OpMlslLane:
1661    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1662      SplatLane(nElts, "__c", "__d") + ");";
1663    break;
1664  case OpMlslHiLane:
1665    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1666      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1667    break;
1668  case OpMlsl:
1669    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1670    break;
1671  case OpMlslHi:
1672    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1673    break;
1674  case OpQDMullLane:
1675    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1676      SplatLane(nElts, "__b", "__c") + ");";
1677    break;
1678  case OpQDMullHiLane:
1679    s += MangleName("vqdmull", typestr, ClassS) + "(" +
1680      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1681    break;
1682  case OpQDMlalLane:
1683    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1684      SplatLane(nElts, "__c", "__d") + ");";
1685    break;
1686  case OpQDMlalHiLane:
1687    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1688      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1689    break;
1690  case OpQDMlslLane:
1691    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1692      SplatLane(nElts, "__c", "__d") + ");";
1693    break;
1694  case OpQDMlslHiLane:
1695    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1696      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1697    break;
1698  case OpQDMulhLane:
1699    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1700      SplatLane(nElts, "__b", "__c") + ");";
1701    break;
1702  case OpQRDMulhLane:
1703    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1704      SplatLane(nElts, "__b", "__c") + ");";
1705    break;
1706  case OpEq:
1707    s += "(" + ts + ")(__a == __b);";
1708    break;
1709  case OpGe:
1710    s += "(" + ts + ")(__a >= __b);";
1711    break;
1712  case OpLe:
1713    s += "(" + ts + ")(__a <= __b);";
1714    break;
1715  case OpGt:
1716    s += "(" + ts + ")(__a > __b);";
1717    break;
1718  case OpLt:
1719    s += "(" + ts + ")(__a < __b);";
1720    break;
1721  case OpNeg:
1722    s += " -__a;";
1723    break;
1724  case OpNot:
1725    s += " ~__a;";
1726    break;
1727  case OpAnd:
1728    s += "__a & __b;";
1729    break;
1730  case OpOr:
1731    s += "__a | __b;";
1732    break;
1733  case OpXor:
1734    s += "__a ^ __b;";
1735    break;
1736  case OpAndNot:
1737    s += "__a & ~__b;";
1738    break;
1739  case OpOrNot:
1740    s += "__a | ~__b;";
1741    break;
1742  case OpCast:
1743    s += "(" + ts + ")__a;";
1744    break;
1745  case OpConcat:
1746    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1747    s += ", (int64x1_t)__b, 0, 1);";
1748    break;
1749  case OpHi:
1750    // nElts is for the result vector, so the source is twice that number.
1751    s += "__builtin_shufflevector(__a, __a";
1752    for (unsigned i = nElts; i < nElts * 2; ++i)
1753      s += ", " + utostr(i);
1754    s+= ");";
1755    break;
1756  case OpLo:
1757    s += "__builtin_shufflevector(__a, __a";
1758    for (unsigned i = 0; i < nElts; ++i)
1759      s += ", " + utostr(i);
1760    s+= ");";
1761    break;
1762  case OpDup:
1763    s += Duplicate(nElts, typestr, "__a") + ";";
1764    break;
1765  case OpDupLane:
1766    s += SplatLane(nElts, "__a", "__b") + ";";
1767    break;
1768  case OpSelect:
1769    // ((0 & 1) | (~0 & 2))
1770    s += "(" + ts + ")";
1771    ts = TypeString(proto[1], typestr);
1772    s += "((__a & (" + ts + ")__b) | ";
1773    s += "(~__a & (" + ts + ")__c));";
1774    break;
1775  case OpRev16:
1776    s += "__builtin_shufflevector(__a, __a";
1777    for (unsigned i = 2; i <= nElts; i += 2)
1778      for (unsigned j = 0; j != 2; ++j)
1779        s += ", " + utostr(i - j - 1);
1780    s += ");";
1781    break;
1782  case OpRev32: {
1783    unsigned WordElts = nElts >> (1 + (int)quad);
1784    s += "__builtin_shufflevector(__a, __a";
1785    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1786      for (unsigned j = 0; j != WordElts; ++j)
1787        s += ", " + utostr(i - j - 1);
1788    s += ");";
1789    break;
1790  }
1791  case OpRev64: {
1792    unsigned DblWordElts = nElts >> (int)quad;
1793    s += "__builtin_shufflevector(__a, __a";
1794    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1795      for (unsigned j = 0; j != DblWordElts; ++j)
1796        s += ", " + utostr(i - j - 1);
1797    s += ");";
1798    break;
1799  }
1800  case OpUzp1:
1801    s += "__builtin_shufflevector(__a, __b";
1802    for (unsigned i = 0; i < nElts; i++)
1803      s += ", " + utostr(2*i);
1804    s += ");";
1805    break;
1806  case OpUzp2:
1807    s += "__builtin_shufflevector(__a, __b";
1808    for (unsigned i = 0; i < nElts; i++)
1809      s += ", " + utostr(2*i+1);
1810    s += ");";
1811    break;
1812  case OpZip1:
1813    s += "__builtin_shufflevector(__a, __b";
1814    for (unsigned i = 0; i < (nElts/2); i++)
1815       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1816    s += ");";
1817    break;
1818  case OpZip2:
1819    s += "__builtin_shufflevector(__a, __b";
1820    for (unsigned i = nElts/2; i < nElts; i++)
1821       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1822    s += ");";
1823    break;
1824  case OpTrn1:
1825    s += "__builtin_shufflevector(__a, __b";
1826    for (unsigned i = 0; i < (nElts/2); i++)
1827       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1828    s += ");";
1829    break;
1830  case OpTrn2:
1831    s += "__builtin_shufflevector(__a, __b";
1832    for (unsigned i = 0; i < (nElts/2); i++)
1833       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1834    s += ");";
1835    break;
1836  case OpAbdl: {
1837    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1838    if (typestr[0] != 'U') {
1839      // vabd results are always unsigned and must be zero-extended.
1840      std::string utype = "U" + typestr.str();
1841      s += "(" + TypeString(proto[0], typestr) + ")";
1842      abd = "(" + TypeString('d', utype) + ")" + abd;
1843      s += Extend(utype, abd) + ";";
1844    } else {
1845      s += Extend(typestr, abd) + ";";
1846    }
1847    break;
1848  }
1849  case OpAbdlHi:
1850    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1851    break;
1852  case OpAddhnHi: {
1853    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1854    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1855    s += ";";
1856    break;
1857  }
1858  case OpRAddhnHi: {
1859    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1860    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1861    s += ";";
1862    break;
1863  }
1864  case OpSubhnHi: {
1865    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1866    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1867    s += ";";
1868    break;
1869  }
1870  case OpRSubhnHi: {
1871    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1872    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1873    s += ";";
1874    break;
1875  }
1876  case OpAba:
1877    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1878    break;
1879  case OpAbal:
1880    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1881    break;
1882  case OpAbalHi:
1883    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1884    break;
1885  case OpQDMullHi:
1886    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1887    break;
1888  case OpQDMlalHi:
1889    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1890    break;
1891  case OpQDMlslHi:
1892    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1893    break;
1894  case OpDiv:
1895    s += "__a / __b;";
1896    break;
1897  case OpMovlHi: {
1898    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1899        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1900    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1901    s += "(__a1, 0);";
1902    break;
1903  }
1904  case OpLongHi: {
1905    // Another local variable __a1 is needed for calling a Macro,
1906    // or using __a will have naming conflict when Macro expanding.
1907    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1908         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1909    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1910         "(__a1, __b);";
1911    break;
1912  }
1913  case OpNarrowHi: {
1914    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1915         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1916    break;
1917  }
1918  case OpCopyLane: {
1919    s += TypeString('s', typestr) + " __c2 = " +
1920         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
1921         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
1922    break;
1923  }
1924  case OpCopyQLane: {
1925    std::string typeCode = "";
1926    InstructionTypeCode(typestr, ClassS, quad, typeCode);
1927    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
1928         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
1929    break;
1930  }
1931  case OpCopyLaneQ: {
1932    std::string typeCode = "";
1933    InstructionTypeCode(typestr, ClassS, quad, typeCode);
1934    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
1935         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
1936    break;
1937  }
1938  default:
1939    PrintFatalError("unknown OpKind!");
1940  }
1941  return s;
1942}
1943
1944static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1945  unsigned mod = proto[0];
1946
1947  if (mod == 'v' || mod == 'f')
1948    mod = proto[1];
1949
1950  bool quad = false;
1951  bool poly = false;
1952  bool usgn = false;
1953  bool scal = false;
1954  bool cnst = false;
1955  bool pntr = false;
1956
1957  // Base type to get the type string for.
1958  char type = ClassifyType(typestr, quad, poly, usgn);
1959
1960  // Based on the modifying character, change the type and width if necessary.
1961  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1962
1963  NeonTypeFlags::EltType ET;
1964  switch (type) {
1965    case 'c':
1966      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1967      break;
1968    case 's':
1969      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1970      break;
1971    case 'i':
1972      ET = NeonTypeFlags::Int32;
1973      break;
1974    case 'l':
1975      ET = NeonTypeFlags::Int64;
1976      break;
1977    case 'h':
1978      ET = NeonTypeFlags::Float16;
1979      break;
1980    case 'f':
1981      ET = NeonTypeFlags::Float32;
1982      break;
1983    case 'd':
1984      ET = NeonTypeFlags::Float64;
1985      break;
1986    default:
1987      PrintFatalError("unhandled type!");
1988  }
1989  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1990  return Flags.getFlags();
1991}
1992
1993static bool ProtoHasScalar(const std::string proto)
1994{
1995  return (proto.find('s') != std::string::npos
1996          || proto.find('r') != std::string::npos);
1997}
1998
1999// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2000static std::string GenBuiltin(const std::string &name, const std::string &proto,
2001                              StringRef typestr, ClassKind ck) {
2002  std::string s;
2003
2004  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2005  // sret-like argument.
2006  bool sret = IsMultiVecProto(proto[0]);
2007
2008  bool define = UseMacro(proto);
2009
2010  // Check if the prototype has a scalar operand with the type of the vector
2011  // elements.  If not, bitcasting the args will take care of arg checking.
2012  // The actual signedness etc. will be taken care of with special enums.
2013  if (!ProtoHasScalar(proto))
2014    ck = ClassB;
2015
2016  if (proto[0] != 'v') {
2017    std::string ts = TypeString(proto[0], typestr);
2018
2019    if (define) {
2020      if (sret)
2021        s += ts + " r; ";
2022      else
2023        s += "(" + ts + ")";
2024    } else if (sret) {
2025      s += ts + " r; ";
2026    } else {
2027      s += "return (" + ts + ")";
2028    }
2029  }
2030
2031  bool splat = proto.find('a') != std::string::npos;
2032
2033  s += "__builtin_neon_";
2034  if (splat) {
2035    // Call the non-splat builtin: chop off the "_n" suffix from the name.
2036    std::string vname(name, 0, name.size()-2);
2037    s += MangleName(vname, typestr, ck);
2038  } else {
2039    s += MangleName(name, typestr, ck);
2040  }
2041  s += "(";
2042
2043  // Pass the address of the return variable as the first argument to sret-like
2044  // builtins.
2045  if (sret)
2046    s += "&r, ";
2047
2048  char arg = 'a';
2049  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2050    std::string args = std::string(&arg, 1);
2051
2052    // Use the local temporaries instead of the macro arguments.
2053    args = "__" + args;
2054
2055    bool argQuad = false;
2056    bool argPoly = false;
2057    bool argUsgn = false;
2058    bool argScalar = false;
2059    bool dummy = false;
2060    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2061    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2062                      dummy, dummy);
2063
2064    // Handle multiple-vector values specially, emitting each subvector as an
2065    // argument to the __builtin.
2066    unsigned NumOfVec = 0;
2067    if (proto[i] >= '2' && proto[i] <= '4') {
2068      NumOfVec = proto[i] - '0';
2069    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2070      NumOfVec = proto[i] - 'A' + 1;
2071    }
2072
2073    if (NumOfVec > 0) {
2074      // Check if an explicit cast is needed.
2075      if (argType != 'c' || argPoly || argUsgn)
2076        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2077
2078      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2079        s += args + ".val[" + utostr(vi) + "]";
2080        if ((vi + 1) < ve)
2081          s += ", ";
2082      }
2083      if ((i + 1) < e)
2084        s += ", ";
2085
2086      continue;
2087    }
2088
2089    if (splat && (i + 1) == e)
2090      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2091
2092    // Check if an explicit cast is needed.
2093    if ((splat || !argScalar) &&
2094        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2095      std::string argTypeStr = "c";
2096      if (ck != ClassB)
2097        argTypeStr = argType;
2098      if (argQuad)
2099        argTypeStr = "Q" + argTypeStr;
2100      args = "(" + TypeString('d', argTypeStr) + ")" + args;
2101    }
2102
2103    s += args;
2104    if ((i + 1) < e)
2105      s += ", ";
2106  }
2107
2108  // Extra constant integer to hold type class enum for this function, e.g. s8
2109  if (ck == ClassB)
2110    s += ", " + utostr(GetNeonEnum(proto, typestr));
2111
2112  s += ");";
2113
2114  if (proto[0] != 'v' && sret) {
2115    if (define)
2116      s += " r;";
2117    else
2118      s += " return r;";
2119  }
2120  return s;
2121}
2122
2123static std::string GenBuiltinDef(const std::string &name,
2124                                 const std::string &proto,
2125                                 StringRef typestr, ClassKind ck) {
2126  std::string s("BUILTIN(__builtin_neon_");
2127
2128  // If all types are the same size, bitcasting the args will take care
2129  // of arg checking.  The actual signedness etc. will be taken care of with
2130  // special enums.
2131  if (!ProtoHasScalar(proto))
2132    ck = ClassB;
2133
2134  s += MangleName(name, typestr, ck);
2135  s += ", \"";
2136
2137  for (unsigned i = 0, e = proto.size(); i != e; ++i)
2138    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2139
2140  // Extra constant integer to hold type class enum for this function, e.g. s8
2141  if (ck == ClassB)
2142    s += "i";
2143
2144  s += "\", \"n\")";
2145  return s;
2146}
2147
2148static std::string GenIntrinsic(const std::string &name,
2149                                const std::string &proto,
2150                                StringRef outTypeStr, StringRef inTypeStr,
2151                                OpKind kind, ClassKind classKind) {
2152  assert(!proto.empty() && "");
2153  bool define = UseMacro(proto) && kind != OpUnavailable;
2154  std::string s;
2155
2156  // static always inline + return type
2157  if (define)
2158    s += "#define ";
2159  else
2160    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2161
2162  // Function name with type suffix
2163  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2164  if (outTypeStr != inTypeStr) {
2165    // If the input type is different (e.g., for vreinterpret), append a suffix
2166    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2167    // does not insert another "q" in the name.
2168    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2169    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2170    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2171  }
2172  s += mangledName;
2173
2174  // Function arguments
2175  s += GenArgs(proto, inTypeStr, name);
2176
2177  // Definition.
2178  if (define) {
2179    s += " __extension__ ({ \\\n  ";
2180    s += GenMacroLocals(proto, inTypeStr, name);
2181  } else if (kind == OpUnavailable) {
2182    s += " __attribute__((unavailable));\n";
2183    return s;
2184  } else
2185    s += " {\n  ";
2186
2187  if (kind != OpNone)
2188    s += GenOpString(name, kind, proto, outTypeStr);
2189  else
2190    s += GenBuiltin(name, proto, outTypeStr, classKind);
2191  if (define)
2192    s += " })";
2193  else
2194    s += " }";
2195  s += "\n";
2196  return s;
2197}
2198
2199/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2200/// is comprised of type definitions and function declarations.
2201void NeonEmitter::run(raw_ostream &OS) {
2202  OS <<
2203    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2204    "---===\n"
2205    " *\n"
2206    " * Permission is hereby granted, free of charge, to any person obtaining "
2207    "a copy\n"
2208    " * of this software and associated documentation files (the \"Software\"),"
2209    " to deal\n"
2210    " * in the Software without restriction, including without limitation the "
2211    "rights\n"
2212    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2213    "and/or sell\n"
2214    " * copies of the Software, and to permit persons to whom the Software is\n"
2215    " * furnished to do so, subject to the following conditions:\n"
2216    " *\n"
2217    " * The above copyright notice and this permission notice shall be "
2218    "included in\n"
2219    " * all copies or substantial portions of the Software.\n"
2220    " *\n"
2221    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2222    "EXPRESS OR\n"
2223    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2224    "MERCHANTABILITY,\n"
2225    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2226    "SHALL THE\n"
2227    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2228    "OTHER\n"
2229    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2230    "ARISING FROM,\n"
2231    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2232    "DEALINGS IN\n"
2233    " * THE SOFTWARE.\n"
2234    " *\n"
2235    " *===--------------------------------------------------------------------"
2236    "---===\n"
2237    " */\n\n";
2238
2239  OS << "#ifndef __ARM_NEON_H\n";
2240  OS << "#define __ARM_NEON_H\n\n";
2241
2242  OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2243  OS << "#error \"NEON support not enabled\"\n";
2244  OS << "#endif\n\n";
2245
2246  OS << "#include <stdint.h>\n\n";
2247
2248  // Emit NEON-specific scalar typedefs.
2249  OS << "typedef float float32_t;\n";
2250  OS << "typedef __fp16 float16_t;\n";
2251
2252  OS << "#ifdef __aarch64__\n";
2253  OS << "typedef double float64_t;\n";
2254  OS << "#endif\n\n";
2255
2256  // For now, signedness of polynomial types depends on target
2257  OS << "#ifdef __aarch64__\n";
2258  OS << "typedef uint8_t poly8_t;\n";
2259  OS << "typedef uint16_t poly16_t;\n";
2260  OS << "#else\n";
2261  OS << "typedef int8_t poly8_t;\n";
2262  OS << "typedef int16_t poly16_t;\n";
2263  OS << "#endif\n";
2264
2265  // Emit Neon vector typedefs.
2266  std::string TypedefTypes(
2267      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
2268  SmallVector<StringRef, 24> TDTypeVec;
2269  ParseTypes(0, TypedefTypes, TDTypeVec);
2270
2271  // Emit vector typedefs.
2272  bool isA64 = false;
2273  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2274    bool dummy, quad = false, poly = false;
2275    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2276    bool preinsert = false;
2277    bool postinsert = false;
2278
2279    if (type == 'd') {
2280      preinsert = isA64? false: true;
2281      isA64 = true;
2282    } else {
2283      postinsert = isA64? true: false;
2284      isA64 = false;
2285    }
2286    if (postinsert)
2287      OS << "#endif\n";
2288    if (preinsert)
2289      OS << "#ifdef __aarch64__\n";
2290
2291    if (poly)
2292      OS << "typedef __attribute__((neon_polyvector_type(";
2293    else
2294      OS << "typedef __attribute__((neon_vector_type(";
2295
2296    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2297    OS << utostr(nElts) << "))) ";
2298    if (nElts < 10)
2299      OS << " ";
2300
2301    OS << TypeString('s', TDTypeVec[i]);
2302    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2303
2304  }
2305  OS << "\n";
2306
2307  // Emit struct typedefs.
2308  isA64 = false;
2309  for (unsigned vi = 2; vi != 5; ++vi) {
2310    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2311      bool dummy, quad = false, poly = false;
2312      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2313      bool preinsert = false;
2314      bool postinsert = false;
2315
2316      if (type == 'd') {
2317        preinsert = isA64? false: true;
2318        isA64 = true;
2319      } else {
2320        postinsert = isA64? true: false;
2321        isA64 = false;
2322      }
2323      if (postinsert)
2324        OS << "#endif\n";
2325      if (preinsert)
2326        OS << "#ifdef __aarch64__\n";
2327
2328      std::string ts = TypeString('d', TDTypeVec[i]);
2329      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2330      OS << "typedef struct " << vs << " {\n";
2331      OS << "  " << ts << " val";
2332      OS << "[" << utostr(vi) << "]";
2333      OS << ";\n} ";
2334      OS << vs << ";\n";
2335      OS << "\n";
2336    }
2337  }
2338
2339  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2340
2341  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2342
2343  StringMap<ClassKind> EmittedMap;
2344
2345  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2346  // intrinsics.  (Some of the saturating multiply instructions are also
2347  // used to implement the corresponding "_lane" variants, but tablegen
2348  // sorts the records into alphabetical order so that the "_lane" variants
2349  // come after the intrinsics they use.)
2350  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2351  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2352  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2353  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2354
2355  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2356  // common intrinsics appear only once in the output stream.
2357  // The check for uniquiness is done in emitIntrinsic.
2358  // Emit ARM intrinsics.
2359  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2360    Record *R = RV[i];
2361
2362    // Skip AArch64 intrinsics; they will be emitted at the end.
2363    bool isA64 = R->getValueAsBit("isA64");
2364    if (isA64)
2365      continue;
2366
2367    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2368        R->getName() != "VABD")
2369      emitIntrinsic(OS, R, EmittedMap);
2370  }
2371
2372  // Emit AArch64-specific intrinsics.
2373  OS << "#ifdef __aarch64__\n";
2374
2375  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2376  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2377  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2378
2379  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2380    Record *R = RV[i];
2381
2382    // Skip ARM intrinsics already included above.
2383    bool isA64 = R->getValueAsBit("isA64");
2384    if (!isA64)
2385      continue;
2386
2387    emitIntrinsic(OS, R, EmittedMap);
2388  }
2389
2390  OS << "#endif\n\n";
2391
2392  OS << "#undef __ai\n\n";
2393  OS << "#endif /* __ARM_NEON_H */\n";
2394}
2395
2396/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2397/// intrinsics specified by record R checking for intrinsic uniqueness.
2398void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2399                                StringMap<ClassKind> &EmittedMap) {
2400  std::string name = R->getValueAsString("Name");
2401  std::string Proto = R->getValueAsString("Prototype");
2402  std::string Types = R->getValueAsString("Types");
2403
2404  SmallVector<StringRef, 16> TypeVec;
2405  ParseTypes(R, Types, TypeVec);
2406
2407  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2408
2409  ClassKind classKind = ClassNone;
2410  if (R->getSuperClasses().size() >= 2)
2411    classKind = ClassMap[R->getSuperClasses()[1]];
2412  if (classKind == ClassNone && kind == OpNone)
2413    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2414
2415  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2416    if (kind == OpReinterpret) {
2417      bool outQuad = false;
2418      bool dummy = false;
2419      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2420      for (unsigned srcti = 0, srcte = TypeVec.size();
2421           srcti != srcte; ++srcti) {
2422        bool inQuad = false;
2423        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2424        if (srcti == ti || inQuad != outQuad)
2425          continue;
2426        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2427                                     OpCast, ClassS);
2428        if (EmittedMap.count(s))
2429          continue;
2430        EmittedMap[s] = ClassS;
2431        OS << s;
2432      }
2433    } else {
2434      std::string s =
2435          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2436      if (EmittedMap.count(s))
2437        continue;
2438      EmittedMap[s] = classKind;
2439      OS << s;
2440    }
2441  }
2442  OS << "\n";
2443}
2444
2445static unsigned RangeFromType(const char mod, StringRef typestr) {
2446  // base type to get the type string for.
2447  bool quad = false, dummy = false;
2448  char type = ClassifyType(typestr, quad, dummy, dummy);
2449  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2450
2451  switch (type) {
2452    case 'c':
2453      return (8 << (int)quad) - 1;
2454    case 'h':
2455    case 's':
2456      return (4 << (int)quad) - 1;
2457    case 'f':
2458    case 'i':
2459      return (2 << (int)quad) - 1;
2460    case 'd':
2461    case 'l':
2462      return (1 << (int)quad) - 1;
2463    default:
2464      PrintFatalError("unhandled type!");
2465  }
2466}
2467
2468static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2469  // base type to get the type string for.
2470  bool dummy = false;
2471  char type = ClassifyType(typestr, dummy, dummy, dummy);
2472  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2473
2474  switch (type) {
2475    case 'c':
2476      return 7;
2477    case 'h':
2478    case 's':
2479      return 15;
2480    case 'f':
2481    case 'i':
2482      return 31;
2483    case 'd':
2484    case 'l':
2485      return 63;
2486    default:
2487      PrintFatalError("unhandled type!");
2488  }
2489}
2490
2491/// Generate the ARM and AArch64 intrinsic range checking code for
2492/// shift/lane immediates, checking for unique declarations.
2493void
2494NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2495                                        StringMap<ClassKind> &A64IntrinsicMap,
2496                                        bool isA64RangeCheck) {
2497  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2498  StringMap<OpKind> EmittedMap;
2499
2500  // Generate the intrinsic range checking code for shift/lane immediates.
2501  if (isA64RangeCheck)
2502    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2503  else
2504    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2505
2506  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2507    Record *R = RV[i];
2508
2509    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2510    if (k != OpNone)
2511      continue;
2512
2513    std::string name = R->getValueAsString("Name");
2514    std::string Proto = R->getValueAsString("Prototype");
2515    std::string Types = R->getValueAsString("Types");
2516    std::string Rename = name + "@" + Proto;
2517
2518    // Functions with 'a' (the splat code) in the type prototype should not get
2519    // their own builtin as they use the non-splat variant.
2520    if (Proto.find('a') != std::string::npos)
2521      continue;
2522
2523    // Functions which do not have an immediate do not need to have range
2524    // checking code emitted.
2525    size_t immPos = Proto.find('i');
2526    if (immPos == std::string::npos)
2527      continue;
2528
2529    SmallVector<StringRef, 16> TypeVec;
2530    ParseTypes(R, Types, TypeVec);
2531
2532    if (R->getSuperClasses().size() < 2)
2533      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2534
2535    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2536
2537    // Do not include AArch64 range checks if not generating code for AArch64.
2538    bool isA64 = R->getValueAsBit("isA64");
2539    if (!isA64RangeCheck && isA64)
2540      continue;
2541
2542    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2543    // redefined by AArch64 to handle new types.
2544    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2545      ClassKind &A64CK = A64IntrinsicMap[Rename];
2546      if (A64CK == ck && ck != ClassNone)
2547        continue;
2548    }
2549
2550    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2551      std::string namestr, shiftstr, rangestr;
2552
2553      if (R->getValueAsBit("isVCVT_N")) {
2554        // VCVT between floating- and fixed-point values takes an immediate
2555        // in the range [1, 32] for f32, or [1, 64] for f64.
2556        ck = ClassB;
2557        if (name.find("32") != std::string::npos)
2558          rangestr = "l = 1; u = 31"; // upper bound = l + u
2559        else if (name.find("64") != std::string::npos)
2560          rangestr = "l = 1; u = 63";
2561        else
2562          PrintFatalError(R->getLoc(),
2563              "Fixed point convert name should contains \"32\" or \"64\"");
2564
2565      } else if (R->getValueAsBit("isScalarShift")) {
2566        // Right shifts have an 'r' in the name, left shifts do not.  Convert
2567        // instructions have the same bounds and right shifts.
2568        if (name.find('r') != std::string::npos ||
2569            name.find("cvt") != std::string::npos)
2570          rangestr = "l = 1; ";
2571
2572        rangestr += "u = " +
2573          utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
2574      } else if (!ProtoHasScalar(Proto)) {
2575        // Builtins which are overloaded by type will need to have their upper
2576        // bound computed at Sema time based on the type constant.
2577        ck = ClassB;
2578        if (R->getValueAsBit("isShift")) {
2579          shiftstr = ", true";
2580
2581          // Right shifts have an 'r' in the name, left shifts do not.
2582          if (name.find('r') != std::string::npos)
2583            rangestr = "l = 1; ";
2584        }
2585        rangestr += "u = RFT(TV" + shiftstr + ")";
2586      } else {
2587        // The immediate generally refers to a lane in the preceding argument.
2588        assert(immPos > 0 && "unexpected immediate operand");
2589        rangestr =
2590            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2591      }
2592      // Make sure cases appear only once by uniquing them in a string map.
2593      namestr = MangleName(name, TypeVec[ti], ck);
2594      if (EmittedMap.count(namestr))
2595        continue;
2596      EmittedMap[namestr] = OpNone;
2597
2598      // Calculate the index of the immediate that should be range checked.
2599      unsigned immidx = 0;
2600
2601      // Builtins that return a struct of multiple vectors have an extra
2602      // leading arg for the struct return.
2603      if (IsMultiVecProto(Proto[0]))
2604        ++immidx;
2605
2606      // Add one to the index for each argument until we reach the immediate
2607      // to be checked.  Structs of vectors are passed as multiple arguments.
2608      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2609        switch (Proto[ii]) {
2610        default:
2611          immidx += 1;
2612          break;
2613        case '2':
2614        case 'B':
2615          immidx += 2;
2616          break;
2617        case '3':
2618        case 'C':
2619          immidx += 3;
2620          break;
2621        case '4':
2622        case 'D':
2623          immidx += 4;
2624          break;
2625        case 'i':
2626          ie = ii + 1;
2627          break;
2628        }
2629      }
2630      if (isA64RangeCheck)
2631        OS << "case AArch64::BI__builtin_neon_";
2632      else
2633        OS << "case ARM::BI__builtin_neon_";
2634      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2635         << rangestr << "; break;\n";
2636    }
2637  }
2638  OS << "#endif\n\n";
2639}
2640
2641/// Generate the ARM and AArch64 overloaded type checking code for
2642/// SemaChecking.cpp, checking for unique builtin declarations.
2643void
2644NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2645                                      StringMap<ClassKind> &A64IntrinsicMap,
2646                                      bool isA64TypeCheck) {
2647  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2648  StringMap<OpKind> EmittedMap;
2649
2650  // Generate the overloaded type checking code for SemaChecking.cpp
2651  if (isA64TypeCheck)
2652    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2653  else
2654    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2655
2656  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2657    Record *R = RV[i];
2658    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2659    if (k != OpNone)
2660      continue;
2661
2662    std::string Proto = R->getValueAsString("Prototype");
2663    std::string Types = R->getValueAsString("Types");
2664    std::string name = R->getValueAsString("Name");
2665    std::string Rename = name + "@" + Proto;
2666
2667    // Functions with 'a' (the splat code) in the type prototype should not get
2668    // their own builtin as they use the non-splat variant.
2669    if (Proto.find('a') != std::string::npos)
2670      continue;
2671
2672    // Functions which have a scalar argument cannot be overloaded, no need to
2673    // check them if we are emitting the type checking code.
2674    if (ProtoHasScalar(Proto))
2675      continue;
2676
2677    SmallVector<StringRef, 16> TypeVec;
2678    ParseTypes(R, Types, TypeVec);
2679
2680    if (R->getSuperClasses().size() < 2)
2681      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2682
2683    // Do not include AArch64 type checks if not generating code for AArch64.
2684    bool isA64 = R->getValueAsBit("isA64");
2685    if (!isA64TypeCheck && isA64)
2686      continue;
2687
2688    // Include ARM  type check in AArch64 but only if ARM intrinsics
2689    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2690    // redefined in AArch64 to handle an additional 2 x f64 type.
2691    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2692    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2693      ClassKind &A64CK = A64IntrinsicMap[Rename];
2694      if (A64CK == ck && ck != ClassNone)
2695        continue;
2696    }
2697
2698    int si = -1, qi = -1;
2699    uint64_t mask = 0, qmask = 0;
2700    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2701      // Generate the switch case(s) for this builtin for the type validation.
2702      bool quad = false, poly = false, usgn = false;
2703      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2704
2705      if (quad) {
2706        qi = ti;
2707        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2708      } else {
2709        si = ti;
2710        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2711      }
2712    }
2713
2714    // Check if the builtin function has a pointer or const pointer argument.
2715    int PtrArgNum = -1;
2716    bool HasConstPtr = false;
2717    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2718      char ArgType = Proto[arg];
2719      if (ArgType == 'c') {
2720        HasConstPtr = true;
2721        PtrArgNum = arg - 1;
2722        break;
2723      }
2724      if (ArgType == 'p') {
2725        PtrArgNum = arg - 1;
2726        break;
2727      }
2728    }
2729    // For sret builtins, adjust the pointer argument index.
2730    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
2731      PtrArgNum += 1;
2732
2733    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2734    // and vst1_lane intrinsics.  Using a pointer to the vector element
2735    // type with one of those operations causes codegen to select an aligned
2736    // load/store instruction.  If you want an unaligned operation,
2737    // the pointer argument needs to have less alignment than element type,
2738    // so just accept any pointer type.
2739    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2740      PtrArgNum = -1;
2741      HasConstPtr = false;
2742    }
2743
2744    if (mask) {
2745      if (isA64TypeCheck)
2746        OS << "case AArch64::BI__builtin_neon_";
2747      else
2748        OS << "case ARM::BI__builtin_neon_";
2749      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2750         << "0x" << utohexstr(mask) << "ULL";
2751      if (PtrArgNum >= 0)
2752        OS << "; PtrArgNum = " << PtrArgNum;
2753      if (HasConstPtr)
2754        OS << "; HasConstPtr = true";
2755      OS << "; break;\n";
2756    }
2757    if (qmask) {
2758      if (isA64TypeCheck)
2759        OS << "case AArch64::BI__builtin_neon_";
2760      else
2761        OS << "case ARM::BI__builtin_neon_";
2762      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2763         << "0x" << utohexstr(qmask) << "ULL";
2764      if (PtrArgNum >= 0)
2765        OS << "; PtrArgNum = " << PtrArgNum;
2766      if (HasConstPtr)
2767        OS << "; HasConstPtr = true";
2768      OS << "; break;\n";
2769    }
2770  }
2771  OS << "#endif\n\n";
2772}
2773
2774/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2775/// declaration of builtins, checking for unique builtin declarations.
2776void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2777                                 StringMap<ClassKind> &A64IntrinsicMap,
2778                                 bool isA64GenBuiltinDef) {
2779  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2780  StringMap<OpKind> EmittedMap;
2781
2782  // Generate BuiltinsARM.def and BuiltinsAArch64.def
2783  if (isA64GenBuiltinDef)
2784    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2785  else
2786    OS << "#ifdef GET_NEON_BUILTINS\n";
2787
2788  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2789    Record *R = RV[i];
2790    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2791    if (k != OpNone)
2792      continue;
2793
2794    std::string Proto = R->getValueAsString("Prototype");
2795    std::string name = R->getValueAsString("Name");
2796    std::string Rename = name + "@" + Proto;
2797
2798    // Functions with 'a' (the splat code) in the type prototype should not get
2799    // their own builtin as they use the non-splat variant.
2800    if (Proto.find('a') != std::string::npos)
2801      continue;
2802
2803    std::string Types = R->getValueAsString("Types");
2804    SmallVector<StringRef, 16> TypeVec;
2805    ParseTypes(R, Types, TypeVec);
2806
2807    if (R->getSuperClasses().size() < 2)
2808      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2809
2810    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2811
2812    // Do not include AArch64 BUILTIN() macros if not generating
2813    // code for AArch64
2814    bool isA64 = R->getValueAsBit("isA64");
2815    if (!isA64GenBuiltinDef && isA64)
2816      continue;
2817
2818    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2819    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2820    // redefined in AArch64 to handle an additional 2 x f64 type.
2821    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2822      ClassKind &A64CK = A64IntrinsicMap[Rename];
2823      if (A64CK == ck && ck != ClassNone)
2824        continue;
2825    }
2826
2827    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2828      // Generate the declaration for this builtin, ensuring
2829      // that each unique BUILTIN() macro appears only once in the output
2830      // stream.
2831      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2832      if (EmittedMap.count(bd))
2833        continue;
2834
2835      EmittedMap[bd] = OpNone;
2836      OS << bd << "\n";
2837    }
2838  }
2839  OS << "#endif\n\n";
2840}
2841
2842/// runHeader - Emit a file with sections defining:
2843/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2844/// 2. the SemaChecking code for the type overload checking.
2845/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2846void NeonEmitter::runHeader(raw_ostream &OS) {
2847  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2848
2849  // build a map of AArch64 intriniscs to be used in uniqueness checks.
2850  StringMap<ClassKind> A64IntrinsicMap;
2851  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2852    Record *R = RV[i];
2853
2854    bool isA64 = R->getValueAsBit("isA64");
2855    if (!isA64)
2856      continue;
2857
2858    ClassKind CK = ClassNone;
2859    if (R->getSuperClasses().size() >= 2)
2860      CK = ClassMap[R->getSuperClasses()[1]];
2861
2862    std::string Name = R->getValueAsString("Name");
2863    std::string Proto = R->getValueAsString("Prototype");
2864    std::string Rename = Name + "@" + Proto;
2865    if (A64IntrinsicMap.count(Rename))
2866      continue;
2867    A64IntrinsicMap[Rename] = CK;
2868  }
2869
2870  // Generate BuiltinsARM.def for ARM
2871  genBuiltinsDef(OS, A64IntrinsicMap, false);
2872
2873  // Generate BuiltinsAArch64.def for AArch64
2874  genBuiltinsDef(OS, A64IntrinsicMap, true);
2875
2876  // Generate ARM overloaded type checking code for SemaChecking.cpp
2877  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2878
2879  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2880  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2881
2882  // Generate ARM range checking code for shift/lane immediates.
2883  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2884
2885  // Generate the AArch64 range checking code for shift/lane immediates.
2886  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2887}
2888
2889/// GenTest - Write out a test for the intrinsic specified by the name and
2890/// type strings, including the embedded patterns for FileCheck to match.
2891static std::string GenTest(const std::string &name,
2892                           const std::string &proto,
2893                           StringRef outTypeStr, StringRef inTypeStr,
2894                           bool isShift, bool isHiddenLOp,
2895                           ClassKind ck, const std::string &InstName,
2896						   bool isA64,
2897						   std::string & testFuncProto) {
2898  assert(!proto.empty() && "");
2899  std::string s;
2900
2901  // Function name with type suffix
2902  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2903  if (outTypeStr != inTypeStr) {
2904    // If the input type is different (e.g., for vreinterpret), append a suffix
2905    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2906    // does not insert another "q" in the name.
2907    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2908    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2909    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2910  }
2911
2912  // todo: GenerateChecksForIntrinsic does not generate CHECK
2913  // for aarch64 instructions yet
2914  std::vector<std::string> FileCheckPatterns;
2915  if (!isA64) {
2916	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2917							   isHiddenLOp, FileCheckPatterns);
2918	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2919  }
2920  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2921
2922  // Emit the FileCheck patterns.
2923  // If for any reason we do not want to emit a check, mangledInst
2924  // will be the empty string.
2925  if (FileCheckPatterns.size()) {
2926    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2927                                                  e = FileCheckPatterns.end();
2928         i != e;
2929         ++i) {
2930      s += "// CHECK_ARM: " + *i + "\n";
2931    }
2932  }
2933
2934  // Emit the start of the test function.
2935
2936  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2937  char arg = 'a';
2938  std::string comma;
2939  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2940    // Do not create arguments for values that must be immediate constants.
2941    if (proto[i] == 'i')
2942      continue;
2943    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2944    testFuncProto.push_back(arg);
2945    comma = ", ";
2946  }
2947  testFuncProto += ")";
2948
2949  s+= testFuncProto;
2950  s+= " {\n  ";
2951
2952  if (proto[0] != 'v')
2953    s += "return ";
2954  s += mangledName + "(";
2955  arg = 'a';
2956  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2957    if (proto[i] == 'i') {
2958      // For immediate operands, test the maximum value.
2959      if (isShift)
2960        s += "1"; // FIXME
2961      else
2962        // The immediate generally refers to a lane in the preceding argument.
2963        s += utostr(RangeFromType(proto[i-1], inTypeStr));
2964    } else {
2965      s.push_back(arg);
2966    }
2967    if ((i + 1) < e)
2968      s += ", ";
2969  }
2970  s += ");\n}\n\n";
2971  return s;
2972}
2973
2974/// Write out all intrinsic tests for the specified target, checking
2975/// for intrinsic test uniqueness.
2976void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2977                                bool isA64GenTest) {
2978  if (isA64GenTest)
2979	OS << "#ifdef __aarch64__\n";
2980
2981  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2982  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2983    Record *R = RV[i];
2984    std::string name = R->getValueAsString("Name");
2985    std::string Proto = R->getValueAsString("Prototype");
2986    std::string Types = R->getValueAsString("Types");
2987    bool isShift = R->getValueAsBit("isShift");
2988    std::string InstName = R->getValueAsString("InstName");
2989    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2990    bool isA64 = R->getValueAsBit("isA64");
2991
2992    // do not include AArch64 intrinsic test if not generating
2993    // code for AArch64
2994    if (!isA64GenTest && isA64)
2995      continue;
2996
2997    SmallVector<StringRef, 16> TypeVec;
2998    ParseTypes(R, Types, TypeVec);
2999
3000    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3001    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3002    if (kind == OpUnavailable)
3003      continue;
3004    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3005      if (kind == OpReinterpret) {
3006        bool outQuad = false;
3007        bool dummy = false;
3008        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3009        for (unsigned srcti = 0, srcte = TypeVec.size();
3010             srcti != srcte; ++srcti) {
3011          bool inQuad = false;
3012          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3013          if (srcti == ti || inQuad != outQuad)
3014            continue;
3015		  std::string testFuncProto;
3016          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3017                                  isShift, isHiddenLOp, ck, InstName, isA64,
3018								  testFuncProto);
3019          if (EmittedMap.count(testFuncProto))
3020            continue;
3021          EmittedMap[testFuncProto] = kind;
3022          OS << s << "\n";
3023        }
3024      } else {
3025		std::string testFuncProto;
3026        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3027                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
3028        if (EmittedMap.count(testFuncProto))
3029          continue;
3030        EmittedMap[testFuncProto] = kind;
3031        OS << s << "\n";
3032      }
3033    }
3034  }
3035
3036  if (isA64GenTest)
3037	OS << "#endif\n";
3038}
3039/// runTests - Write out a complete set of tests for all of the Neon
3040/// intrinsics.
3041void NeonEmitter::runTests(raw_ostream &OS) {
3042  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3043        "apcs-gnu\\\n"
3044        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3045        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3046		"\n"
3047	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3048	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3049	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3050        "\n"
3051        "// REQUIRES: long_tests\n"
3052        "\n"
3053        "#include <arm_neon.h>\n"
3054        "\n";
3055
3056  // ARM tests must be emitted before AArch64 tests to ensure
3057  // tests for intrinsics that are common to ARM and AArch64
3058  // appear only once in the output stream.
3059  // The check for uniqueness is done in genTargetTest.
3060  StringMap<OpKind> EmittedMap;
3061
3062  genTargetTest(OS, EmittedMap, false);
3063
3064  genTargetTest(OS, EmittedMap, true);
3065}
3066
3067namespace clang {
3068void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3069  NeonEmitter(Records).run(OS);
3070}
3071void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3072  NeonEmitter(Records).runHeader(OS);
3073}
3074void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3075  NeonEmitter(Records).runTests(OS);
3076}
3077} // End namespace clang
3078