NeonEmitter.cpp revision dd12780e86575795fa912529a911b01e2abc4677
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpTrn1, 85 OpZip1, 86 OpUzp1, 87 OpTrn2, 88 OpZip2, 89 OpUzp2, 90 OpEq, 91 OpGe, 92 OpLe, 93 OpGt, 94 OpLt, 95 OpNeg, 96 OpNot, 97 OpAnd, 98 OpOr, 99 OpXor, 100 OpAndNot, 101 OpOrNot, 102 OpCast, 103 OpConcat, 104 OpDup, 105 OpDupLane, 106 OpHi, 107 OpLo, 108 OpSelect, 109 OpRev16, 110 OpRev32, 111 OpRev64, 112 OpXtnHi, 113 OpSqxtunHi, 114 OpQxtnHi, 115 OpFcvtnHi, 116 OpFcvtlHi, 117 OpFcvtxnHi, 118 OpReinterpret, 119 OpAddhnHi, 120 OpRAddhnHi, 121 OpSubhnHi, 122 OpRSubhnHi, 123 OpAbdl, 124 OpAbdlHi, 125 OpAba, 126 OpAbal, 127 OpAbalHi, 128 OpQDMullHi, 129 OpQDMlalHi, 130 OpQDMlslHi, 131 OpDiv, 132 OpLongHi, 133 OpNarrowHi, 134 OpMovlHi, 135 OpCopyLane, 136 OpCopyQLane, 137 OpCopyLaneQ, 138 OpScalarMulLane, 139 OpScalarMulLaneQ, 140 OpScalarMulXLane, 141 OpScalarMulXLaneQ, 142 OpScalarVMulXLane, 143 OpScalarVMulXLaneQ 144}; 145 146enum ClassKind { 147 ClassNone, 148 ClassI, // generic integer instruction, e.g., "i8" suffix 149 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 150 ClassW, // width-specific instruction, e.g., "8" suffix 151 ClassB, // bitcast arguments with enum argument to specify type 152 ClassL, // Logical instructions which are op instructions 153 // but we need to not emit any suffix for in our 154 // tests. 155 ClassNoTest // Instructions which we do not test since they are 156 // not TRUE instructions. 157}; 158 159/// NeonTypeFlags - Flags to identify the types for overloaded Neon 160/// builtins. These must be kept in sync with the flags in 161/// include/clang/Basic/TargetBuiltins.h. 162namespace { 163class NeonTypeFlags { 164 enum { 165 EltTypeMask = 0xf, 166 UnsignedFlag = 0x10, 167 QuadFlag = 0x20 168 }; 169 uint32_t Flags; 170 171public: 172 enum EltType { 173 Int8, 174 Int16, 175 Int32, 176 Int64, 177 Poly8, 178 Poly16, 179 Poly64, 180 Float16, 181 Float32, 182 Float64 183 }; 184 185 NeonTypeFlags(unsigned F) : Flags(F) {} 186 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 187 if (IsUnsigned) 188 Flags |= UnsignedFlag; 189 if (IsQuad) 190 Flags |= QuadFlag; 191 } 192 193 uint32_t getFlags() const { return Flags; } 194}; 195} // end anonymous namespace 196 197namespace { 198class NeonEmitter { 199 RecordKeeper &Records; 200 StringMap<OpKind> OpMap; 201 DenseMap<Record*, ClassKind> ClassMap; 202 203public: 204 NeonEmitter(RecordKeeper &R) : Records(R) { 205 OpMap["OP_NONE"] = OpNone; 206 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 207 OpMap["OP_ADD"] = OpAdd; 208 OpMap["OP_ADDL"] = OpAddl; 209 OpMap["OP_ADDLHi"] = OpAddlHi; 210 OpMap["OP_ADDW"] = OpAddw; 211 OpMap["OP_ADDWHi"] = OpAddwHi; 212 OpMap["OP_SUB"] = OpSub; 213 OpMap["OP_SUBL"] = OpSubl; 214 OpMap["OP_SUBLHi"] = OpSublHi; 215 OpMap["OP_SUBW"] = OpSubw; 216 OpMap["OP_SUBWHi"] = OpSubwHi; 217 OpMap["OP_MUL"] = OpMul; 218 OpMap["OP_MLA"] = OpMla; 219 OpMap["OP_MLAL"] = OpMlal; 220 OpMap["OP_MULLHi"] = OpMullHi; 221 OpMap["OP_MLALHi"] = OpMlalHi; 222 OpMap["OP_MLS"] = OpMls; 223 OpMap["OP_MLSL"] = OpMlsl; 224 OpMap["OP_MLSLHi"] = OpMlslHi; 225 OpMap["OP_MUL_N"] = OpMulN; 226 OpMap["OP_MLA_N"] = OpMlaN; 227 OpMap["OP_MLS_N"] = OpMlsN; 228 OpMap["OP_MLAL_N"] = OpMlalN; 229 OpMap["OP_MLSL_N"] = OpMlslN; 230 OpMap["OP_MUL_LN"]= OpMulLane; 231 OpMap["OP_MULX_LN"]= OpMulXLane; 232 OpMap["OP_MULL_LN"] = OpMullLane; 233 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 234 OpMap["OP_MLA_LN"]= OpMlaLane; 235 OpMap["OP_MLS_LN"]= OpMlsLane; 236 OpMap["OP_MLAL_LN"] = OpMlalLane; 237 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 238 OpMap["OP_MLSL_LN"] = OpMlslLane; 239 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 240 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 241 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 242 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 243 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 244 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 245 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 246 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 247 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 248 OpMap["OP_FMS_LN"] = OpFMSLane; 249 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 250 OpMap["OP_TRN1"] = OpTrn1; 251 OpMap["OP_ZIP1"] = OpZip1; 252 OpMap["OP_UZP1"] = OpUzp1; 253 OpMap["OP_TRN2"] = OpTrn2; 254 OpMap["OP_ZIP2"] = OpZip2; 255 OpMap["OP_UZP2"] = OpUzp2; 256 OpMap["OP_EQ"] = OpEq; 257 OpMap["OP_GE"] = OpGe; 258 OpMap["OP_LE"] = OpLe; 259 OpMap["OP_GT"] = OpGt; 260 OpMap["OP_LT"] = OpLt; 261 OpMap["OP_NEG"] = OpNeg; 262 OpMap["OP_NOT"] = OpNot; 263 OpMap["OP_AND"] = OpAnd; 264 OpMap["OP_OR"] = OpOr; 265 OpMap["OP_XOR"] = OpXor; 266 OpMap["OP_ANDN"] = OpAndNot; 267 OpMap["OP_ORN"] = OpOrNot; 268 OpMap["OP_CAST"] = OpCast; 269 OpMap["OP_CONC"] = OpConcat; 270 OpMap["OP_HI"] = OpHi; 271 OpMap["OP_LO"] = OpLo; 272 OpMap["OP_DUP"] = OpDup; 273 OpMap["OP_DUP_LN"] = OpDupLane; 274 OpMap["OP_SEL"] = OpSelect; 275 OpMap["OP_REV16"] = OpRev16; 276 OpMap["OP_REV32"] = OpRev32; 277 OpMap["OP_REV64"] = OpRev64; 278 OpMap["OP_XTN"] = OpXtnHi; 279 OpMap["OP_SQXTUN"] = OpSqxtunHi; 280 OpMap["OP_QXTN"] = OpQxtnHi; 281 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; 282 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; 283 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; 284 OpMap["OP_REINT"] = OpReinterpret; 285 OpMap["OP_ADDHNHi"] = OpAddhnHi; 286 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 287 OpMap["OP_SUBHNHi"] = OpSubhnHi; 288 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 289 OpMap["OP_ABDL"] = OpAbdl; 290 OpMap["OP_ABDLHi"] = OpAbdlHi; 291 OpMap["OP_ABA"] = OpAba; 292 OpMap["OP_ABAL"] = OpAbal; 293 OpMap["OP_ABALHi"] = OpAbalHi; 294 OpMap["OP_QDMULLHi"] = OpQDMullHi; 295 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 296 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 297 OpMap["OP_DIV"] = OpDiv; 298 OpMap["OP_LONG_HI"] = OpLongHi; 299 OpMap["OP_NARROW_HI"] = OpNarrowHi; 300 OpMap["OP_MOVL_HI"] = OpMovlHi; 301 OpMap["OP_COPY_LN"] = OpCopyLane; 302 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 303 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 304 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; 305 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; 306 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; 307 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; 308 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; 309 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; 310 311 Record *SI = R.getClass("SInst"); 312 Record *II = R.getClass("IInst"); 313 Record *WI = R.getClass("WInst"); 314 Record *SOpI = R.getClass("SOpInst"); 315 Record *IOpI = R.getClass("IOpInst"); 316 Record *WOpI = R.getClass("WOpInst"); 317 Record *LOpI = R.getClass("LOpInst"); 318 Record *NoTestOpI = R.getClass("NoTestOpInst"); 319 320 ClassMap[SI] = ClassS; 321 ClassMap[II] = ClassI; 322 ClassMap[WI] = ClassW; 323 ClassMap[SOpI] = ClassS; 324 ClassMap[IOpI] = ClassI; 325 ClassMap[WOpI] = ClassW; 326 ClassMap[LOpI] = ClassL; 327 ClassMap[NoTestOpI] = ClassNoTest; 328 } 329 330 // run - Emit arm_neon.h.inc 331 void run(raw_ostream &o); 332 333 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 334 void runHeader(raw_ostream &o); 335 336 // runTests - Emit tests for all the Neon intrinsics. 337 void runTests(raw_ostream &o); 338 339private: 340 void emitIntrinsic(raw_ostream &OS, Record *R, 341 StringMap<ClassKind> &EmittedMap); 342 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 343 bool isA64GenBuiltinDef); 344 void genOverloadTypeCheckCode(raw_ostream &OS, 345 StringMap<ClassKind> &A64IntrinsicMap, 346 bool isA64TypeCheck); 347 void genIntrinsicRangeCheckCode(raw_ostream &OS, 348 StringMap<ClassKind> &A64IntrinsicMap, 349 bool isA64RangeCheck); 350 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 351 bool isA64TestGen); 352}; 353} // end anonymous namespace 354 355/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 356/// which each StringRef representing a single type declared in the string. 357/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 358/// 2xfloat and 4xfloat respectively. 359static void ParseTypes(Record *r, std::string &s, 360 SmallVectorImpl<StringRef> &TV) { 361 const char *data = s.data(); 362 int len = 0; 363 364 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 365 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 366 || data[len] == 'H' || data[len] == 'S') 367 continue; 368 369 switch (data[len]) { 370 case 'c': 371 case 's': 372 case 'i': 373 case 'l': 374 case 'h': 375 case 'f': 376 case 'd': 377 break; 378 default: 379 PrintFatalError(r->getLoc(), 380 "Unexpected letter: " + std::string(data + len, 1)); 381 } 382 TV.push_back(StringRef(data, len + 1)); 383 data += len + 1; 384 len = -1; 385 } 386} 387 388/// Widen - Convert a type code into the next wider type. char -> short, 389/// short -> int, etc. 390static char Widen(const char t) { 391 switch (t) { 392 case 'c': 393 return 's'; 394 case 's': 395 return 'i'; 396 case 'i': 397 return 'l'; 398 case 'h': 399 return 'f'; 400 case 'f': 401 return 'd'; 402 default: 403 PrintFatalError("unhandled type in widen!"); 404 } 405} 406 407/// Narrow - Convert a type code into the next smaller type. short -> char, 408/// float -> half float, etc. 409static char Narrow(const char t) { 410 switch (t) { 411 case 's': 412 return 'c'; 413 case 'i': 414 return 's'; 415 case 'l': 416 return 'i'; 417 case 'f': 418 return 'h'; 419 case 'd': 420 return 'f'; 421 default: 422 PrintFatalError("unhandled type in narrow!"); 423 } 424} 425 426static std::string GetNarrowTypestr(StringRef ty) 427{ 428 std::string s; 429 for (size_t i = 0, end = ty.size(); i < end; i++) { 430 switch (ty[i]) { 431 case 's': 432 s += 'c'; 433 break; 434 case 'i': 435 s += 's'; 436 break; 437 case 'l': 438 s += 'i'; 439 break; 440 default: 441 s += ty[i]; 442 break; 443 } 444 } 445 446 return s; 447} 448 449/// For a particular StringRef, return the base type code, and whether it has 450/// the quad-vector, polynomial, or unsigned modifiers set. 451static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 452 unsigned off = 0; 453 // ignore scalar. 454 if (ty[off] == 'S') { 455 ++off; 456 } 457 // remember quad. 458 if (ty[off] == 'Q' || ty[off] == 'H') { 459 quad = true; 460 ++off; 461 } 462 463 // remember poly. 464 if (ty[off] == 'P') { 465 poly = true; 466 ++off; 467 } 468 469 // remember unsigned. 470 if (ty[off] == 'U') { 471 usgn = true; 472 ++off; 473 } 474 475 // base type to get the type string for. 476 return ty[off]; 477} 478 479/// ModType - Transform a type code and its modifiers based on a mod code. The 480/// mod code definitions may be found at the top of arm_neon.td. 481static char ModType(const char mod, char type, bool &quad, bool &poly, 482 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 483 switch (mod) { 484 case 't': 485 if (poly) { 486 poly = false; 487 usgn = true; 488 } 489 break; 490 case 'b': 491 scal = true; 492 case 'u': 493 usgn = true; 494 poly = false; 495 if (type == 'f') 496 type = 'i'; 497 if (type == 'd') 498 type = 'l'; 499 break; 500 case '$': 501 scal = true; 502 case 'x': 503 usgn = false; 504 poly = false; 505 if (type == 'f') 506 type = 'i'; 507 if (type == 'd') 508 type = 'l'; 509 break; 510 case 'o': 511 scal = true; 512 type = 'd'; 513 usgn = false; 514 break; 515 case 'y': 516 scal = true; 517 case 'f': 518 if (type == 'h') 519 quad = true; 520 type = 'f'; 521 usgn = false; 522 break; 523 case 'g': 524 quad = false; 525 break; 526 case 'B': 527 case 'C': 528 case 'D': 529 case 'j': 530 quad = true; 531 break; 532 case 'w': 533 type = Widen(type); 534 quad = true; 535 break; 536 case 'n': 537 type = Widen(type); 538 break; 539 case 'i': 540 type = 'i'; 541 scal = true; 542 break; 543 case 'l': 544 type = 'l'; 545 scal = true; 546 usgn = true; 547 break; 548 case 'z': 549 type = Narrow(type); 550 scal = true; 551 break; 552 case 'r': 553 type = Widen(type); 554 scal = true; 555 break; 556 case 's': 557 case 'a': 558 scal = true; 559 break; 560 case 'k': 561 quad = true; 562 break; 563 case 'c': 564 cnst = true; 565 case 'p': 566 pntr = true; 567 scal = true; 568 break; 569 case 'h': 570 type = Narrow(type); 571 if (type == 'h') 572 quad = false; 573 break; 574 case 'q': 575 type = Narrow(type); 576 quad = true; 577 break; 578 case 'e': 579 type = Narrow(type); 580 usgn = true; 581 break; 582 case 'm': 583 type = Narrow(type); 584 quad = false; 585 break; 586 default: 587 break; 588 } 589 return type; 590} 591 592static bool IsMultiVecProto(const char p) { 593 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 594} 595 596/// TypeString - for a modifier and type, generate the name of the typedef for 597/// that type. QUc -> uint8x8_t. 598static std::string TypeString(const char mod, StringRef typestr) { 599 bool quad = false; 600 bool poly = false; 601 bool usgn = false; 602 bool scal = false; 603 bool cnst = false; 604 bool pntr = false; 605 606 if (mod == 'v') 607 return "void"; 608 if (mod == 'i') 609 return "int"; 610 611 // base type to get the type string for. 612 char type = ClassifyType(typestr, quad, poly, usgn); 613 614 // Based on the modifying character, change the type and width if necessary. 615 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 616 617 SmallString<128> s; 618 619 if (usgn) 620 s.push_back('u'); 621 622 switch (type) { 623 case 'c': 624 s += poly ? "poly8" : "int8"; 625 if (scal) 626 break; 627 s += quad ? "x16" : "x8"; 628 break; 629 case 's': 630 s += poly ? "poly16" : "int16"; 631 if (scal) 632 break; 633 s += quad ? "x8" : "x4"; 634 break; 635 case 'i': 636 s += "int32"; 637 if (scal) 638 break; 639 s += quad ? "x4" : "x2"; 640 break; 641 case 'l': 642 s += (poly && !usgn)? "poly64" : "int64"; 643 if (scal) 644 break; 645 s += quad ? "x2" : "x1"; 646 break; 647 case 'h': 648 s += "float16"; 649 if (scal) 650 break; 651 s += quad ? "x8" : "x4"; 652 break; 653 case 'f': 654 s += "float32"; 655 if (scal) 656 break; 657 s += quad ? "x4" : "x2"; 658 break; 659 case 'd': 660 s += "float64"; 661 if (scal) 662 break; 663 s += quad ? "x2" : "x1"; 664 break; 665 666 default: 667 PrintFatalError("unhandled type!"); 668 } 669 670 if (mod == '2' || mod == 'B') 671 s += "x2"; 672 if (mod == '3' || mod == 'C') 673 s += "x3"; 674 if (mod == '4' || mod == 'D') 675 s += "x4"; 676 677 // Append _t, finishing the type string typedef type. 678 s += "_t"; 679 680 if (cnst) 681 s += " const"; 682 683 if (pntr) 684 s += " *"; 685 686 return s.str(); 687} 688 689/// BuiltinTypeString - for a modifier and type, generate the clang 690/// BuiltinsARM.def prototype code for the function. See the top of clang's 691/// Builtins.def for a description of the type strings. 692static std::string BuiltinTypeString(const char mod, StringRef typestr, 693 ClassKind ck, bool ret) { 694 bool quad = false; 695 bool poly = false; 696 bool usgn = false; 697 bool scal = false; 698 bool cnst = false; 699 bool pntr = false; 700 701 if (mod == 'v') 702 return "v"; // void 703 if (mod == 'i') 704 return "i"; // int 705 706 // base type to get the type string for. 707 char type = ClassifyType(typestr, quad, poly, usgn); 708 709 // Based on the modifying character, change the type and width if necessary. 710 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 711 712 // All pointers are void* pointers. Change type to 'v' now. 713 if (pntr) { 714 usgn = false; 715 poly = false; 716 type = 'v'; 717 } 718 // Treat half-float ('h') types as unsigned short ('s') types. 719 if (type == 'h') { 720 type = 's'; 721 usgn = true; 722 } 723 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 724 scal && type != 'f' && type != 'd'); 725 726 if (scal) { 727 SmallString<128> s; 728 729 if (usgn) 730 s.push_back('U'); 731 else if (type == 'c') 732 s.push_back('S'); // make chars explicitly signed 733 734 if (type == 'l') // 64-bit long 735 s += "LLi"; 736 else 737 s.push_back(type); 738 739 if (cnst) 740 s.push_back('C'); 741 if (pntr) 742 s.push_back('*'); 743 return s.str(); 744 } 745 746 // Since the return value must be one type, return a vector type of the 747 // appropriate width which we will bitcast. An exception is made for 748 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 749 // fashion, storing them to a pointer arg. 750 if (ret) { 751 if (IsMultiVecProto(mod)) 752 return "vv*"; // void result with void* first argument 753 if (mod == 'f' || (ck != ClassB && type == 'f')) 754 return quad ? "V4f" : "V2f"; 755 if (ck != ClassB && type == 'd') 756 return quad ? "V2d" : "V1d"; 757 if (ck != ClassB && type == 's') 758 return quad ? "V8s" : "V4s"; 759 if (ck != ClassB && type == 'i') 760 return quad ? "V4i" : "V2i"; 761 if (ck != ClassB && type == 'l') 762 return quad ? "V2LLi" : "V1LLi"; 763 764 return quad ? "V16Sc" : "V8Sc"; 765 } 766 767 // Non-return array types are passed as individual vectors. 768 if (mod == '2' || mod == 'B') 769 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 770 if (mod == '3' || mod == 'C') 771 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 772 if (mod == '4' || mod == 'D') 773 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 774 775 if (mod == 'f' || (ck != ClassB && type == 'f')) 776 return quad ? "V4f" : "V2f"; 777 if (ck != ClassB && type == 'd') 778 return quad ? "V2d" : "V1d"; 779 if (ck != ClassB && type == 's') 780 return quad ? "V8s" : "V4s"; 781 if (ck != ClassB && type == 'i') 782 return quad ? "V4i" : "V2i"; 783 if (ck != ClassB && type == 'l') 784 return quad ? "V2LLi" : "V1LLi"; 785 786 return quad ? "V16Sc" : "V8Sc"; 787} 788 789/// InstructionTypeCode - Computes the ARM argument character code and 790/// quad status for a specific type string and ClassKind. 791static void InstructionTypeCode(const StringRef &typeStr, 792 const ClassKind ck, 793 bool &quad, 794 std::string &typeCode) { 795 bool poly = false; 796 bool usgn = false; 797 char type = ClassifyType(typeStr, quad, poly, usgn); 798 799 switch (type) { 800 case 'c': 801 switch (ck) { 802 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 803 case ClassI: typeCode = "i8"; break; 804 case ClassW: typeCode = "8"; break; 805 default: break; 806 } 807 break; 808 case 's': 809 switch (ck) { 810 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 811 case ClassI: typeCode = "i16"; break; 812 case ClassW: typeCode = "16"; break; 813 default: break; 814 } 815 break; 816 case 'i': 817 switch (ck) { 818 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 819 case ClassI: typeCode = "i32"; break; 820 case ClassW: typeCode = "32"; break; 821 default: break; 822 } 823 break; 824 case 'l': 825 switch (ck) { 826 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; 827 case ClassI: typeCode = "i64"; break; 828 case ClassW: typeCode = "64"; break; 829 default: break; 830 } 831 break; 832 case 'h': 833 switch (ck) { 834 case ClassS: 835 case ClassI: typeCode = "f16"; break; 836 case ClassW: typeCode = "16"; break; 837 default: break; 838 } 839 break; 840 case 'f': 841 switch (ck) { 842 case ClassS: 843 case ClassI: typeCode = "f32"; break; 844 case ClassW: typeCode = "32"; break; 845 default: break; 846 } 847 break; 848 case 'd': 849 switch (ck) { 850 case ClassS: 851 case ClassI: 852 typeCode += "f64"; 853 break; 854 case ClassW: 855 PrintFatalError("unhandled type!"); 856 default: 857 break; 858 } 859 break; 860 default: 861 PrintFatalError("unhandled type!"); 862 } 863} 864 865static char Insert_BHSD_Suffix(StringRef typestr){ 866 unsigned off = 0; 867 if(typestr[off++] == 'S'){ 868 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 869 typestr[off] == 'P' || typestr[off] == 'U') 870 ++off; 871 switch (typestr[off]){ 872 default : break; 873 case 'c' : return 'b'; 874 case 's' : return 'h'; 875 case 'i' : 876 case 'f' : return 's'; 877 case 'l' : 878 case 'd' : return 'd'; 879 } 880 } 881 return 0; 882} 883 884static bool endsWith_xN(std::string const &name) { 885 if (name.length() > 3) { 886 if (name.compare(name.length() - 3, 3, "_x2") == 0 || 887 name.compare(name.length() - 3, 3, "_x3") == 0 || 888 name.compare(name.length() - 3, 3, "_x4") == 0) 889 return true; 890 } 891 return false; 892} 893 894/// MangleName - Append a type or width suffix to a base neon function name, 895/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 896/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 897/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 898static std::string MangleName(const std::string &name, StringRef typestr, 899 ClassKind ck) { 900 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64") 901 return name; 902 903 bool quad = false; 904 std::string typeCode = ""; 905 906 InstructionTypeCode(typestr, ck, quad, typeCode); 907 908 std::string s = name; 909 910 if (typeCode.size() > 0) { 911 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. 912 if (endsWith_xN(s)) 913 s.insert(s.length() - 3, "_" + typeCode); 914 else 915 s += "_" + typeCode; 916 } 917 918 if (ck == ClassB) 919 s += "_v"; 920 921 // Insert a 'q' before the first '_' character so that it ends up before 922 // _lane or _n on vector-scalar operations. 923 if (typestr.find("Q") != StringRef::npos) { 924 size_t pos = s.find('_'); 925 s = s.insert(pos, "q"); 926 } 927 char ins = Insert_BHSD_Suffix(typestr); 928 if(ins){ 929 size_t pos = s.find('_'); 930 s = s.insert(pos, &ins, 1); 931 } 932 933 return s; 934} 935 936static void PreprocessInstruction(const StringRef &Name, 937 const std::string &InstName, 938 std::string &Prefix, 939 bool &HasNPostfix, 940 bool &HasLanePostfix, 941 bool &HasDupPostfix, 942 bool &IsSpecialVCvt, 943 size_t &TBNumber) { 944 // All of our instruction name fields from arm_neon.td are of the form 945 // <instructionname>_... 946 // Thus we grab our instruction name via computation of said Prefix. 947 const size_t PrefixEnd = Name.find_first_of('_'); 948 // If InstName is passed in, we use that instead of our name Prefix. 949 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 950 951 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 952 953 HasNPostfix = Postfix.count("_n"); 954 HasLanePostfix = Postfix.count("_lane"); 955 HasDupPostfix = Postfix.count("_dup"); 956 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 957 958 if (InstName.compare("vtbl") == 0 || 959 InstName.compare("vtbx") == 0) { 960 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 961 // encoding to get its true value. 962 TBNumber = Name[Name.size()-1] - 48; 963 } 964} 965 966/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 967/// extracted, generate a FileCheck pattern for a Load Or Store 968static void 969GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 970 const std::string& OutTypeCode, 971 const bool &IsQuad, 972 const bool &HasDupPostfix, 973 const bool &HasLanePostfix, 974 const size_t Count, 975 std::string &RegisterSuffix) { 976 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 977 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 978 // will output a series of v{ld,st}1s, so we have to handle it specially. 979 if ((Count == 3 || Count == 4) && IsQuad) { 980 RegisterSuffix += "{"; 981 for (size_t i = 0; i < Count; i++) { 982 RegisterSuffix += "d{{[0-9]+}}"; 983 if (HasDupPostfix) { 984 RegisterSuffix += "[]"; 985 } 986 if (HasLanePostfix) { 987 RegisterSuffix += "[{{[0-9]+}}]"; 988 } 989 if (i < Count-1) { 990 RegisterSuffix += ", "; 991 } 992 } 993 RegisterSuffix += "}"; 994 } else { 995 996 // Handle normal loads and stores. 997 RegisterSuffix += "{"; 998 for (size_t i = 0; i < Count; i++) { 999 RegisterSuffix += "d{{[0-9]+}}"; 1000 if (HasDupPostfix) { 1001 RegisterSuffix += "[]"; 1002 } 1003 if (HasLanePostfix) { 1004 RegisterSuffix += "[{{[0-9]+}}]"; 1005 } 1006 if (IsQuad && !HasLanePostfix) { 1007 RegisterSuffix += ", d{{[0-9]+}}"; 1008 if (HasDupPostfix) { 1009 RegisterSuffix += "[]"; 1010 } 1011 } 1012 if (i < Count-1) { 1013 RegisterSuffix += ", "; 1014 } 1015 } 1016 RegisterSuffix += "}, [r{{[0-9]+}}"; 1017 1018 // We only include the alignment hint if we have a vld1.*64 or 1019 // a dup/lane instruction. 1020 if (IsLDSTOne) { 1021 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 1022 RegisterSuffix += ":" + OutTypeCode; 1023 } 1024 } 1025 1026 RegisterSuffix += "]"; 1027 } 1028} 1029 1030static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 1031 const bool &HasNPostfix) { 1032 return (NameRef.count("vmla") || 1033 NameRef.count("vmlal") || 1034 NameRef.count("vmlsl") || 1035 NameRef.count("vmull") || 1036 NameRef.count("vqdmlal") || 1037 NameRef.count("vqdmlsl") || 1038 NameRef.count("vqdmulh") || 1039 NameRef.count("vqdmull") || 1040 NameRef.count("vqrdmulh")) && HasNPostfix; 1041} 1042 1043static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1044 const bool &HasLanePostfix) { 1045 return (NameRef.count("vmla") || 1046 NameRef.count("vmls") || 1047 NameRef.count("vmlal") || 1048 NameRef.count("vmlsl") || 1049 (NameRef.count("vmul") && NameRef.size() == 3)|| 1050 NameRef.count("vqdmlal") || 1051 NameRef.count("vqdmlsl") || 1052 NameRef.count("vqdmulh") || 1053 NameRef.count("vqrdmulh")) && HasLanePostfix; 1054} 1055 1056static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1057 const bool &HasLanePostfix, 1058 const bool &IsQuad) { 1059 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1060 && IsQuad; 1061 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1062 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1063} 1064 1065static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1066 const std::string &Proto, 1067 const bool &HasNPostfix, 1068 const bool &IsQuad, 1069 const bool &HasLanePostfix, 1070 const bool &HasDupPostfix, 1071 std::string &NormedProto) { 1072 // Handle generic case. 1073 const StringRef NameRef(Name); 1074 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1075 switch (Proto[i]) { 1076 case 'u': 1077 case 'f': 1078 case 'd': 1079 case 's': 1080 case 'x': 1081 case 't': 1082 case 'n': 1083 NormedProto += IsQuad? 'q' : 'd'; 1084 break; 1085 case 'w': 1086 case 'k': 1087 NormedProto += 'q'; 1088 break; 1089 case 'g': 1090 case 'j': 1091 case 'h': 1092 case 'e': 1093 NormedProto += 'd'; 1094 break; 1095 case 'i': 1096 NormedProto += HasLanePostfix? 'a' : 'i'; 1097 break; 1098 case 'a': 1099 if (HasLanePostfix) { 1100 NormedProto += 'a'; 1101 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1102 NormedProto += IsQuad? 'q' : 'd'; 1103 } else { 1104 NormedProto += 'i'; 1105 } 1106 break; 1107 } 1108 } 1109 1110 // Handle Special Cases. 1111 const bool IsNotVExt = !NameRef.count("vext"); 1112 const bool IsVPADAL = NameRef.count("vpadal"); 1113 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1114 HasLanePostfix); 1115 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1116 IsQuad); 1117 1118 if (IsSpecialLaneMul) { 1119 // If 1120 NormedProto[2] = NormedProto[3]; 1121 NormedProto.erase(3); 1122 } else if (NormedProto.size() == 4 && 1123 NormedProto[0] == NormedProto[1] && 1124 IsNotVExt) { 1125 // If NormedProto.size() == 4 and the first two proto characters are the 1126 // same, ignore the first. 1127 NormedProto = NormedProto.substr(1, 3); 1128 } else if (Is5OpLaneAccum) { 1129 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1130 std::string tmp = NormedProto.substr(1,2); 1131 tmp += NormedProto[4]; 1132 NormedProto = tmp; 1133 } else if (IsVPADAL) { 1134 // If we have VPADAL, ignore the first character. 1135 NormedProto = NormedProto.substr(0, 2); 1136 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1137 // If our instruction is a dup instruction, keep only the first and 1138 // last characters. 1139 std::string tmp = ""; 1140 tmp += NormedProto[0]; 1141 tmp += NormedProto[NormedProto.size()-1]; 1142 NormedProto = tmp; 1143 } 1144} 1145 1146/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1147/// extracted, generate a FileCheck pattern to check that an 1148/// instruction's arguments are correct. 1149static void GenerateRegisterCheckPattern(const std::string &Name, 1150 const std::string &Proto, 1151 const std::string &OutTypeCode, 1152 const bool &HasNPostfix, 1153 const bool &IsQuad, 1154 const bool &HasLanePostfix, 1155 const bool &HasDupPostfix, 1156 const size_t &TBNumber, 1157 std::string &RegisterSuffix) { 1158 1159 RegisterSuffix = ""; 1160 1161 const StringRef NameRef(Name); 1162 const StringRef ProtoRef(Proto); 1163 1164 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1165 return; 1166 } 1167 1168 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1169 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1170 1171 if (IsLoadStore) { 1172 // Grab N value from v{ld,st}N using its ascii representation. 1173 const size_t Count = NameRef[3] - 48; 1174 1175 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1176 HasDupPostfix, HasLanePostfix, 1177 Count, RegisterSuffix); 1178 } else if (IsTBXOrTBL) { 1179 RegisterSuffix += "d{{[0-9]+}}, {"; 1180 for (size_t i = 0; i < TBNumber-1; i++) { 1181 RegisterSuffix += "d{{[0-9]+}}, "; 1182 } 1183 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1184 } else { 1185 // Handle a normal instruction. 1186 if (NameRef.count("vget") || NameRef.count("vset")) 1187 return; 1188 1189 // We first normalize our proto, since we only need to emit 4 1190 // different types of checks, yet have more than 4 proto types 1191 // that map onto those 4 patterns. 1192 std::string NormalizedProto(""); 1193 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1194 HasLanePostfix, HasDupPostfix, 1195 NormalizedProto); 1196 1197 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1198 const char &c = NormalizedProto[i]; 1199 switch (c) { 1200 case 'q': 1201 RegisterSuffix += "q{{[0-9]+}}, "; 1202 break; 1203 1204 case 'd': 1205 RegisterSuffix += "d{{[0-9]+}}, "; 1206 break; 1207 1208 case 'i': 1209 RegisterSuffix += "#{{[0-9]+}}, "; 1210 break; 1211 1212 case 'a': 1213 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1214 break; 1215 } 1216 } 1217 1218 // Remove extra ", ". 1219 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1220 } 1221} 1222 1223/// GenerateChecksForIntrinsic - Given a specific instruction name + 1224/// typestr + class kind, generate the proper set of FileCheck 1225/// Patterns to check for. We could just return a string, but instead 1226/// use a vector since it provides us with the extra flexibility of 1227/// emitting multiple checks, which comes in handy for certain cases 1228/// like mla where we want to check for 2 different instructions. 1229static void GenerateChecksForIntrinsic(const std::string &Name, 1230 const std::string &Proto, 1231 StringRef &OutTypeStr, 1232 StringRef &InTypeStr, 1233 ClassKind Ck, 1234 const std::string &InstName, 1235 bool IsHiddenLOp, 1236 std::vector<std::string>& Result) { 1237 1238 // If Ck is a ClassNoTest instruction, just return so no test is 1239 // emitted. 1240 if(Ck == ClassNoTest) 1241 return; 1242 1243 if (Name == "vcvt_f32_f16") { 1244 Result.push_back("vcvt.f32.f16"); 1245 return; 1246 } 1247 1248 1249 // Now we preprocess our instruction given the data we have to get the 1250 // data that we need. 1251 // Create a StringRef for String Manipulation of our Name. 1252 const StringRef NameRef(Name); 1253 // Instruction Prefix. 1254 std::string Prefix; 1255 // The type code for our out type string. 1256 std::string OutTypeCode; 1257 // To handle our different cases, we need to check for different postfixes. 1258 // Is our instruction a quad instruction. 1259 bool IsQuad = false; 1260 // Our instruction is of the form <instructionname>_n. 1261 bool HasNPostfix = false; 1262 // Our instruction is of the form <instructionname>_lane. 1263 bool HasLanePostfix = false; 1264 // Our instruction is of the form <instructionname>_dup. 1265 bool HasDupPostfix = false; 1266 // Our instruction is a vcvt instruction which requires special handling. 1267 bool IsSpecialVCvt = false; 1268 // If we have a vtbxN or vtblN instruction, this is set to N. 1269 size_t TBNumber = -1; 1270 // Register Suffix 1271 std::string RegisterSuffix; 1272 1273 PreprocessInstruction(NameRef, InstName, Prefix, 1274 HasNPostfix, HasLanePostfix, HasDupPostfix, 1275 IsSpecialVCvt, TBNumber); 1276 1277 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1278 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1279 HasLanePostfix, HasDupPostfix, TBNumber, 1280 RegisterSuffix); 1281 1282 // In the following section, we handle a bunch of special cases. You can tell 1283 // a special case by the fact we are returning early. 1284 1285 // If our instruction is a logical instruction without postfix or a 1286 // hidden LOp just return the current Prefix. 1287 if (Ck == ClassL || IsHiddenLOp) { 1288 Result.push_back(Prefix + " " + RegisterSuffix); 1289 return; 1290 } 1291 1292 // If we have a vmov, due to the many different cases, some of which 1293 // vary within the different intrinsics generated for a single 1294 // instruction type, just output a vmov. (e.g. given an instruction 1295 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1296 // 1297 // FIXME: Maybe something can be done about this. The two cases that we care 1298 // about are vmov as an LType and vmov as a WType. 1299 if (Prefix == "vmov") { 1300 Result.push_back(Prefix + " " + RegisterSuffix); 1301 return; 1302 } 1303 1304 // In the following section, we handle special cases. 1305 1306 if (OutTypeCode == "64") { 1307 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1308 // type, the intrinsic will be optimized away, so just return 1309 // nothing. On the other hand if we are handling an uint64x2_t 1310 // (i.e. quad instruction), vdup/vmov instructions should be 1311 // emitted. 1312 if (Prefix == "vdup" || Prefix == "vext") { 1313 if (IsQuad) { 1314 Result.push_back("{{vmov|vdup}}"); 1315 } 1316 return; 1317 } 1318 1319 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1320 // multiple register operands. 1321 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1322 || Prefix == "vld4"; 1323 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1324 || Prefix == "vst4"; 1325 if (MultiLoadPrefix || MultiStorePrefix) { 1326 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1327 return; 1328 } 1329 1330 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1331 // emitting said instructions. So return a check for 1332 // vldr/vstr/vmov/str instead. 1333 if (HasLanePostfix || HasDupPostfix) { 1334 if (Prefix == "vst1") { 1335 Result.push_back("{{str|vstr|vmov}}"); 1336 return; 1337 } else if (Prefix == "vld1") { 1338 Result.push_back("{{ldr|vldr|vmov}}"); 1339 return; 1340 } 1341 } 1342 } 1343 1344 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1345 // sometimes disassembled as vtrn.32. We use a regex to handle both 1346 // cases. 1347 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1348 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1349 return; 1350 } 1351 1352 // Currently on most ARM processors, we do not use vmla/vmls for 1353 // quad floating point operations. Instead we output vmul + vadd. So 1354 // check if we have one of those instructions and just output a 1355 // check for vmul. 1356 if (OutTypeCode == "f32") { 1357 if (Prefix == "vmls") { 1358 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1359 Result.push_back("vsub." + OutTypeCode); 1360 return; 1361 } else if (Prefix == "vmla") { 1362 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1363 Result.push_back("vadd." + OutTypeCode); 1364 return; 1365 } 1366 } 1367 1368 // If we have vcvt, get the input type from the instruction name 1369 // (which should be of the form instname_inputtype) and append it 1370 // before the output type. 1371 if (Prefix == "vcvt") { 1372 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1373 Prefix += "." + inTypeCode; 1374 } 1375 1376 // Append output type code to get our final mangled instruction. 1377 Prefix += "." + OutTypeCode; 1378 1379 Result.push_back(Prefix + " " + RegisterSuffix); 1380} 1381 1382/// UseMacro - Examine the prototype string to determine if the intrinsic 1383/// should be defined as a preprocessor macro instead of an inline function. 1384static bool UseMacro(const std::string &proto) { 1385 // If this builtin takes an immediate argument, we need to #define it rather 1386 // than use a standard declaration, so that SemaChecking can range check 1387 // the immediate passed by the user. 1388 if (proto.find('i') != std::string::npos) 1389 return true; 1390 1391 // Pointer arguments need to use macros to avoid hiding aligned attributes 1392 // from the pointer type. 1393 if (proto.find('p') != std::string::npos || 1394 proto.find('c') != std::string::npos) 1395 return true; 1396 1397 return false; 1398} 1399 1400/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1401/// defined as a macro should be accessed directly instead of being first 1402/// assigned to a local temporary. 1403static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1404 // True for constant ints (i), pointers (p) and const pointers (c). 1405 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1406} 1407 1408// Generate the string "(argtype a, argtype b, ...)" 1409static std::string GenArgs(const std::string &proto, StringRef typestr, 1410 const std::string &name) { 1411 bool define = UseMacro(proto); 1412 char arg = 'a'; 1413 1414 std::string s; 1415 s += "("; 1416 1417 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1418 if (define) { 1419 // Some macro arguments are used directly instead of being assigned 1420 // to local temporaries; prepend an underscore prefix to make their 1421 // names consistent with the local temporaries. 1422 if (MacroArgUsedDirectly(proto, i)) 1423 s += "__"; 1424 } else { 1425 s += TypeString(proto[i], typestr) + " __"; 1426 } 1427 s.push_back(arg); 1428 //To avoid argument being multiple defined, add extra number for renaming. 1429 if (name == "vcopy_lane" || name == "vcopy_laneq") 1430 s.push_back('1'); 1431 if ((i + 1) < e) 1432 s += ", "; 1433 } 1434 1435 s += ")"; 1436 return s; 1437} 1438 1439// Macro arguments are not type-checked like inline function arguments, so 1440// assign them to local temporaries to get the right type checking. 1441static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1442 const std::string &name ) { 1443 char arg = 'a'; 1444 std::string s; 1445 bool generatedLocal = false; 1446 1447 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1448 // Do not create a temporary for an immediate argument. 1449 // That would defeat the whole point of using a macro! 1450 if (MacroArgUsedDirectly(proto, i)) 1451 continue; 1452 generatedLocal = true; 1453 bool extranumber = false; 1454 if (name == "vcopy_lane" || name == "vcopy_laneq") 1455 extranumber = true; 1456 1457 s += TypeString(proto[i], typestr) + " __"; 1458 s.push_back(arg); 1459 if(extranumber) 1460 s.push_back('1'); 1461 s += " = ("; 1462 s.push_back(arg); 1463 if(extranumber) 1464 s.push_back('1'); 1465 s += "); "; 1466 } 1467 1468 if (generatedLocal) 1469 s += "\\\n "; 1470 return s; 1471} 1472 1473// Use the vmovl builtin to sign-extend or zero-extend a vector. 1474static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1475 std::string s, high; 1476 high = h ? "_high" : ""; 1477 s = MangleName("vmovl" + high, typestr, ClassS); 1478 s += "(" + a + ")"; 1479 return s; 1480} 1481 1482// Get the high 64-bit part of a vector 1483static std::string GetHigh(const std::string &a, StringRef typestr) { 1484 std::string s; 1485 s = MangleName("vget_high", typestr, ClassS); 1486 s += "(" + a + ")"; 1487 return s; 1488} 1489 1490// Gen operation with two operands and get high 64-bit for both of two operands. 1491static std::string Gen2OpWith2High(StringRef typestr, 1492 const std::string &op, 1493 const std::string &a, 1494 const std::string &b) { 1495 std::string s; 1496 std::string Op1 = GetHigh(a, typestr); 1497 std::string Op2 = GetHigh(b, typestr); 1498 s = MangleName(op, typestr, ClassS); 1499 s += "(" + Op1 + ", " + Op2 + ");"; 1500 return s; 1501} 1502 1503// Gen operation with three operands and get high 64-bit of the latter 1504// two operands. 1505static std::string Gen3OpWith2High(StringRef typestr, 1506 const std::string &op, 1507 const std::string &a, 1508 const std::string &b, 1509 const std::string &c) { 1510 std::string s; 1511 std::string Op1 = GetHigh(b, typestr); 1512 std::string Op2 = GetHigh(c, typestr); 1513 s = MangleName(op, typestr, ClassS); 1514 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1515 return s; 1516} 1517 1518// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1519static std::string GenCombine(std::string typestr, 1520 const std::string &a, 1521 const std::string &b) { 1522 std::string s; 1523 s = MangleName("vcombine", typestr, ClassS); 1524 s += "(" + a + ", " + b + ")"; 1525 return s; 1526} 1527 1528static std::string Duplicate(unsigned nElts, StringRef typestr, 1529 const std::string &a) { 1530 std::string s; 1531 1532 s = "(" + TypeString('d', typestr) + "){ "; 1533 for (unsigned i = 0; i != nElts; ++i) { 1534 s += a; 1535 if ((i + 1) < nElts) 1536 s += ", "; 1537 } 1538 s += " }"; 1539 1540 return s; 1541} 1542 1543static std::string SplatLane(unsigned nElts, const std::string &vec, 1544 const std::string &lane) { 1545 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1546 for (unsigned i = 0; i < nElts; ++i) 1547 s += ", " + lane; 1548 s += ")"; 1549 return s; 1550} 1551 1552static std::string RemoveHigh(const std::string &name) { 1553 std::string s = name; 1554 std::size_t found = s.find("_high_"); 1555 if (found == std::string::npos) 1556 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1557 s.replace(found, 5, ""); 1558 return s; 1559} 1560 1561static unsigned GetNumElements(StringRef typestr, bool &quad) { 1562 quad = false; 1563 bool dummy = false; 1564 char type = ClassifyType(typestr, quad, dummy, dummy); 1565 unsigned nElts = 0; 1566 switch (type) { 1567 case 'c': nElts = 8; break; 1568 case 's': nElts = 4; break; 1569 case 'i': nElts = 2; break; 1570 case 'l': nElts = 1; break; 1571 case 'h': nElts = 4; break; 1572 case 'f': nElts = 2; break; 1573 case 'd': 1574 nElts = 1; 1575 break; 1576 default: 1577 PrintFatalError("unhandled type!"); 1578 } 1579 if (quad) nElts <<= 1; 1580 return nElts; 1581} 1582 1583// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1584static std::string GenOpString(const std::string &name, OpKind op, 1585 const std::string &proto, StringRef typestr) { 1586 bool quad; 1587 unsigned nElts = GetNumElements(typestr, quad); 1588 bool define = UseMacro(proto); 1589 1590 std::string ts = TypeString(proto[0], typestr); 1591 std::string s; 1592 if (!define) { 1593 s = "return "; 1594 } 1595 1596 switch(op) { 1597 case OpAdd: 1598 s += "__a + __b;"; 1599 break; 1600 case OpAddl: 1601 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1602 break; 1603 case OpAddlHi: 1604 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1605 break; 1606 case OpAddw: 1607 s += "__a + " + Extend(typestr, "__b") + ";"; 1608 break; 1609 case OpAddwHi: 1610 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1611 break; 1612 case OpSub: 1613 s += "__a - __b;"; 1614 break; 1615 case OpSubl: 1616 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1617 break; 1618 case OpSublHi: 1619 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1620 break; 1621 case OpSubw: 1622 s += "__a - " + Extend(typestr, "__b") + ";"; 1623 break; 1624 case OpSubwHi: 1625 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1626 break; 1627 case OpMulN: 1628 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1629 break; 1630 case OpMulLane: 1631 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1632 break; 1633 case OpMulXLane: 1634 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1635 SplatLane(nElts, "__b", "__c") + ");"; 1636 break; 1637 case OpMul: 1638 s += "__a * __b;"; 1639 break; 1640 case OpMullLane: 1641 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1642 SplatLane(nElts, "__b", "__c") + ");"; 1643 break; 1644 case OpMullHiLane: 1645 s += MangleName("vmull", typestr, ClassS) + "(" + 1646 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1647 break; 1648 case OpMlaN: 1649 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1650 break; 1651 case OpMlaLane: 1652 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1653 break; 1654 case OpMla: 1655 s += "__a + (__b * __c);"; 1656 break; 1657 case OpMlalN: 1658 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1659 Duplicate(nElts, typestr, "__c") + ");"; 1660 break; 1661 case OpMlalLane: 1662 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1663 SplatLane(nElts, "__c", "__d") + ");"; 1664 break; 1665 case OpMlalHiLane: 1666 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1667 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1668 break; 1669 case OpMlal: 1670 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1671 break; 1672 case OpMullHi: 1673 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1674 break; 1675 case OpMlalHi: 1676 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1677 break; 1678 case OpMlsN: 1679 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1680 break; 1681 case OpMlsLane: 1682 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1683 break; 1684 case OpFMSLane: 1685 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1686 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1687 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1688 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1689 break; 1690 case OpFMSLaneQ: 1691 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1692 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1693 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1694 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1695 break; 1696 case OpMls: 1697 s += "__a - (__b * __c);"; 1698 break; 1699 case OpMlslN: 1700 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1701 Duplicate(nElts, typestr, "__c") + ");"; 1702 break; 1703 case OpMlslLane: 1704 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1705 SplatLane(nElts, "__c", "__d") + ");"; 1706 break; 1707 case OpMlslHiLane: 1708 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1709 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1710 break; 1711 case OpMlsl: 1712 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1713 break; 1714 case OpMlslHi: 1715 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1716 break; 1717 case OpQDMullLane: 1718 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1719 SplatLane(nElts, "__b", "__c") + ");"; 1720 break; 1721 case OpQDMullHiLane: 1722 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1723 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1724 break; 1725 case OpQDMlalLane: 1726 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1727 SplatLane(nElts, "__c", "__d") + ");"; 1728 break; 1729 case OpQDMlalHiLane: 1730 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1731 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1732 break; 1733 case OpQDMlslLane: 1734 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1735 SplatLane(nElts, "__c", "__d") + ");"; 1736 break; 1737 case OpQDMlslHiLane: 1738 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1739 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1740 break; 1741 case OpQDMulhLane: 1742 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1743 SplatLane(nElts, "__b", "__c") + ");"; 1744 break; 1745 case OpQRDMulhLane: 1746 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1747 SplatLane(nElts, "__b", "__c") + ");"; 1748 break; 1749 case OpEq: 1750 s += "(" + ts + ")(__a == __b);"; 1751 break; 1752 case OpGe: 1753 s += "(" + ts + ")(__a >= __b);"; 1754 break; 1755 case OpLe: 1756 s += "(" + ts + ")(__a <= __b);"; 1757 break; 1758 case OpGt: 1759 s += "(" + ts + ")(__a > __b);"; 1760 break; 1761 case OpLt: 1762 s += "(" + ts + ")(__a < __b);"; 1763 break; 1764 case OpNeg: 1765 s += " -__a;"; 1766 break; 1767 case OpNot: 1768 s += " ~__a;"; 1769 break; 1770 case OpAnd: 1771 s += "__a & __b;"; 1772 break; 1773 case OpOr: 1774 s += "__a | __b;"; 1775 break; 1776 case OpXor: 1777 s += "__a ^ __b;"; 1778 break; 1779 case OpAndNot: 1780 s += "__a & ~__b;"; 1781 break; 1782 case OpOrNot: 1783 s += "__a | ~__b;"; 1784 break; 1785 case OpCast: 1786 s += "(" + ts + ")__a;"; 1787 break; 1788 case OpConcat: 1789 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1790 s += ", (int64x1_t)__b, 0, 1);"; 1791 break; 1792 case OpHi: 1793 // nElts is for the result vector, so the source is twice that number. 1794 s += "__builtin_shufflevector(__a, __a"; 1795 for (unsigned i = nElts; i < nElts * 2; ++i) 1796 s += ", " + utostr(i); 1797 s+= ");"; 1798 break; 1799 case OpLo: 1800 s += "__builtin_shufflevector(__a, __a"; 1801 for (unsigned i = 0; i < nElts; ++i) 1802 s += ", " + utostr(i); 1803 s+= ");"; 1804 break; 1805 case OpDup: 1806 s += Duplicate(nElts, typestr, "__a") + ";"; 1807 break; 1808 case OpDupLane: 1809 s += SplatLane(nElts, "__a", "__b") + ";"; 1810 break; 1811 case OpSelect: 1812 // ((0 & 1) | (~0 & 2)) 1813 s += "(" + ts + ")"; 1814 ts = TypeString(proto[1], typestr); 1815 s += "((__a & (" + ts + ")__b) | "; 1816 s += "(~__a & (" + ts + ")__c));"; 1817 break; 1818 case OpRev16: 1819 s += "__builtin_shufflevector(__a, __a"; 1820 for (unsigned i = 2; i <= nElts; i += 2) 1821 for (unsigned j = 0; j != 2; ++j) 1822 s += ", " + utostr(i - j - 1); 1823 s += ");"; 1824 break; 1825 case OpRev32: { 1826 unsigned WordElts = nElts >> (1 + (int)quad); 1827 s += "__builtin_shufflevector(__a, __a"; 1828 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1829 for (unsigned j = 0; j != WordElts; ++j) 1830 s += ", " + utostr(i - j - 1); 1831 s += ");"; 1832 break; 1833 } 1834 case OpRev64: { 1835 unsigned DblWordElts = nElts >> (int)quad; 1836 s += "__builtin_shufflevector(__a, __a"; 1837 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1838 for (unsigned j = 0; j != DblWordElts; ++j) 1839 s += ", " + utostr(i - j - 1); 1840 s += ");"; 1841 break; 1842 } 1843 case OpXtnHi: { 1844 s = TypeString(proto[1], typestr) + " __a1 = " + 1845 MangleName("vmovn", typestr, ClassS) + "(__b);\n " + 1846 "return __builtin_shufflevector(__a, __a1"; 1847 for (unsigned i = 0; i < nElts * 4; ++i) 1848 s += ", " + utostr(i); 1849 s += ");"; 1850 break; 1851 } 1852 case OpSqxtunHi: { 1853 s = TypeString(proto[1], typestr) + " __a1 = " + 1854 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + 1855 "return __builtin_shufflevector(__a, __a1"; 1856 for (unsigned i = 0; i < nElts * 4; ++i) 1857 s += ", " + utostr(i); 1858 s += ");"; 1859 break; 1860 } 1861 case OpQxtnHi: { 1862 s = TypeString(proto[1], typestr) + " __a1 = " + 1863 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + 1864 "return __builtin_shufflevector(__a, __a1"; 1865 for (unsigned i = 0; i < nElts * 4; ++i) 1866 s += ", " + utostr(i); 1867 s += ");"; 1868 break; 1869 } 1870 case OpFcvtnHi: { 1871 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; 1872 s = TypeString(proto[1], typestr) + " __a1 = " + 1873 MangleName(FName, typestr, ClassS) + "(__b);\n " + 1874 "return __builtin_shufflevector(__a, __a1"; 1875 for (unsigned i = 0; i < nElts * 4; ++i) 1876 s += ", " + utostr(i); 1877 s += ");"; 1878 break; 1879 } 1880 case OpFcvtlHi: { 1881 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; 1882 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + 1883 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; 1884 break; 1885 } 1886 case OpFcvtxnHi: { 1887 s = TypeString(proto[1], typestr) + " __a1 = " + 1888 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + 1889 "return __builtin_shufflevector(__a, __a1"; 1890 for (unsigned i = 0; i < nElts * 4; ++i) 1891 s += ", " + utostr(i); 1892 s += ");"; 1893 break; 1894 } 1895 case OpUzp1: 1896 s += "__builtin_shufflevector(__a, __b"; 1897 for (unsigned i = 0; i < nElts; i++) 1898 s += ", " + utostr(2*i); 1899 s += ");"; 1900 break; 1901 case OpUzp2: 1902 s += "__builtin_shufflevector(__a, __b"; 1903 for (unsigned i = 0; i < nElts; i++) 1904 s += ", " + utostr(2*i+1); 1905 s += ");"; 1906 break; 1907 case OpZip1: 1908 s += "__builtin_shufflevector(__a, __b"; 1909 for (unsigned i = 0; i < (nElts/2); i++) 1910 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1911 s += ");"; 1912 break; 1913 case OpZip2: 1914 s += "__builtin_shufflevector(__a, __b"; 1915 for (unsigned i = nElts/2; i < nElts; i++) 1916 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1917 s += ");"; 1918 break; 1919 case OpTrn1: 1920 s += "__builtin_shufflevector(__a, __b"; 1921 for (unsigned i = 0; i < (nElts/2); i++) 1922 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 1923 s += ");"; 1924 break; 1925 case OpTrn2: 1926 s += "__builtin_shufflevector(__a, __b"; 1927 for (unsigned i = 0; i < (nElts/2); i++) 1928 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 1929 s += ");"; 1930 break; 1931 case OpAbdl: { 1932 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1933 if (typestr[0] != 'U') { 1934 // vabd results are always unsigned and must be zero-extended. 1935 std::string utype = "U" + typestr.str(); 1936 s += "(" + TypeString(proto[0], typestr) + ")"; 1937 abd = "(" + TypeString('d', utype) + ")" + abd; 1938 s += Extend(utype, abd) + ";"; 1939 } else { 1940 s += Extend(typestr, abd) + ";"; 1941 } 1942 break; 1943 } 1944 case OpAbdlHi: 1945 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1946 break; 1947 case OpAddhnHi: { 1948 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1949 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1950 s += ";"; 1951 break; 1952 } 1953 case OpRAddhnHi: { 1954 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1955 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1956 s += ";"; 1957 break; 1958 } 1959 case OpSubhnHi: { 1960 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1961 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1962 s += ";"; 1963 break; 1964 } 1965 case OpRSubhnHi: { 1966 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1967 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1968 s += ";"; 1969 break; 1970 } 1971 case OpAba: 1972 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1973 break; 1974 case OpAbal: 1975 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1976 break; 1977 case OpAbalHi: 1978 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1979 break; 1980 case OpQDMullHi: 1981 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1982 break; 1983 case OpQDMlalHi: 1984 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1985 break; 1986 case OpQDMlslHi: 1987 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1988 break; 1989 case OpDiv: 1990 s += "__a / __b;"; 1991 break; 1992 case OpMovlHi: { 1993 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1994 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1995 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1996 s += "(__a1, 0);"; 1997 break; 1998 } 1999 case OpLongHi: { 2000 // Another local variable __a1 is needed for calling a Macro, 2001 // or using __a will have naming conflict when Macro expanding. 2002 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2003 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 2004 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 2005 "(__a1, __b);"; 2006 break; 2007 } 2008 case OpNarrowHi: { 2009 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 2010 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 2011 break; 2012 } 2013 case OpCopyLane: { 2014 s += TypeString('s', typestr) + " __c2 = " + 2015 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 2016 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 2017 break; 2018 } 2019 case OpCopyQLane: { 2020 std::string typeCode = ""; 2021 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2022 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 2023 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; 2024 break; 2025 } 2026 case OpCopyLaneQ: { 2027 std::string typeCode = ""; 2028 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2029 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 2030 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; 2031 break; 2032 } 2033 case OpScalarMulLane: { 2034 std::string typeCode = ""; 2035 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2036 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2037 "(__b, __c);\\\n __a * __d1;"; 2038 break; 2039 } 2040 case OpScalarMulLaneQ: { 2041 std::string typeCode = ""; 2042 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2043 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + 2044 "(__b, __c);\\\n __a * __d1;"; 2045 break; 2046 } 2047 case OpScalarMulXLane: { 2048 bool dummy = false; 2049 char type = ClassifyType(typestr, dummy, dummy, dummy); 2050 if (type == 'f') type = 's'; 2051 std::string typeCode = ""; 2052 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2053 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2054 "(__b, __c);\\\n vmulx" + type + "_" + 2055 typeCode + "(__a, __d1);"; 2056 break; 2057 } 2058 case OpScalarMulXLaneQ: { 2059 bool dummy = false; 2060 char type = ClassifyType(typestr, dummy, dummy, dummy); 2061 if (type == 'f') type = 's'; 2062 std::string typeCode = ""; 2063 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2064 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + 2065 typeCode + "(__b, __c);\\\n vmulx" + type + 2066 "_" + typeCode + "(__a, __d1);"; 2067 break; 2068 } 2069 2070 case OpScalarVMulXLane: { 2071 bool dummy = false; 2072 char type = ClassifyType(typestr, dummy, dummy, dummy); 2073 if (type == 'f') type = 's'; 2074 std::string typeCode = ""; 2075 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2076 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2077 typeCode + "(__a, 0);\\\n" + 2078 " " + TypeString('s', typestr) + " __e1 = vget_lane_" + 2079 typeCode + "(__b, __c);\\\n" + 2080 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2081 typeCode + "(__d1, __e1);\\\n" + 2082 " " + TypeString('d', typestr) + " __g1;\\\n" + 2083 " vset_lane_" + typeCode + "(__f1, __g1, __c);"; 2084 break; 2085 } 2086 2087 case OpScalarVMulXLaneQ: { 2088 bool dummy = false; 2089 char type = ClassifyType(typestr, dummy, dummy, dummy); 2090 if (type == 'f') type = 's'; 2091 std::string typeCode = ""; 2092 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2093 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2094 typeCode + "(__a, 0);\\\n" + 2095 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + 2096 typeCode + "(__b, __c);\\\n" + 2097 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2098 typeCode + "(__d1, __e1);\\\n" + 2099 " " + TypeString('d', typestr) + " __g1;\\\n" + 2100 " vset_lane_" + typeCode + "(__f1, __g1, 0);"; 2101 break; 2102 } 2103 2104 default: 2105 PrintFatalError("unknown OpKind!"); 2106 } 2107 return s; 2108} 2109 2110static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 2111 unsigned mod = proto[0]; 2112 2113 if (mod == 'v' || mod == 'f') 2114 mod = proto[1]; 2115 2116 bool quad = false; 2117 bool poly = false; 2118 bool usgn = false; 2119 bool scal = false; 2120 bool cnst = false; 2121 bool pntr = false; 2122 2123 // Base type to get the type string for. 2124 char type = ClassifyType(typestr, quad, poly, usgn); 2125 2126 // Based on the modifying character, change the type and width if necessary. 2127 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 2128 2129 NeonTypeFlags::EltType ET; 2130 switch (type) { 2131 case 'c': 2132 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 2133 break; 2134 case 's': 2135 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 2136 break; 2137 case 'i': 2138 ET = NeonTypeFlags::Int32; 2139 break; 2140 case 'l': 2141 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; 2142 break; 2143 case 'h': 2144 ET = NeonTypeFlags::Float16; 2145 break; 2146 case 'f': 2147 ET = NeonTypeFlags::Float32; 2148 break; 2149 case 'd': 2150 ET = NeonTypeFlags::Float64; 2151 break; 2152 default: 2153 PrintFatalError("unhandled type!"); 2154 } 2155 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 2156 return Flags.getFlags(); 2157} 2158 2159static bool ProtoHasScalar(const std::string proto) 2160{ 2161 return (proto.find('s') != std::string::npos 2162 || proto.find('r') != std::string::npos); 2163} 2164 2165// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2166static std::string GenBuiltin(const std::string &name, const std::string &proto, 2167 StringRef typestr, ClassKind ck) { 2168 std::string s; 2169 2170 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2171 // sret-like argument. 2172 bool sret = IsMultiVecProto(proto[0]); 2173 2174 bool define = UseMacro(proto); 2175 2176 // Check if the prototype has a scalar operand with the type of the vector 2177 // elements. If not, bitcasting the args will take care of arg checking. 2178 // The actual signedness etc. will be taken care of with special enums. 2179 if (!ProtoHasScalar(proto)) 2180 ck = ClassB; 2181 2182 if (proto[0] != 'v') { 2183 std::string ts = TypeString(proto[0], typestr); 2184 2185 if (define) { 2186 if (sret) 2187 s += ts + " r; "; 2188 else 2189 s += "(" + ts + ")"; 2190 } else if (sret) { 2191 s += ts + " r; "; 2192 } else { 2193 s += "return (" + ts + ")"; 2194 } 2195 } 2196 2197 bool splat = proto.find('a') != std::string::npos; 2198 2199 s += "__builtin_neon_"; 2200 if (splat) { 2201 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2202 std::string vname(name, 0, name.size()-2); 2203 s += MangleName(vname, typestr, ck); 2204 } else { 2205 s += MangleName(name, typestr, ck); 2206 } 2207 s += "("; 2208 2209 // Pass the address of the return variable as the first argument to sret-like 2210 // builtins. 2211 if (sret) 2212 s += "&r, "; 2213 2214 char arg = 'a'; 2215 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2216 std::string args = std::string(&arg, 1); 2217 2218 // Use the local temporaries instead of the macro arguments. 2219 args = "__" + args; 2220 2221 bool argQuad = false; 2222 bool argPoly = false; 2223 bool argUsgn = false; 2224 bool argScalar = false; 2225 bool dummy = false; 2226 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2227 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2228 dummy, dummy); 2229 2230 // Handle multiple-vector values specially, emitting each subvector as an 2231 // argument to the __builtin. 2232 unsigned NumOfVec = 0; 2233 if (proto[i] >= '2' && proto[i] <= '4') { 2234 NumOfVec = proto[i] - '0'; 2235 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2236 NumOfVec = proto[i] - 'A' + 1; 2237 } 2238 2239 if (NumOfVec > 0) { 2240 // Check if an explicit cast is needed. 2241 if (argType != 'c' || argPoly || argUsgn) 2242 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2243 2244 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2245 s += args + ".val[" + utostr(vi) + "]"; 2246 if ((vi + 1) < ve) 2247 s += ", "; 2248 } 2249 if ((i + 1) < e) 2250 s += ", "; 2251 2252 continue; 2253 } 2254 2255 if (splat && (i + 1) == e) 2256 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2257 2258 // Check if an explicit cast is needed. 2259 if ((splat || !argScalar) && 2260 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2261 std::string argTypeStr = "c"; 2262 if (ck != ClassB) 2263 argTypeStr = argType; 2264 if (argQuad) 2265 argTypeStr = "Q" + argTypeStr; 2266 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2267 } 2268 2269 s += args; 2270 if ((i + 1) < e) 2271 s += ", "; 2272 } 2273 2274 // Extra constant integer to hold type class enum for this function, e.g. s8 2275 if (ck == ClassB) 2276 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2277 2278 s += ");"; 2279 2280 if (proto[0] != 'v' && sret) { 2281 if (define) 2282 s += " r;"; 2283 else 2284 s += " return r;"; 2285 } 2286 return s; 2287} 2288 2289static std::string GenBuiltinDef(const std::string &name, 2290 const std::string &proto, 2291 StringRef typestr, ClassKind ck) { 2292 std::string s("BUILTIN(__builtin_neon_"); 2293 2294 // If all types are the same size, bitcasting the args will take care 2295 // of arg checking. The actual signedness etc. will be taken care of with 2296 // special enums. 2297 if (!ProtoHasScalar(proto)) 2298 ck = ClassB; 2299 2300 s += MangleName(name, typestr, ck); 2301 s += ", \""; 2302 2303 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2304 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2305 2306 // Extra constant integer to hold type class enum for this function, e.g. s8 2307 if (ck == ClassB) 2308 s += "i"; 2309 2310 s += "\", \"n\")"; 2311 return s; 2312} 2313 2314static std::string GenIntrinsic(const std::string &name, 2315 const std::string &proto, 2316 StringRef outTypeStr, StringRef inTypeStr, 2317 OpKind kind, ClassKind classKind) { 2318 assert(!proto.empty() && ""); 2319 bool define = UseMacro(proto) && kind != OpUnavailable; 2320 std::string s; 2321 2322 // static always inline + return type 2323 if (define) 2324 s += "#define "; 2325 else 2326 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2327 2328 // Function name with type suffix 2329 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2330 if (outTypeStr != inTypeStr) { 2331 // If the input type is different (e.g., for vreinterpret), append a suffix 2332 // for the input type. String off a "Q" (quad) prefix so that MangleName 2333 // does not insert another "q" in the name. 2334 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2335 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2336 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2337 } 2338 s += mangledName; 2339 2340 // Function arguments 2341 s += GenArgs(proto, inTypeStr, name); 2342 2343 // Definition. 2344 if (define) { 2345 s += " __extension__ ({ \\\n "; 2346 s += GenMacroLocals(proto, inTypeStr, name); 2347 } else if (kind == OpUnavailable) { 2348 s += " __attribute__((unavailable));\n"; 2349 return s; 2350 } else 2351 s += " {\n "; 2352 2353 if (kind != OpNone) 2354 s += GenOpString(name, kind, proto, outTypeStr); 2355 else 2356 s += GenBuiltin(name, proto, outTypeStr, classKind); 2357 if (define) 2358 s += " })"; 2359 else 2360 s += " }"; 2361 s += "\n"; 2362 return s; 2363} 2364 2365/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2366/// is comprised of type definitions and function declarations. 2367void NeonEmitter::run(raw_ostream &OS) { 2368 OS << 2369 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2370 "---===\n" 2371 " *\n" 2372 " * Permission is hereby granted, free of charge, to any person obtaining " 2373 "a copy\n" 2374 " * of this software and associated documentation files (the \"Software\")," 2375 " to deal\n" 2376 " * in the Software without restriction, including without limitation the " 2377 "rights\n" 2378 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2379 "and/or sell\n" 2380 " * copies of the Software, and to permit persons to whom the Software is\n" 2381 " * furnished to do so, subject to the following conditions:\n" 2382 " *\n" 2383 " * The above copyright notice and this permission notice shall be " 2384 "included in\n" 2385 " * all copies or substantial portions of the Software.\n" 2386 " *\n" 2387 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2388 "EXPRESS OR\n" 2389 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2390 "MERCHANTABILITY,\n" 2391 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2392 "SHALL THE\n" 2393 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2394 "OTHER\n" 2395 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2396 "ARISING FROM,\n" 2397 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2398 "DEALINGS IN\n" 2399 " * THE SOFTWARE.\n" 2400 " *\n" 2401 " *===--------------------------------------------------------------------" 2402 "---===\n" 2403 " */\n\n"; 2404 2405 OS << "#ifndef __ARM_NEON_H\n"; 2406 OS << "#define __ARM_NEON_H\n\n"; 2407 2408 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2409 OS << "#error \"NEON support not enabled\"\n"; 2410 OS << "#endif\n\n"; 2411 2412 OS << "#include <stdint.h>\n\n"; 2413 2414 // Emit NEON-specific scalar typedefs. 2415 OS << "typedef float float32_t;\n"; 2416 OS << "typedef __fp16 float16_t;\n"; 2417 2418 OS << "#ifdef __aarch64__\n"; 2419 OS << "typedef double float64_t;\n"; 2420 OS << "#endif\n\n"; 2421 2422 // For now, signedness of polynomial types depends on target 2423 OS << "#ifdef __aarch64__\n"; 2424 OS << "typedef uint8_t poly8_t;\n"; 2425 OS << "typedef uint16_t poly16_t;\n"; 2426 OS << "typedef uint64_t poly64_t;\n"; 2427 OS << "#else\n"; 2428 OS << "typedef int8_t poly8_t;\n"; 2429 OS << "typedef int16_t poly16_t;\n"; 2430 OS << "#endif\n"; 2431 2432 // Emit Neon vector typedefs. 2433 std::string TypedefTypes( 2434 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2435 SmallVector<StringRef, 24> TDTypeVec; 2436 ParseTypes(0, TypedefTypes, TDTypeVec); 2437 2438 // Emit vector typedefs. 2439 bool isA64 = false; 2440 bool preinsert; 2441 bool postinsert; 2442 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2443 bool dummy, quad = false, poly = false; 2444 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2445 preinsert = false; 2446 postinsert = false; 2447 2448 if (type == 'd' || (type == 'l' && poly)) { 2449 preinsert = isA64? false: true; 2450 isA64 = true; 2451 } else { 2452 postinsert = isA64? true: false; 2453 isA64 = false; 2454 } 2455 if (postinsert) 2456 OS << "#endif\n"; 2457 if (preinsert) 2458 OS << "#ifdef __aarch64__\n"; 2459 2460 if (poly) 2461 OS << "typedef __attribute__((neon_polyvector_type("; 2462 else 2463 OS << "typedef __attribute__((neon_vector_type("; 2464 2465 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2466 OS << utostr(nElts) << "))) "; 2467 if (nElts < 10) 2468 OS << " "; 2469 2470 OS << TypeString('s', TDTypeVec[i]); 2471 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2472 2473 } 2474 postinsert = isA64? true: false; 2475 if (postinsert) 2476 OS << "#endif\n"; 2477 OS << "\n"; 2478 2479 // Emit struct typedefs. 2480 isA64 = false; 2481 for (unsigned vi = 2; vi != 5; ++vi) { 2482 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2483 bool dummy, quad = false, poly = false; 2484 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2485 preinsert = false; 2486 postinsert = false; 2487 2488 if (type == 'd' || (type == 'l' && poly)) { 2489 preinsert = isA64? false: true; 2490 isA64 = true; 2491 } else { 2492 postinsert = isA64? true: false; 2493 isA64 = false; 2494 } 2495 if (postinsert) 2496 OS << "#endif\n"; 2497 if (preinsert) 2498 OS << "#ifdef __aarch64__\n"; 2499 2500 std::string ts = TypeString('d', TDTypeVec[i]); 2501 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2502 OS << "typedef struct " << vs << " {\n"; 2503 OS << " " << ts << " val"; 2504 OS << "[" << utostr(vi) << "]"; 2505 OS << ";\n} "; 2506 OS << vs << ";\n"; 2507 OS << "\n"; 2508 } 2509 } 2510 postinsert = isA64? true: false; 2511 if (postinsert) 2512 OS << "#endif\n"; 2513 OS << "\n"; 2514 2515 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2516 2517 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2518 2519 StringMap<ClassKind> EmittedMap; 2520 2521 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2522 // intrinsics. (Some of the saturating multiply instructions are also 2523 // used to implement the corresponding "_lane" variants, but tablegen 2524 // sorts the records into alphabetical order so that the "_lane" variants 2525 // come after the intrinsics they use.) 2526 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2527 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2528 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2529 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2530 2531 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2532 // common intrinsics appear only once in the output stream. 2533 // The check for uniquiness is done in emitIntrinsic. 2534 // Emit ARM intrinsics. 2535 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2536 Record *R = RV[i]; 2537 2538 // Skip AArch64 intrinsics; they will be emitted at the end. 2539 bool isA64 = R->getValueAsBit("isA64"); 2540 if (isA64) 2541 continue; 2542 2543 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2544 R->getName() != "VABD") 2545 emitIntrinsic(OS, R, EmittedMap); 2546 } 2547 2548 // Emit AArch64-specific intrinsics. 2549 OS << "#ifdef __aarch64__\n"; 2550 2551 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2552 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2553 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2554 2555 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2556 Record *R = RV[i]; 2557 2558 // Skip ARM intrinsics already included above. 2559 bool isA64 = R->getValueAsBit("isA64"); 2560 if (!isA64) 2561 continue; 2562 2563 emitIntrinsic(OS, R, EmittedMap); 2564 } 2565 2566 OS << "#endif\n\n"; 2567 2568 OS << "#undef __ai\n\n"; 2569 OS << "#endif /* __ARM_NEON_H */\n"; 2570} 2571 2572/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2573/// intrinsics specified by record R checking for intrinsic uniqueness. 2574void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2575 StringMap<ClassKind> &EmittedMap) { 2576 std::string name = R->getValueAsString("Name"); 2577 std::string Proto = R->getValueAsString("Prototype"); 2578 std::string Types = R->getValueAsString("Types"); 2579 2580 SmallVector<StringRef, 16> TypeVec; 2581 ParseTypes(R, Types, TypeVec); 2582 2583 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2584 2585 ClassKind classKind = ClassNone; 2586 if (R->getSuperClasses().size() >= 2) 2587 classKind = ClassMap[R->getSuperClasses()[1]]; 2588 if (classKind == ClassNone && kind == OpNone) 2589 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2590 2591 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2592 if (kind == OpReinterpret) { 2593 bool outQuad = false; 2594 bool dummy = false; 2595 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2596 for (unsigned srcti = 0, srcte = TypeVec.size(); 2597 srcti != srcte; ++srcti) { 2598 bool inQuad = false; 2599 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2600 if (srcti == ti || inQuad != outQuad) 2601 continue; 2602 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2603 OpCast, ClassS); 2604 if (EmittedMap.count(s)) 2605 continue; 2606 EmittedMap[s] = ClassS; 2607 OS << s; 2608 } 2609 } else { 2610 std::string s = 2611 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2612 if (EmittedMap.count(s)) 2613 continue; 2614 EmittedMap[s] = classKind; 2615 OS << s; 2616 } 2617 } 2618 OS << "\n"; 2619} 2620 2621static unsigned RangeFromType(const char mod, StringRef typestr) { 2622 // base type to get the type string for. 2623 bool quad = false, dummy = false; 2624 char type = ClassifyType(typestr, quad, dummy, dummy); 2625 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2626 2627 switch (type) { 2628 case 'c': 2629 return (8 << (int)quad) - 1; 2630 case 'h': 2631 case 's': 2632 return (4 << (int)quad) - 1; 2633 case 'f': 2634 case 'i': 2635 return (2 << (int)quad) - 1; 2636 case 'd': 2637 case 'l': 2638 return (1 << (int)quad) - 1; 2639 default: 2640 PrintFatalError("unhandled type!"); 2641 } 2642} 2643 2644static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2645 // base type to get the type string for. 2646 bool dummy = false; 2647 char type = ClassifyType(typestr, dummy, dummy, dummy); 2648 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2649 2650 switch (type) { 2651 case 'c': 2652 return 7; 2653 case 'h': 2654 case 's': 2655 return 15; 2656 case 'f': 2657 case 'i': 2658 return 31; 2659 case 'd': 2660 case 'l': 2661 return 63; 2662 default: 2663 PrintFatalError("unhandled type!"); 2664 } 2665} 2666 2667/// Generate the ARM and AArch64 intrinsic range checking code for 2668/// shift/lane immediates, checking for unique declarations. 2669void 2670NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2671 StringMap<ClassKind> &A64IntrinsicMap, 2672 bool isA64RangeCheck) { 2673 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2674 StringMap<OpKind> EmittedMap; 2675 2676 // Generate the intrinsic range checking code for shift/lane immediates. 2677 if (isA64RangeCheck) 2678 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2679 else 2680 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2681 2682 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2683 Record *R = RV[i]; 2684 2685 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2686 if (k != OpNone) 2687 continue; 2688 2689 std::string name = R->getValueAsString("Name"); 2690 std::string Proto = R->getValueAsString("Prototype"); 2691 std::string Types = R->getValueAsString("Types"); 2692 std::string Rename = name + "@" + Proto; 2693 2694 // Functions with 'a' (the splat code) in the type prototype should not get 2695 // their own builtin as they use the non-splat variant. 2696 if (Proto.find('a') != std::string::npos) 2697 continue; 2698 2699 // Functions which do not have an immediate do not need to have range 2700 // checking code emitted. 2701 size_t immPos = Proto.find('i'); 2702 if (immPos == std::string::npos) 2703 continue; 2704 2705 SmallVector<StringRef, 16> TypeVec; 2706 ParseTypes(R, Types, TypeVec); 2707 2708 if (R->getSuperClasses().size() < 2) 2709 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2710 2711 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2712 2713 // Do not include AArch64 range checks if not generating code for AArch64. 2714 bool isA64 = R->getValueAsBit("isA64"); 2715 if (!isA64RangeCheck && isA64) 2716 continue; 2717 2718 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2719 // redefined by AArch64 to handle new types. 2720 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2721 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2722 if (A64CK == ck && ck != ClassNone) 2723 continue; 2724 } 2725 2726 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2727 std::string namestr, shiftstr, rangestr; 2728 2729 if (R->getValueAsBit("isVCVT_N")) { 2730 // VCVT between floating- and fixed-point values takes an immediate 2731 // in the range [1, 32] for f32, or [1, 64] for f64. 2732 ck = ClassB; 2733 if (name.find("32") != std::string::npos) 2734 rangestr = "l = 1; u = 31"; // upper bound = l + u 2735 else if (name.find("64") != std::string::npos) 2736 rangestr = "l = 1; u = 63"; 2737 else 2738 PrintFatalError(R->getLoc(), 2739 "Fixed point convert name should contains \"32\" or \"64\""); 2740 2741 } else if (R->getValueAsBit("isScalarShift")) { 2742 // Right shifts have an 'r' in the name, left shifts do not. Convert 2743 // instructions have the same bounds and right shifts. 2744 if (name.find('r') != std::string::npos || 2745 name.find("cvt") != std::string::npos) 2746 rangestr = "l = 1; "; 2747 2748 rangestr += "u = " + 2749 utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti])); 2750 } else if (!ProtoHasScalar(Proto)) { 2751 // Builtins which are overloaded by type will need to have their upper 2752 // bound computed at Sema time based on the type constant. 2753 ck = ClassB; 2754 if (R->getValueAsBit("isShift")) { 2755 shiftstr = ", true"; 2756 2757 // Right shifts have an 'r' in the name, left shifts do not. 2758 if (name.find('r') != std::string::npos) 2759 rangestr = "l = 1; "; 2760 } 2761 rangestr += "u = RFT(TV" + shiftstr + ")"; 2762 } else { 2763 // The immediate generally refers to a lane in the preceding argument. 2764 assert(immPos > 0 && "unexpected immediate operand"); 2765 rangestr = 2766 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2767 } 2768 // Make sure cases appear only once by uniquing them in a string map. 2769 namestr = MangleName(name, TypeVec[ti], ck); 2770 if (EmittedMap.count(namestr)) 2771 continue; 2772 EmittedMap[namestr] = OpNone; 2773 2774 // Calculate the index of the immediate that should be range checked. 2775 unsigned immidx = 0; 2776 2777 // Builtins that return a struct of multiple vectors have an extra 2778 // leading arg for the struct return. 2779 if (IsMultiVecProto(Proto[0])) 2780 ++immidx; 2781 2782 // Add one to the index for each argument until we reach the immediate 2783 // to be checked. Structs of vectors are passed as multiple arguments. 2784 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2785 switch (Proto[ii]) { 2786 default: 2787 immidx += 1; 2788 break; 2789 case '2': 2790 case 'B': 2791 immidx += 2; 2792 break; 2793 case '3': 2794 case 'C': 2795 immidx += 3; 2796 break; 2797 case '4': 2798 case 'D': 2799 immidx += 4; 2800 break; 2801 case 'i': 2802 ie = ii + 1; 2803 break; 2804 } 2805 } 2806 if (isA64RangeCheck) 2807 OS << "case AArch64::BI__builtin_neon_"; 2808 else 2809 OS << "case ARM::BI__builtin_neon_"; 2810 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2811 << rangestr << "; break;\n"; 2812 } 2813 } 2814 OS << "#endif\n\n"; 2815} 2816 2817/// Generate the ARM and AArch64 overloaded type checking code for 2818/// SemaChecking.cpp, checking for unique builtin declarations. 2819void 2820NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2821 StringMap<ClassKind> &A64IntrinsicMap, 2822 bool isA64TypeCheck) { 2823 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2824 StringMap<OpKind> EmittedMap; 2825 2826 // Generate the overloaded type checking code for SemaChecking.cpp 2827 if (isA64TypeCheck) 2828 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2829 else 2830 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2831 2832 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2833 Record *R = RV[i]; 2834 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2835 if (k != OpNone) 2836 continue; 2837 2838 std::string Proto = R->getValueAsString("Prototype"); 2839 std::string Types = R->getValueAsString("Types"); 2840 std::string name = R->getValueAsString("Name"); 2841 std::string Rename = name + "@" + Proto; 2842 2843 // Functions with 'a' (the splat code) in the type prototype should not get 2844 // their own builtin as they use the non-splat variant. 2845 if (Proto.find('a') != std::string::npos) 2846 continue; 2847 2848 // Functions which have a scalar argument cannot be overloaded, no need to 2849 // check them if we are emitting the type checking code. 2850 if (ProtoHasScalar(Proto)) 2851 continue; 2852 2853 SmallVector<StringRef, 16> TypeVec; 2854 ParseTypes(R, Types, TypeVec); 2855 2856 if (R->getSuperClasses().size() < 2) 2857 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2858 2859 // Do not include AArch64 type checks if not generating code for AArch64. 2860 bool isA64 = R->getValueAsBit("isA64"); 2861 if (!isA64TypeCheck && isA64) 2862 continue; 2863 2864 // Include ARM type check in AArch64 but only if ARM intrinsics 2865 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2866 // redefined in AArch64 to handle an additional 2 x f64 type. 2867 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2868 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2869 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2870 if (A64CK == ck && ck != ClassNone) 2871 continue; 2872 } 2873 2874 int si = -1, qi = -1; 2875 uint64_t mask = 0, qmask = 0; 2876 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2877 // Generate the switch case(s) for this builtin for the type validation. 2878 bool quad = false, poly = false, usgn = false; 2879 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2880 2881 if (quad) { 2882 qi = ti; 2883 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2884 } else { 2885 si = ti; 2886 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2887 } 2888 } 2889 2890 // Check if the builtin function has a pointer or const pointer argument. 2891 int PtrArgNum = -1; 2892 bool HasConstPtr = false; 2893 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2894 char ArgType = Proto[arg]; 2895 if (ArgType == 'c') { 2896 HasConstPtr = true; 2897 PtrArgNum = arg - 1; 2898 break; 2899 } 2900 if (ArgType == 'p') { 2901 PtrArgNum = arg - 1; 2902 break; 2903 } 2904 } 2905 // For sret builtins, adjust the pointer argument index. 2906 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 2907 PtrArgNum += 1; 2908 2909 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2910 // and vst1_lane intrinsics. Using a pointer to the vector element 2911 // type with one of those operations causes codegen to select an aligned 2912 // load/store instruction. If you want an unaligned operation, 2913 // the pointer argument needs to have less alignment than element type, 2914 // so just accept any pointer type. 2915 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2916 PtrArgNum = -1; 2917 HasConstPtr = false; 2918 } 2919 2920 if (mask) { 2921 if (isA64TypeCheck) 2922 OS << "case AArch64::BI__builtin_neon_"; 2923 else 2924 OS << "case ARM::BI__builtin_neon_"; 2925 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2926 << "0x" << utohexstr(mask) << "ULL"; 2927 if (PtrArgNum >= 0) 2928 OS << "; PtrArgNum = " << PtrArgNum; 2929 if (HasConstPtr) 2930 OS << "; HasConstPtr = true"; 2931 OS << "; break;\n"; 2932 } 2933 if (qmask) { 2934 if (isA64TypeCheck) 2935 OS << "case AArch64::BI__builtin_neon_"; 2936 else 2937 OS << "case ARM::BI__builtin_neon_"; 2938 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2939 << "0x" << utohexstr(qmask) << "ULL"; 2940 if (PtrArgNum >= 0) 2941 OS << "; PtrArgNum = " << PtrArgNum; 2942 if (HasConstPtr) 2943 OS << "; HasConstPtr = true"; 2944 OS << "; break;\n"; 2945 } 2946 } 2947 OS << "#endif\n\n"; 2948} 2949 2950/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2951/// declaration of builtins, checking for unique builtin declarations. 2952void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2953 StringMap<ClassKind> &A64IntrinsicMap, 2954 bool isA64GenBuiltinDef) { 2955 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2956 StringMap<OpKind> EmittedMap; 2957 2958 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2959 if (isA64GenBuiltinDef) 2960 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2961 else 2962 OS << "#ifdef GET_NEON_BUILTINS\n"; 2963 2964 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2965 Record *R = RV[i]; 2966 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2967 if (k != OpNone) 2968 continue; 2969 2970 std::string Proto = R->getValueAsString("Prototype"); 2971 std::string name = R->getValueAsString("Name"); 2972 std::string Rename = name + "@" + Proto; 2973 2974 // Functions with 'a' (the splat code) in the type prototype should not get 2975 // their own builtin as they use the non-splat variant. 2976 if (Proto.find('a') != std::string::npos) 2977 continue; 2978 2979 std::string Types = R->getValueAsString("Types"); 2980 SmallVector<StringRef, 16> TypeVec; 2981 ParseTypes(R, Types, TypeVec); 2982 2983 if (R->getSuperClasses().size() < 2) 2984 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2985 2986 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2987 2988 // Do not include AArch64 BUILTIN() macros if not generating 2989 // code for AArch64 2990 bool isA64 = R->getValueAsBit("isA64"); 2991 if (!isA64GenBuiltinDef && isA64) 2992 continue; 2993 2994 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2995 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2996 // redefined in AArch64 to handle an additional 2 x f64 type. 2997 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2998 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2999 if (A64CK == ck && ck != ClassNone) 3000 continue; 3001 } 3002 3003 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3004 // Generate the declaration for this builtin, ensuring 3005 // that each unique BUILTIN() macro appears only once in the output 3006 // stream. 3007 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 3008 if (EmittedMap.count(bd)) 3009 continue; 3010 3011 EmittedMap[bd] = OpNone; 3012 OS << bd << "\n"; 3013 } 3014 } 3015 OS << "#endif\n\n"; 3016} 3017 3018/// runHeader - Emit a file with sections defining: 3019/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 3020/// 2. the SemaChecking code for the type overload checking. 3021/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 3022void NeonEmitter::runHeader(raw_ostream &OS) { 3023 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3024 3025 // build a map of AArch64 intriniscs to be used in uniqueness checks. 3026 StringMap<ClassKind> A64IntrinsicMap; 3027 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3028 Record *R = RV[i]; 3029 3030 bool isA64 = R->getValueAsBit("isA64"); 3031 if (!isA64) 3032 continue; 3033 3034 ClassKind CK = ClassNone; 3035 if (R->getSuperClasses().size() >= 2) 3036 CK = ClassMap[R->getSuperClasses()[1]]; 3037 3038 std::string Name = R->getValueAsString("Name"); 3039 std::string Proto = R->getValueAsString("Prototype"); 3040 std::string Rename = Name + "@" + Proto; 3041 if (A64IntrinsicMap.count(Rename)) 3042 continue; 3043 A64IntrinsicMap[Rename] = CK; 3044 } 3045 3046 // Generate BuiltinsARM.def for ARM 3047 genBuiltinsDef(OS, A64IntrinsicMap, false); 3048 3049 // Generate BuiltinsAArch64.def for AArch64 3050 genBuiltinsDef(OS, A64IntrinsicMap, true); 3051 3052 // Generate ARM overloaded type checking code for SemaChecking.cpp 3053 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 3054 3055 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 3056 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 3057 3058 // Generate ARM range checking code for shift/lane immediates. 3059 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 3060 3061 // Generate the AArch64 range checking code for shift/lane immediates. 3062 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 3063} 3064 3065/// GenTest - Write out a test for the intrinsic specified by the name and 3066/// type strings, including the embedded patterns for FileCheck to match. 3067static std::string GenTest(const std::string &name, 3068 const std::string &proto, 3069 StringRef outTypeStr, StringRef inTypeStr, 3070 bool isShift, bool isHiddenLOp, 3071 ClassKind ck, const std::string &InstName, 3072 bool isA64, 3073 std::string & testFuncProto) { 3074 assert(!proto.empty() && ""); 3075 std::string s; 3076 3077 // Function name with type suffix 3078 std::string mangledName = MangleName(name, outTypeStr, ClassS); 3079 if (outTypeStr != inTypeStr) { 3080 // If the input type is different (e.g., for vreinterpret), append a suffix 3081 // for the input type. String off a "Q" (quad) prefix so that MangleName 3082 // does not insert another "q" in the name. 3083 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 3084 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 3085 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 3086 } 3087 3088 // todo: GenerateChecksForIntrinsic does not generate CHECK 3089 // for aarch64 instructions yet 3090 std::vector<std::string> FileCheckPatterns; 3091 if (!isA64) { 3092 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 3093 isHiddenLOp, FileCheckPatterns); 3094 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 3095 } 3096 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 3097 3098 // Emit the FileCheck patterns. 3099 // If for any reason we do not want to emit a check, mangledInst 3100 // will be the empty string. 3101 if (FileCheckPatterns.size()) { 3102 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 3103 e = FileCheckPatterns.end(); 3104 i != e; 3105 ++i) { 3106 s += "// CHECK_ARM: " + *i + "\n"; 3107 } 3108 } 3109 3110 // Emit the start of the test function. 3111 3112 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 3113 char arg = 'a'; 3114 std::string comma; 3115 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3116 // Do not create arguments for values that must be immediate constants. 3117 if (proto[i] == 'i') 3118 continue; 3119 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 3120 testFuncProto.push_back(arg); 3121 comma = ", "; 3122 } 3123 testFuncProto += ")"; 3124 3125 s+= testFuncProto; 3126 s+= " {\n "; 3127 3128 if (proto[0] != 'v') 3129 s += "return "; 3130 s += mangledName + "("; 3131 arg = 'a'; 3132 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3133 if (proto[i] == 'i') { 3134 // For immediate operands, test the maximum value. 3135 if (isShift) 3136 s += "1"; // FIXME 3137 else 3138 // The immediate generally refers to a lane in the preceding argument. 3139 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 3140 } else { 3141 s.push_back(arg); 3142 } 3143 if ((i + 1) < e) 3144 s += ", "; 3145 } 3146 s += ");\n}\n\n"; 3147 return s; 3148} 3149 3150/// Write out all intrinsic tests for the specified target, checking 3151/// for intrinsic test uniqueness. 3152void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 3153 bool isA64GenTest) { 3154 if (isA64GenTest) 3155 OS << "#ifdef __aarch64__\n"; 3156 3157 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3158 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3159 Record *R = RV[i]; 3160 std::string name = R->getValueAsString("Name"); 3161 std::string Proto = R->getValueAsString("Prototype"); 3162 std::string Types = R->getValueAsString("Types"); 3163 bool isShift = R->getValueAsBit("isShift"); 3164 std::string InstName = R->getValueAsString("InstName"); 3165 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 3166 bool isA64 = R->getValueAsBit("isA64"); 3167 3168 // do not include AArch64 intrinsic test if not generating 3169 // code for AArch64 3170 if (!isA64GenTest && isA64) 3171 continue; 3172 3173 SmallVector<StringRef, 16> TypeVec; 3174 ParseTypes(R, Types, TypeVec); 3175 3176 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3177 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3178 if (kind == OpUnavailable) 3179 continue; 3180 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3181 if (kind == OpReinterpret) { 3182 bool outQuad = false; 3183 bool dummy = false; 3184 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3185 for (unsigned srcti = 0, srcte = TypeVec.size(); 3186 srcti != srcte; ++srcti) { 3187 bool inQuad = false; 3188 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3189 if (srcti == ti || inQuad != outQuad) 3190 continue; 3191 std::string testFuncProto; 3192 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3193 isShift, isHiddenLOp, ck, InstName, isA64, 3194 testFuncProto); 3195 if (EmittedMap.count(testFuncProto)) 3196 continue; 3197 EmittedMap[testFuncProto] = kind; 3198 OS << s << "\n"; 3199 } 3200 } else { 3201 std::string testFuncProto; 3202 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 3203 isHiddenLOp, ck, InstName, isA64, testFuncProto); 3204 if (EmittedMap.count(testFuncProto)) 3205 continue; 3206 EmittedMap[testFuncProto] = kind; 3207 OS << s << "\n"; 3208 } 3209 } 3210 } 3211 3212 if (isA64GenTest) 3213 OS << "#endif\n"; 3214} 3215/// runTests - Write out a complete set of tests for all of the Neon 3216/// intrinsics. 3217void NeonEmitter::runTests(raw_ostream &OS) { 3218 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3219 "apcs-gnu\\\n" 3220 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3221 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3222 "\n" 3223 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3224 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3225 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3226 "\n" 3227 "// REQUIRES: long_tests\n" 3228 "\n" 3229 "#include <arm_neon.h>\n" 3230 "\n"; 3231 3232 // ARM tests must be emitted before AArch64 tests to ensure 3233 // tests for intrinsics that are common to ARM and AArch64 3234 // appear only once in the output stream. 3235 // The check for uniqueness is done in genTargetTest. 3236 StringMap<OpKind> EmittedMap; 3237 3238 genTargetTest(OS, EmittedMap, false); 3239 3240 genTargetTest(OS, EmittedMap, true); 3241} 3242 3243namespace clang { 3244void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3245 NeonEmitter(Records).run(OS); 3246} 3247void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3248 NeonEmitter(Records).runHeader(OS); 3249} 3250void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3251 NeonEmitter(Records).runTests(OS); 3252} 3253} // End namespace clang 3254