NeonEmitter.cpp revision 2e22f29b92768ea65ac5c26d354226ecc7509311
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpTrn1, 85 OpZip1, 86 OpUzp1, 87 OpTrn2, 88 OpZip2, 89 OpUzp2, 90 OpEq, 91 OpGe, 92 OpLe, 93 OpGt, 94 OpLt, 95 OpNeg, 96 OpNot, 97 OpAnd, 98 OpOr, 99 OpXor, 100 OpAndNot, 101 OpOrNot, 102 OpCast, 103 OpConcat, 104 OpDup, 105 OpDupLane, 106 OpHi, 107 OpLo, 108 OpSelect, 109 OpRev16, 110 OpRev32, 111 OpRev64, 112 OpReinterpret, 113 OpAddhnHi, 114 OpRAddhnHi, 115 OpSubhnHi, 116 OpRSubhnHi, 117 OpAbdl, 118 OpAbdlHi, 119 OpAba, 120 OpAbal, 121 OpAbalHi, 122 OpQDMullHi, 123 OpQDMlalHi, 124 OpQDMlslHi, 125 OpDiv, 126 OpLongHi, 127 OpNarrowHi, 128 OpMovlHi, 129 OpCopyLane, 130 OpCopyQLane, 131 OpCopyLaneQ 132}; 133 134enum ClassKind { 135 ClassNone, 136 ClassI, // generic integer instruction, e.g., "i8" suffix 137 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 138 ClassW, // width-specific instruction, e.g., "8" suffix 139 ClassB, // bitcast arguments with enum argument to specify type 140 ClassL, // Logical instructions which are op instructions 141 // but we need to not emit any suffix for in our 142 // tests. 143 ClassNoTest // Instructions which we do not test since they are 144 // not TRUE instructions. 145}; 146 147/// NeonTypeFlags - Flags to identify the types for overloaded Neon 148/// builtins. These must be kept in sync with the flags in 149/// include/clang/Basic/TargetBuiltins.h. 150namespace { 151class NeonTypeFlags { 152 enum { 153 EltTypeMask = 0xf, 154 UnsignedFlag = 0x10, 155 QuadFlag = 0x20 156 }; 157 uint32_t Flags; 158 159public: 160 enum EltType { 161 Int8, 162 Int16, 163 Int32, 164 Int64, 165 Poly8, 166 Poly16, 167 Float16, 168 Float32, 169 Float64 170 }; 171 172 NeonTypeFlags(unsigned F) : Flags(F) {} 173 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 174 if (IsUnsigned) 175 Flags |= UnsignedFlag; 176 if (IsQuad) 177 Flags |= QuadFlag; 178 } 179 180 uint32_t getFlags() const { return Flags; } 181}; 182} // end anonymous namespace 183 184namespace { 185class NeonEmitter { 186 RecordKeeper &Records; 187 StringMap<OpKind> OpMap; 188 DenseMap<Record*, ClassKind> ClassMap; 189 190public: 191 NeonEmitter(RecordKeeper &R) : Records(R) { 192 OpMap["OP_NONE"] = OpNone; 193 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 194 OpMap["OP_ADD"] = OpAdd; 195 OpMap["OP_ADDL"] = OpAddl; 196 OpMap["OP_ADDLHi"] = OpAddlHi; 197 OpMap["OP_ADDW"] = OpAddw; 198 OpMap["OP_ADDWHi"] = OpAddwHi; 199 OpMap["OP_SUB"] = OpSub; 200 OpMap["OP_SUBL"] = OpSubl; 201 OpMap["OP_SUBLHi"] = OpSublHi; 202 OpMap["OP_SUBW"] = OpSubw; 203 OpMap["OP_SUBWHi"] = OpSubwHi; 204 OpMap["OP_MUL"] = OpMul; 205 OpMap["OP_MLA"] = OpMla; 206 OpMap["OP_MLAL"] = OpMlal; 207 OpMap["OP_MULLHi"] = OpMullHi; 208 OpMap["OP_MLALHi"] = OpMlalHi; 209 OpMap["OP_MLS"] = OpMls; 210 OpMap["OP_MLSL"] = OpMlsl; 211 OpMap["OP_MLSLHi"] = OpMlslHi; 212 OpMap["OP_MUL_N"] = OpMulN; 213 OpMap["OP_MLA_N"] = OpMlaN; 214 OpMap["OP_MLS_N"] = OpMlsN; 215 OpMap["OP_MLAL_N"] = OpMlalN; 216 OpMap["OP_MLSL_N"] = OpMlslN; 217 OpMap["OP_MUL_LN"]= OpMulLane; 218 OpMap["OP_MULX_LN"]= OpMulXLane; 219 OpMap["OP_MULL_LN"] = OpMullLane; 220 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 221 OpMap["OP_MLA_LN"]= OpMlaLane; 222 OpMap["OP_MLS_LN"]= OpMlsLane; 223 OpMap["OP_MLAL_LN"] = OpMlalLane; 224 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 225 OpMap["OP_MLSL_LN"] = OpMlslLane; 226 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 227 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 228 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 229 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 230 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 231 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 232 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 233 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 234 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 235 OpMap["OP_FMS_LN"] = OpFMSLane; 236 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 237 OpMap["OP_TRN1"] = OpTrn1; 238 OpMap["OP_ZIP1"] = OpZip1; 239 OpMap["OP_UZP1"] = OpUzp1; 240 OpMap["OP_TRN2"] = OpTrn2; 241 OpMap["OP_ZIP2"] = OpZip2; 242 OpMap["OP_UZP2"] = OpUzp2; 243 OpMap["OP_EQ"] = OpEq; 244 OpMap["OP_GE"] = OpGe; 245 OpMap["OP_LE"] = OpLe; 246 OpMap["OP_GT"] = OpGt; 247 OpMap["OP_LT"] = OpLt; 248 OpMap["OP_NEG"] = OpNeg; 249 OpMap["OP_NOT"] = OpNot; 250 OpMap["OP_AND"] = OpAnd; 251 OpMap["OP_OR"] = OpOr; 252 OpMap["OP_XOR"] = OpXor; 253 OpMap["OP_ANDN"] = OpAndNot; 254 OpMap["OP_ORN"] = OpOrNot; 255 OpMap["OP_CAST"] = OpCast; 256 OpMap["OP_CONC"] = OpConcat; 257 OpMap["OP_HI"] = OpHi; 258 OpMap["OP_LO"] = OpLo; 259 OpMap["OP_DUP"] = OpDup; 260 OpMap["OP_DUP_LN"] = OpDupLane; 261 OpMap["OP_SEL"] = OpSelect; 262 OpMap["OP_REV16"] = OpRev16; 263 OpMap["OP_REV32"] = OpRev32; 264 OpMap["OP_REV64"] = OpRev64; 265 OpMap["OP_REINT"] = OpReinterpret; 266 OpMap["OP_ADDHNHi"] = OpAddhnHi; 267 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 268 OpMap["OP_SUBHNHi"] = OpSubhnHi; 269 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 270 OpMap["OP_ABDL"] = OpAbdl; 271 OpMap["OP_ABDLHi"] = OpAbdlHi; 272 OpMap["OP_ABA"] = OpAba; 273 OpMap["OP_ABAL"] = OpAbal; 274 OpMap["OP_ABALHi"] = OpAbalHi; 275 OpMap["OP_QDMULLHi"] = OpQDMullHi; 276 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 277 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 278 OpMap["OP_DIV"] = OpDiv; 279 OpMap["OP_LONG_HI"] = OpLongHi; 280 OpMap["OP_NARROW_HI"] = OpNarrowHi; 281 OpMap["OP_MOVL_HI"] = OpMovlHi; 282 OpMap["OP_COPY_LN"] = OpCopyLane; 283 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 284 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 285 286 Record *SI = R.getClass("SInst"); 287 Record *II = R.getClass("IInst"); 288 Record *WI = R.getClass("WInst"); 289 Record *SOpI = R.getClass("SOpInst"); 290 Record *IOpI = R.getClass("IOpInst"); 291 Record *WOpI = R.getClass("WOpInst"); 292 Record *LOpI = R.getClass("LOpInst"); 293 Record *NoTestOpI = R.getClass("NoTestOpInst"); 294 295 ClassMap[SI] = ClassS; 296 ClassMap[II] = ClassI; 297 ClassMap[WI] = ClassW; 298 ClassMap[SOpI] = ClassS; 299 ClassMap[IOpI] = ClassI; 300 ClassMap[WOpI] = ClassW; 301 ClassMap[LOpI] = ClassL; 302 ClassMap[NoTestOpI] = ClassNoTest; 303 } 304 305 // run - Emit arm_neon.h.inc 306 void run(raw_ostream &o); 307 308 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 309 void runHeader(raw_ostream &o); 310 311 // runTests - Emit tests for all the Neon intrinsics. 312 void runTests(raw_ostream &o); 313 314private: 315 void emitIntrinsic(raw_ostream &OS, Record *R, 316 StringMap<ClassKind> &EmittedMap); 317 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 318 bool isA64GenBuiltinDef); 319 void genOverloadTypeCheckCode(raw_ostream &OS, 320 StringMap<ClassKind> &A64IntrinsicMap, 321 bool isA64TypeCheck); 322 void genIntrinsicRangeCheckCode(raw_ostream &OS, 323 StringMap<ClassKind> &A64IntrinsicMap, 324 bool isA64RangeCheck); 325 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 326 bool isA64TestGen); 327}; 328} // end anonymous namespace 329 330/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 331/// which each StringRef representing a single type declared in the string. 332/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 333/// 2xfloat and 4xfloat respectively. 334static void ParseTypes(Record *r, std::string &s, 335 SmallVectorImpl<StringRef> &TV) { 336 const char *data = s.data(); 337 int len = 0; 338 339 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 340 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 341 || data[len] == 'H' || data[len] == 'S') 342 continue; 343 344 switch (data[len]) { 345 case 'c': 346 case 's': 347 case 'i': 348 case 'l': 349 case 'h': 350 case 'f': 351 case 'd': 352 break; 353 default: 354 PrintFatalError(r->getLoc(), 355 "Unexpected letter: " + std::string(data + len, 1)); 356 } 357 TV.push_back(StringRef(data, len + 1)); 358 data += len + 1; 359 len = -1; 360 } 361} 362 363/// Widen - Convert a type code into the next wider type. char -> short, 364/// short -> int, etc. 365static char Widen(const char t) { 366 switch (t) { 367 case 'c': 368 return 's'; 369 case 's': 370 return 'i'; 371 case 'i': 372 return 'l'; 373 case 'h': 374 return 'f'; 375 default: 376 PrintFatalError("unhandled type in widen!"); 377 } 378} 379 380/// Narrow - Convert a type code into the next smaller type. short -> char, 381/// float -> half float, etc. 382static char Narrow(const char t) { 383 switch (t) { 384 case 's': 385 return 'c'; 386 case 'i': 387 return 's'; 388 case 'l': 389 return 'i'; 390 case 'f': 391 return 'h'; 392 default: 393 PrintFatalError("unhandled type in narrow!"); 394 } 395} 396 397static std::string GetNarrowTypestr(StringRef ty) 398{ 399 std::string s; 400 for (size_t i = 0, end = ty.size(); i < end; i++) { 401 switch (ty[i]) { 402 case 's': 403 s += 'c'; 404 break; 405 case 'i': 406 s += 's'; 407 break; 408 case 'l': 409 s += 'i'; 410 break; 411 default: 412 s += ty[i]; 413 break; 414 } 415 } 416 417 return s; 418} 419 420/// For a particular StringRef, return the base type code, and whether it has 421/// the quad-vector, polynomial, or unsigned modifiers set. 422static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 423 unsigned off = 0; 424 // ignore scalar. 425 if (ty[off] == 'S') { 426 ++off; 427 } 428 // remember quad. 429 if (ty[off] == 'Q' || ty[off] == 'H') { 430 quad = true; 431 ++off; 432 } 433 434 // remember poly. 435 if (ty[off] == 'P') { 436 poly = true; 437 ++off; 438 } 439 440 // remember unsigned. 441 if (ty[off] == 'U') { 442 usgn = true; 443 ++off; 444 } 445 446 // base type to get the type string for. 447 return ty[off]; 448} 449 450/// ModType - Transform a type code and its modifiers based on a mod code. The 451/// mod code definitions may be found at the top of arm_neon.td. 452static char ModType(const char mod, char type, bool &quad, bool &poly, 453 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 454 switch (mod) { 455 case 't': 456 if (poly) { 457 poly = false; 458 usgn = true; 459 } 460 break; 461 case 'b': 462 scal = true; 463 case 'u': 464 usgn = true; 465 poly = false; 466 if (type == 'f') 467 type = 'i'; 468 if (type == 'd') 469 type = 'l'; 470 break; 471 case '$': 472 scal = true; 473 case 'x': 474 usgn = false; 475 poly = false; 476 if (type == 'f') 477 type = 'i'; 478 if (type == 'd') 479 type = 'l'; 480 break; 481 case 'o': 482 scal = true; 483 type = 'd'; 484 usgn = false; 485 break; 486 case 'y': 487 scal = true; 488 case 'f': 489 if (type == 'h') 490 quad = true; 491 type = 'f'; 492 usgn = false; 493 break; 494 case 'g': 495 quad = false; 496 break; 497 case 'B': 498 case 'C': 499 case 'D': 500 case 'j': 501 quad = true; 502 break; 503 case 'w': 504 type = Widen(type); 505 quad = true; 506 break; 507 case 'n': 508 type = Widen(type); 509 break; 510 case 'i': 511 type = 'i'; 512 scal = true; 513 break; 514 case 'l': 515 type = 'l'; 516 scal = true; 517 usgn = true; 518 break; 519 case 'z': 520 type = Narrow(type); 521 scal = true; 522 break; 523 case 'r': 524 type = Widen(type); 525 scal = true; 526 break; 527 case 's': 528 case 'a': 529 scal = true; 530 break; 531 case 'k': 532 quad = true; 533 break; 534 case 'c': 535 cnst = true; 536 case 'p': 537 pntr = true; 538 scal = true; 539 break; 540 case 'h': 541 type = Narrow(type); 542 if (type == 'h') 543 quad = false; 544 break; 545 case 'q': 546 type = Narrow(type); 547 quad = true; 548 break; 549 case 'e': 550 type = Narrow(type); 551 usgn = true; 552 break; 553 case 'm': 554 type = Narrow(type); 555 quad = false; 556 break; 557 default: 558 break; 559 } 560 return type; 561} 562 563static bool IsMultiVecProto(const char p) { 564 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 565} 566 567/// TypeString - for a modifier and type, generate the name of the typedef for 568/// that type. QUc -> uint8x8_t. 569static std::string TypeString(const char mod, StringRef typestr) { 570 bool quad = false; 571 bool poly = false; 572 bool usgn = false; 573 bool scal = false; 574 bool cnst = false; 575 bool pntr = false; 576 577 if (mod == 'v') 578 return "void"; 579 if (mod == 'i') 580 return "int"; 581 582 // base type to get the type string for. 583 char type = ClassifyType(typestr, quad, poly, usgn); 584 585 // Based on the modifying character, change the type and width if necessary. 586 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 587 588 SmallString<128> s; 589 590 if (usgn) 591 s.push_back('u'); 592 593 switch (type) { 594 case 'c': 595 s += poly ? "poly8" : "int8"; 596 if (scal) 597 break; 598 s += quad ? "x16" : "x8"; 599 break; 600 case 's': 601 s += poly ? "poly16" : "int16"; 602 if (scal) 603 break; 604 s += quad ? "x8" : "x4"; 605 break; 606 case 'i': 607 s += "int32"; 608 if (scal) 609 break; 610 s += quad ? "x4" : "x2"; 611 break; 612 case 'l': 613 s += "int64"; 614 if (scal) 615 break; 616 s += quad ? "x2" : "x1"; 617 break; 618 case 'h': 619 s += "float16"; 620 if (scal) 621 break; 622 s += quad ? "x8" : "x4"; 623 break; 624 case 'f': 625 s += "float32"; 626 if (scal) 627 break; 628 s += quad ? "x4" : "x2"; 629 break; 630 case 'd': 631 s += "float64"; 632 if (scal) 633 break; 634 s += quad ? "x2" : "x1"; 635 break; 636 637 default: 638 PrintFatalError("unhandled type!"); 639 } 640 641 if (mod == '2' || mod == 'B') 642 s += "x2"; 643 if (mod == '3' || mod == 'C') 644 s += "x3"; 645 if (mod == '4' || mod == 'D') 646 s += "x4"; 647 648 // Append _t, finishing the type string typedef type. 649 s += "_t"; 650 651 if (cnst) 652 s += " const"; 653 654 if (pntr) 655 s += " *"; 656 657 return s.str(); 658} 659 660/// BuiltinTypeString - for a modifier and type, generate the clang 661/// BuiltinsARM.def prototype code for the function. See the top of clang's 662/// Builtins.def for a description of the type strings. 663static std::string BuiltinTypeString(const char mod, StringRef typestr, 664 ClassKind ck, bool ret) { 665 bool quad = false; 666 bool poly = false; 667 bool usgn = false; 668 bool scal = false; 669 bool cnst = false; 670 bool pntr = false; 671 672 if (mod == 'v') 673 return "v"; // void 674 if (mod == 'i') 675 return "i"; // int 676 677 // base type to get the type string for. 678 char type = ClassifyType(typestr, quad, poly, usgn); 679 680 // Based on the modifying character, change the type and width if necessary. 681 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 682 683 // All pointers are void* pointers. Change type to 'v' now. 684 if (pntr) { 685 usgn = false; 686 poly = false; 687 type = 'v'; 688 } 689 // Treat half-float ('h') types as unsigned short ('s') types. 690 if (type == 'h') { 691 type = 's'; 692 usgn = true; 693 } 694 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 695 scal && type != 'f' && type != 'd'); 696 697 if (scal) { 698 SmallString<128> s; 699 700 if (usgn) 701 s.push_back('U'); 702 else if (type == 'c') 703 s.push_back('S'); // make chars explicitly signed 704 705 if (type == 'l') // 64-bit long 706 s += "LLi"; 707 else 708 s.push_back(type); 709 710 if (cnst) 711 s.push_back('C'); 712 if (pntr) 713 s.push_back('*'); 714 return s.str(); 715 } 716 717 // Since the return value must be one type, return a vector type of the 718 // appropriate width which we will bitcast. An exception is made for 719 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 720 // fashion, storing them to a pointer arg. 721 if (ret) { 722 if (IsMultiVecProto(mod)) 723 return "vv*"; // void result with void* first argument 724 if (mod == 'f' || (ck != ClassB && type == 'f')) 725 return quad ? "V4f" : "V2f"; 726 if (ck != ClassB && type == 'd') 727 return quad ? "V2d" : "V1d"; 728 if (ck != ClassB && type == 's') 729 return quad ? "V8s" : "V4s"; 730 if (ck != ClassB && type == 'i') 731 return quad ? "V4i" : "V2i"; 732 if (ck != ClassB && type == 'l') 733 return quad ? "V2LLi" : "V1LLi"; 734 735 return quad ? "V16Sc" : "V8Sc"; 736 } 737 738 // Non-return array types are passed as individual vectors. 739 if (mod == '2' || mod == 'B') 740 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 741 if (mod == '3' || mod == 'C') 742 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 743 if (mod == '4' || mod == 'D') 744 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 745 746 if (mod == 'f' || (ck != ClassB && type == 'f')) 747 return quad ? "V4f" : "V2f"; 748 if (ck != ClassB && type == 'd') 749 return quad ? "V2d" : "V1d"; 750 if (ck != ClassB && type == 's') 751 return quad ? "V8s" : "V4s"; 752 if (ck != ClassB && type == 'i') 753 return quad ? "V4i" : "V2i"; 754 if (ck != ClassB && type == 'l') 755 return quad ? "V2LLi" : "V1LLi"; 756 757 return quad ? "V16Sc" : "V8Sc"; 758} 759 760/// InstructionTypeCode - Computes the ARM argument character code and 761/// quad status for a specific type string and ClassKind. 762static void InstructionTypeCode(const StringRef &typeStr, 763 const ClassKind ck, 764 bool &quad, 765 std::string &typeCode) { 766 bool poly = false; 767 bool usgn = false; 768 char type = ClassifyType(typeStr, quad, poly, usgn); 769 770 switch (type) { 771 case 'c': 772 switch (ck) { 773 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 774 case ClassI: typeCode = "i8"; break; 775 case ClassW: typeCode = "8"; break; 776 default: break; 777 } 778 break; 779 case 's': 780 switch (ck) { 781 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 782 case ClassI: typeCode = "i16"; break; 783 case ClassW: typeCode = "16"; break; 784 default: break; 785 } 786 break; 787 case 'i': 788 switch (ck) { 789 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 790 case ClassI: typeCode = "i32"; break; 791 case ClassW: typeCode = "32"; break; 792 default: break; 793 } 794 break; 795 case 'l': 796 switch (ck) { 797 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 798 case ClassI: typeCode = "i64"; break; 799 case ClassW: typeCode = "64"; break; 800 default: break; 801 } 802 break; 803 case 'h': 804 switch (ck) { 805 case ClassS: 806 case ClassI: typeCode = "f16"; break; 807 case ClassW: typeCode = "16"; break; 808 default: break; 809 } 810 break; 811 case 'f': 812 switch (ck) { 813 case ClassS: 814 case ClassI: typeCode = "f32"; break; 815 case ClassW: typeCode = "32"; break; 816 default: break; 817 } 818 break; 819 case 'd': 820 switch (ck) { 821 case ClassS: 822 case ClassI: 823 typeCode += "f64"; 824 break; 825 case ClassW: 826 PrintFatalError("unhandled type!"); 827 default: 828 break; 829 } 830 break; 831 default: 832 PrintFatalError("unhandled type!"); 833 } 834} 835 836static char Insert_BHSD_Suffix(StringRef typestr){ 837 unsigned off = 0; 838 if(typestr[off++] == 'S'){ 839 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 840 typestr[off] == 'P' || typestr[off] == 'U') 841 ++off; 842 switch (typestr[off]){ 843 default : break; 844 case 'c' : return 'b'; 845 case 's' : return 'h'; 846 case 'i' : 847 case 'f' : return 's'; 848 case 'l' : 849 case 'd' : return 'd'; 850 } 851 } 852 return 0; 853} 854 855/// MangleName - Append a type or width suffix to a base neon function name, 856/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 857/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 858/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 859static std::string MangleName(const std::string &name, StringRef typestr, 860 ClassKind ck) { 861 if (name == "vcvt_f32_f16") 862 return name; 863 864 bool quad = false; 865 std::string typeCode = ""; 866 867 InstructionTypeCode(typestr, ck, quad, typeCode); 868 869 std::string s = name; 870 871 if (typeCode.size() > 0) { 872 s += "_" + typeCode; 873 } 874 875 if (ck == ClassB) 876 s += "_v"; 877 878 // Insert a 'q' before the first '_' character so that it ends up before 879 // _lane or _n on vector-scalar operations. 880 if (typestr.find("Q") != StringRef::npos) { 881 size_t pos = s.find('_'); 882 s = s.insert(pos, "q"); 883 } 884 char ins = Insert_BHSD_Suffix(typestr); 885 if(ins){ 886 size_t pos = s.find('_'); 887 s = s.insert(pos, &ins, 1); 888 } 889 890 return s; 891} 892 893static void PreprocessInstruction(const StringRef &Name, 894 const std::string &InstName, 895 std::string &Prefix, 896 bool &HasNPostfix, 897 bool &HasLanePostfix, 898 bool &HasDupPostfix, 899 bool &IsSpecialVCvt, 900 size_t &TBNumber) { 901 // All of our instruction name fields from arm_neon.td are of the form 902 // <instructionname>_... 903 // Thus we grab our instruction name via computation of said Prefix. 904 const size_t PrefixEnd = Name.find_first_of('_'); 905 // If InstName is passed in, we use that instead of our name Prefix. 906 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 907 908 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 909 910 HasNPostfix = Postfix.count("_n"); 911 HasLanePostfix = Postfix.count("_lane"); 912 HasDupPostfix = Postfix.count("_dup"); 913 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 914 915 if (InstName.compare("vtbl") == 0 || 916 InstName.compare("vtbx") == 0) { 917 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 918 // encoding to get its true value. 919 TBNumber = Name[Name.size()-1] - 48; 920 } 921} 922 923/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 924/// extracted, generate a FileCheck pattern for a Load Or Store 925static void 926GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 927 const std::string& OutTypeCode, 928 const bool &IsQuad, 929 const bool &HasDupPostfix, 930 const bool &HasLanePostfix, 931 const size_t Count, 932 std::string &RegisterSuffix) { 933 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 934 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 935 // will output a series of v{ld,st}1s, so we have to handle it specially. 936 if ((Count == 3 || Count == 4) && IsQuad) { 937 RegisterSuffix += "{"; 938 for (size_t i = 0; i < Count; i++) { 939 RegisterSuffix += "d{{[0-9]+}}"; 940 if (HasDupPostfix) { 941 RegisterSuffix += "[]"; 942 } 943 if (HasLanePostfix) { 944 RegisterSuffix += "[{{[0-9]+}}]"; 945 } 946 if (i < Count-1) { 947 RegisterSuffix += ", "; 948 } 949 } 950 RegisterSuffix += "}"; 951 } else { 952 953 // Handle normal loads and stores. 954 RegisterSuffix += "{"; 955 for (size_t i = 0; i < Count; i++) { 956 RegisterSuffix += "d{{[0-9]+}}"; 957 if (HasDupPostfix) { 958 RegisterSuffix += "[]"; 959 } 960 if (HasLanePostfix) { 961 RegisterSuffix += "[{{[0-9]+}}]"; 962 } 963 if (IsQuad && !HasLanePostfix) { 964 RegisterSuffix += ", d{{[0-9]+}}"; 965 if (HasDupPostfix) { 966 RegisterSuffix += "[]"; 967 } 968 } 969 if (i < Count-1) { 970 RegisterSuffix += ", "; 971 } 972 } 973 RegisterSuffix += "}, [r{{[0-9]+}}"; 974 975 // We only include the alignment hint if we have a vld1.*64 or 976 // a dup/lane instruction. 977 if (IsLDSTOne) { 978 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 979 RegisterSuffix += ":" + OutTypeCode; 980 } 981 } 982 983 RegisterSuffix += "]"; 984 } 985} 986 987static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 988 const bool &HasNPostfix) { 989 return (NameRef.count("vmla") || 990 NameRef.count("vmlal") || 991 NameRef.count("vmlsl") || 992 NameRef.count("vmull") || 993 NameRef.count("vqdmlal") || 994 NameRef.count("vqdmlsl") || 995 NameRef.count("vqdmulh") || 996 NameRef.count("vqdmull") || 997 NameRef.count("vqrdmulh")) && HasNPostfix; 998} 999 1000static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1001 const bool &HasLanePostfix) { 1002 return (NameRef.count("vmla") || 1003 NameRef.count("vmls") || 1004 NameRef.count("vmlal") || 1005 NameRef.count("vmlsl") || 1006 (NameRef.count("vmul") && NameRef.size() == 3)|| 1007 NameRef.count("vqdmlal") || 1008 NameRef.count("vqdmlsl") || 1009 NameRef.count("vqdmulh") || 1010 NameRef.count("vqrdmulh")) && HasLanePostfix; 1011} 1012 1013static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1014 const bool &HasLanePostfix, 1015 const bool &IsQuad) { 1016 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1017 && IsQuad; 1018 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1019 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1020} 1021 1022static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1023 const std::string &Proto, 1024 const bool &HasNPostfix, 1025 const bool &IsQuad, 1026 const bool &HasLanePostfix, 1027 const bool &HasDupPostfix, 1028 std::string &NormedProto) { 1029 // Handle generic case. 1030 const StringRef NameRef(Name); 1031 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1032 switch (Proto[i]) { 1033 case 'u': 1034 case 'f': 1035 case 'd': 1036 case 's': 1037 case 'x': 1038 case 't': 1039 case 'n': 1040 NormedProto += IsQuad? 'q' : 'd'; 1041 break; 1042 case 'w': 1043 case 'k': 1044 NormedProto += 'q'; 1045 break; 1046 case 'g': 1047 case 'j': 1048 case 'h': 1049 case 'e': 1050 NormedProto += 'd'; 1051 break; 1052 case 'i': 1053 NormedProto += HasLanePostfix? 'a' : 'i'; 1054 break; 1055 case 'a': 1056 if (HasLanePostfix) { 1057 NormedProto += 'a'; 1058 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1059 NormedProto += IsQuad? 'q' : 'd'; 1060 } else { 1061 NormedProto += 'i'; 1062 } 1063 break; 1064 } 1065 } 1066 1067 // Handle Special Cases. 1068 const bool IsNotVExt = !NameRef.count("vext"); 1069 const bool IsVPADAL = NameRef.count("vpadal"); 1070 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1071 HasLanePostfix); 1072 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1073 IsQuad); 1074 1075 if (IsSpecialLaneMul) { 1076 // If 1077 NormedProto[2] = NormedProto[3]; 1078 NormedProto.erase(3); 1079 } else if (NormedProto.size() == 4 && 1080 NormedProto[0] == NormedProto[1] && 1081 IsNotVExt) { 1082 // If NormedProto.size() == 4 and the first two proto characters are the 1083 // same, ignore the first. 1084 NormedProto = NormedProto.substr(1, 3); 1085 } else if (Is5OpLaneAccum) { 1086 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1087 std::string tmp = NormedProto.substr(1,2); 1088 tmp += NormedProto[4]; 1089 NormedProto = tmp; 1090 } else if (IsVPADAL) { 1091 // If we have VPADAL, ignore the first character. 1092 NormedProto = NormedProto.substr(0, 2); 1093 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1094 // If our instruction is a dup instruction, keep only the first and 1095 // last characters. 1096 std::string tmp = ""; 1097 tmp += NormedProto[0]; 1098 tmp += NormedProto[NormedProto.size()-1]; 1099 NormedProto = tmp; 1100 } 1101} 1102 1103/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1104/// extracted, generate a FileCheck pattern to check that an 1105/// instruction's arguments are correct. 1106static void GenerateRegisterCheckPattern(const std::string &Name, 1107 const std::string &Proto, 1108 const std::string &OutTypeCode, 1109 const bool &HasNPostfix, 1110 const bool &IsQuad, 1111 const bool &HasLanePostfix, 1112 const bool &HasDupPostfix, 1113 const size_t &TBNumber, 1114 std::string &RegisterSuffix) { 1115 1116 RegisterSuffix = ""; 1117 1118 const StringRef NameRef(Name); 1119 const StringRef ProtoRef(Proto); 1120 1121 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1122 return; 1123 } 1124 1125 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1126 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1127 1128 if (IsLoadStore) { 1129 // Grab N value from v{ld,st}N using its ascii representation. 1130 const size_t Count = NameRef[3] - 48; 1131 1132 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1133 HasDupPostfix, HasLanePostfix, 1134 Count, RegisterSuffix); 1135 } else if (IsTBXOrTBL) { 1136 RegisterSuffix += "d{{[0-9]+}}, {"; 1137 for (size_t i = 0; i < TBNumber-1; i++) { 1138 RegisterSuffix += "d{{[0-9]+}}, "; 1139 } 1140 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1141 } else { 1142 // Handle a normal instruction. 1143 if (NameRef.count("vget") || NameRef.count("vset")) 1144 return; 1145 1146 // We first normalize our proto, since we only need to emit 4 1147 // different types of checks, yet have more than 4 proto types 1148 // that map onto those 4 patterns. 1149 std::string NormalizedProto(""); 1150 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1151 HasLanePostfix, HasDupPostfix, 1152 NormalizedProto); 1153 1154 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1155 const char &c = NormalizedProto[i]; 1156 switch (c) { 1157 case 'q': 1158 RegisterSuffix += "q{{[0-9]+}}, "; 1159 break; 1160 1161 case 'd': 1162 RegisterSuffix += "d{{[0-9]+}}, "; 1163 break; 1164 1165 case 'i': 1166 RegisterSuffix += "#{{[0-9]+}}, "; 1167 break; 1168 1169 case 'a': 1170 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1171 break; 1172 } 1173 } 1174 1175 // Remove extra ", ". 1176 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1177 } 1178} 1179 1180/// GenerateChecksForIntrinsic - Given a specific instruction name + 1181/// typestr + class kind, generate the proper set of FileCheck 1182/// Patterns to check for. We could just return a string, but instead 1183/// use a vector since it provides us with the extra flexibility of 1184/// emitting multiple checks, which comes in handy for certain cases 1185/// like mla where we want to check for 2 different instructions. 1186static void GenerateChecksForIntrinsic(const std::string &Name, 1187 const std::string &Proto, 1188 StringRef &OutTypeStr, 1189 StringRef &InTypeStr, 1190 ClassKind Ck, 1191 const std::string &InstName, 1192 bool IsHiddenLOp, 1193 std::vector<std::string>& Result) { 1194 1195 // If Ck is a ClassNoTest instruction, just return so no test is 1196 // emitted. 1197 if(Ck == ClassNoTest) 1198 return; 1199 1200 if (Name == "vcvt_f32_f16") { 1201 Result.push_back("vcvt.f32.f16"); 1202 return; 1203 } 1204 1205 1206 // Now we preprocess our instruction given the data we have to get the 1207 // data that we need. 1208 // Create a StringRef for String Manipulation of our Name. 1209 const StringRef NameRef(Name); 1210 // Instruction Prefix. 1211 std::string Prefix; 1212 // The type code for our out type string. 1213 std::string OutTypeCode; 1214 // To handle our different cases, we need to check for different postfixes. 1215 // Is our instruction a quad instruction. 1216 bool IsQuad = false; 1217 // Our instruction is of the form <instructionname>_n. 1218 bool HasNPostfix = false; 1219 // Our instruction is of the form <instructionname>_lane. 1220 bool HasLanePostfix = false; 1221 // Our instruction is of the form <instructionname>_dup. 1222 bool HasDupPostfix = false; 1223 // Our instruction is a vcvt instruction which requires special handling. 1224 bool IsSpecialVCvt = false; 1225 // If we have a vtbxN or vtblN instruction, this is set to N. 1226 size_t TBNumber = -1; 1227 // Register Suffix 1228 std::string RegisterSuffix; 1229 1230 PreprocessInstruction(NameRef, InstName, Prefix, 1231 HasNPostfix, HasLanePostfix, HasDupPostfix, 1232 IsSpecialVCvt, TBNumber); 1233 1234 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1235 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1236 HasLanePostfix, HasDupPostfix, TBNumber, 1237 RegisterSuffix); 1238 1239 // In the following section, we handle a bunch of special cases. You can tell 1240 // a special case by the fact we are returning early. 1241 1242 // If our instruction is a logical instruction without postfix or a 1243 // hidden LOp just return the current Prefix. 1244 if (Ck == ClassL || IsHiddenLOp) { 1245 Result.push_back(Prefix + " " + RegisterSuffix); 1246 return; 1247 } 1248 1249 // If we have a vmov, due to the many different cases, some of which 1250 // vary within the different intrinsics generated for a single 1251 // instruction type, just output a vmov. (e.g. given an instruction 1252 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1253 // 1254 // FIXME: Maybe something can be done about this. The two cases that we care 1255 // about are vmov as an LType and vmov as a WType. 1256 if (Prefix == "vmov") { 1257 Result.push_back(Prefix + " " + RegisterSuffix); 1258 return; 1259 } 1260 1261 // In the following section, we handle special cases. 1262 1263 if (OutTypeCode == "64") { 1264 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1265 // type, the intrinsic will be optimized away, so just return 1266 // nothing. On the other hand if we are handling an uint64x2_t 1267 // (i.e. quad instruction), vdup/vmov instructions should be 1268 // emitted. 1269 if (Prefix == "vdup" || Prefix == "vext") { 1270 if (IsQuad) { 1271 Result.push_back("{{vmov|vdup}}"); 1272 } 1273 return; 1274 } 1275 1276 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1277 // multiple register operands. 1278 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1279 || Prefix == "vld4"; 1280 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1281 || Prefix == "vst4"; 1282 if (MultiLoadPrefix || MultiStorePrefix) { 1283 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1284 return; 1285 } 1286 1287 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1288 // emitting said instructions. So return a check for 1289 // vldr/vstr/vmov/str instead. 1290 if (HasLanePostfix || HasDupPostfix) { 1291 if (Prefix == "vst1") { 1292 Result.push_back("{{str|vstr|vmov}}"); 1293 return; 1294 } else if (Prefix == "vld1") { 1295 Result.push_back("{{ldr|vldr|vmov}}"); 1296 return; 1297 } 1298 } 1299 } 1300 1301 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1302 // sometimes disassembled as vtrn.32. We use a regex to handle both 1303 // cases. 1304 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1305 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1306 return; 1307 } 1308 1309 // Currently on most ARM processors, we do not use vmla/vmls for 1310 // quad floating point operations. Instead we output vmul + vadd. So 1311 // check if we have one of those instructions and just output a 1312 // check for vmul. 1313 if (OutTypeCode == "f32") { 1314 if (Prefix == "vmls") { 1315 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1316 Result.push_back("vsub." + OutTypeCode); 1317 return; 1318 } else if (Prefix == "vmla") { 1319 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1320 Result.push_back("vadd." + OutTypeCode); 1321 return; 1322 } 1323 } 1324 1325 // If we have vcvt, get the input type from the instruction name 1326 // (which should be of the form instname_inputtype) and append it 1327 // before the output type. 1328 if (Prefix == "vcvt") { 1329 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1330 Prefix += "." + inTypeCode; 1331 } 1332 1333 // Append output type code to get our final mangled instruction. 1334 Prefix += "." + OutTypeCode; 1335 1336 Result.push_back(Prefix + " " + RegisterSuffix); 1337} 1338 1339/// UseMacro - Examine the prototype string to determine if the intrinsic 1340/// should be defined as a preprocessor macro instead of an inline function. 1341static bool UseMacro(const std::string &proto) { 1342 // If this builtin takes an immediate argument, we need to #define it rather 1343 // than use a standard declaration, so that SemaChecking can range check 1344 // the immediate passed by the user. 1345 if (proto.find('i') != std::string::npos) 1346 return true; 1347 1348 // Pointer arguments need to use macros to avoid hiding aligned attributes 1349 // from the pointer type. 1350 if (proto.find('p') != std::string::npos || 1351 proto.find('c') != std::string::npos) 1352 return true; 1353 1354 return false; 1355} 1356 1357/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1358/// defined as a macro should be accessed directly instead of being first 1359/// assigned to a local temporary. 1360static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1361 // True for constant ints (i), pointers (p) and const pointers (c). 1362 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1363} 1364 1365// Generate the string "(argtype a, argtype b, ...)" 1366static std::string GenArgs(const std::string &proto, StringRef typestr, 1367 const std::string &name) { 1368 bool define = UseMacro(proto); 1369 char arg = 'a'; 1370 1371 std::string s; 1372 s += "("; 1373 1374 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1375 if (define) { 1376 // Some macro arguments are used directly instead of being assigned 1377 // to local temporaries; prepend an underscore prefix to make their 1378 // names consistent with the local temporaries. 1379 if (MacroArgUsedDirectly(proto, i)) 1380 s += "__"; 1381 } else { 1382 s += TypeString(proto[i], typestr) + " __"; 1383 } 1384 s.push_back(arg); 1385 //To avoid argument being multiple defined, add extra number for renaming. 1386 if (name == "vcopy_lane" || name == "vcopy_laneq") 1387 s.push_back('1'); 1388 if ((i + 1) < e) 1389 s += ", "; 1390 } 1391 1392 s += ")"; 1393 return s; 1394} 1395 1396// Macro arguments are not type-checked like inline function arguments, so 1397// assign them to local temporaries to get the right type checking. 1398static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1399 const std::string &name ) { 1400 char arg = 'a'; 1401 std::string s; 1402 bool generatedLocal = false; 1403 1404 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1405 // Do not create a temporary for an immediate argument. 1406 // That would defeat the whole point of using a macro! 1407 if (MacroArgUsedDirectly(proto, i)) 1408 continue; 1409 generatedLocal = true; 1410 bool extranumber = false; 1411 if (name == "vcopy_lane" || name == "vcopy_laneq") 1412 extranumber = true; 1413 1414 s += TypeString(proto[i], typestr) + " __"; 1415 s.push_back(arg); 1416 if(extranumber) 1417 s.push_back('1'); 1418 s += " = ("; 1419 s.push_back(arg); 1420 if(extranumber) 1421 s.push_back('1'); 1422 s += "); "; 1423 } 1424 1425 if (generatedLocal) 1426 s += "\\\n "; 1427 return s; 1428} 1429 1430// Use the vmovl builtin to sign-extend or zero-extend a vector. 1431static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1432 std::string s, high; 1433 high = h ? "_high" : ""; 1434 s = MangleName("vmovl" + high, typestr, ClassS); 1435 s += "(" + a + ")"; 1436 return s; 1437} 1438 1439// Get the high 64-bit part of a vector 1440static std::string GetHigh(const std::string &a, StringRef typestr) { 1441 std::string s; 1442 s = MangleName("vget_high", typestr, ClassS); 1443 s += "(" + a + ")"; 1444 return s; 1445} 1446 1447// Gen operation with two operands and get high 64-bit for both of two operands. 1448static std::string Gen2OpWith2High(StringRef typestr, 1449 const std::string &op, 1450 const std::string &a, 1451 const std::string &b) { 1452 std::string s; 1453 std::string Op1 = GetHigh(a, typestr); 1454 std::string Op2 = GetHigh(b, typestr); 1455 s = MangleName(op, typestr, ClassS); 1456 s += "(" + Op1 + ", " + Op2 + ");"; 1457 return s; 1458} 1459 1460// Gen operation with three operands and get high 64-bit of the latter 1461// two operands. 1462static std::string Gen3OpWith2High(StringRef typestr, 1463 const std::string &op, 1464 const std::string &a, 1465 const std::string &b, 1466 const std::string &c) { 1467 std::string s; 1468 std::string Op1 = GetHigh(b, typestr); 1469 std::string Op2 = GetHigh(c, typestr); 1470 s = MangleName(op, typestr, ClassS); 1471 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1472 return s; 1473} 1474 1475// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1476static std::string GenCombine(std::string typestr, 1477 const std::string &a, 1478 const std::string &b) { 1479 std::string s; 1480 s = MangleName("vcombine", typestr, ClassS); 1481 s += "(" + a + ", " + b + ")"; 1482 return s; 1483} 1484 1485static std::string Duplicate(unsigned nElts, StringRef typestr, 1486 const std::string &a) { 1487 std::string s; 1488 1489 s = "(" + TypeString('d', typestr) + "){ "; 1490 for (unsigned i = 0; i != nElts; ++i) { 1491 s += a; 1492 if ((i + 1) < nElts) 1493 s += ", "; 1494 } 1495 s += " }"; 1496 1497 return s; 1498} 1499 1500static std::string SplatLane(unsigned nElts, const std::string &vec, 1501 const std::string &lane) { 1502 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1503 for (unsigned i = 0; i < nElts; ++i) 1504 s += ", " + lane; 1505 s += ")"; 1506 return s; 1507} 1508 1509static std::string RemoveHigh(const std::string &name) { 1510 std::string s = name; 1511 std::size_t found = s.find("_high_"); 1512 if (found == std::string::npos) 1513 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1514 s.replace(found, 5, ""); 1515 return s; 1516} 1517 1518static unsigned GetNumElements(StringRef typestr, bool &quad) { 1519 quad = false; 1520 bool dummy = false; 1521 char type = ClassifyType(typestr, quad, dummy, dummy); 1522 unsigned nElts = 0; 1523 switch (type) { 1524 case 'c': nElts = 8; break; 1525 case 's': nElts = 4; break; 1526 case 'i': nElts = 2; break; 1527 case 'l': nElts = 1; break; 1528 case 'h': nElts = 4; break; 1529 case 'f': nElts = 2; break; 1530 case 'd': 1531 nElts = 1; 1532 break; 1533 default: 1534 PrintFatalError("unhandled type!"); 1535 } 1536 if (quad) nElts <<= 1; 1537 return nElts; 1538} 1539 1540// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1541static std::string GenOpString(const std::string &name, OpKind op, 1542 const std::string &proto, StringRef typestr) { 1543 bool quad; 1544 unsigned nElts = GetNumElements(typestr, quad); 1545 bool define = UseMacro(proto); 1546 1547 std::string ts = TypeString(proto[0], typestr); 1548 std::string s; 1549 if (!define) { 1550 s = "return "; 1551 } 1552 1553 switch(op) { 1554 case OpAdd: 1555 s += "__a + __b;"; 1556 break; 1557 case OpAddl: 1558 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1559 break; 1560 case OpAddlHi: 1561 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1562 break; 1563 case OpAddw: 1564 s += "__a + " + Extend(typestr, "__b") + ";"; 1565 break; 1566 case OpAddwHi: 1567 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1568 break; 1569 case OpSub: 1570 s += "__a - __b;"; 1571 break; 1572 case OpSubl: 1573 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1574 break; 1575 case OpSublHi: 1576 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1577 break; 1578 case OpSubw: 1579 s += "__a - " + Extend(typestr, "__b") + ";"; 1580 break; 1581 case OpSubwHi: 1582 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1583 break; 1584 case OpMulN: 1585 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1586 break; 1587 case OpMulLane: 1588 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1589 break; 1590 case OpMulXLane: 1591 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1592 SplatLane(nElts, "__b", "__c") + ");"; 1593 break; 1594 case OpMul: 1595 s += "__a * __b;"; 1596 break; 1597 case OpMullLane: 1598 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1599 SplatLane(nElts, "__b", "__c") + ");"; 1600 break; 1601 case OpMullHiLane: 1602 s += MangleName("vmull", typestr, ClassS) + "(" + 1603 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1604 break; 1605 case OpMlaN: 1606 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1607 break; 1608 case OpMlaLane: 1609 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1610 break; 1611 case OpMla: 1612 s += "__a + (__b * __c);"; 1613 break; 1614 case OpMlalN: 1615 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1616 Duplicate(nElts, typestr, "__c") + ");"; 1617 break; 1618 case OpMlalLane: 1619 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1620 SplatLane(nElts, "__c", "__d") + ");"; 1621 break; 1622 case OpMlalHiLane: 1623 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1624 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1625 break; 1626 case OpMlal: 1627 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1628 break; 1629 case OpMullHi: 1630 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1631 break; 1632 case OpMlalHi: 1633 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1634 break; 1635 case OpMlsN: 1636 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1637 break; 1638 case OpMlsLane: 1639 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1640 break; 1641 case OpFMSLane: 1642 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1643 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1644 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1645 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1646 break; 1647 case OpFMSLaneQ: 1648 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1649 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1650 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1651 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1652 break; 1653 case OpMls: 1654 s += "__a - (__b * __c);"; 1655 break; 1656 case OpMlslN: 1657 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1658 Duplicate(nElts, typestr, "__c") + ");"; 1659 break; 1660 case OpMlslLane: 1661 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1662 SplatLane(nElts, "__c", "__d") + ");"; 1663 break; 1664 case OpMlslHiLane: 1665 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1666 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1667 break; 1668 case OpMlsl: 1669 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1670 break; 1671 case OpMlslHi: 1672 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1673 break; 1674 case OpQDMullLane: 1675 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1676 SplatLane(nElts, "__b", "__c") + ");"; 1677 break; 1678 case OpQDMullHiLane: 1679 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1680 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1681 break; 1682 case OpQDMlalLane: 1683 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1684 SplatLane(nElts, "__c", "__d") + ");"; 1685 break; 1686 case OpQDMlalHiLane: 1687 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1688 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1689 break; 1690 case OpQDMlslLane: 1691 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1692 SplatLane(nElts, "__c", "__d") + ");"; 1693 break; 1694 case OpQDMlslHiLane: 1695 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1696 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1697 break; 1698 case OpQDMulhLane: 1699 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1700 SplatLane(nElts, "__b", "__c") + ");"; 1701 break; 1702 case OpQRDMulhLane: 1703 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1704 SplatLane(nElts, "__b", "__c") + ");"; 1705 break; 1706 case OpEq: 1707 s += "(" + ts + ")(__a == __b);"; 1708 break; 1709 case OpGe: 1710 s += "(" + ts + ")(__a >= __b);"; 1711 break; 1712 case OpLe: 1713 s += "(" + ts + ")(__a <= __b);"; 1714 break; 1715 case OpGt: 1716 s += "(" + ts + ")(__a > __b);"; 1717 break; 1718 case OpLt: 1719 s += "(" + ts + ")(__a < __b);"; 1720 break; 1721 case OpNeg: 1722 s += " -__a;"; 1723 break; 1724 case OpNot: 1725 s += " ~__a;"; 1726 break; 1727 case OpAnd: 1728 s += "__a & __b;"; 1729 break; 1730 case OpOr: 1731 s += "__a | __b;"; 1732 break; 1733 case OpXor: 1734 s += "__a ^ __b;"; 1735 break; 1736 case OpAndNot: 1737 s += "__a & ~__b;"; 1738 break; 1739 case OpOrNot: 1740 s += "__a | ~__b;"; 1741 break; 1742 case OpCast: 1743 s += "(" + ts + ")__a;"; 1744 break; 1745 case OpConcat: 1746 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1747 s += ", (int64x1_t)__b, 0, 1);"; 1748 break; 1749 case OpHi: 1750 // nElts is for the result vector, so the source is twice that number. 1751 s += "__builtin_shufflevector(__a, __a"; 1752 for (unsigned i = nElts; i < nElts * 2; ++i) 1753 s += ", " + utostr(i); 1754 s+= ");"; 1755 break; 1756 case OpLo: 1757 s += "__builtin_shufflevector(__a, __a"; 1758 for (unsigned i = 0; i < nElts; ++i) 1759 s += ", " + utostr(i); 1760 s+= ");"; 1761 break; 1762 case OpDup: 1763 s += Duplicate(nElts, typestr, "__a") + ";"; 1764 break; 1765 case OpDupLane: 1766 s += SplatLane(nElts, "__a", "__b") + ";"; 1767 break; 1768 case OpSelect: 1769 // ((0 & 1) | (~0 & 2)) 1770 s += "(" + ts + ")"; 1771 ts = TypeString(proto[1], typestr); 1772 s += "((__a & (" + ts + ")__b) | "; 1773 s += "(~__a & (" + ts + ")__c));"; 1774 break; 1775 case OpRev16: 1776 s += "__builtin_shufflevector(__a, __a"; 1777 for (unsigned i = 2; i <= nElts; i += 2) 1778 for (unsigned j = 0; j != 2; ++j) 1779 s += ", " + utostr(i - j - 1); 1780 s += ");"; 1781 break; 1782 case OpRev32: { 1783 unsigned WordElts = nElts >> (1 + (int)quad); 1784 s += "__builtin_shufflevector(__a, __a"; 1785 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1786 for (unsigned j = 0; j != WordElts; ++j) 1787 s += ", " + utostr(i - j - 1); 1788 s += ");"; 1789 break; 1790 } 1791 case OpRev64: { 1792 unsigned DblWordElts = nElts >> (int)quad; 1793 s += "__builtin_shufflevector(__a, __a"; 1794 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1795 for (unsigned j = 0; j != DblWordElts; ++j) 1796 s += ", " + utostr(i - j - 1); 1797 s += ");"; 1798 break; 1799 } 1800 case OpUzp1: 1801 s += "__builtin_shufflevector(__a, __b"; 1802 for (unsigned i = 0; i < nElts; i++) 1803 s += ", " + utostr(2*i); 1804 s += ");"; 1805 break; 1806 case OpUzp2: 1807 s += "__builtin_shufflevector(__a, __b"; 1808 for (unsigned i = 0; i < nElts; i++) 1809 s += ", " + utostr(2*i+1); 1810 s += ");"; 1811 break; 1812 case OpZip1: 1813 s += "__builtin_shufflevector(__a, __b"; 1814 for (unsigned i = 0; i < (nElts/2); i++) 1815 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1816 s += ");"; 1817 break; 1818 case OpZip2: 1819 s += "__builtin_shufflevector(__a, __b"; 1820 for (unsigned i = nElts/2; i < nElts; i++) 1821 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1822 s += ");"; 1823 break; 1824 case OpTrn1: 1825 s += "__builtin_shufflevector(__a, __b"; 1826 for (unsigned i = 0; i < (nElts/2); i++) 1827 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 1828 s += ");"; 1829 break; 1830 case OpTrn2: 1831 s += "__builtin_shufflevector(__a, __b"; 1832 for (unsigned i = 0; i < (nElts/2); i++) 1833 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 1834 s += ");"; 1835 break; 1836 case OpAbdl: { 1837 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1838 if (typestr[0] != 'U') { 1839 // vabd results are always unsigned and must be zero-extended. 1840 std::string utype = "U" + typestr.str(); 1841 s += "(" + TypeString(proto[0], typestr) + ")"; 1842 abd = "(" + TypeString('d', utype) + ")" + abd; 1843 s += Extend(utype, abd) + ";"; 1844 } else { 1845 s += Extend(typestr, abd) + ";"; 1846 } 1847 break; 1848 } 1849 case OpAbdlHi: 1850 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1851 break; 1852 case OpAddhnHi: { 1853 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1854 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1855 s += ";"; 1856 break; 1857 } 1858 case OpRAddhnHi: { 1859 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1860 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1861 s += ";"; 1862 break; 1863 } 1864 case OpSubhnHi: { 1865 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1866 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1867 s += ";"; 1868 break; 1869 } 1870 case OpRSubhnHi: { 1871 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1872 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1873 s += ";"; 1874 break; 1875 } 1876 case OpAba: 1877 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1878 break; 1879 case OpAbal: 1880 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1881 break; 1882 case OpAbalHi: 1883 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1884 break; 1885 case OpQDMullHi: 1886 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1887 break; 1888 case OpQDMlalHi: 1889 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1890 break; 1891 case OpQDMlslHi: 1892 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1893 break; 1894 case OpDiv: 1895 s += "__a / __b;"; 1896 break; 1897 case OpMovlHi: { 1898 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1899 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1900 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1901 s += "(__a1, 0);"; 1902 break; 1903 } 1904 case OpLongHi: { 1905 // Another local variable __a1 is needed for calling a Macro, 1906 // or using __a will have naming conflict when Macro expanding. 1907 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1908 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 1909 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 1910 "(__a1, __b);"; 1911 break; 1912 } 1913 case OpNarrowHi: { 1914 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 1915 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 1916 break; 1917 } 1918 case OpCopyLane: { 1919 s += TypeString('s', typestr) + " __c2 = " + 1920 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 1921 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 1922 break; 1923 } 1924 case OpCopyQLane: { 1925 std::string typeCode = ""; 1926 InstructionTypeCode(typestr, ClassS, quad, typeCode); 1927 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 1928 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; 1929 break; 1930 } 1931 case OpCopyLaneQ: { 1932 std::string typeCode = ""; 1933 InstructionTypeCode(typestr, ClassS, quad, typeCode); 1934 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 1935 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; 1936 break; 1937 } 1938 default: 1939 PrintFatalError("unknown OpKind!"); 1940 } 1941 return s; 1942} 1943 1944static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1945 unsigned mod = proto[0]; 1946 1947 if (mod == 'v' || mod == 'f') 1948 mod = proto[1]; 1949 1950 bool quad = false; 1951 bool poly = false; 1952 bool usgn = false; 1953 bool scal = false; 1954 bool cnst = false; 1955 bool pntr = false; 1956 1957 // Base type to get the type string for. 1958 char type = ClassifyType(typestr, quad, poly, usgn); 1959 1960 // Based on the modifying character, change the type and width if necessary. 1961 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1962 1963 NeonTypeFlags::EltType ET; 1964 switch (type) { 1965 case 'c': 1966 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1967 break; 1968 case 's': 1969 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1970 break; 1971 case 'i': 1972 ET = NeonTypeFlags::Int32; 1973 break; 1974 case 'l': 1975 ET = NeonTypeFlags::Int64; 1976 break; 1977 case 'h': 1978 ET = NeonTypeFlags::Float16; 1979 break; 1980 case 'f': 1981 ET = NeonTypeFlags::Float32; 1982 break; 1983 case 'd': 1984 ET = NeonTypeFlags::Float64; 1985 break; 1986 default: 1987 PrintFatalError("unhandled type!"); 1988 } 1989 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1990 return Flags.getFlags(); 1991} 1992 1993static bool ProtoHasScalar(const std::string proto) 1994{ 1995 return (proto.find('s') != std::string::npos 1996 || proto.find('r') != std::string::npos); 1997} 1998 1999// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2000static std::string GenBuiltin(const std::string &name, const std::string &proto, 2001 StringRef typestr, ClassKind ck) { 2002 std::string s; 2003 2004 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2005 // sret-like argument. 2006 bool sret = IsMultiVecProto(proto[0]); 2007 2008 bool define = UseMacro(proto); 2009 2010 // Check if the prototype has a scalar operand with the type of the vector 2011 // elements. If not, bitcasting the args will take care of arg checking. 2012 // The actual signedness etc. will be taken care of with special enums. 2013 if (!ProtoHasScalar(proto)) 2014 ck = ClassB; 2015 2016 if (proto[0] != 'v') { 2017 std::string ts = TypeString(proto[0], typestr); 2018 2019 if (define) { 2020 if (sret) 2021 s += ts + " r; "; 2022 else 2023 s += "(" + ts + ")"; 2024 } else if (sret) { 2025 s += ts + " r; "; 2026 } else { 2027 s += "return (" + ts + ")"; 2028 } 2029 } 2030 2031 bool splat = proto.find('a') != std::string::npos; 2032 2033 s += "__builtin_neon_"; 2034 if (splat) { 2035 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2036 std::string vname(name, 0, name.size()-2); 2037 s += MangleName(vname, typestr, ck); 2038 } else { 2039 s += MangleName(name, typestr, ck); 2040 } 2041 s += "("; 2042 2043 // Pass the address of the return variable as the first argument to sret-like 2044 // builtins. 2045 if (sret) 2046 s += "&r, "; 2047 2048 char arg = 'a'; 2049 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2050 std::string args = std::string(&arg, 1); 2051 2052 // Use the local temporaries instead of the macro arguments. 2053 args = "__" + args; 2054 2055 bool argQuad = false; 2056 bool argPoly = false; 2057 bool argUsgn = false; 2058 bool argScalar = false; 2059 bool dummy = false; 2060 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2061 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2062 dummy, dummy); 2063 2064 // Handle multiple-vector values specially, emitting each subvector as an 2065 // argument to the __builtin. 2066 unsigned NumOfVec = 0; 2067 if (proto[i] >= '2' && proto[i] <= '4') { 2068 NumOfVec = proto[i] - '0'; 2069 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2070 NumOfVec = proto[i] - 'A' + 1; 2071 } 2072 2073 if (NumOfVec > 0) { 2074 // Check if an explicit cast is needed. 2075 if (argType != 'c' || argPoly || argUsgn) 2076 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2077 2078 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2079 s += args + ".val[" + utostr(vi) + "]"; 2080 if ((vi + 1) < ve) 2081 s += ", "; 2082 } 2083 if ((i + 1) < e) 2084 s += ", "; 2085 2086 continue; 2087 } 2088 2089 if (splat && (i + 1) == e) 2090 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2091 2092 // Check if an explicit cast is needed. 2093 if ((splat || !argScalar) && 2094 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2095 std::string argTypeStr = "c"; 2096 if (ck != ClassB) 2097 argTypeStr = argType; 2098 if (argQuad) 2099 argTypeStr = "Q" + argTypeStr; 2100 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2101 } 2102 2103 s += args; 2104 if ((i + 1) < e) 2105 s += ", "; 2106 } 2107 2108 // Extra constant integer to hold type class enum for this function, e.g. s8 2109 if (ck == ClassB) 2110 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2111 2112 s += ");"; 2113 2114 if (proto[0] != 'v' && sret) { 2115 if (define) 2116 s += " r;"; 2117 else 2118 s += " return r;"; 2119 } 2120 return s; 2121} 2122 2123static std::string GenBuiltinDef(const std::string &name, 2124 const std::string &proto, 2125 StringRef typestr, ClassKind ck) { 2126 std::string s("BUILTIN(__builtin_neon_"); 2127 2128 // If all types are the same size, bitcasting the args will take care 2129 // of arg checking. The actual signedness etc. will be taken care of with 2130 // special enums. 2131 if (!ProtoHasScalar(proto)) 2132 ck = ClassB; 2133 2134 s += MangleName(name, typestr, ck); 2135 s += ", \""; 2136 2137 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2138 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2139 2140 // Extra constant integer to hold type class enum for this function, e.g. s8 2141 if (ck == ClassB) 2142 s += "i"; 2143 2144 s += "\", \"n\")"; 2145 return s; 2146} 2147 2148static std::string GenIntrinsic(const std::string &name, 2149 const std::string &proto, 2150 StringRef outTypeStr, StringRef inTypeStr, 2151 OpKind kind, ClassKind classKind) { 2152 assert(!proto.empty() && ""); 2153 bool define = UseMacro(proto) && kind != OpUnavailable; 2154 std::string s; 2155 2156 // static always inline + return type 2157 if (define) 2158 s += "#define "; 2159 else 2160 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2161 2162 // Function name with type suffix 2163 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2164 if (outTypeStr != inTypeStr) { 2165 // If the input type is different (e.g., for vreinterpret), append a suffix 2166 // for the input type. String off a "Q" (quad) prefix so that MangleName 2167 // does not insert another "q" in the name. 2168 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2169 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2170 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2171 } 2172 s += mangledName; 2173 2174 // Function arguments 2175 s += GenArgs(proto, inTypeStr, name); 2176 2177 // Definition. 2178 if (define) { 2179 s += " __extension__ ({ \\\n "; 2180 s += GenMacroLocals(proto, inTypeStr, name); 2181 } else if (kind == OpUnavailable) { 2182 s += " __attribute__((unavailable));\n"; 2183 return s; 2184 } else 2185 s += " {\n "; 2186 2187 if (kind != OpNone) 2188 s += GenOpString(name, kind, proto, outTypeStr); 2189 else 2190 s += GenBuiltin(name, proto, outTypeStr, classKind); 2191 if (define) 2192 s += " })"; 2193 else 2194 s += " }"; 2195 s += "\n"; 2196 return s; 2197} 2198 2199/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2200/// is comprised of type definitions and function declarations. 2201void NeonEmitter::run(raw_ostream &OS) { 2202 OS << 2203 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2204 "---===\n" 2205 " *\n" 2206 " * Permission is hereby granted, free of charge, to any person obtaining " 2207 "a copy\n" 2208 " * of this software and associated documentation files (the \"Software\")," 2209 " to deal\n" 2210 " * in the Software without restriction, including without limitation the " 2211 "rights\n" 2212 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2213 "and/or sell\n" 2214 " * copies of the Software, and to permit persons to whom the Software is\n" 2215 " * furnished to do so, subject to the following conditions:\n" 2216 " *\n" 2217 " * The above copyright notice and this permission notice shall be " 2218 "included in\n" 2219 " * all copies or substantial portions of the Software.\n" 2220 " *\n" 2221 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2222 "EXPRESS OR\n" 2223 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2224 "MERCHANTABILITY,\n" 2225 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2226 "SHALL THE\n" 2227 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2228 "OTHER\n" 2229 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2230 "ARISING FROM,\n" 2231 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2232 "DEALINGS IN\n" 2233 " * THE SOFTWARE.\n" 2234 " *\n" 2235 " *===--------------------------------------------------------------------" 2236 "---===\n" 2237 " */\n\n"; 2238 2239 OS << "#ifndef __ARM_NEON_H\n"; 2240 OS << "#define __ARM_NEON_H\n\n"; 2241 2242 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2243 OS << "#error \"NEON support not enabled\"\n"; 2244 OS << "#endif\n\n"; 2245 2246 OS << "#include <stdint.h>\n\n"; 2247 2248 // Emit NEON-specific scalar typedefs. 2249 OS << "typedef float float32_t;\n"; 2250 OS << "typedef __fp16 float16_t;\n"; 2251 2252 OS << "#ifdef __aarch64__\n"; 2253 OS << "typedef double float64_t;\n"; 2254 OS << "#endif\n\n"; 2255 2256 // For now, signedness of polynomial types depends on target 2257 OS << "#ifdef __aarch64__\n"; 2258 OS << "typedef uint8_t poly8_t;\n"; 2259 OS << "typedef uint16_t poly16_t;\n"; 2260 OS << "#else\n"; 2261 OS << "typedef int8_t poly8_t;\n"; 2262 OS << "typedef int16_t poly16_t;\n"; 2263 OS << "#endif\n"; 2264 2265 // Emit Neon vector typedefs. 2266 std::string TypedefTypes( 2267 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs"); 2268 SmallVector<StringRef, 24> TDTypeVec; 2269 ParseTypes(0, TypedefTypes, TDTypeVec); 2270 2271 // Emit vector typedefs. 2272 bool isA64 = false; 2273 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2274 bool dummy, quad = false, poly = false; 2275 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2276 bool preinsert = false; 2277 bool postinsert = false; 2278 2279 if (type == 'd') { 2280 preinsert = isA64? false: true; 2281 isA64 = true; 2282 } else { 2283 postinsert = isA64? true: false; 2284 isA64 = false; 2285 } 2286 if (postinsert) 2287 OS << "#endif\n"; 2288 if (preinsert) 2289 OS << "#ifdef __aarch64__\n"; 2290 2291 if (poly) 2292 OS << "typedef __attribute__((neon_polyvector_type("; 2293 else 2294 OS << "typedef __attribute__((neon_vector_type("; 2295 2296 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2297 OS << utostr(nElts) << "))) "; 2298 if (nElts < 10) 2299 OS << " "; 2300 2301 OS << TypeString('s', TDTypeVec[i]); 2302 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2303 2304 } 2305 OS << "\n"; 2306 2307 // Emit struct typedefs. 2308 isA64 = false; 2309 for (unsigned vi = 2; vi != 5; ++vi) { 2310 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2311 bool dummy, quad = false, poly = false; 2312 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2313 bool preinsert = false; 2314 bool postinsert = false; 2315 2316 if (type == 'd') { 2317 preinsert = isA64? false: true; 2318 isA64 = true; 2319 } else { 2320 postinsert = isA64? true: false; 2321 isA64 = false; 2322 } 2323 if (postinsert) 2324 OS << "#endif\n"; 2325 if (preinsert) 2326 OS << "#ifdef __aarch64__\n"; 2327 2328 std::string ts = TypeString('d', TDTypeVec[i]); 2329 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2330 OS << "typedef struct " << vs << " {\n"; 2331 OS << " " << ts << " val"; 2332 OS << "[" << utostr(vi) << "]"; 2333 OS << ";\n} "; 2334 OS << vs << ";\n"; 2335 OS << "\n"; 2336 } 2337 } 2338 2339 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2340 2341 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2342 2343 StringMap<ClassKind> EmittedMap; 2344 2345 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2346 // intrinsics. (Some of the saturating multiply instructions are also 2347 // used to implement the corresponding "_lane" variants, but tablegen 2348 // sorts the records into alphabetical order so that the "_lane" variants 2349 // come after the intrinsics they use.) 2350 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2351 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2352 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2353 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2354 2355 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2356 // common intrinsics appear only once in the output stream. 2357 // The check for uniquiness is done in emitIntrinsic. 2358 // Emit ARM intrinsics. 2359 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2360 Record *R = RV[i]; 2361 2362 // Skip AArch64 intrinsics; they will be emitted at the end. 2363 bool isA64 = R->getValueAsBit("isA64"); 2364 if (isA64) 2365 continue; 2366 2367 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2368 R->getName() != "VABD") 2369 emitIntrinsic(OS, R, EmittedMap); 2370 } 2371 2372 // Emit AArch64-specific intrinsics. 2373 OS << "#ifdef __aarch64__\n"; 2374 2375 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2376 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2377 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2378 2379 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2380 Record *R = RV[i]; 2381 2382 // Skip ARM intrinsics already included above. 2383 bool isA64 = R->getValueAsBit("isA64"); 2384 if (!isA64) 2385 continue; 2386 2387 emitIntrinsic(OS, R, EmittedMap); 2388 } 2389 2390 OS << "#endif\n\n"; 2391 2392 OS << "#undef __ai\n\n"; 2393 OS << "#endif /* __ARM_NEON_H */\n"; 2394} 2395 2396/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2397/// intrinsics specified by record R checking for intrinsic uniqueness. 2398void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2399 StringMap<ClassKind> &EmittedMap) { 2400 std::string name = R->getValueAsString("Name"); 2401 std::string Proto = R->getValueAsString("Prototype"); 2402 std::string Types = R->getValueAsString("Types"); 2403 2404 SmallVector<StringRef, 16> TypeVec; 2405 ParseTypes(R, Types, TypeVec); 2406 2407 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2408 2409 ClassKind classKind = ClassNone; 2410 if (R->getSuperClasses().size() >= 2) 2411 classKind = ClassMap[R->getSuperClasses()[1]]; 2412 if (classKind == ClassNone && kind == OpNone) 2413 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2414 2415 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2416 if (kind == OpReinterpret) { 2417 bool outQuad = false; 2418 bool dummy = false; 2419 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2420 for (unsigned srcti = 0, srcte = TypeVec.size(); 2421 srcti != srcte; ++srcti) { 2422 bool inQuad = false; 2423 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2424 if (srcti == ti || inQuad != outQuad) 2425 continue; 2426 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2427 OpCast, ClassS); 2428 if (EmittedMap.count(s)) 2429 continue; 2430 EmittedMap[s] = ClassS; 2431 OS << s; 2432 } 2433 } else { 2434 std::string s = 2435 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2436 if (EmittedMap.count(s)) 2437 continue; 2438 EmittedMap[s] = classKind; 2439 OS << s; 2440 } 2441 } 2442 OS << "\n"; 2443} 2444 2445static unsigned RangeFromType(const char mod, StringRef typestr) { 2446 // base type to get the type string for. 2447 bool quad = false, dummy = false; 2448 char type = ClassifyType(typestr, quad, dummy, dummy); 2449 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2450 2451 switch (type) { 2452 case 'c': 2453 return (8 << (int)quad) - 1; 2454 case 'h': 2455 case 's': 2456 return (4 << (int)quad) - 1; 2457 case 'f': 2458 case 'i': 2459 return (2 << (int)quad) - 1; 2460 case 'd': 2461 case 'l': 2462 return (1 << (int)quad) - 1; 2463 default: 2464 PrintFatalError("unhandled type!"); 2465 } 2466} 2467 2468static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2469 // base type to get the type string for. 2470 bool dummy = false; 2471 char type = ClassifyType(typestr, dummy, dummy, dummy); 2472 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2473 2474 switch (type) { 2475 case 'c': 2476 return 7; 2477 case 'h': 2478 case 's': 2479 return 15; 2480 case 'f': 2481 case 'i': 2482 return 31; 2483 case 'd': 2484 case 'l': 2485 return 63; 2486 default: 2487 PrintFatalError("unhandled type!"); 2488 } 2489} 2490 2491/// Generate the ARM and AArch64 intrinsic range checking code for 2492/// shift/lane immediates, checking for unique declarations. 2493void 2494NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2495 StringMap<ClassKind> &A64IntrinsicMap, 2496 bool isA64RangeCheck) { 2497 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2498 StringMap<OpKind> EmittedMap; 2499 2500 // Generate the intrinsic range checking code for shift/lane immediates. 2501 if (isA64RangeCheck) 2502 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2503 else 2504 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2505 2506 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2507 Record *R = RV[i]; 2508 2509 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2510 if (k != OpNone) 2511 continue; 2512 2513 std::string name = R->getValueAsString("Name"); 2514 std::string Proto = R->getValueAsString("Prototype"); 2515 std::string Types = R->getValueAsString("Types"); 2516 std::string Rename = name + "@" + Proto; 2517 2518 // Functions with 'a' (the splat code) in the type prototype should not get 2519 // their own builtin as they use the non-splat variant. 2520 if (Proto.find('a') != std::string::npos) 2521 continue; 2522 2523 // Functions which do not have an immediate do not need to have range 2524 // checking code emitted. 2525 size_t immPos = Proto.find('i'); 2526 if (immPos == std::string::npos) 2527 continue; 2528 2529 SmallVector<StringRef, 16> TypeVec; 2530 ParseTypes(R, Types, TypeVec); 2531 2532 if (R->getSuperClasses().size() < 2) 2533 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2534 2535 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2536 2537 // Do not include AArch64 range checks if not generating code for AArch64. 2538 bool isA64 = R->getValueAsBit("isA64"); 2539 if (!isA64RangeCheck && isA64) 2540 continue; 2541 2542 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2543 // redefined by AArch64 to handle new types. 2544 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2545 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2546 if (A64CK == ck && ck != ClassNone) 2547 continue; 2548 } 2549 2550 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2551 std::string namestr, shiftstr, rangestr; 2552 2553 if (R->getValueAsBit("isVCVT_N")) { 2554 // VCVT between floating- and fixed-point values takes an immediate 2555 // in the range [1, 32] for f32, or [1, 64] for f64. 2556 ck = ClassB; 2557 if (name.find("32") != std::string::npos) 2558 rangestr = "l = 1; u = 31"; // upper bound = l + u 2559 else if (name.find("64") != std::string::npos) 2560 rangestr = "l = 1; u = 63"; 2561 else 2562 PrintFatalError(R->getLoc(), 2563 "Fixed point convert name should contains \"32\" or \"64\""); 2564 2565 } else if (R->getValueAsBit("isScalarShift")) { 2566 // Right shifts have an 'r' in the name, left shifts do not. Convert 2567 // instructions have the same bounds and right shifts. 2568 if (name.find('r') != std::string::npos || 2569 name.find("cvt") != std::string::npos) 2570 rangestr = "l = 1; "; 2571 2572 rangestr += "u = " + 2573 utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti])); 2574 } else if (!ProtoHasScalar(Proto)) { 2575 // Builtins which are overloaded by type will need to have their upper 2576 // bound computed at Sema time based on the type constant. 2577 ck = ClassB; 2578 if (R->getValueAsBit("isShift")) { 2579 shiftstr = ", true"; 2580 2581 // Right shifts have an 'r' in the name, left shifts do not. 2582 if (name.find('r') != std::string::npos) 2583 rangestr = "l = 1; "; 2584 } 2585 rangestr += "u = RFT(TV" + shiftstr + ")"; 2586 } else { 2587 // The immediate generally refers to a lane in the preceding argument. 2588 assert(immPos > 0 && "unexpected immediate operand"); 2589 rangestr = 2590 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2591 } 2592 // Make sure cases appear only once by uniquing them in a string map. 2593 namestr = MangleName(name, TypeVec[ti], ck); 2594 if (EmittedMap.count(namestr)) 2595 continue; 2596 EmittedMap[namestr] = OpNone; 2597 2598 // Calculate the index of the immediate that should be range checked. 2599 unsigned immidx = 0; 2600 2601 // Builtins that return a struct of multiple vectors have an extra 2602 // leading arg for the struct return. 2603 if (IsMultiVecProto(Proto[0])) 2604 ++immidx; 2605 2606 // Add one to the index for each argument until we reach the immediate 2607 // to be checked. Structs of vectors are passed as multiple arguments. 2608 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2609 switch (Proto[ii]) { 2610 default: 2611 immidx += 1; 2612 break; 2613 case '2': 2614 case 'B': 2615 immidx += 2; 2616 break; 2617 case '3': 2618 case 'C': 2619 immidx += 3; 2620 break; 2621 case '4': 2622 case 'D': 2623 immidx += 4; 2624 break; 2625 case 'i': 2626 ie = ii + 1; 2627 break; 2628 } 2629 } 2630 if (isA64RangeCheck) 2631 OS << "case AArch64::BI__builtin_neon_"; 2632 else 2633 OS << "case ARM::BI__builtin_neon_"; 2634 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2635 << rangestr << "; break;\n"; 2636 } 2637 } 2638 OS << "#endif\n\n"; 2639} 2640 2641/// Generate the ARM and AArch64 overloaded type checking code for 2642/// SemaChecking.cpp, checking for unique builtin declarations. 2643void 2644NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2645 StringMap<ClassKind> &A64IntrinsicMap, 2646 bool isA64TypeCheck) { 2647 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2648 StringMap<OpKind> EmittedMap; 2649 2650 // Generate the overloaded type checking code for SemaChecking.cpp 2651 if (isA64TypeCheck) 2652 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2653 else 2654 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2655 2656 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2657 Record *R = RV[i]; 2658 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2659 if (k != OpNone) 2660 continue; 2661 2662 std::string Proto = R->getValueAsString("Prototype"); 2663 std::string Types = R->getValueAsString("Types"); 2664 std::string name = R->getValueAsString("Name"); 2665 std::string Rename = name + "@" + Proto; 2666 2667 // Functions with 'a' (the splat code) in the type prototype should not get 2668 // their own builtin as they use the non-splat variant. 2669 if (Proto.find('a') != std::string::npos) 2670 continue; 2671 2672 // Functions which have a scalar argument cannot be overloaded, no need to 2673 // check them if we are emitting the type checking code. 2674 if (ProtoHasScalar(Proto)) 2675 continue; 2676 2677 SmallVector<StringRef, 16> TypeVec; 2678 ParseTypes(R, Types, TypeVec); 2679 2680 if (R->getSuperClasses().size() < 2) 2681 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2682 2683 // Do not include AArch64 type checks if not generating code for AArch64. 2684 bool isA64 = R->getValueAsBit("isA64"); 2685 if (!isA64TypeCheck && isA64) 2686 continue; 2687 2688 // Include ARM type check in AArch64 but only if ARM intrinsics 2689 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2690 // redefined in AArch64 to handle an additional 2 x f64 type. 2691 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2692 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2693 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2694 if (A64CK == ck && ck != ClassNone) 2695 continue; 2696 } 2697 2698 int si = -1, qi = -1; 2699 uint64_t mask = 0, qmask = 0; 2700 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2701 // Generate the switch case(s) for this builtin for the type validation. 2702 bool quad = false, poly = false, usgn = false; 2703 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2704 2705 if (quad) { 2706 qi = ti; 2707 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2708 } else { 2709 si = ti; 2710 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2711 } 2712 } 2713 2714 // Check if the builtin function has a pointer or const pointer argument. 2715 int PtrArgNum = -1; 2716 bool HasConstPtr = false; 2717 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2718 char ArgType = Proto[arg]; 2719 if (ArgType == 'c') { 2720 HasConstPtr = true; 2721 PtrArgNum = arg - 1; 2722 break; 2723 } 2724 if (ArgType == 'p') { 2725 PtrArgNum = arg - 1; 2726 break; 2727 } 2728 } 2729 // For sret builtins, adjust the pointer argument index. 2730 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 2731 PtrArgNum += 1; 2732 2733 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2734 // and vst1_lane intrinsics. Using a pointer to the vector element 2735 // type with one of those operations causes codegen to select an aligned 2736 // load/store instruction. If you want an unaligned operation, 2737 // the pointer argument needs to have less alignment than element type, 2738 // so just accept any pointer type. 2739 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2740 PtrArgNum = -1; 2741 HasConstPtr = false; 2742 } 2743 2744 if (mask) { 2745 if (isA64TypeCheck) 2746 OS << "case AArch64::BI__builtin_neon_"; 2747 else 2748 OS << "case ARM::BI__builtin_neon_"; 2749 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2750 << "0x" << utohexstr(mask) << "ULL"; 2751 if (PtrArgNum >= 0) 2752 OS << "; PtrArgNum = " << PtrArgNum; 2753 if (HasConstPtr) 2754 OS << "; HasConstPtr = true"; 2755 OS << "; break;\n"; 2756 } 2757 if (qmask) { 2758 if (isA64TypeCheck) 2759 OS << "case AArch64::BI__builtin_neon_"; 2760 else 2761 OS << "case ARM::BI__builtin_neon_"; 2762 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2763 << "0x" << utohexstr(qmask) << "ULL"; 2764 if (PtrArgNum >= 0) 2765 OS << "; PtrArgNum = " << PtrArgNum; 2766 if (HasConstPtr) 2767 OS << "; HasConstPtr = true"; 2768 OS << "; break;\n"; 2769 } 2770 } 2771 OS << "#endif\n\n"; 2772} 2773 2774/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2775/// declaration of builtins, checking for unique builtin declarations. 2776void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2777 StringMap<ClassKind> &A64IntrinsicMap, 2778 bool isA64GenBuiltinDef) { 2779 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2780 StringMap<OpKind> EmittedMap; 2781 2782 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2783 if (isA64GenBuiltinDef) 2784 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2785 else 2786 OS << "#ifdef GET_NEON_BUILTINS\n"; 2787 2788 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2789 Record *R = RV[i]; 2790 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2791 if (k != OpNone) 2792 continue; 2793 2794 std::string Proto = R->getValueAsString("Prototype"); 2795 std::string name = R->getValueAsString("Name"); 2796 std::string Rename = name + "@" + Proto; 2797 2798 // Functions with 'a' (the splat code) in the type prototype should not get 2799 // their own builtin as they use the non-splat variant. 2800 if (Proto.find('a') != std::string::npos) 2801 continue; 2802 2803 std::string Types = R->getValueAsString("Types"); 2804 SmallVector<StringRef, 16> TypeVec; 2805 ParseTypes(R, Types, TypeVec); 2806 2807 if (R->getSuperClasses().size() < 2) 2808 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2809 2810 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2811 2812 // Do not include AArch64 BUILTIN() macros if not generating 2813 // code for AArch64 2814 bool isA64 = R->getValueAsBit("isA64"); 2815 if (!isA64GenBuiltinDef && isA64) 2816 continue; 2817 2818 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2819 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2820 // redefined in AArch64 to handle an additional 2 x f64 type. 2821 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2822 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2823 if (A64CK == ck && ck != ClassNone) 2824 continue; 2825 } 2826 2827 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2828 // Generate the declaration for this builtin, ensuring 2829 // that each unique BUILTIN() macro appears only once in the output 2830 // stream. 2831 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 2832 if (EmittedMap.count(bd)) 2833 continue; 2834 2835 EmittedMap[bd] = OpNone; 2836 OS << bd << "\n"; 2837 } 2838 } 2839 OS << "#endif\n\n"; 2840} 2841 2842/// runHeader - Emit a file with sections defining: 2843/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2844/// 2. the SemaChecking code for the type overload checking. 2845/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2846void NeonEmitter::runHeader(raw_ostream &OS) { 2847 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2848 2849 // build a map of AArch64 intriniscs to be used in uniqueness checks. 2850 StringMap<ClassKind> A64IntrinsicMap; 2851 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2852 Record *R = RV[i]; 2853 2854 bool isA64 = R->getValueAsBit("isA64"); 2855 if (!isA64) 2856 continue; 2857 2858 ClassKind CK = ClassNone; 2859 if (R->getSuperClasses().size() >= 2) 2860 CK = ClassMap[R->getSuperClasses()[1]]; 2861 2862 std::string Name = R->getValueAsString("Name"); 2863 std::string Proto = R->getValueAsString("Prototype"); 2864 std::string Rename = Name + "@" + Proto; 2865 if (A64IntrinsicMap.count(Rename)) 2866 continue; 2867 A64IntrinsicMap[Rename] = CK; 2868 } 2869 2870 // Generate BuiltinsARM.def for ARM 2871 genBuiltinsDef(OS, A64IntrinsicMap, false); 2872 2873 // Generate BuiltinsAArch64.def for AArch64 2874 genBuiltinsDef(OS, A64IntrinsicMap, true); 2875 2876 // Generate ARM overloaded type checking code for SemaChecking.cpp 2877 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 2878 2879 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 2880 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 2881 2882 // Generate ARM range checking code for shift/lane immediates. 2883 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 2884 2885 // Generate the AArch64 range checking code for shift/lane immediates. 2886 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 2887} 2888 2889/// GenTest - Write out a test for the intrinsic specified by the name and 2890/// type strings, including the embedded patterns for FileCheck to match. 2891static std::string GenTest(const std::string &name, 2892 const std::string &proto, 2893 StringRef outTypeStr, StringRef inTypeStr, 2894 bool isShift, bool isHiddenLOp, 2895 ClassKind ck, const std::string &InstName, 2896 bool isA64, 2897 std::string & testFuncProto) { 2898 assert(!proto.empty() && ""); 2899 std::string s; 2900 2901 // Function name with type suffix 2902 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2903 if (outTypeStr != inTypeStr) { 2904 // If the input type is different (e.g., for vreinterpret), append a suffix 2905 // for the input type. String off a "Q" (quad) prefix so that MangleName 2906 // does not insert another "q" in the name. 2907 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2908 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2909 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2910 } 2911 2912 // todo: GenerateChecksForIntrinsic does not generate CHECK 2913 // for aarch64 instructions yet 2914 std::vector<std::string> FileCheckPatterns; 2915 if (!isA64) { 2916 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2917 isHiddenLOp, FileCheckPatterns); 2918 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 2919 } 2920 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 2921 2922 // Emit the FileCheck patterns. 2923 // If for any reason we do not want to emit a check, mangledInst 2924 // will be the empty string. 2925 if (FileCheckPatterns.size()) { 2926 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2927 e = FileCheckPatterns.end(); 2928 i != e; 2929 ++i) { 2930 s += "// CHECK_ARM: " + *i + "\n"; 2931 } 2932 } 2933 2934 // Emit the start of the test function. 2935 2936 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 2937 char arg = 'a'; 2938 std::string comma; 2939 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2940 // Do not create arguments for values that must be immediate constants. 2941 if (proto[i] == 'i') 2942 continue; 2943 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 2944 testFuncProto.push_back(arg); 2945 comma = ", "; 2946 } 2947 testFuncProto += ")"; 2948 2949 s+= testFuncProto; 2950 s+= " {\n "; 2951 2952 if (proto[0] != 'v') 2953 s += "return "; 2954 s += mangledName + "("; 2955 arg = 'a'; 2956 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2957 if (proto[i] == 'i') { 2958 // For immediate operands, test the maximum value. 2959 if (isShift) 2960 s += "1"; // FIXME 2961 else 2962 // The immediate generally refers to a lane in the preceding argument. 2963 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 2964 } else { 2965 s.push_back(arg); 2966 } 2967 if ((i + 1) < e) 2968 s += ", "; 2969 } 2970 s += ");\n}\n\n"; 2971 return s; 2972} 2973 2974/// Write out all intrinsic tests for the specified target, checking 2975/// for intrinsic test uniqueness. 2976void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 2977 bool isA64GenTest) { 2978 if (isA64GenTest) 2979 OS << "#ifdef __aarch64__\n"; 2980 2981 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2982 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2983 Record *R = RV[i]; 2984 std::string name = R->getValueAsString("Name"); 2985 std::string Proto = R->getValueAsString("Prototype"); 2986 std::string Types = R->getValueAsString("Types"); 2987 bool isShift = R->getValueAsBit("isShift"); 2988 std::string InstName = R->getValueAsString("InstName"); 2989 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 2990 bool isA64 = R->getValueAsBit("isA64"); 2991 2992 // do not include AArch64 intrinsic test if not generating 2993 // code for AArch64 2994 if (!isA64GenTest && isA64) 2995 continue; 2996 2997 SmallVector<StringRef, 16> TypeVec; 2998 ParseTypes(R, Types, TypeVec); 2999 3000 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3001 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3002 if (kind == OpUnavailable) 3003 continue; 3004 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3005 if (kind == OpReinterpret) { 3006 bool outQuad = false; 3007 bool dummy = false; 3008 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3009 for (unsigned srcti = 0, srcte = TypeVec.size(); 3010 srcti != srcte; ++srcti) { 3011 bool inQuad = false; 3012 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3013 if (srcti == ti || inQuad != outQuad) 3014 continue; 3015 std::string testFuncProto; 3016 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3017 isShift, isHiddenLOp, ck, InstName, isA64, 3018 testFuncProto); 3019 if (EmittedMap.count(testFuncProto)) 3020 continue; 3021 EmittedMap[testFuncProto] = kind; 3022 OS << s << "\n"; 3023 } 3024 } else { 3025 std::string testFuncProto; 3026 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 3027 isHiddenLOp, ck, InstName, isA64, testFuncProto); 3028 if (EmittedMap.count(testFuncProto)) 3029 continue; 3030 EmittedMap[testFuncProto] = kind; 3031 OS << s << "\n"; 3032 } 3033 } 3034 } 3035 3036 if (isA64GenTest) 3037 OS << "#endif\n"; 3038} 3039/// runTests - Write out a complete set of tests for all of the Neon 3040/// intrinsics. 3041void NeonEmitter::runTests(raw_ostream &OS) { 3042 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3043 "apcs-gnu\\\n" 3044 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3045 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3046 "\n" 3047 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3048 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3049 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3050 "\n" 3051 "// REQUIRES: long_tests\n" 3052 "\n" 3053 "#include <arm_neon.h>\n" 3054 "\n"; 3055 3056 // ARM tests must be emitted before AArch64 tests to ensure 3057 // tests for intrinsics that are common to ARM and AArch64 3058 // appear only once in the output stream. 3059 // The check for uniqueness is done in genTargetTest. 3060 StringMap<OpKind> EmittedMap; 3061 3062 genTargetTest(OS, EmittedMap, false); 3063 3064 genTargetTest(OS, EmittedMap, true); 3065} 3066 3067namespace clang { 3068void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3069 NeonEmitter(Records).run(OS); 3070} 3071void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3072 NeonEmitter(Records).runHeader(OS); 3073} 3074void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3075 NeonEmitter(Records).runTests(OS); 3076} 3077} // End namespace clang 3078