NeonEmitter.cpp revision 410c3f73cb0c78f72335dc712a9d887d77b8e7ce
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddw, 44 OpSub, 45 OpSubl, 46 OpSubw, 47 OpMul, 48 OpMla, 49 OpMlal, 50 OpMls, 51 OpMlsl, 52 OpMulN, 53 OpMlaN, 54 OpMlsN, 55 OpMlalN, 56 OpMlslN, 57 OpMulLane, 58 OpMullLane, 59 OpMlaLane, 60 OpMlsLane, 61 OpMlalLane, 62 OpMlslLane, 63 OpQDMullLane, 64 OpQDMlalLane, 65 OpQDMlslLane, 66 OpQDMulhLane, 67 OpQRDMulhLane, 68 OpEq, 69 OpGe, 70 OpLe, 71 OpGt, 72 OpLt, 73 OpNeg, 74 OpNot, 75 OpAnd, 76 OpOr, 77 OpXor, 78 OpAndNot, 79 OpOrNot, 80 OpCast, 81 OpConcat, 82 OpDup, 83 OpDupLane, 84 OpHi, 85 OpLo, 86 OpSelect, 87 OpRev16, 88 OpRev32, 89 OpRev64, 90 OpReinterpret, 91 OpAbdl, 92 OpAba, 93 OpAbal 94}; 95 96enum ClassKind { 97 ClassNone, 98 ClassI, // generic integer instruction, e.g., "i8" suffix 99 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 100 ClassW, // width-specific instruction, e.g., "8" suffix 101 ClassB, // bitcast arguments with enum argument to specify type 102 ClassL, // Logical instructions which are op instructions 103 // but we need to not emit any suffix for in our 104 // tests. 105 ClassNoTest // Instructions which we do not test since they are 106 // not TRUE instructions. 107}; 108 109/// NeonTypeFlags - Flags to identify the types for overloaded Neon 110/// builtins. These must be kept in sync with the flags in 111/// include/clang/Basic/TargetBuiltins.h. 112namespace { 113class NeonTypeFlags { 114 enum { 115 EltTypeMask = 0xf, 116 UnsignedFlag = 0x10, 117 QuadFlag = 0x20 118 }; 119 uint32_t Flags; 120 121public: 122 enum EltType { 123 Int8, 124 Int16, 125 Int32, 126 Int64, 127 Poly8, 128 Poly16, 129 Float16, 130 Float32 131 }; 132 133 NeonTypeFlags(unsigned F) : Flags(F) {} 134 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 135 if (IsUnsigned) 136 Flags |= UnsignedFlag; 137 if (IsQuad) 138 Flags |= QuadFlag; 139 } 140 141 uint32_t getFlags() const { return Flags; } 142}; 143} // end anonymous namespace 144 145namespace { 146class NeonEmitter { 147 RecordKeeper &Records; 148 StringMap<OpKind> OpMap; 149 DenseMap<Record*, ClassKind> ClassMap; 150 151public: 152 NeonEmitter(RecordKeeper &R) : Records(R) { 153 OpMap["OP_NONE"] = OpNone; 154 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 155 OpMap["OP_ADD"] = OpAdd; 156 OpMap["OP_ADDL"] = OpAddl; 157 OpMap["OP_ADDW"] = OpAddw; 158 OpMap["OP_SUB"] = OpSub; 159 OpMap["OP_SUBL"] = OpSubl; 160 OpMap["OP_SUBW"] = OpSubw; 161 OpMap["OP_MUL"] = OpMul; 162 OpMap["OP_MLA"] = OpMla; 163 OpMap["OP_MLAL"] = OpMlal; 164 OpMap["OP_MLS"] = OpMls; 165 OpMap["OP_MLSL"] = OpMlsl; 166 OpMap["OP_MUL_N"] = OpMulN; 167 OpMap["OP_MLA_N"] = OpMlaN; 168 OpMap["OP_MLS_N"] = OpMlsN; 169 OpMap["OP_MLAL_N"] = OpMlalN; 170 OpMap["OP_MLSL_N"] = OpMlslN; 171 OpMap["OP_MUL_LN"]= OpMulLane; 172 OpMap["OP_MULL_LN"] = OpMullLane; 173 OpMap["OP_MLA_LN"]= OpMlaLane; 174 OpMap["OP_MLS_LN"]= OpMlsLane; 175 OpMap["OP_MLAL_LN"] = OpMlalLane; 176 OpMap["OP_MLSL_LN"] = OpMlslLane; 177 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 178 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 179 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 180 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 181 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 182 OpMap["OP_EQ"] = OpEq; 183 OpMap["OP_GE"] = OpGe; 184 OpMap["OP_LE"] = OpLe; 185 OpMap["OP_GT"] = OpGt; 186 OpMap["OP_LT"] = OpLt; 187 OpMap["OP_NEG"] = OpNeg; 188 OpMap["OP_NOT"] = OpNot; 189 OpMap["OP_AND"] = OpAnd; 190 OpMap["OP_OR"] = OpOr; 191 OpMap["OP_XOR"] = OpXor; 192 OpMap["OP_ANDN"] = OpAndNot; 193 OpMap["OP_ORN"] = OpOrNot; 194 OpMap["OP_CAST"] = OpCast; 195 OpMap["OP_CONC"] = OpConcat; 196 OpMap["OP_HI"] = OpHi; 197 OpMap["OP_LO"] = OpLo; 198 OpMap["OP_DUP"] = OpDup; 199 OpMap["OP_DUP_LN"] = OpDupLane; 200 OpMap["OP_SEL"] = OpSelect; 201 OpMap["OP_REV16"] = OpRev16; 202 OpMap["OP_REV32"] = OpRev32; 203 OpMap["OP_REV64"] = OpRev64; 204 OpMap["OP_REINT"] = OpReinterpret; 205 OpMap["OP_ABDL"] = OpAbdl; 206 OpMap["OP_ABA"] = OpAba; 207 OpMap["OP_ABAL"] = OpAbal; 208 209 Record *SI = R.getClass("SInst"); 210 Record *II = R.getClass("IInst"); 211 Record *WI = R.getClass("WInst"); 212 Record *SOpI = R.getClass("SOpInst"); 213 Record *IOpI = R.getClass("IOpInst"); 214 Record *WOpI = R.getClass("WOpInst"); 215 Record *LOpI = R.getClass("LOpInst"); 216 Record *NoTestOpI = R.getClass("NoTestOpInst"); 217 218 ClassMap[SI] = ClassS; 219 ClassMap[II] = ClassI; 220 ClassMap[WI] = ClassW; 221 ClassMap[SOpI] = ClassS; 222 ClassMap[IOpI] = ClassI; 223 ClassMap[WOpI] = ClassW; 224 ClassMap[LOpI] = ClassL; 225 ClassMap[NoTestOpI] = ClassNoTest; 226 } 227 228 // run - Emit arm_neon.h.inc 229 void run(raw_ostream &o); 230 231 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 232 void runHeader(raw_ostream &o); 233 234 // runTests - Emit tests for all the Neon intrinsics. 235 void runTests(raw_ostream &o); 236 237private: 238 void emitIntrinsic(raw_ostream &OS, Record *R); 239}; 240} // end anonymous namespace 241 242/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 243/// which each StringRef representing a single type declared in the string. 244/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 245/// 2xfloat and 4xfloat respectively. 246static void ParseTypes(Record *r, std::string &s, 247 SmallVectorImpl<StringRef> &TV) { 248 const char *data = s.data(); 249 int len = 0; 250 251 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 252 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U') 253 continue; 254 255 switch (data[len]) { 256 case 'c': 257 case 's': 258 case 'i': 259 case 'l': 260 case 'h': 261 case 'f': 262 break; 263 default: 264 PrintFatalError(r->getLoc(), 265 "Unexpected letter: " + std::string(data + len, 1)); 266 } 267 TV.push_back(StringRef(data, len + 1)); 268 data += len + 1; 269 len = -1; 270 } 271} 272 273/// Widen - Convert a type code into the next wider type. char -> short, 274/// short -> int, etc. 275static char Widen(const char t) { 276 switch (t) { 277 case 'c': 278 return 's'; 279 case 's': 280 return 'i'; 281 case 'i': 282 return 'l'; 283 case 'h': 284 return 'f'; 285 default: 286 PrintFatalError("unhandled type in widen!"); 287 } 288} 289 290/// Narrow - Convert a type code into the next smaller type. short -> char, 291/// float -> half float, etc. 292static char Narrow(const char t) { 293 switch (t) { 294 case 's': 295 return 'c'; 296 case 'i': 297 return 's'; 298 case 'l': 299 return 'i'; 300 case 'f': 301 return 'h'; 302 default: 303 PrintFatalError("unhandled type in narrow!"); 304 } 305} 306 307/// For a particular StringRef, return the base type code, and whether it has 308/// the quad-vector, polynomial, or unsigned modifiers set. 309static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 310 unsigned off = 0; 311 312 // remember quad. 313 if (ty[off] == 'Q') { 314 quad = true; 315 ++off; 316 } 317 318 // remember poly. 319 if (ty[off] == 'P') { 320 poly = true; 321 ++off; 322 } 323 324 // remember unsigned. 325 if (ty[off] == 'U') { 326 usgn = true; 327 ++off; 328 } 329 330 // base type to get the type string for. 331 return ty[off]; 332} 333 334/// ModType - Transform a type code and its modifiers based on a mod code. The 335/// mod code definitions may be found at the top of arm_neon.td. 336static char ModType(const char mod, char type, bool &quad, bool &poly, 337 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 338 switch (mod) { 339 case 't': 340 if (poly) { 341 poly = false; 342 usgn = true; 343 } 344 break; 345 case 'u': 346 usgn = true; 347 poly = false; 348 if (type == 'f') 349 type = 'i'; 350 break; 351 case 'x': 352 usgn = false; 353 poly = false; 354 if (type == 'f') 355 type = 'i'; 356 break; 357 case 'f': 358 if (type == 'h') 359 quad = true; 360 type = 'f'; 361 usgn = false; 362 break; 363 case 'g': 364 quad = false; 365 break; 366 case 'w': 367 type = Widen(type); 368 quad = true; 369 break; 370 case 'n': 371 type = Widen(type); 372 break; 373 case 'i': 374 type = 'i'; 375 scal = true; 376 break; 377 case 'l': 378 type = 'l'; 379 scal = true; 380 usgn = true; 381 break; 382 case 's': 383 case 'a': 384 scal = true; 385 break; 386 case 'k': 387 quad = true; 388 break; 389 case 'c': 390 cnst = true; 391 case 'p': 392 pntr = true; 393 scal = true; 394 break; 395 case 'h': 396 type = Narrow(type); 397 if (type == 'h') 398 quad = false; 399 break; 400 case 'e': 401 type = Narrow(type); 402 usgn = true; 403 break; 404 default: 405 break; 406 } 407 return type; 408} 409 410/// TypeString - for a modifier and type, generate the name of the typedef for 411/// that type. QUc -> uint8x8_t. 412static std::string TypeString(const char mod, StringRef typestr) { 413 bool quad = false; 414 bool poly = false; 415 bool usgn = false; 416 bool scal = false; 417 bool cnst = false; 418 bool pntr = false; 419 420 if (mod == 'v') 421 return "void"; 422 if (mod == 'i') 423 return "int"; 424 425 // base type to get the type string for. 426 char type = ClassifyType(typestr, quad, poly, usgn); 427 428 // Based on the modifying character, change the type and width if necessary. 429 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 430 431 SmallString<128> s; 432 433 if (usgn) 434 s.push_back('u'); 435 436 switch (type) { 437 case 'c': 438 s += poly ? "poly8" : "int8"; 439 if (scal) 440 break; 441 s += quad ? "x16" : "x8"; 442 break; 443 case 's': 444 s += poly ? "poly16" : "int16"; 445 if (scal) 446 break; 447 s += quad ? "x8" : "x4"; 448 break; 449 case 'i': 450 s += "int32"; 451 if (scal) 452 break; 453 s += quad ? "x4" : "x2"; 454 break; 455 case 'l': 456 s += "int64"; 457 if (scal) 458 break; 459 s += quad ? "x2" : "x1"; 460 break; 461 case 'h': 462 s += "float16"; 463 if (scal) 464 break; 465 s += quad ? "x8" : "x4"; 466 break; 467 case 'f': 468 s += "float32"; 469 if (scal) 470 break; 471 s += quad ? "x4" : "x2"; 472 break; 473 default: 474 PrintFatalError("unhandled type!"); 475 } 476 477 if (mod == '2') 478 s += "x2"; 479 if (mod == '3') 480 s += "x3"; 481 if (mod == '4') 482 s += "x4"; 483 484 // Append _t, finishing the type string typedef type. 485 s += "_t"; 486 487 if (cnst) 488 s += " const"; 489 490 if (pntr) 491 s += " *"; 492 493 return s.str(); 494} 495 496/// BuiltinTypeString - for a modifier and type, generate the clang 497/// BuiltinsARM.def prototype code for the function. See the top of clang's 498/// Builtins.def for a description of the type strings. 499static std::string BuiltinTypeString(const char mod, StringRef typestr, 500 ClassKind ck, bool ret) { 501 bool quad = false; 502 bool poly = false; 503 bool usgn = false; 504 bool scal = false; 505 bool cnst = false; 506 bool pntr = false; 507 508 if (mod == 'v') 509 return "v"; // void 510 if (mod == 'i') 511 return "i"; // int 512 513 // base type to get the type string for. 514 char type = ClassifyType(typestr, quad, poly, usgn); 515 516 // Based on the modifying character, change the type and width if necessary. 517 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 518 519 // All pointers are void* pointers. Change type to 'v' now. 520 if (pntr) { 521 usgn = false; 522 poly = false; 523 type = 'v'; 524 } 525 // Treat half-float ('h') types as unsigned short ('s') types. 526 if (type == 'h') { 527 type = 's'; 528 usgn = true; 529 } 530 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f'); 531 532 if (scal) { 533 SmallString<128> s; 534 535 if (usgn) 536 s.push_back('U'); 537 else if (type == 'c') 538 s.push_back('S'); // make chars explicitly signed 539 540 if (type == 'l') // 64-bit long 541 s += "LLi"; 542 else 543 s.push_back(type); 544 545 if (cnst) 546 s.push_back('C'); 547 if (pntr) 548 s.push_back('*'); 549 return s.str(); 550 } 551 552 // Since the return value must be one type, return a vector type of the 553 // appropriate width which we will bitcast. An exception is made for 554 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 555 // fashion, storing them to a pointer arg. 556 if (ret) { 557 if (mod >= '2' && mod <= '4') 558 return "vv*"; // void result with void* first argument 559 if (mod == 'f' || (ck != ClassB && type == 'f')) 560 return quad ? "V4f" : "V2f"; 561 if (ck != ClassB && type == 's') 562 return quad ? "V8s" : "V4s"; 563 if (ck != ClassB && type == 'i') 564 return quad ? "V4i" : "V2i"; 565 if (ck != ClassB && type == 'l') 566 return quad ? "V2LLi" : "V1LLi"; 567 568 return quad ? "V16Sc" : "V8Sc"; 569 } 570 571 // Non-return array types are passed as individual vectors. 572 if (mod == '2') 573 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 574 if (mod == '3') 575 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 576 if (mod == '4') 577 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 578 579 if (mod == 'f' || (ck != ClassB && type == 'f')) 580 return quad ? "V4f" : "V2f"; 581 if (ck != ClassB && type == 's') 582 return quad ? "V8s" : "V4s"; 583 if (ck != ClassB && type == 'i') 584 return quad ? "V4i" : "V2i"; 585 if (ck != ClassB && type == 'l') 586 return quad ? "V2LLi" : "V1LLi"; 587 588 return quad ? "V16Sc" : "V8Sc"; 589} 590 591/// InstructionTypeCode - Computes the ARM argument character code and 592/// quad status for a specific type string and ClassKind. 593static void InstructionTypeCode(const StringRef &typeStr, 594 const ClassKind ck, 595 bool &quad, 596 std::string &typeCode) { 597 bool poly = false; 598 bool usgn = false; 599 char type = ClassifyType(typeStr, quad, poly, usgn); 600 601 switch (type) { 602 case 'c': 603 switch (ck) { 604 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 605 case ClassI: typeCode = "i8"; break; 606 case ClassW: typeCode = "8"; break; 607 default: break; 608 } 609 break; 610 case 's': 611 switch (ck) { 612 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 613 case ClassI: typeCode = "i16"; break; 614 case ClassW: typeCode = "16"; break; 615 default: break; 616 } 617 break; 618 case 'i': 619 switch (ck) { 620 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 621 case ClassI: typeCode = "i32"; break; 622 case ClassW: typeCode = "32"; break; 623 default: break; 624 } 625 break; 626 case 'l': 627 switch (ck) { 628 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 629 case ClassI: typeCode = "i64"; break; 630 case ClassW: typeCode = "64"; break; 631 default: break; 632 } 633 break; 634 case 'h': 635 switch (ck) { 636 case ClassS: 637 case ClassI: typeCode = "f16"; break; 638 case ClassW: typeCode = "16"; break; 639 default: break; 640 } 641 break; 642 case 'f': 643 switch (ck) { 644 case ClassS: 645 case ClassI: typeCode = "f32"; break; 646 case ClassW: typeCode = "32"; break; 647 default: break; 648 } 649 break; 650 default: 651 PrintFatalError("unhandled type!"); 652 } 653} 654 655/// MangleName - Append a type or width suffix to a base neon function name, 656/// and insert a 'q' in the appropriate location if the operation works on 657/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 658static std::string MangleName(const std::string &name, StringRef typestr, 659 ClassKind ck) { 660 if (name == "vcvt_f32_f16") 661 return name; 662 663 bool quad = false; 664 std::string typeCode = ""; 665 666 InstructionTypeCode(typestr, ck, quad, typeCode); 667 668 std::string s = name; 669 670 if (typeCode.size() > 0) { 671 s += "_" + typeCode; 672 } 673 674 if (ck == ClassB) 675 s += "_v"; 676 677 // Insert a 'q' before the first '_' character so that it ends up before 678 // _lane or _n on vector-scalar operations. 679 if (quad) { 680 size_t pos = s.find('_'); 681 s = s.insert(pos, "q"); 682 } 683 684 return s; 685} 686 687static void PreprocessInstruction(const StringRef &Name, 688 const std::string &InstName, 689 std::string &Prefix, 690 bool &HasNPostfix, 691 bool &HasLanePostfix, 692 bool &HasDupPostfix, 693 bool &IsSpecialVCvt, 694 size_t &TBNumber) { 695 // All of our instruction name fields from arm_neon.td are of the form 696 // <instructionname>_... 697 // Thus we grab our instruction name via computation of said Prefix. 698 const size_t PrefixEnd = Name.find_first_of('_'); 699 // If InstName is passed in, we use that instead of our name Prefix. 700 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 701 702 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 703 704 HasNPostfix = Postfix.count("_n"); 705 HasLanePostfix = Postfix.count("_lane"); 706 HasDupPostfix = Postfix.count("_dup"); 707 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 708 709 if (InstName.compare("vtbl") == 0 || 710 InstName.compare("vtbx") == 0) { 711 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 712 // encoding to get its true value. 713 TBNumber = Name[Name.size()-1] - 48; 714 } 715} 716 717/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 718/// extracted, generate a FileCheck pattern for a Load Or Store 719static void 720GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 721 const std::string& OutTypeCode, 722 const bool &IsQuad, 723 const bool &HasDupPostfix, 724 const bool &HasLanePostfix, 725 const size_t Count, 726 std::string &RegisterSuffix) { 727 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 728 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 729 // will output a series of v{ld,st}1s, so we have to handle it specially. 730 if ((Count == 3 || Count == 4) && IsQuad) { 731 RegisterSuffix += "{"; 732 for (size_t i = 0; i < Count; i++) { 733 RegisterSuffix += "d{{[0-9]+}}"; 734 if (HasDupPostfix) { 735 RegisterSuffix += "[]"; 736 } 737 if (HasLanePostfix) { 738 RegisterSuffix += "[{{[0-9]+}}]"; 739 } 740 if (i < Count-1) { 741 RegisterSuffix += ", "; 742 } 743 } 744 RegisterSuffix += "}"; 745 } else { 746 747 // Handle normal loads and stores. 748 RegisterSuffix += "{"; 749 for (size_t i = 0; i < Count; i++) { 750 RegisterSuffix += "d{{[0-9]+}}"; 751 if (HasDupPostfix) { 752 RegisterSuffix += "[]"; 753 } 754 if (HasLanePostfix) { 755 RegisterSuffix += "[{{[0-9]+}}]"; 756 } 757 if (IsQuad && !HasLanePostfix) { 758 RegisterSuffix += ", d{{[0-9]+}}"; 759 if (HasDupPostfix) { 760 RegisterSuffix += "[]"; 761 } 762 } 763 if (i < Count-1) { 764 RegisterSuffix += ", "; 765 } 766 } 767 RegisterSuffix += "}, [r{{[0-9]+}}"; 768 769 // We only include the alignment hint if we have a vld1.*64 or 770 // a dup/lane instruction. 771 if (IsLDSTOne) { 772 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 773 RegisterSuffix += ":" + OutTypeCode; 774 } else if (OutTypeCode == "64") { 775 RegisterSuffix += ":64"; 776 } 777 } 778 779 RegisterSuffix += "]"; 780 } 781} 782 783static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 784 const bool &HasNPostfix) { 785 return (NameRef.count("vmla") || 786 NameRef.count("vmlal") || 787 NameRef.count("vmlsl") || 788 NameRef.count("vmull") || 789 NameRef.count("vqdmlal") || 790 NameRef.count("vqdmlsl") || 791 NameRef.count("vqdmulh") || 792 NameRef.count("vqdmull") || 793 NameRef.count("vqrdmulh")) && HasNPostfix; 794} 795 796static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 797 const bool &HasLanePostfix) { 798 return (NameRef.count("vmla") || 799 NameRef.count("vmls") || 800 NameRef.count("vmlal") || 801 NameRef.count("vmlsl") || 802 (NameRef.count("vmul") && NameRef.size() == 3)|| 803 NameRef.count("vqdmlal") || 804 NameRef.count("vqdmlsl") || 805 NameRef.count("vqdmulh") || 806 NameRef.count("vqrdmulh")) && HasLanePostfix; 807} 808 809static bool IsSpecialLaneMultiply(const StringRef &NameRef, 810 const bool &HasLanePostfix, 811 const bool &IsQuad) { 812 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 813 && IsQuad; 814 const bool IsVMull = NameRef.count("mull") && !IsQuad; 815 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 816} 817 818static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 819 const std::string &Proto, 820 const bool &HasNPostfix, 821 const bool &IsQuad, 822 const bool &HasLanePostfix, 823 const bool &HasDupPostfix, 824 std::string &NormedProto) { 825 // Handle generic case. 826 const StringRef NameRef(Name); 827 for (size_t i = 0, end = Proto.size(); i < end; i++) { 828 switch (Proto[i]) { 829 case 'u': 830 case 'f': 831 case 'd': 832 case 's': 833 case 'x': 834 case 't': 835 case 'n': 836 NormedProto += IsQuad? 'q' : 'd'; 837 break; 838 case 'w': 839 case 'k': 840 NormedProto += 'q'; 841 break; 842 case 'g': 843 case 'h': 844 case 'e': 845 NormedProto += 'd'; 846 break; 847 case 'i': 848 NormedProto += HasLanePostfix? 'a' : 'i'; 849 break; 850 case 'a': 851 if (HasLanePostfix) { 852 NormedProto += 'a'; 853 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 854 NormedProto += IsQuad? 'q' : 'd'; 855 } else { 856 NormedProto += 'i'; 857 } 858 break; 859 } 860 } 861 862 // Handle Special Cases. 863 const bool IsNotVExt = !NameRef.count("vext"); 864 const bool IsVPADAL = NameRef.count("vpadal"); 865 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 866 HasLanePostfix); 867 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 868 IsQuad); 869 870 if (IsSpecialLaneMul) { 871 // If 872 NormedProto[2] = NormedProto[3]; 873 NormedProto.erase(3); 874 } else if (NormedProto.size() == 4 && 875 NormedProto[0] == NormedProto[1] && 876 IsNotVExt) { 877 // If NormedProto.size() == 4 and the first two proto characters are the 878 // same, ignore the first. 879 NormedProto = NormedProto.substr(1, 3); 880 } else if (Is5OpLaneAccum) { 881 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 882 std::string tmp = NormedProto.substr(1,2); 883 tmp += NormedProto[4]; 884 NormedProto = tmp; 885 } else if (IsVPADAL) { 886 // If we have VPADAL, ignore the first character. 887 NormedProto = NormedProto.substr(0, 2); 888 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 889 // If our instruction is a dup instruction, keep only the first and 890 // last characters. 891 std::string tmp = ""; 892 tmp += NormedProto[0]; 893 tmp += NormedProto[NormedProto.size()-1]; 894 NormedProto = tmp; 895 } 896} 897 898/// GenerateRegisterCheckPatterns - Given a bunch of data we have 899/// extracted, generate a FileCheck pattern to check that an 900/// instruction's arguments are correct. 901static void GenerateRegisterCheckPattern(const std::string &Name, 902 const std::string &Proto, 903 const std::string &OutTypeCode, 904 const bool &HasNPostfix, 905 const bool &IsQuad, 906 const bool &HasLanePostfix, 907 const bool &HasDupPostfix, 908 const size_t &TBNumber, 909 std::string &RegisterSuffix) { 910 911 RegisterSuffix = ""; 912 913 const StringRef NameRef(Name); 914 const StringRef ProtoRef(Proto); 915 916 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 917 return; 918 } 919 920 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 921 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 922 923 if (IsLoadStore) { 924 // Grab N value from v{ld,st}N using its ascii representation. 925 const size_t Count = NameRef[3] - 48; 926 927 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 928 HasDupPostfix, HasLanePostfix, 929 Count, RegisterSuffix); 930 } else if (IsTBXOrTBL) { 931 RegisterSuffix += "d{{[0-9]+}}, {"; 932 for (size_t i = 0; i < TBNumber-1; i++) { 933 RegisterSuffix += "d{{[0-9]+}}, "; 934 } 935 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 936 } else { 937 // Handle a normal instruction. 938 if (NameRef.count("vget") || NameRef.count("vset")) 939 return; 940 941 // We first normalize our proto, since we only need to emit 4 942 // different types of checks, yet have more than 4 proto types 943 // that map onto those 4 patterns. 944 std::string NormalizedProto(""); 945 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 946 HasLanePostfix, HasDupPostfix, 947 NormalizedProto); 948 949 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 950 const char &c = NormalizedProto[i]; 951 switch (c) { 952 case 'q': 953 RegisterSuffix += "q{{[0-9]+}}, "; 954 break; 955 956 case 'd': 957 RegisterSuffix += "d{{[0-9]+}}, "; 958 break; 959 960 case 'i': 961 RegisterSuffix += "#{{[0-9]+}}, "; 962 break; 963 964 case 'a': 965 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 966 break; 967 } 968 } 969 970 // Remove extra ", ". 971 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 972 } 973} 974 975/// GenerateChecksForIntrinsic - Given a specific instruction name + 976/// typestr + class kind, generate the proper set of FileCheck 977/// Patterns to check for. We could just return a string, but instead 978/// use a vector since it provides us with the extra flexibility of 979/// emitting multiple checks, which comes in handy for certain cases 980/// like mla where we want to check for 2 different instructions. 981static void GenerateChecksForIntrinsic(const std::string &Name, 982 const std::string &Proto, 983 StringRef &OutTypeStr, 984 StringRef &InTypeStr, 985 ClassKind Ck, 986 const std::string &InstName, 987 bool IsHiddenLOp, 988 std::vector<std::string>& Result) { 989 990 // If Ck is a ClassNoTest instruction, just return so no test is 991 // emitted. 992 if(Ck == ClassNoTest) 993 return; 994 995 if (Name == "vcvt_f32_f16") { 996 Result.push_back("vcvt.f32.f16"); 997 return; 998 } 999 1000 1001 // Now we preprocess our instruction given the data we have to get the 1002 // data that we need. 1003 // Create a StringRef for String Manipulation of our Name. 1004 const StringRef NameRef(Name); 1005 // Instruction Prefix. 1006 std::string Prefix; 1007 // The type code for our out type string. 1008 std::string OutTypeCode; 1009 // To handle our different cases, we need to check for different postfixes. 1010 // Is our instruction a quad instruction. 1011 bool IsQuad = false; 1012 // Our instruction is of the form <instructionname>_n. 1013 bool HasNPostfix = false; 1014 // Our instruction is of the form <instructionname>_lane. 1015 bool HasLanePostfix = false; 1016 // Our instruction is of the form <instructionname>_dup. 1017 bool HasDupPostfix = false; 1018 // Our instruction is a vcvt instruction which requires special handling. 1019 bool IsSpecialVCvt = false; 1020 // If we have a vtbxN or vtblN instruction, this is set to N. 1021 size_t TBNumber = -1; 1022 // Register Suffix 1023 std::string RegisterSuffix; 1024 1025 PreprocessInstruction(NameRef, InstName, Prefix, 1026 HasNPostfix, HasLanePostfix, HasDupPostfix, 1027 IsSpecialVCvt, TBNumber); 1028 1029 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1030 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1031 HasLanePostfix, HasDupPostfix, TBNumber, 1032 RegisterSuffix); 1033 1034 // In the following section, we handle a bunch of special cases. You can tell 1035 // a special case by the fact we are returning early. 1036 1037 // If our instruction is a logical instruction without postfix or a 1038 // hidden LOp just return the current Prefix. 1039 if (Ck == ClassL || IsHiddenLOp) { 1040 Result.push_back(Prefix + " " + RegisterSuffix); 1041 return; 1042 } 1043 1044 // If we have a vmov, due to the many different cases, some of which 1045 // vary within the different intrinsics generated for a single 1046 // instruction type, just output a vmov. (e.g. given an instruction 1047 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1048 // 1049 // FIXME: Maybe something can be done about this. The two cases that we care 1050 // about are vmov as an LType and vmov as a WType. 1051 if (Prefix == "vmov") { 1052 Result.push_back(Prefix + " " + RegisterSuffix); 1053 return; 1054 } 1055 1056 // In the following section, we handle special cases. 1057 1058 if (OutTypeCode == "64") { 1059 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1060 // type, the intrinsic will be optimized away, so just return 1061 // nothing. On the other hand if we are handling an uint64x2_t 1062 // (i.e. quad instruction), vdup/vmov instructions should be 1063 // emitted. 1064 if (Prefix == "vdup" || Prefix == "vext") { 1065 if (IsQuad) { 1066 Result.push_back("{{vmov|vdup}}"); 1067 } 1068 return; 1069 } 1070 1071 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1072 // multiple register operands. 1073 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1074 || Prefix == "vld4"; 1075 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1076 || Prefix == "vst4"; 1077 if (MultiLoadPrefix || MultiStorePrefix) { 1078 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1079 return; 1080 } 1081 1082 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1083 // emitting said instructions. So return a check for 1084 // vldr/vstr/vmov/str instead. 1085 if (HasLanePostfix || HasDupPostfix) { 1086 if (Prefix == "vst1") { 1087 Result.push_back("{{str|vstr|vmov}}"); 1088 return; 1089 } else if (Prefix == "vld1") { 1090 Result.push_back("{{ldr|vldr|vmov}}"); 1091 return; 1092 } 1093 } 1094 } 1095 1096 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1097 // sometimes disassembled as vtrn.32. We use a regex to handle both 1098 // cases. 1099 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1100 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1101 return; 1102 } 1103 1104 // Currently on most ARM processors, we do not use vmla/vmls for 1105 // quad floating point operations. Instead we output vmul + vadd. So 1106 // check if we have one of those instructions and just output a 1107 // check for vmul. 1108 if (OutTypeCode == "f32") { 1109 if (Prefix == "vmls") { 1110 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1111 Result.push_back("vsub." + OutTypeCode); 1112 return; 1113 } else if (Prefix == "vmla") { 1114 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1115 Result.push_back("vadd." + OutTypeCode); 1116 return; 1117 } 1118 } 1119 1120 // If we have vcvt, get the input type from the instruction name 1121 // (which should be of the form instname_inputtype) and append it 1122 // before the output type. 1123 if (Prefix == "vcvt") { 1124 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1125 Prefix += "." + inTypeCode; 1126 } 1127 1128 // Append output type code to get our final mangled instruction. 1129 Prefix += "." + OutTypeCode; 1130 1131 Result.push_back(Prefix + " " + RegisterSuffix); 1132} 1133 1134/// UseMacro - Examine the prototype string to determine if the intrinsic 1135/// should be defined as a preprocessor macro instead of an inline function. 1136static bool UseMacro(const std::string &proto) { 1137 // If this builtin takes an immediate argument, we need to #define it rather 1138 // than use a standard declaration, so that SemaChecking can range check 1139 // the immediate passed by the user. 1140 if (proto.find('i') != std::string::npos) 1141 return true; 1142 1143 // Pointer arguments need to use macros to avoid hiding aligned attributes 1144 // from the pointer type. 1145 if (proto.find('p') != std::string::npos || 1146 proto.find('c') != std::string::npos) 1147 return true; 1148 1149 return false; 1150} 1151 1152/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1153/// defined as a macro should be accessed directly instead of being first 1154/// assigned to a local temporary. 1155static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1156 // True for constant ints (i), pointers (p) and const pointers (c). 1157 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1158} 1159 1160// Generate the string "(argtype a, argtype b, ...)" 1161static std::string GenArgs(const std::string &proto, StringRef typestr) { 1162 bool define = UseMacro(proto); 1163 char arg = 'a'; 1164 1165 std::string s; 1166 s += "("; 1167 1168 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1169 if (define) { 1170 // Some macro arguments are used directly instead of being assigned 1171 // to local temporaries; prepend an underscore prefix to make their 1172 // names consistent with the local temporaries. 1173 if (MacroArgUsedDirectly(proto, i)) 1174 s += "__"; 1175 } else { 1176 s += TypeString(proto[i], typestr) + " __"; 1177 } 1178 s.push_back(arg); 1179 if ((i + 1) < e) 1180 s += ", "; 1181 } 1182 1183 s += ")"; 1184 return s; 1185} 1186 1187// Macro arguments are not type-checked like inline function arguments, so 1188// assign them to local temporaries to get the right type checking. 1189static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { 1190 char arg = 'a'; 1191 std::string s; 1192 bool generatedLocal = false; 1193 1194 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1195 // Do not create a temporary for an immediate argument. 1196 // That would defeat the whole point of using a macro! 1197 if (MacroArgUsedDirectly(proto, i)) 1198 continue; 1199 generatedLocal = true; 1200 1201 s += TypeString(proto[i], typestr) + " __"; 1202 s.push_back(arg); 1203 s += " = ("; 1204 s.push_back(arg); 1205 s += "); "; 1206 } 1207 1208 if (generatedLocal) 1209 s += "\\\n "; 1210 return s; 1211} 1212 1213// Use the vmovl builtin to sign-extend or zero-extend a vector. 1214static std::string Extend(StringRef typestr, const std::string &a) { 1215 std::string s; 1216 s = MangleName("vmovl", typestr, ClassS); 1217 s += "(" + a + ")"; 1218 return s; 1219} 1220 1221static std::string Duplicate(unsigned nElts, StringRef typestr, 1222 const std::string &a) { 1223 std::string s; 1224 1225 s = "(" + TypeString('d', typestr) + "){ "; 1226 for (unsigned i = 0; i != nElts; ++i) { 1227 s += a; 1228 if ((i + 1) < nElts) 1229 s += ", "; 1230 } 1231 s += " }"; 1232 1233 return s; 1234} 1235 1236static std::string SplatLane(unsigned nElts, const std::string &vec, 1237 const std::string &lane) { 1238 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1239 for (unsigned i = 0; i < nElts; ++i) 1240 s += ", " + lane; 1241 s += ")"; 1242 return s; 1243} 1244 1245static unsigned GetNumElements(StringRef typestr, bool &quad) { 1246 quad = false; 1247 bool dummy = false; 1248 char type = ClassifyType(typestr, quad, dummy, dummy); 1249 unsigned nElts = 0; 1250 switch (type) { 1251 case 'c': nElts = 8; break; 1252 case 's': nElts = 4; break; 1253 case 'i': nElts = 2; break; 1254 case 'l': nElts = 1; break; 1255 case 'h': nElts = 4; break; 1256 case 'f': nElts = 2; break; 1257 default: 1258 PrintFatalError("unhandled type!"); 1259 } 1260 if (quad) nElts <<= 1; 1261 return nElts; 1262} 1263 1264// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1265static std::string GenOpString(OpKind op, const std::string &proto, 1266 StringRef typestr) { 1267 bool quad; 1268 unsigned nElts = GetNumElements(typestr, quad); 1269 bool define = UseMacro(proto); 1270 1271 std::string ts = TypeString(proto[0], typestr); 1272 std::string s; 1273 if (!define) { 1274 s = "return "; 1275 } 1276 1277 switch(op) { 1278 case OpAdd: 1279 s += "__a + __b;"; 1280 break; 1281 case OpAddl: 1282 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1283 break; 1284 case OpAddw: 1285 s += "__a + " + Extend(typestr, "__b") + ";"; 1286 break; 1287 case OpSub: 1288 s += "__a - __b;"; 1289 break; 1290 case OpSubl: 1291 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1292 break; 1293 case OpSubw: 1294 s += "__a - " + Extend(typestr, "__b") + ";"; 1295 break; 1296 case OpMulN: 1297 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1298 break; 1299 case OpMulLane: 1300 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1301 break; 1302 case OpMul: 1303 s += "__a * __b;"; 1304 break; 1305 case OpMullLane: 1306 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1307 SplatLane(nElts, "__b", "__c") + ");"; 1308 break; 1309 case OpMlaN: 1310 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1311 break; 1312 case OpMlaLane: 1313 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1314 break; 1315 case OpMla: 1316 s += "__a + (__b * __c);"; 1317 break; 1318 case OpMlalN: 1319 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1320 Duplicate(nElts, typestr, "__c") + ");"; 1321 break; 1322 case OpMlalLane: 1323 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1324 SplatLane(nElts, "__c", "__d") + ");"; 1325 break; 1326 case OpMlal: 1327 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1328 break; 1329 case OpMlsN: 1330 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1331 break; 1332 case OpMlsLane: 1333 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1334 break; 1335 case OpMls: 1336 s += "__a - (__b * __c);"; 1337 break; 1338 case OpMlslN: 1339 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1340 Duplicate(nElts, typestr, "__c") + ");"; 1341 break; 1342 case OpMlslLane: 1343 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1344 SplatLane(nElts, "__c", "__d") + ");"; 1345 break; 1346 case OpMlsl: 1347 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1348 break; 1349 case OpQDMullLane: 1350 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1351 SplatLane(nElts, "__b", "__c") + ");"; 1352 break; 1353 case OpQDMlalLane: 1354 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1355 SplatLane(nElts, "__c", "__d") + ");"; 1356 break; 1357 case OpQDMlslLane: 1358 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1359 SplatLane(nElts, "__c", "__d") + ");"; 1360 break; 1361 case OpQDMulhLane: 1362 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1363 SplatLane(nElts, "__b", "__c") + ");"; 1364 break; 1365 case OpQRDMulhLane: 1366 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1367 SplatLane(nElts, "__b", "__c") + ");"; 1368 break; 1369 case OpEq: 1370 s += "(" + ts + ")(__a == __b);"; 1371 break; 1372 case OpGe: 1373 s += "(" + ts + ")(__a >= __b);"; 1374 break; 1375 case OpLe: 1376 s += "(" + ts + ")(__a <= __b);"; 1377 break; 1378 case OpGt: 1379 s += "(" + ts + ")(__a > __b);"; 1380 break; 1381 case OpLt: 1382 s += "(" + ts + ")(__a < __b);"; 1383 break; 1384 case OpNeg: 1385 s += " -__a;"; 1386 break; 1387 case OpNot: 1388 s += " ~__a;"; 1389 break; 1390 case OpAnd: 1391 s += "__a & __b;"; 1392 break; 1393 case OpOr: 1394 s += "__a | __b;"; 1395 break; 1396 case OpXor: 1397 s += "__a ^ __b;"; 1398 break; 1399 case OpAndNot: 1400 s += "__a & ~__b;"; 1401 break; 1402 case OpOrNot: 1403 s += "__a | ~__b;"; 1404 break; 1405 case OpCast: 1406 s += "(" + ts + ")__a;"; 1407 break; 1408 case OpConcat: 1409 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1410 s += ", (int64x1_t)__b, 0, 1);"; 1411 break; 1412 case OpHi: 1413 // nElts is for the result vector, so the source is twice that number. 1414 s += "__builtin_shufflevector(__a, __a"; 1415 for (unsigned i = nElts; i < nElts * 2; ++i) 1416 s += ", " + utostr(i); 1417 s+= ");"; 1418 break; 1419 case OpLo: 1420 s += "__builtin_shufflevector(__a, __a"; 1421 for (unsigned i = 0; i < nElts; ++i) 1422 s += ", " + utostr(i); 1423 s+= ");"; 1424 break; 1425 case OpDup: 1426 s += Duplicate(nElts, typestr, "__a") + ";"; 1427 break; 1428 case OpDupLane: 1429 s += SplatLane(nElts, "__a", "__b") + ";"; 1430 break; 1431 case OpSelect: 1432 // ((0 & 1) | (~0 & 2)) 1433 s += "(" + ts + ")"; 1434 ts = TypeString(proto[1], typestr); 1435 s += "((__a & (" + ts + ")__b) | "; 1436 s += "(~__a & (" + ts + ")__c));"; 1437 break; 1438 case OpRev16: 1439 s += "__builtin_shufflevector(__a, __a"; 1440 for (unsigned i = 2; i <= nElts; i += 2) 1441 for (unsigned j = 0; j != 2; ++j) 1442 s += ", " + utostr(i - j - 1); 1443 s += ");"; 1444 break; 1445 case OpRev32: { 1446 unsigned WordElts = nElts >> (1 + (int)quad); 1447 s += "__builtin_shufflevector(__a, __a"; 1448 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1449 for (unsigned j = 0; j != WordElts; ++j) 1450 s += ", " + utostr(i - j - 1); 1451 s += ");"; 1452 break; 1453 } 1454 case OpRev64: { 1455 unsigned DblWordElts = nElts >> (int)quad; 1456 s += "__builtin_shufflevector(__a, __a"; 1457 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1458 for (unsigned j = 0; j != DblWordElts; ++j) 1459 s += ", " + utostr(i - j - 1); 1460 s += ");"; 1461 break; 1462 } 1463 case OpAbdl: { 1464 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1465 if (typestr[0] != 'U') { 1466 // vabd results are always unsigned and must be zero-extended. 1467 std::string utype = "U" + typestr.str(); 1468 s += "(" + TypeString(proto[0], typestr) + ")"; 1469 abd = "(" + TypeString('d', utype) + ")" + abd; 1470 s += Extend(utype, abd) + ";"; 1471 } else { 1472 s += Extend(typestr, abd) + ";"; 1473 } 1474 break; 1475 } 1476 case OpAba: 1477 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1478 break; 1479 case OpAbal: { 1480 s += "__a + "; 1481 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)"; 1482 if (typestr[0] != 'U') { 1483 // vabd results are always unsigned and must be zero-extended. 1484 std::string utype = "U" + typestr.str(); 1485 s += "(" + TypeString(proto[0], typestr) + ")"; 1486 abd = "(" + TypeString('d', utype) + ")" + abd; 1487 s += Extend(utype, abd) + ";"; 1488 } else { 1489 s += Extend(typestr, abd) + ";"; 1490 } 1491 break; 1492 } 1493 default: 1494 PrintFatalError("unknown OpKind!"); 1495 } 1496 return s; 1497} 1498 1499static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1500 unsigned mod = proto[0]; 1501 1502 if (mod == 'v' || mod == 'f') 1503 mod = proto[1]; 1504 1505 bool quad = false; 1506 bool poly = false; 1507 bool usgn = false; 1508 bool scal = false; 1509 bool cnst = false; 1510 bool pntr = false; 1511 1512 // Base type to get the type string for. 1513 char type = ClassifyType(typestr, quad, poly, usgn); 1514 1515 // Based on the modifying character, change the type and width if necessary. 1516 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1517 1518 NeonTypeFlags::EltType ET; 1519 switch (type) { 1520 case 'c': 1521 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1522 break; 1523 case 's': 1524 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1525 break; 1526 case 'i': 1527 ET = NeonTypeFlags::Int32; 1528 break; 1529 case 'l': 1530 ET = NeonTypeFlags::Int64; 1531 break; 1532 case 'h': 1533 ET = NeonTypeFlags::Float16; 1534 break; 1535 case 'f': 1536 ET = NeonTypeFlags::Float32; 1537 break; 1538 default: 1539 PrintFatalError("unhandled type!"); 1540 } 1541 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1542 return Flags.getFlags(); 1543} 1544 1545// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1546static std::string GenBuiltin(const std::string &name, const std::string &proto, 1547 StringRef typestr, ClassKind ck) { 1548 std::string s; 1549 1550 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1551 // sret-like argument. 1552 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1553 1554 bool define = UseMacro(proto); 1555 1556 // Check if the prototype has a scalar operand with the type of the vector 1557 // elements. If not, bitcasting the args will take care of arg checking. 1558 // The actual signedness etc. will be taken care of with special enums. 1559 if (proto.find('s') == std::string::npos) 1560 ck = ClassB; 1561 1562 if (proto[0] != 'v') { 1563 std::string ts = TypeString(proto[0], typestr); 1564 1565 if (define) { 1566 if (sret) 1567 s += ts + " r; "; 1568 else 1569 s += "(" + ts + ")"; 1570 } else if (sret) { 1571 s += ts + " r; "; 1572 } else { 1573 s += "return (" + ts + ")"; 1574 } 1575 } 1576 1577 bool splat = proto.find('a') != std::string::npos; 1578 1579 s += "__builtin_neon_"; 1580 if (splat) { 1581 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1582 std::string vname(name, 0, name.size()-2); 1583 s += MangleName(vname, typestr, ck); 1584 } else { 1585 s += MangleName(name, typestr, ck); 1586 } 1587 s += "("; 1588 1589 // Pass the address of the return variable as the first argument to sret-like 1590 // builtins. 1591 if (sret) 1592 s += "&r, "; 1593 1594 char arg = 'a'; 1595 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1596 std::string args = std::string(&arg, 1); 1597 1598 // Use the local temporaries instead of the macro arguments. 1599 args = "__" + args; 1600 1601 bool argQuad = false; 1602 bool argPoly = false; 1603 bool argUsgn = false; 1604 bool argScalar = false; 1605 bool dummy = false; 1606 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1607 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1608 dummy, dummy); 1609 1610 // Handle multiple-vector values specially, emitting each subvector as an 1611 // argument to the __builtin. 1612 if (proto[i] >= '2' && proto[i] <= '4') { 1613 // Check if an explicit cast is needed. 1614 if (argType != 'c' || argPoly || argUsgn) 1615 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1616 1617 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1618 s += args + ".val[" + utostr(vi) + "]"; 1619 if ((vi + 1) < ve) 1620 s += ", "; 1621 } 1622 if ((i + 1) < e) 1623 s += ", "; 1624 1625 continue; 1626 } 1627 1628 if (splat && (i + 1) == e) 1629 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 1630 1631 // Check if an explicit cast is needed. 1632 if ((splat || !argScalar) && 1633 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 1634 std::string argTypeStr = "c"; 1635 if (ck != ClassB) 1636 argTypeStr = argType; 1637 if (argQuad) 1638 argTypeStr = "Q" + argTypeStr; 1639 args = "(" + TypeString('d', argTypeStr) + ")" + args; 1640 } 1641 1642 s += args; 1643 if ((i + 1) < e) 1644 s += ", "; 1645 } 1646 1647 // Extra constant integer to hold type class enum for this function, e.g. s8 1648 if (ck == ClassB) 1649 s += ", " + utostr(GetNeonEnum(proto, typestr)); 1650 1651 s += ");"; 1652 1653 if (proto[0] != 'v' && sret) { 1654 if (define) 1655 s += " r;"; 1656 else 1657 s += " return r;"; 1658 } 1659 return s; 1660} 1661 1662static std::string GenBuiltinDef(const std::string &name, 1663 const std::string &proto, 1664 StringRef typestr, ClassKind ck) { 1665 std::string s("BUILTIN(__builtin_neon_"); 1666 1667 // If all types are the same size, bitcasting the args will take care 1668 // of arg checking. The actual signedness etc. will be taken care of with 1669 // special enums. 1670 if (proto.find('s') == std::string::npos) 1671 ck = ClassB; 1672 1673 s += MangleName(name, typestr, ck); 1674 s += ", \""; 1675 1676 for (unsigned i = 0, e = proto.size(); i != e; ++i) 1677 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 1678 1679 // Extra constant integer to hold type class enum for this function, e.g. s8 1680 if (ck == ClassB) 1681 s += "i"; 1682 1683 s += "\", \"n\")"; 1684 return s; 1685} 1686 1687static std::string GenIntrinsic(const std::string &name, 1688 const std::string &proto, 1689 StringRef outTypeStr, StringRef inTypeStr, 1690 OpKind kind, ClassKind classKind) { 1691 assert(!proto.empty() && ""); 1692 bool define = UseMacro(proto) && kind != OpUnavailable; 1693 std::string s; 1694 1695 // static always inline + return type 1696 if (define) 1697 s += "#define "; 1698 else 1699 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 1700 1701 // Function name with type suffix 1702 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1703 if (outTypeStr != inTypeStr) { 1704 // If the input type is different (e.g., for vreinterpret), append a suffix 1705 // for the input type. String off a "Q" (quad) prefix so that MangleName 1706 // does not insert another "q" in the name. 1707 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1708 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1709 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1710 } 1711 s += mangledName; 1712 1713 // Function arguments 1714 s += GenArgs(proto, inTypeStr); 1715 1716 // Definition. 1717 if (define) { 1718 s += " __extension__ ({ \\\n "; 1719 s += GenMacroLocals(proto, inTypeStr); 1720 } else if (kind == OpUnavailable) { 1721 s += " __attribute__((unavailable));\n"; 1722 return s; 1723 } else 1724 s += " {\n "; 1725 1726 if (kind != OpNone) 1727 s += GenOpString(kind, proto, outTypeStr); 1728 else 1729 s += GenBuiltin(name, proto, outTypeStr, classKind); 1730 if (define) 1731 s += " })"; 1732 else 1733 s += " }"; 1734 s += "\n"; 1735 return s; 1736} 1737 1738/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 1739/// is comprised of type definitions and function declarations. 1740void NeonEmitter::run(raw_ostream &OS) { 1741 OS << 1742 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 1743 "---===\n" 1744 " *\n" 1745 " * Permission is hereby granted, free of charge, to any person obtaining " 1746 "a copy\n" 1747 " * of this software and associated documentation files (the \"Software\")," 1748 " to deal\n" 1749 " * in the Software without restriction, including without limitation the " 1750 "rights\n" 1751 " * to use, copy, modify, merge, publish, distribute, sublicense, " 1752 "and/or sell\n" 1753 " * copies of the Software, and to permit persons to whom the Software is\n" 1754 " * furnished to do so, subject to the following conditions:\n" 1755 " *\n" 1756 " * The above copyright notice and this permission notice shall be " 1757 "included in\n" 1758 " * all copies or substantial portions of the Software.\n" 1759 " *\n" 1760 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 1761 "EXPRESS OR\n" 1762 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 1763 "MERCHANTABILITY,\n" 1764 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 1765 "SHALL THE\n" 1766 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 1767 "OTHER\n" 1768 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 1769 "ARISING FROM,\n" 1770 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 1771 "DEALINGS IN\n" 1772 " * THE SOFTWARE.\n" 1773 " *\n" 1774 " *===--------------------------------------------------------------------" 1775 "---===\n" 1776 " */\n\n"; 1777 1778 OS << "#ifndef __ARM_NEON_H\n"; 1779 OS << "#define __ARM_NEON_H\n\n"; 1780 1781 OS << "#ifndef __ARM_NEON__\n"; 1782 OS << "#error \"NEON support not enabled\"\n"; 1783 OS << "#endif\n\n"; 1784 1785 OS << "#include <stdint.h>\n\n"; 1786 1787 // Emit NEON-specific scalar typedefs. 1788 OS << "typedef float float32_t;\n"; 1789 OS << "typedef int8_t poly8_t;\n"; 1790 OS << "typedef int16_t poly16_t;\n"; 1791 OS << "typedef uint16_t float16_t;\n"; 1792 1793 // Emit Neon vector typedefs. 1794 std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs"); 1795 SmallVector<StringRef, 24> TDTypeVec; 1796 ParseTypes(0, TypedefTypes, TDTypeVec); 1797 1798 // Emit vector typedefs. 1799 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1800 bool dummy, quad = false, poly = false; 1801 (void) ClassifyType(TDTypeVec[i], quad, poly, dummy); 1802 if (poly) 1803 OS << "typedef __attribute__((neon_polyvector_type("; 1804 else 1805 OS << "typedef __attribute__((neon_vector_type("; 1806 1807 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 1808 OS << utostr(nElts) << "))) "; 1809 if (nElts < 10) 1810 OS << " "; 1811 1812 OS << TypeString('s', TDTypeVec[i]); 1813 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 1814 } 1815 OS << "\n"; 1816 1817 // Emit struct typedefs. 1818 for (unsigned vi = 2; vi != 5; ++vi) { 1819 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1820 std::string ts = TypeString('d', TDTypeVec[i]); 1821 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 1822 OS << "typedef struct " << vs << " {\n"; 1823 OS << " " << ts << " val"; 1824 OS << "[" << utostr(vi) << "]"; 1825 OS << ";\n} "; 1826 OS << vs << ";\n\n"; 1827 } 1828 } 1829 1830 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 1831 1832 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1833 1834 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 1835 // intrinsics. (Some of the saturating multiply instructions are also 1836 // used to implement the corresponding "_lane" variants, but tablegen 1837 // sorts the records into alphabetical order so that the "_lane" variants 1838 // come after the intrinsics they use.) 1839 emitIntrinsic(OS, Records.getDef("VMOVL")); 1840 emitIntrinsic(OS, Records.getDef("VMULL")); 1841 emitIntrinsic(OS, Records.getDef("VABD")); 1842 1843 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1844 Record *R = RV[i]; 1845 if (R->getName() != "VMOVL" && 1846 R->getName() != "VMULL" && 1847 R->getName() != "VABD") 1848 emitIntrinsic(OS, R); 1849 } 1850 1851 OS << "#undef __ai\n\n"; 1852 OS << "#endif /* __ARM_NEON_H */\n"; 1853} 1854 1855/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 1856/// intrinsics specified by record R. 1857void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { 1858 std::string name = R->getValueAsString("Name"); 1859 std::string Proto = R->getValueAsString("Prototype"); 1860 std::string Types = R->getValueAsString("Types"); 1861 1862 SmallVector<StringRef, 16> TypeVec; 1863 ParseTypes(R, Types, TypeVec); 1864 1865 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 1866 1867 ClassKind classKind = ClassNone; 1868 if (R->getSuperClasses().size() >= 2) 1869 classKind = ClassMap[R->getSuperClasses()[1]]; 1870 if (classKind == ClassNone && kind == OpNone) 1871 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1872 1873 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1874 if (kind == OpReinterpret) { 1875 bool outQuad = false; 1876 bool dummy = false; 1877 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 1878 for (unsigned srcti = 0, srcte = TypeVec.size(); 1879 srcti != srcte; ++srcti) { 1880 bool inQuad = false; 1881 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 1882 if (srcti == ti || inQuad != outQuad) 1883 continue; 1884 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 1885 OpCast, ClassS); 1886 } 1887 } else { 1888 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], 1889 kind, classKind); 1890 } 1891 } 1892 OS << "\n"; 1893} 1894 1895static unsigned RangeFromType(const char mod, StringRef typestr) { 1896 // base type to get the type string for. 1897 bool quad = false, dummy = false; 1898 char type = ClassifyType(typestr, quad, dummy, dummy); 1899 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 1900 1901 switch (type) { 1902 case 'c': 1903 return (8 << (int)quad) - 1; 1904 case 'h': 1905 case 's': 1906 return (4 << (int)quad) - 1; 1907 case 'f': 1908 case 'i': 1909 return (2 << (int)quad) - 1; 1910 case 'l': 1911 return (1 << (int)quad) - 1; 1912 default: 1913 PrintFatalError("unhandled type!"); 1914 } 1915} 1916 1917/// runHeader - Emit a file with sections defining: 1918/// 1. the NEON section of BuiltinsARM.def. 1919/// 2. the SemaChecking code for the type overload checking. 1920/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 1921void NeonEmitter::runHeader(raw_ostream &OS) { 1922 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1923 1924 StringMap<OpKind> EmittedMap; 1925 1926 // Generate BuiltinsARM.def for NEON 1927 OS << "#ifdef GET_NEON_BUILTINS\n"; 1928 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1929 Record *R = RV[i]; 1930 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1931 if (k != OpNone) 1932 continue; 1933 1934 std::string Proto = R->getValueAsString("Prototype"); 1935 1936 // Functions with 'a' (the splat code) in the type prototype should not get 1937 // their own builtin as they use the non-splat variant. 1938 if (Proto.find('a') != std::string::npos) 1939 continue; 1940 1941 std::string Types = R->getValueAsString("Types"); 1942 SmallVector<StringRef, 16> TypeVec; 1943 ParseTypes(R, Types, TypeVec); 1944 1945 if (R->getSuperClasses().size() < 2) 1946 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1947 1948 std::string name = R->getValueAsString("Name"); 1949 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 1950 1951 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1952 // Generate the BuiltinsARM.def declaration for this builtin, ensuring 1953 // that each unique BUILTIN() macro appears only once in the output 1954 // stream. 1955 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 1956 if (EmittedMap.count(bd)) 1957 continue; 1958 1959 EmittedMap[bd] = OpNone; 1960 OS << bd << "\n"; 1961 } 1962 } 1963 OS << "#endif\n\n"; 1964 1965 // Generate the overloaded type checking code for SemaChecking.cpp 1966 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1967 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1968 Record *R = RV[i]; 1969 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1970 if (k != OpNone) 1971 continue; 1972 1973 std::string Proto = R->getValueAsString("Prototype"); 1974 std::string Types = R->getValueAsString("Types"); 1975 std::string name = R->getValueAsString("Name"); 1976 1977 // Functions with 'a' (the splat code) in the type prototype should not get 1978 // their own builtin as they use the non-splat variant. 1979 if (Proto.find('a') != std::string::npos) 1980 continue; 1981 1982 // Functions which have a scalar argument cannot be overloaded, no need to 1983 // check them if we are emitting the type checking code. 1984 if (Proto.find('s') != std::string::npos) 1985 continue; 1986 1987 SmallVector<StringRef, 16> TypeVec; 1988 ParseTypes(R, Types, TypeVec); 1989 1990 if (R->getSuperClasses().size() < 2) 1991 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1992 1993 int si = -1, qi = -1; 1994 uint64_t mask = 0, qmask = 0; 1995 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1996 // Generate the switch case(s) for this builtin for the type validation. 1997 bool quad = false, poly = false, usgn = false; 1998 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 1999 2000 if (quad) { 2001 qi = ti; 2002 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2003 } else { 2004 si = ti; 2005 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2006 } 2007 } 2008 2009 // Check if the builtin function has a pointer or const pointer argument. 2010 int PtrArgNum = -1; 2011 bool HasConstPtr = false; 2012 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2013 char ArgType = Proto[arg]; 2014 if (ArgType == 'c') { 2015 HasConstPtr = true; 2016 PtrArgNum = arg - 1; 2017 break; 2018 } 2019 if (ArgType == 'p') { 2020 PtrArgNum = arg - 1; 2021 break; 2022 } 2023 } 2024 // For sret builtins, adjust the pointer argument index. 2025 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 2026 PtrArgNum += 1; 2027 2028 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2029 // and vst1_lane intrinsics. Using a pointer to the vector element 2030 // type with one of those operations causes codegen to select an aligned 2031 // load/store instruction. If you want an unaligned operation, 2032 // the pointer argument needs to have less alignment than element type, 2033 // so just accept any pointer type. 2034 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2035 PtrArgNum = -1; 2036 HasConstPtr = false; 2037 } 2038 2039 if (mask) { 2040 OS << "case ARM::BI__builtin_neon_" 2041 << MangleName(name, TypeVec[si], ClassB) 2042 << ": mask = " << "0x" << utohexstr(mask) << "ULL"; 2043 if (PtrArgNum >= 0) 2044 OS << "; PtrArgNum = " << PtrArgNum; 2045 if (HasConstPtr) 2046 OS << "; HasConstPtr = true"; 2047 OS << "; break;\n"; 2048 } 2049 if (qmask) { 2050 OS << "case ARM::BI__builtin_neon_" 2051 << MangleName(name, TypeVec[qi], ClassB) 2052 << ": mask = " << "0x" << utohexstr(qmask) << "ULL"; 2053 if (PtrArgNum >= 0) 2054 OS << "; PtrArgNum = " << PtrArgNum; 2055 if (HasConstPtr) 2056 OS << "; HasConstPtr = true"; 2057 OS << "; break;\n"; 2058 } 2059 } 2060 OS << "#endif\n\n"; 2061 2062 // Generate the intrinsic range checking code for shift/lane immediates. 2063 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2064 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2065 Record *R = RV[i]; 2066 2067 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2068 if (k != OpNone) 2069 continue; 2070 2071 std::string name = R->getValueAsString("Name"); 2072 std::string Proto = R->getValueAsString("Prototype"); 2073 std::string Types = R->getValueAsString("Types"); 2074 2075 // Functions with 'a' (the splat code) in the type prototype should not get 2076 // their own builtin as they use the non-splat variant. 2077 if (Proto.find('a') != std::string::npos) 2078 continue; 2079 2080 // Functions which do not have an immediate do not need to have range 2081 // checking code emitted. 2082 size_t immPos = Proto.find('i'); 2083 if (immPos == std::string::npos) 2084 continue; 2085 2086 SmallVector<StringRef, 16> TypeVec; 2087 ParseTypes(R, Types, TypeVec); 2088 2089 if (R->getSuperClasses().size() < 2) 2090 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2091 2092 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2093 2094 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2095 std::string namestr, shiftstr, rangestr; 2096 2097 if (R->getValueAsBit("isVCVT_N")) { 2098 // VCVT between floating- and fixed-point values takes an immediate 2099 // in the range 1 to 32. 2100 ck = ClassB; 2101 rangestr = "l = 1; u = 31"; // upper bound = l + u 2102 } else if (Proto.find('s') == std::string::npos) { 2103 // Builtins which are overloaded by type will need to have their upper 2104 // bound computed at Sema time based on the type constant. 2105 ck = ClassB; 2106 if (R->getValueAsBit("isShift")) { 2107 shiftstr = ", true"; 2108 2109 // Right shifts have an 'r' in the name, left shifts do not. 2110 if (name.find('r') != std::string::npos) 2111 rangestr = "l = 1; "; 2112 } 2113 rangestr += "u = RFT(TV" + shiftstr + ")"; 2114 } else { 2115 // The immediate generally refers to a lane in the preceding argument. 2116 assert(immPos > 0 && "unexpected immediate operand"); 2117 rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti])); 2118 } 2119 // Make sure cases appear only once by uniquing them in a string map. 2120 namestr = MangleName(name, TypeVec[ti], ck); 2121 if (EmittedMap.count(namestr)) 2122 continue; 2123 EmittedMap[namestr] = OpNone; 2124 2125 // Calculate the index of the immediate that should be range checked. 2126 unsigned immidx = 0; 2127 2128 // Builtins that return a struct of multiple vectors have an extra 2129 // leading arg for the struct return. 2130 if (Proto[0] >= '2' && Proto[0] <= '4') 2131 ++immidx; 2132 2133 // Add one to the index for each argument until we reach the immediate 2134 // to be checked. Structs of vectors are passed as multiple arguments. 2135 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2136 switch (Proto[ii]) { 2137 default: immidx += 1; break; 2138 case '2': immidx += 2; break; 2139 case '3': immidx += 3; break; 2140 case '4': immidx += 4; break; 2141 case 'i': ie = ii + 1; break; 2142 } 2143 } 2144 OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck) 2145 << ": i = " << immidx << "; " << rangestr << "; break;\n"; 2146 } 2147 } 2148 OS << "#endif\n\n"; 2149} 2150 2151/// GenTest - Write out a test for the intrinsic specified by the name and 2152/// type strings, including the embedded patterns for FileCheck to match. 2153static std::string GenTest(const std::string &name, 2154 const std::string &proto, 2155 StringRef outTypeStr, StringRef inTypeStr, 2156 bool isShift, bool isHiddenLOp, 2157 ClassKind ck, const std::string &InstName) { 2158 assert(!proto.empty() && ""); 2159 std::string s; 2160 2161 // Function name with type suffix 2162 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2163 if (outTypeStr != inTypeStr) { 2164 // If the input type is different (e.g., for vreinterpret), append a suffix 2165 // for the input type. String off a "Q" (quad) prefix so that MangleName 2166 // does not insert another "q" in the name. 2167 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2168 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2169 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2170 } 2171 2172 std::vector<std::string> FileCheckPatterns; 2173 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2174 isHiddenLOp, FileCheckPatterns); 2175 2176 // Emit the FileCheck patterns. 2177 s += "// CHECK: test_" + mangledName + "\n"; 2178 // If for any reason we do not want to emit a check, mangledInst 2179 // will be the empty string. 2180 if (FileCheckPatterns.size()) { 2181 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2182 e = FileCheckPatterns.end(); 2183 i != e; 2184 ++i) { 2185 s += "// CHECK: " + *i + "\n"; 2186 } 2187 } 2188 2189 // Emit the start of the test function. 2190 s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 2191 char arg = 'a'; 2192 std::string comma; 2193 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2194 // Do not create arguments for values that must be immediate constants. 2195 if (proto[i] == 'i') 2196 continue; 2197 s += comma + TypeString(proto[i], inTypeStr) + " "; 2198 s.push_back(arg); 2199 comma = ", "; 2200 } 2201 s += ") {\n "; 2202 2203 if (proto[0] != 'v') 2204 s += "return "; 2205 s += mangledName + "("; 2206 arg = 'a'; 2207 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2208 if (proto[i] == 'i') { 2209 // For immediate operands, test the maximum value. 2210 if (isShift) 2211 s += "1"; // FIXME 2212 else 2213 // The immediate generally refers to a lane in the preceding argument. 2214 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 2215 } else { 2216 s.push_back(arg); 2217 } 2218 if ((i + 1) < e) 2219 s += ", "; 2220 } 2221 s += ");\n}\n\n"; 2222 return s; 2223} 2224 2225/// runTests - Write out a complete set of tests for all of the Neon 2226/// intrinsics. 2227void NeonEmitter::runTests(raw_ostream &OS) { 2228 OS << 2229 "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n" 2230 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 2231 "// RUN: | FileCheck %s\n" 2232 "\n" 2233 "// REQUIRES: long_tests\n" 2234 "\n" 2235 "#include <arm_neon.h>\n" 2236 "\n"; 2237 2238 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2239 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2240 Record *R = RV[i]; 2241 std::string name = R->getValueAsString("Name"); 2242 std::string Proto = R->getValueAsString("Prototype"); 2243 std::string Types = R->getValueAsString("Types"); 2244 bool isShift = R->getValueAsBit("isShift"); 2245 std::string InstName = R->getValueAsString("InstName"); 2246 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 2247 2248 SmallVector<StringRef, 16> TypeVec; 2249 ParseTypes(R, Types, TypeVec); 2250 2251 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2252 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2253 if (kind == OpUnavailable) 2254 continue; 2255 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2256 if (kind == OpReinterpret) { 2257 bool outQuad = false; 2258 bool dummy = false; 2259 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2260 for (unsigned srcti = 0, srcte = TypeVec.size(); 2261 srcti != srcte; ++srcti) { 2262 bool inQuad = false; 2263 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2264 if (srcti == ti || inQuad != outQuad) 2265 continue; 2266 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 2267 isShift, isHiddenLOp, ck, InstName); 2268 } 2269 } else { 2270 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], 2271 isShift, isHiddenLOp, ck, InstName); 2272 } 2273 } 2274 OS << "\n"; 2275 } 2276} 2277 2278namespace clang { 2279void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2280 NeonEmitter(Records).run(OS); 2281} 2282void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2283 NeonEmitter(Records).runHeader(OS); 2284} 2285void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2286 NeonEmitter(Records).runTests(OS); 2287} 2288} // End namespace clang 2289