NeonEmitter.cpp revision 097a4b487897ca29f0f371c81b6a8b6c1ca599e4
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMullLane, 66 OpMlaLane, 67 OpMlsLane, 68 OpMlalLane, 69 OpMlslLane, 70 OpQDMullLane, 71 OpQDMlalLane, 72 OpQDMlslLane, 73 OpQDMulhLane, 74 OpQRDMulhLane, 75 OpEq, 76 OpGe, 77 OpLe, 78 OpGt, 79 OpLt, 80 OpNeg, 81 OpNot, 82 OpAnd, 83 OpOr, 84 OpXor, 85 OpAndNot, 86 OpOrNot, 87 OpCast, 88 OpConcat, 89 OpDup, 90 OpDupLane, 91 OpHi, 92 OpLo, 93 OpSelect, 94 OpRev16, 95 OpRev32, 96 OpRev64, 97 OpReinterpret, 98 OpAddhnHi, 99 OpRAddhnHi, 100 OpSubhnHi, 101 OpRSubhnHi, 102 OpAbdl, 103 OpAbdlHi, 104 OpAba, 105 OpAbal, 106 OpAbalHi, 107 OpQDMullHi, 108 OpQDMlalHi, 109 OpQDMlslHi, 110 OpDiv, 111 OpLongHi, 112 OpNarrowHi, 113 OpMovlHi 114}; 115 116enum ClassKind { 117 ClassNone, 118 ClassI, // generic integer instruction, e.g., "i8" suffix 119 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 120 ClassW, // width-specific instruction, e.g., "8" suffix 121 ClassB, // bitcast arguments with enum argument to specify type 122 ClassL, // Logical instructions which are op instructions 123 // but we need to not emit any suffix for in our 124 // tests. 125 ClassNoTest // Instructions which we do not test since they are 126 // not TRUE instructions. 127}; 128 129/// NeonTypeFlags - Flags to identify the types for overloaded Neon 130/// builtins. These must be kept in sync with the flags in 131/// include/clang/Basic/TargetBuiltins.h. 132namespace { 133class NeonTypeFlags { 134 enum { 135 EltTypeMask = 0xf, 136 UnsignedFlag = 0x10, 137 QuadFlag = 0x20 138 }; 139 uint32_t Flags; 140 141public: 142 enum EltType { 143 Int8, 144 Int16, 145 Int32, 146 Int64, 147 Poly8, 148 Poly16, 149 Float16, 150 Float32, 151 Float64 152 }; 153 154 NeonTypeFlags(unsigned F) : Flags(F) {} 155 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 156 if (IsUnsigned) 157 Flags |= UnsignedFlag; 158 if (IsQuad) 159 Flags |= QuadFlag; 160 } 161 162 uint32_t getFlags() const { return Flags; } 163}; 164} // end anonymous namespace 165 166namespace { 167class NeonEmitter { 168 RecordKeeper &Records; 169 StringMap<OpKind> OpMap; 170 DenseMap<Record*, ClassKind> ClassMap; 171 172public: 173 NeonEmitter(RecordKeeper &R) : Records(R) { 174 OpMap["OP_NONE"] = OpNone; 175 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 176 OpMap["OP_ADD"] = OpAdd; 177 OpMap["OP_ADDL"] = OpAddl; 178 OpMap["OP_ADDLHi"] = OpAddlHi; 179 OpMap["OP_ADDW"] = OpAddw; 180 OpMap["OP_ADDWHi"] = OpAddwHi; 181 OpMap["OP_SUB"] = OpSub; 182 OpMap["OP_SUBL"] = OpSubl; 183 OpMap["OP_SUBLHi"] = OpSublHi; 184 OpMap["OP_SUBW"] = OpSubw; 185 OpMap["OP_SUBWHi"] = OpSubwHi; 186 OpMap["OP_MUL"] = OpMul; 187 OpMap["OP_MLA"] = OpMla; 188 OpMap["OP_MLAL"] = OpMlal; 189 OpMap["OP_MULLHi"] = OpMullHi; 190 OpMap["OP_MLALHi"] = OpMlalHi; 191 OpMap["OP_MLS"] = OpMls; 192 OpMap["OP_MLSL"] = OpMlsl; 193 OpMap["OP_MLSLHi"] = OpMlslHi; 194 OpMap["OP_MUL_N"] = OpMulN; 195 OpMap["OP_MLA_N"] = OpMlaN; 196 OpMap["OP_MLS_N"] = OpMlsN; 197 OpMap["OP_MLAL_N"] = OpMlalN; 198 OpMap["OP_MLSL_N"] = OpMlslN; 199 OpMap["OP_MUL_LN"]= OpMulLane; 200 OpMap["OP_MULL_LN"] = OpMullLane; 201 OpMap["OP_MLA_LN"]= OpMlaLane; 202 OpMap["OP_MLS_LN"]= OpMlsLane; 203 OpMap["OP_MLAL_LN"] = OpMlalLane; 204 OpMap["OP_MLSL_LN"] = OpMlslLane; 205 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 206 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 207 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 208 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 209 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 210 OpMap["OP_EQ"] = OpEq; 211 OpMap["OP_GE"] = OpGe; 212 OpMap["OP_LE"] = OpLe; 213 OpMap["OP_GT"] = OpGt; 214 OpMap["OP_LT"] = OpLt; 215 OpMap["OP_NEG"] = OpNeg; 216 OpMap["OP_NOT"] = OpNot; 217 OpMap["OP_AND"] = OpAnd; 218 OpMap["OP_OR"] = OpOr; 219 OpMap["OP_XOR"] = OpXor; 220 OpMap["OP_ANDN"] = OpAndNot; 221 OpMap["OP_ORN"] = OpOrNot; 222 OpMap["OP_CAST"] = OpCast; 223 OpMap["OP_CONC"] = OpConcat; 224 OpMap["OP_HI"] = OpHi; 225 OpMap["OP_LO"] = OpLo; 226 OpMap["OP_DUP"] = OpDup; 227 OpMap["OP_DUP_LN"] = OpDupLane; 228 OpMap["OP_SEL"] = OpSelect; 229 OpMap["OP_REV16"] = OpRev16; 230 OpMap["OP_REV32"] = OpRev32; 231 OpMap["OP_REV64"] = OpRev64; 232 OpMap["OP_REINT"] = OpReinterpret; 233 OpMap["OP_ADDHNHi"] = OpAddhnHi; 234 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 235 OpMap["OP_SUBHNHi"] = OpSubhnHi; 236 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 237 OpMap["OP_ABDL"] = OpAbdl; 238 OpMap["OP_ABDLHi"] = OpAbdlHi; 239 OpMap["OP_ABA"] = OpAba; 240 OpMap["OP_ABAL"] = OpAbal; 241 OpMap["OP_ABALHi"] = OpAbalHi; 242 OpMap["OP_QDMULLHi"] = OpQDMullHi; 243 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 244 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 245 OpMap["OP_DIV"] = OpDiv; 246 OpMap["OP_LONG_HI"] = OpLongHi; 247 OpMap["OP_NARROW_HI"] = OpNarrowHi; 248 OpMap["OP_MOVL_HI"] = OpMovlHi; 249 250 Record *SI = R.getClass("SInst"); 251 Record *II = R.getClass("IInst"); 252 Record *WI = R.getClass("WInst"); 253 Record *SOpI = R.getClass("SOpInst"); 254 Record *IOpI = R.getClass("IOpInst"); 255 Record *WOpI = R.getClass("WOpInst"); 256 Record *LOpI = R.getClass("LOpInst"); 257 Record *NoTestOpI = R.getClass("NoTestOpInst"); 258 259 ClassMap[SI] = ClassS; 260 ClassMap[II] = ClassI; 261 ClassMap[WI] = ClassW; 262 ClassMap[SOpI] = ClassS; 263 ClassMap[IOpI] = ClassI; 264 ClassMap[WOpI] = ClassW; 265 ClassMap[LOpI] = ClassL; 266 ClassMap[NoTestOpI] = ClassNoTest; 267 } 268 269 // run - Emit arm_neon.h.inc 270 void run(raw_ostream &o); 271 272 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 273 void runHeader(raw_ostream &o); 274 275 // runTests - Emit tests for all the Neon intrinsics. 276 void runTests(raw_ostream &o); 277 278private: 279 void emitIntrinsic(raw_ostream &OS, Record *R, 280 StringMap<ClassKind> &EmittedMap); 281 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 282 bool isA64GenBuiltinDef); 283 void genOverloadTypeCheckCode(raw_ostream &OS, 284 StringMap<ClassKind> &A64IntrinsicMap, 285 bool isA64TypeCheck); 286 void genIntrinsicRangeCheckCode(raw_ostream &OS, 287 StringMap<ClassKind> &A64IntrinsicMap, 288 bool isA64RangeCheck); 289 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 290 bool isA64TestGen); 291}; 292} // end anonymous namespace 293 294/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 295/// which each StringRef representing a single type declared in the string. 296/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 297/// 2xfloat and 4xfloat respectively. 298static void ParseTypes(Record *r, std::string &s, 299 SmallVectorImpl<StringRef> &TV) { 300 const char *data = s.data(); 301 int len = 0; 302 303 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 304 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 305 || data[len] == 'H' || data[len] == 'S') 306 continue; 307 308 switch (data[len]) { 309 case 'c': 310 case 's': 311 case 'i': 312 case 'l': 313 case 'h': 314 case 'f': 315 case 'd': 316 break; 317 default: 318 PrintFatalError(r->getLoc(), 319 "Unexpected letter: " + std::string(data + len, 1)); 320 } 321 TV.push_back(StringRef(data, len + 1)); 322 data += len + 1; 323 len = -1; 324 } 325} 326 327/// Widen - Convert a type code into the next wider type. char -> short, 328/// short -> int, etc. 329static char Widen(const char t) { 330 switch (t) { 331 case 'c': 332 return 's'; 333 case 's': 334 return 'i'; 335 case 'i': 336 return 'l'; 337 case 'h': 338 return 'f'; 339 default: 340 PrintFatalError("unhandled type in widen!"); 341 } 342} 343 344/// Narrow - Convert a type code into the next smaller type. short -> char, 345/// float -> half float, etc. 346static char Narrow(const char t) { 347 switch (t) { 348 case 's': 349 return 'c'; 350 case 'i': 351 return 's'; 352 case 'l': 353 return 'i'; 354 case 'f': 355 return 'h'; 356 default: 357 PrintFatalError("unhandled type in narrow!"); 358 } 359} 360 361static std::string GetNarrowTypestr(StringRef ty) 362{ 363 std::string s; 364 for (size_t i = 0, end = ty.size(); i < end; i++) { 365 switch (ty[i]) { 366 case 's': 367 s += 'c'; 368 break; 369 case 'i': 370 s += 's'; 371 break; 372 case 'l': 373 s += 'i'; 374 break; 375 default: 376 s += ty[i]; 377 break; 378 } 379 } 380 381 return s; 382} 383 384/// For a particular StringRef, return the base type code, and whether it has 385/// the quad-vector, polynomial, or unsigned modifiers set. 386static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 387 unsigned off = 0; 388 // ignore scalar. 389 if (ty[off] == 'S') { 390 ++off; 391 } 392 // remember quad. 393 if (ty[off] == 'Q' || ty[off] == 'H') { 394 quad = true; 395 ++off; 396 } 397 398 // remember poly. 399 if (ty[off] == 'P') { 400 poly = true; 401 ++off; 402 } 403 404 // remember unsigned. 405 if (ty[off] == 'U') { 406 usgn = true; 407 ++off; 408 } 409 410 // base type to get the type string for. 411 return ty[off]; 412} 413 414/// ModType - Transform a type code and its modifiers based on a mod code. The 415/// mod code definitions may be found at the top of arm_neon.td. 416static char ModType(const char mod, char type, bool &quad, bool &poly, 417 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 418 switch (mod) { 419 case 't': 420 if (poly) { 421 poly = false; 422 usgn = true; 423 } 424 break; 425 case 'u': 426 usgn = true; 427 poly = false; 428 if (type == 'f') 429 type = 'i'; 430 if (type == 'd') 431 type = 'l'; 432 break; 433 case 'x': 434 usgn = false; 435 poly = false; 436 if (type == 'f') 437 type = 'i'; 438 if (type == 'd') 439 type = 'l'; 440 break; 441 case 'f': 442 if (type == 'h') 443 quad = true; 444 type = 'f'; 445 usgn = false; 446 break; 447 case 'g': 448 quad = false; 449 break; 450 case 'w': 451 type = Widen(type); 452 quad = true; 453 break; 454 case 'n': 455 type = Widen(type); 456 break; 457 case 'i': 458 type = 'i'; 459 scal = true; 460 break; 461 case 'l': 462 type = 'l'; 463 scal = true; 464 usgn = true; 465 break; 466 case 's': 467 case 'a': 468 scal = true; 469 break; 470 case 'k': 471 quad = true; 472 break; 473 case 'c': 474 cnst = true; 475 case 'p': 476 pntr = true; 477 scal = true; 478 break; 479 case 'h': 480 type = Narrow(type); 481 if (type == 'h') 482 quad = false; 483 break; 484 case 'q': 485 type = Narrow(type); 486 quad = true; 487 break; 488 case 'e': 489 type = Narrow(type); 490 usgn = true; 491 break; 492 case 'm': 493 type = Narrow(type); 494 quad = false; 495 break; 496 default: 497 break; 498 } 499 return type; 500} 501 502/// TypeString - for a modifier and type, generate the name of the typedef for 503/// that type. QUc -> uint8x8_t. 504static std::string TypeString(const char mod, StringRef typestr) { 505 bool quad = false; 506 bool poly = false; 507 bool usgn = false; 508 bool scal = false; 509 bool cnst = false; 510 bool pntr = false; 511 512 if (mod == 'v') 513 return "void"; 514 if (mod == 'i') 515 return "int"; 516 517 // base type to get the type string for. 518 char type = ClassifyType(typestr, quad, poly, usgn); 519 520 // Based on the modifying character, change the type and width if necessary. 521 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 522 523 SmallString<128> s; 524 525 if (usgn) 526 s.push_back('u'); 527 528 switch (type) { 529 case 'c': 530 s += poly ? "poly8" : "int8"; 531 if (scal) 532 break; 533 s += quad ? "x16" : "x8"; 534 break; 535 case 's': 536 s += poly ? "poly16" : "int16"; 537 if (scal) 538 break; 539 s += quad ? "x8" : "x4"; 540 break; 541 case 'i': 542 s += "int32"; 543 if (scal) 544 break; 545 s += quad ? "x4" : "x2"; 546 break; 547 case 'l': 548 s += "int64"; 549 if (scal) 550 break; 551 s += quad ? "x2" : "x1"; 552 break; 553 case 'h': 554 s += "float16"; 555 if (scal) 556 break; 557 s += quad ? "x8" : "x4"; 558 break; 559 case 'f': 560 s += "float32"; 561 if (scal) 562 break; 563 s += quad ? "x4" : "x2"; 564 break; 565 case 'd': 566 s += "float64"; 567 if (scal) 568 break; 569 s += quad ? "x2" : "x1"; 570 break; 571 572 default: 573 PrintFatalError("unhandled type!"); 574 } 575 576 if (mod == '2') 577 s += "x2"; 578 if (mod == '3') 579 s += "x3"; 580 if (mod == '4') 581 s += "x4"; 582 583 // Append _t, finishing the type string typedef type. 584 s += "_t"; 585 586 if (cnst) 587 s += " const"; 588 589 if (pntr) 590 s += " *"; 591 592 return s.str(); 593} 594 595/// BuiltinTypeString - for a modifier and type, generate the clang 596/// BuiltinsARM.def prototype code for the function. See the top of clang's 597/// Builtins.def for a description of the type strings. 598static std::string BuiltinTypeString(const char mod, StringRef typestr, 599 ClassKind ck, bool ret) { 600 bool quad = false; 601 bool poly = false; 602 bool usgn = false; 603 bool scal = false; 604 bool cnst = false; 605 bool pntr = false; 606 607 if (mod == 'v') 608 return "v"; // void 609 if (mod == 'i') 610 return "i"; // int 611 612 // base type to get the type string for. 613 char type = ClassifyType(typestr, quad, poly, usgn); 614 615 // Based on the modifying character, change the type and width if necessary. 616 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 617 618 // All pointers are void* pointers. Change type to 'v' now. 619 if (pntr) { 620 usgn = false; 621 poly = false; 622 type = 'v'; 623 } 624 // Treat half-float ('h') types as unsigned short ('s') types. 625 if (type == 'h') { 626 type = 's'; 627 usgn = true; 628 } 629 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f'); 630 631 if (scal) { 632 SmallString<128> s; 633 634 if (usgn) 635 s.push_back('U'); 636 else if (type == 'c') 637 s.push_back('S'); // make chars explicitly signed 638 639 if (type == 'l') // 64-bit long 640 s += "LLi"; 641 else 642 s.push_back(type); 643 644 if (cnst) 645 s.push_back('C'); 646 if (pntr) 647 s.push_back('*'); 648 return s.str(); 649 } 650 651 // Since the return value must be one type, return a vector type of the 652 // appropriate width which we will bitcast. An exception is made for 653 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 654 // fashion, storing them to a pointer arg. 655 if (ret) { 656 if (mod >= '2' && mod <= '4') 657 return "vv*"; // void result with void* first argument 658 if (mod == 'f' || (ck != ClassB && type == 'f')) 659 return quad ? "V4f" : "V2f"; 660 if (ck != ClassB && type == 's') 661 return quad ? "V8s" : "V4s"; 662 if (ck != ClassB && type == 'i') 663 return quad ? "V4i" : "V2i"; 664 if (ck != ClassB && type == 'l') 665 return quad ? "V2LLi" : "V1LLi"; 666 667 return quad ? "V16Sc" : "V8Sc"; 668 } 669 670 // Non-return array types are passed as individual vectors. 671 if (mod == '2') 672 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 673 if (mod == '3') 674 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 675 if (mod == '4') 676 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 677 678 if (mod == 'f' || (ck != ClassB && type == 'f')) 679 return quad ? "V4f" : "V2f"; 680 if (ck != ClassB && type == 's') 681 return quad ? "V8s" : "V4s"; 682 if (ck != ClassB && type == 'i') 683 return quad ? "V4i" : "V2i"; 684 if (ck != ClassB && type == 'l') 685 return quad ? "V2LLi" : "V1LLi"; 686 687 return quad ? "V16Sc" : "V8Sc"; 688} 689 690/// InstructionTypeCode - Computes the ARM argument character code and 691/// quad status for a specific type string and ClassKind. 692static void InstructionTypeCode(const StringRef &typeStr, 693 const ClassKind ck, 694 bool &quad, 695 std::string &typeCode) { 696 bool poly = false; 697 bool usgn = false; 698 char type = ClassifyType(typeStr, quad, poly, usgn); 699 700 switch (type) { 701 case 'c': 702 switch (ck) { 703 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 704 case ClassI: typeCode = "i8"; break; 705 case ClassW: typeCode = "8"; break; 706 default: break; 707 } 708 break; 709 case 's': 710 switch (ck) { 711 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 712 case ClassI: typeCode = "i16"; break; 713 case ClassW: typeCode = "16"; break; 714 default: break; 715 } 716 break; 717 case 'i': 718 switch (ck) { 719 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 720 case ClassI: typeCode = "i32"; break; 721 case ClassW: typeCode = "32"; break; 722 default: break; 723 } 724 break; 725 case 'l': 726 switch (ck) { 727 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 728 case ClassI: typeCode = "i64"; break; 729 case ClassW: typeCode = "64"; break; 730 default: break; 731 } 732 break; 733 case 'h': 734 switch (ck) { 735 case ClassS: 736 case ClassI: typeCode = "f16"; break; 737 case ClassW: typeCode = "16"; break; 738 default: break; 739 } 740 break; 741 case 'f': 742 switch (ck) { 743 case ClassS: 744 case ClassI: typeCode = "f32"; break; 745 case ClassW: typeCode = "32"; break; 746 default: break; 747 } 748 break; 749 case 'd': 750 switch (ck) { 751 case ClassS: 752 case ClassI: 753 typeCode += "f64"; 754 break; 755 case ClassW: 756 PrintFatalError("unhandled type!"); 757 default: 758 break; 759 } 760 break; 761 default: 762 PrintFatalError("unhandled type!"); 763 } 764} 765 766static char Insert_BHSD_Suffix(StringRef typestr){ 767 unsigned off = 0; 768 if(typestr[off++] == 'S'){ 769 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 770 typestr[off] == 'P' || typestr[off] == 'U') 771 ++off; 772 switch (typestr[off]){ 773 default : break; 774 case 'c' : return 'b'; 775 case 's' : return 'h'; 776 case 'i' : 777 case 'f' : return 's'; 778 case 'l' : 779 case 'd' : return 'd'; 780 } 781 } 782 return 0; 783} 784 785/// MangleName - Append a type or width suffix to a base neon function name, 786/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 787/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 788/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 789static std::string MangleName(const std::string &name, StringRef typestr, 790 ClassKind ck) { 791 if (name == "vcvt_f32_f16") 792 return name; 793 794 bool quad = false; 795 std::string typeCode = ""; 796 797 InstructionTypeCode(typestr, ck, quad, typeCode); 798 799 std::string s = name; 800 801 if (typeCode.size() > 0) { 802 s += "_" + typeCode; 803 } 804 805 if (ck == ClassB) 806 s += "_v"; 807 808 // Insert a 'q' before the first '_' character so that it ends up before 809 // _lane or _n on vector-scalar operations. 810 if (typestr.find("Q") != StringRef::npos) { 811 size_t pos = s.find('_'); 812 s = s.insert(pos, "q"); 813 } 814 char ins = Insert_BHSD_Suffix(typestr); 815 if(ins){ 816 size_t pos = s.find('_'); 817 s = s.insert(pos, &ins, 1); 818 } 819 820 return s; 821} 822 823static void PreprocessInstruction(const StringRef &Name, 824 const std::string &InstName, 825 std::string &Prefix, 826 bool &HasNPostfix, 827 bool &HasLanePostfix, 828 bool &HasDupPostfix, 829 bool &IsSpecialVCvt, 830 size_t &TBNumber) { 831 // All of our instruction name fields from arm_neon.td are of the form 832 // <instructionname>_... 833 // Thus we grab our instruction name via computation of said Prefix. 834 const size_t PrefixEnd = Name.find_first_of('_'); 835 // If InstName is passed in, we use that instead of our name Prefix. 836 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 837 838 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 839 840 HasNPostfix = Postfix.count("_n"); 841 HasLanePostfix = Postfix.count("_lane"); 842 HasDupPostfix = Postfix.count("_dup"); 843 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 844 845 if (InstName.compare("vtbl") == 0 || 846 InstName.compare("vtbx") == 0) { 847 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 848 // encoding to get its true value. 849 TBNumber = Name[Name.size()-1] - 48; 850 } 851} 852 853/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 854/// extracted, generate a FileCheck pattern for a Load Or Store 855static void 856GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 857 const std::string& OutTypeCode, 858 const bool &IsQuad, 859 const bool &HasDupPostfix, 860 const bool &HasLanePostfix, 861 const size_t Count, 862 std::string &RegisterSuffix) { 863 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 864 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 865 // will output a series of v{ld,st}1s, so we have to handle it specially. 866 if ((Count == 3 || Count == 4) && IsQuad) { 867 RegisterSuffix += "{"; 868 for (size_t i = 0; i < Count; i++) { 869 RegisterSuffix += "d{{[0-9]+}}"; 870 if (HasDupPostfix) { 871 RegisterSuffix += "[]"; 872 } 873 if (HasLanePostfix) { 874 RegisterSuffix += "[{{[0-9]+}}]"; 875 } 876 if (i < Count-1) { 877 RegisterSuffix += ", "; 878 } 879 } 880 RegisterSuffix += "}"; 881 } else { 882 883 // Handle normal loads and stores. 884 RegisterSuffix += "{"; 885 for (size_t i = 0; i < Count; i++) { 886 RegisterSuffix += "d{{[0-9]+}}"; 887 if (HasDupPostfix) { 888 RegisterSuffix += "[]"; 889 } 890 if (HasLanePostfix) { 891 RegisterSuffix += "[{{[0-9]+}}]"; 892 } 893 if (IsQuad && !HasLanePostfix) { 894 RegisterSuffix += ", d{{[0-9]+}}"; 895 if (HasDupPostfix) { 896 RegisterSuffix += "[]"; 897 } 898 } 899 if (i < Count-1) { 900 RegisterSuffix += ", "; 901 } 902 } 903 RegisterSuffix += "}, [r{{[0-9]+}}"; 904 905 // We only include the alignment hint if we have a vld1.*64 or 906 // a dup/lane instruction. 907 if (IsLDSTOne) { 908 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 909 RegisterSuffix += ":" + OutTypeCode; 910 } 911 } 912 913 RegisterSuffix += "]"; 914 } 915} 916 917static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 918 const bool &HasNPostfix) { 919 return (NameRef.count("vmla") || 920 NameRef.count("vmlal") || 921 NameRef.count("vmlsl") || 922 NameRef.count("vmull") || 923 NameRef.count("vqdmlal") || 924 NameRef.count("vqdmlsl") || 925 NameRef.count("vqdmulh") || 926 NameRef.count("vqdmull") || 927 NameRef.count("vqrdmulh")) && HasNPostfix; 928} 929 930static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 931 const bool &HasLanePostfix) { 932 return (NameRef.count("vmla") || 933 NameRef.count("vmls") || 934 NameRef.count("vmlal") || 935 NameRef.count("vmlsl") || 936 (NameRef.count("vmul") && NameRef.size() == 3)|| 937 NameRef.count("vqdmlal") || 938 NameRef.count("vqdmlsl") || 939 NameRef.count("vqdmulh") || 940 NameRef.count("vqrdmulh")) && HasLanePostfix; 941} 942 943static bool IsSpecialLaneMultiply(const StringRef &NameRef, 944 const bool &HasLanePostfix, 945 const bool &IsQuad) { 946 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 947 && IsQuad; 948 const bool IsVMull = NameRef.count("mull") && !IsQuad; 949 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 950} 951 952static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 953 const std::string &Proto, 954 const bool &HasNPostfix, 955 const bool &IsQuad, 956 const bool &HasLanePostfix, 957 const bool &HasDupPostfix, 958 std::string &NormedProto) { 959 // Handle generic case. 960 const StringRef NameRef(Name); 961 for (size_t i = 0, end = Proto.size(); i < end; i++) { 962 switch (Proto[i]) { 963 case 'u': 964 case 'f': 965 case 'd': 966 case 's': 967 case 'x': 968 case 't': 969 case 'n': 970 NormedProto += IsQuad? 'q' : 'd'; 971 break; 972 case 'w': 973 case 'k': 974 NormedProto += 'q'; 975 break; 976 case 'g': 977 case 'h': 978 case 'e': 979 NormedProto += 'd'; 980 break; 981 case 'i': 982 NormedProto += HasLanePostfix? 'a' : 'i'; 983 break; 984 case 'a': 985 if (HasLanePostfix) { 986 NormedProto += 'a'; 987 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 988 NormedProto += IsQuad? 'q' : 'd'; 989 } else { 990 NormedProto += 'i'; 991 } 992 break; 993 } 994 } 995 996 // Handle Special Cases. 997 const bool IsNotVExt = !NameRef.count("vext"); 998 const bool IsVPADAL = NameRef.count("vpadal"); 999 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1000 HasLanePostfix); 1001 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1002 IsQuad); 1003 1004 if (IsSpecialLaneMul) { 1005 // If 1006 NormedProto[2] = NormedProto[3]; 1007 NormedProto.erase(3); 1008 } else if (NormedProto.size() == 4 && 1009 NormedProto[0] == NormedProto[1] && 1010 IsNotVExt) { 1011 // If NormedProto.size() == 4 and the first two proto characters are the 1012 // same, ignore the first. 1013 NormedProto = NormedProto.substr(1, 3); 1014 } else if (Is5OpLaneAccum) { 1015 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1016 std::string tmp = NormedProto.substr(1,2); 1017 tmp += NormedProto[4]; 1018 NormedProto = tmp; 1019 } else if (IsVPADAL) { 1020 // If we have VPADAL, ignore the first character. 1021 NormedProto = NormedProto.substr(0, 2); 1022 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1023 // If our instruction is a dup instruction, keep only the first and 1024 // last characters. 1025 std::string tmp = ""; 1026 tmp += NormedProto[0]; 1027 tmp += NormedProto[NormedProto.size()-1]; 1028 NormedProto = tmp; 1029 } 1030} 1031 1032/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1033/// extracted, generate a FileCheck pattern to check that an 1034/// instruction's arguments are correct. 1035static void GenerateRegisterCheckPattern(const std::string &Name, 1036 const std::string &Proto, 1037 const std::string &OutTypeCode, 1038 const bool &HasNPostfix, 1039 const bool &IsQuad, 1040 const bool &HasLanePostfix, 1041 const bool &HasDupPostfix, 1042 const size_t &TBNumber, 1043 std::string &RegisterSuffix) { 1044 1045 RegisterSuffix = ""; 1046 1047 const StringRef NameRef(Name); 1048 const StringRef ProtoRef(Proto); 1049 1050 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1051 return; 1052 } 1053 1054 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1055 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1056 1057 if (IsLoadStore) { 1058 // Grab N value from v{ld,st}N using its ascii representation. 1059 const size_t Count = NameRef[3] - 48; 1060 1061 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1062 HasDupPostfix, HasLanePostfix, 1063 Count, RegisterSuffix); 1064 } else if (IsTBXOrTBL) { 1065 RegisterSuffix += "d{{[0-9]+}}, {"; 1066 for (size_t i = 0; i < TBNumber-1; i++) { 1067 RegisterSuffix += "d{{[0-9]+}}, "; 1068 } 1069 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1070 } else { 1071 // Handle a normal instruction. 1072 if (NameRef.count("vget") || NameRef.count("vset")) 1073 return; 1074 1075 // We first normalize our proto, since we only need to emit 4 1076 // different types of checks, yet have more than 4 proto types 1077 // that map onto those 4 patterns. 1078 std::string NormalizedProto(""); 1079 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1080 HasLanePostfix, HasDupPostfix, 1081 NormalizedProto); 1082 1083 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1084 const char &c = NormalizedProto[i]; 1085 switch (c) { 1086 case 'q': 1087 RegisterSuffix += "q{{[0-9]+}}, "; 1088 break; 1089 1090 case 'd': 1091 RegisterSuffix += "d{{[0-9]+}}, "; 1092 break; 1093 1094 case 'i': 1095 RegisterSuffix += "#{{[0-9]+}}, "; 1096 break; 1097 1098 case 'a': 1099 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1100 break; 1101 } 1102 } 1103 1104 // Remove extra ", ". 1105 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1106 } 1107} 1108 1109/// GenerateChecksForIntrinsic - Given a specific instruction name + 1110/// typestr + class kind, generate the proper set of FileCheck 1111/// Patterns to check for. We could just return a string, but instead 1112/// use a vector since it provides us with the extra flexibility of 1113/// emitting multiple checks, which comes in handy for certain cases 1114/// like mla where we want to check for 2 different instructions. 1115static void GenerateChecksForIntrinsic(const std::string &Name, 1116 const std::string &Proto, 1117 StringRef &OutTypeStr, 1118 StringRef &InTypeStr, 1119 ClassKind Ck, 1120 const std::string &InstName, 1121 bool IsHiddenLOp, 1122 std::vector<std::string>& Result) { 1123 1124 // If Ck is a ClassNoTest instruction, just return so no test is 1125 // emitted. 1126 if(Ck == ClassNoTest) 1127 return; 1128 1129 if (Name == "vcvt_f32_f16") { 1130 Result.push_back("vcvt.f32.f16"); 1131 return; 1132 } 1133 1134 1135 // Now we preprocess our instruction given the data we have to get the 1136 // data that we need. 1137 // Create a StringRef for String Manipulation of our Name. 1138 const StringRef NameRef(Name); 1139 // Instruction Prefix. 1140 std::string Prefix; 1141 // The type code for our out type string. 1142 std::string OutTypeCode; 1143 // To handle our different cases, we need to check for different postfixes. 1144 // Is our instruction a quad instruction. 1145 bool IsQuad = false; 1146 // Our instruction is of the form <instructionname>_n. 1147 bool HasNPostfix = false; 1148 // Our instruction is of the form <instructionname>_lane. 1149 bool HasLanePostfix = false; 1150 // Our instruction is of the form <instructionname>_dup. 1151 bool HasDupPostfix = false; 1152 // Our instruction is a vcvt instruction which requires special handling. 1153 bool IsSpecialVCvt = false; 1154 // If we have a vtbxN or vtblN instruction, this is set to N. 1155 size_t TBNumber = -1; 1156 // Register Suffix 1157 std::string RegisterSuffix; 1158 1159 PreprocessInstruction(NameRef, InstName, Prefix, 1160 HasNPostfix, HasLanePostfix, HasDupPostfix, 1161 IsSpecialVCvt, TBNumber); 1162 1163 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1164 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1165 HasLanePostfix, HasDupPostfix, TBNumber, 1166 RegisterSuffix); 1167 1168 // In the following section, we handle a bunch of special cases. You can tell 1169 // a special case by the fact we are returning early. 1170 1171 // If our instruction is a logical instruction without postfix or a 1172 // hidden LOp just return the current Prefix. 1173 if (Ck == ClassL || IsHiddenLOp) { 1174 Result.push_back(Prefix + " " + RegisterSuffix); 1175 return; 1176 } 1177 1178 // If we have a vmov, due to the many different cases, some of which 1179 // vary within the different intrinsics generated for a single 1180 // instruction type, just output a vmov. (e.g. given an instruction 1181 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1182 // 1183 // FIXME: Maybe something can be done about this. The two cases that we care 1184 // about are vmov as an LType and vmov as a WType. 1185 if (Prefix == "vmov") { 1186 Result.push_back(Prefix + " " + RegisterSuffix); 1187 return; 1188 } 1189 1190 // In the following section, we handle special cases. 1191 1192 if (OutTypeCode == "64") { 1193 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1194 // type, the intrinsic will be optimized away, so just return 1195 // nothing. On the other hand if we are handling an uint64x2_t 1196 // (i.e. quad instruction), vdup/vmov instructions should be 1197 // emitted. 1198 if (Prefix == "vdup" || Prefix == "vext") { 1199 if (IsQuad) { 1200 Result.push_back("{{vmov|vdup}}"); 1201 } 1202 return; 1203 } 1204 1205 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1206 // multiple register operands. 1207 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1208 || Prefix == "vld4"; 1209 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1210 || Prefix == "vst4"; 1211 if (MultiLoadPrefix || MultiStorePrefix) { 1212 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1213 return; 1214 } 1215 1216 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1217 // emitting said instructions. So return a check for 1218 // vldr/vstr/vmov/str instead. 1219 if (HasLanePostfix || HasDupPostfix) { 1220 if (Prefix == "vst1") { 1221 Result.push_back("{{str|vstr|vmov}}"); 1222 return; 1223 } else if (Prefix == "vld1") { 1224 Result.push_back("{{ldr|vldr|vmov}}"); 1225 return; 1226 } 1227 } 1228 } 1229 1230 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1231 // sometimes disassembled as vtrn.32. We use a regex to handle both 1232 // cases. 1233 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1234 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1235 return; 1236 } 1237 1238 // Currently on most ARM processors, we do not use vmla/vmls for 1239 // quad floating point operations. Instead we output vmul + vadd. So 1240 // check if we have one of those instructions and just output a 1241 // check for vmul. 1242 if (OutTypeCode == "f32") { 1243 if (Prefix == "vmls") { 1244 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1245 Result.push_back("vsub." + OutTypeCode); 1246 return; 1247 } else if (Prefix == "vmla") { 1248 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1249 Result.push_back("vadd." + OutTypeCode); 1250 return; 1251 } 1252 } 1253 1254 // If we have vcvt, get the input type from the instruction name 1255 // (which should be of the form instname_inputtype) and append it 1256 // before the output type. 1257 if (Prefix == "vcvt") { 1258 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1259 Prefix += "." + inTypeCode; 1260 } 1261 1262 // Append output type code to get our final mangled instruction. 1263 Prefix += "." + OutTypeCode; 1264 1265 Result.push_back(Prefix + " " + RegisterSuffix); 1266} 1267 1268/// UseMacro - Examine the prototype string to determine if the intrinsic 1269/// should be defined as a preprocessor macro instead of an inline function. 1270static bool UseMacro(const std::string &proto) { 1271 // If this builtin takes an immediate argument, we need to #define it rather 1272 // than use a standard declaration, so that SemaChecking can range check 1273 // the immediate passed by the user. 1274 if (proto.find('i') != std::string::npos) 1275 return true; 1276 1277 // Pointer arguments need to use macros to avoid hiding aligned attributes 1278 // from the pointer type. 1279 if (proto.find('p') != std::string::npos || 1280 proto.find('c') != std::string::npos) 1281 return true; 1282 1283 return false; 1284} 1285 1286/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1287/// defined as a macro should be accessed directly instead of being first 1288/// assigned to a local temporary. 1289static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1290 // True for constant ints (i), pointers (p) and const pointers (c). 1291 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1292} 1293 1294// Generate the string "(argtype a, argtype b, ...)" 1295static std::string GenArgs(const std::string &proto, StringRef typestr) { 1296 bool define = UseMacro(proto); 1297 char arg = 'a'; 1298 1299 std::string s; 1300 s += "("; 1301 1302 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1303 if (define) { 1304 // Some macro arguments are used directly instead of being assigned 1305 // to local temporaries; prepend an underscore prefix to make their 1306 // names consistent with the local temporaries. 1307 if (MacroArgUsedDirectly(proto, i)) 1308 s += "__"; 1309 } else { 1310 s += TypeString(proto[i], typestr) + " __"; 1311 } 1312 s.push_back(arg); 1313 if ((i + 1) < e) 1314 s += ", "; 1315 } 1316 1317 s += ")"; 1318 return s; 1319} 1320 1321// Macro arguments are not type-checked like inline function arguments, so 1322// assign them to local temporaries to get the right type checking. 1323static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { 1324 char arg = 'a'; 1325 std::string s; 1326 bool generatedLocal = false; 1327 1328 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1329 // Do not create a temporary for an immediate argument. 1330 // That would defeat the whole point of using a macro! 1331 if (MacroArgUsedDirectly(proto, i)) 1332 continue; 1333 generatedLocal = true; 1334 1335 s += TypeString(proto[i], typestr) + " __"; 1336 s.push_back(arg); 1337 s += " = ("; 1338 s.push_back(arg); 1339 s += "); "; 1340 } 1341 1342 if (generatedLocal) 1343 s += "\\\n "; 1344 return s; 1345} 1346 1347// Use the vmovl builtin to sign-extend or zero-extend a vector. 1348static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1349 std::string s, high; 1350 high = h ? "_high" : ""; 1351 s = MangleName("vmovl" + high, typestr, ClassS); 1352 s += "(" + a + ")"; 1353 return s; 1354} 1355 1356// Get the high 64-bit part of a vector 1357static std::string GetHigh(const std::string &a, StringRef typestr) { 1358 std::string s; 1359 s = MangleName("vget_high", typestr, ClassS); 1360 s += "(" + a + ")"; 1361 return s; 1362} 1363 1364// Gen operation with two operands and get high 64-bit for both of two operands. 1365static std::string Gen2OpWith2High(StringRef typestr, 1366 const std::string &op, 1367 const std::string &a, 1368 const std::string &b) { 1369 std::string s; 1370 std::string Op1 = GetHigh(a, typestr); 1371 std::string Op2 = GetHigh(b, typestr); 1372 s = MangleName(op, typestr, ClassS); 1373 s += "(" + Op1 + ", " + Op2 + ");"; 1374 return s; 1375} 1376 1377// Gen operation with three operands and get high 64-bit of the latter 1378// two operands. 1379static std::string Gen3OpWith2High(StringRef typestr, 1380 const std::string &op, 1381 const std::string &a, 1382 const std::string &b, 1383 const std::string &c) { 1384 std::string s; 1385 std::string Op1 = GetHigh(b, typestr); 1386 std::string Op2 = GetHigh(c, typestr); 1387 s = MangleName(op, typestr, ClassS); 1388 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1389 return s; 1390} 1391 1392// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1393static std::string GenCombine(std::string typestr, 1394 const std::string &a, 1395 const std::string &b) { 1396 std::string s; 1397 s = MangleName("vcombine", typestr, ClassS); 1398 s += "(" + a + ", " + b + ")"; 1399 return s; 1400} 1401 1402static std::string Duplicate(unsigned nElts, StringRef typestr, 1403 const std::string &a) { 1404 std::string s; 1405 1406 s = "(" + TypeString('d', typestr) + "){ "; 1407 for (unsigned i = 0; i != nElts; ++i) { 1408 s += a; 1409 if ((i + 1) < nElts) 1410 s += ", "; 1411 } 1412 s += " }"; 1413 1414 return s; 1415} 1416 1417static std::string SplatLane(unsigned nElts, const std::string &vec, 1418 const std::string &lane) { 1419 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1420 for (unsigned i = 0; i < nElts; ++i) 1421 s += ", " + lane; 1422 s += ")"; 1423 return s; 1424} 1425 1426static std::string RemoveHigh(const std::string &name) { 1427 std::string s = name; 1428 std::size_t found = s.find("_high_"); 1429 if (found == std::string::npos) 1430 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1431 s.replace(found, 5, ""); 1432 return s; 1433} 1434 1435static unsigned GetNumElements(StringRef typestr, bool &quad) { 1436 quad = false; 1437 bool dummy = false; 1438 char type = ClassifyType(typestr, quad, dummy, dummy); 1439 unsigned nElts = 0; 1440 switch (type) { 1441 case 'c': nElts = 8; break; 1442 case 's': nElts = 4; break; 1443 case 'i': nElts = 2; break; 1444 case 'l': nElts = 1; break; 1445 case 'h': nElts = 4; break; 1446 case 'f': nElts = 2; break; 1447 case 'd': 1448 nElts = 1; 1449 break; 1450 default: 1451 PrintFatalError("unhandled type!"); 1452 } 1453 if (quad) nElts <<= 1; 1454 return nElts; 1455} 1456 1457// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1458static std::string GenOpString(const std::string &name, OpKind op, 1459 const std::string &proto, StringRef typestr) { 1460 bool quad; 1461 unsigned nElts = GetNumElements(typestr, quad); 1462 bool define = UseMacro(proto); 1463 1464 std::string ts = TypeString(proto[0], typestr); 1465 std::string s; 1466 if (!define) { 1467 s = "return "; 1468 } 1469 1470 switch(op) { 1471 case OpAdd: 1472 s += "__a + __b;"; 1473 break; 1474 case OpAddl: 1475 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1476 break; 1477 case OpAddlHi: 1478 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1479 break; 1480 case OpAddw: 1481 s += "__a + " + Extend(typestr, "__b") + ";"; 1482 break; 1483 case OpAddwHi: 1484 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1485 break; 1486 case OpSub: 1487 s += "__a - __b;"; 1488 break; 1489 case OpSubl: 1490 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1491 break; 1492 case OpSublHi: 1493 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1494 break; 1495 case OpSubw: 1496 s += "__a - " + Extend(typestr, "__b") + ";"; 1497 break; 1498 case OpSubwHi: 1499 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1500 break; 1501 case OpMulN: 1502 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1503 break; 1504 case OpMulLane: 1505 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1506 break; 1507 case OpMul: 1508 s += "__a * __b;"; 1509 break; 1510 case OpMullLane: 1511 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1512 SplatLane(nElts, "__b", "__c") + ");"; 1513 break; 1514 case OpMlaN: 1515 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1516 break; 1517 case OpMlaLane: 1518 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1519 break; 1520 case OpMla: 1521 s += "__a + (__b * __c);"; 1522 break; 1523 case OpMlalN: 1524 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1525 Duplicate(nElts, typestr, "__c") + ");"; 1526 break; 1527 case OpMlalLane: 1528 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1529 SplatLane(nElts, "__c", "__d") + ");"; 1530 break; 1531 case OpMlal: 1532 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1533 break; 1534 case OpMullHi: 1535 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1536 break; 1537 case OpMlalHi: 1538 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1539 break; 1540 case OpMlsN: 1541 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1542 break; 1543 case OpMlsLane: 1544 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1545 break; 1546 case OpMls: 1547 s += "__a - (__b * __c);"; 1548 break; 1549 case OpMlslN: 1550 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1551 Duplicate(nElts, typestr, "__c") + ");"; 1552 break; 1553 case OpMlslLane: 1554 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1555 SplatLane(nElts, "__c", "__d") + ");"; 1556 break; 1557 case OpMlsl: 1558 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1559 break; 1560 case OpMlslHi: 1561 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1562 break; 1563 case OpQDMullLane: 1564 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1565 SplatLane(nElts, "__b", "__c") + ");"; 1566 break; 1567 case OpQDMlalLane: 1568 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1569 SplatLane(nElts, "__c", "__d") + ");"; 1570 break; 1571 case OpQDMlslLane: 1572 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1573 SplatLane(nElts, "__c", "__d") + ");"; 1574 break; 1575 case OpQDMulhLane: 1576 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1577 SplatLane(nElts, "__b", "__c") + ");"; 1578 break; 1579 case OpQRDMulhLane: 1580 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1581 SplatLane(nElts, "__b", "__c") + ");"; 1582 break; 1583 case OpEq: 1584 s += "(" + ts + ")(__a == __b);"; 1585 break; 1586 case OpGe: 1587 s += "(" + ts + ")(__a >= __b);"; 1588 break; 1589 case OpLe: 1590 s += "(" + ts + ")(__a <= __b);"; 1591 break; 1592 case OpGt: 1593 s += "(" + ts + ")(__a > __b);"; 1594 break; 1595 case OpLt: 1596 s += "(" + ts + ")(__a < __b);"; 1597 break; 1598 case OpNeg: 1599 s += " -__a;"; 1600 break; 1601 case OpNot: 1602 s += " ~__a;"; 1603 break; 1604 case OpAnd: 1605 s += "__a & __b;"; 1606 break; 1607 case OpOr: 1608 s += "__a | __b;"; 1609 break; 1610 case OpXor: 1611 s += "__a ^ __b;"; 1612 break; 1613 case OpAndNot: 1614 s += "__a & ~__b;"; 1615 break; 1616 case OpOrNot: 1617 s += "__a | ~__b;"; 1618 break; 1619 case OpCast: 1620 s += "(" + ts + ")__a;"; 1621 break; 1622 case OpConcat: 1623 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1624 s += ", (int64x1_t)__b, 0, 1);"; 1625 break; 1626 case OpHi: 1627 // nElts is for the result vector, so the source is twice that number. 1628 s += "__builtin_shufflevector(__a, __a"; 1629 for (unsigned i = nElts; i < nElts * 2; ++i) 1630 s += ", " + utostr(i); 1631 s+= ");"; 1632 break; 1633 case OpLo: 1634 s += "__builtin_shufflevector(__a, __a"; 1635 for (unsigned i = 0; i < nElts; ++i) 1636 s += ", " + utostr(i); 1637 s+= ");"; 1638 break; 1639 case OpDup: 1640 s += Duplicate(nElts, typestr, "__a") + ";"; 1641 break; 1642 case OpDupLane: 1643 s += SplatLane(nElts, "__a", "__b") + ";"; 1644 break; 1645 case OpSelect: 1646 // ((0 & 1) | (~0 & 2)) 1647 s += "(" + ts + ")"; 1648 ts = TypeString(proto[1], typestr); 1649 s += "((__a & (" + ts + ")__b) | "; 1650 s += "(~__a & (" + ts + ")__c));"; 1651 break; 1652 case OpRev16: 1653 s += "__builtin_shufflevector(__a, __a"; 1654 for (unsigned i = 2; i <= nElts; i += 2) 1655 for (unsigned j = 0; j != 2; ++j) 1656 s += ", " + utostr(i - j - 1); 1657 s += ");"; 1658 break; 1659 case OpRev32: { 1660 unsigned WordElts = nElts >> (1 + (int)quad); 1661 s += "__builtin_shufflevector(__a, __a"; 1662 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1663 for (unsigned j = 0; j != WordElts; ++j) 1664 s += ", " + utostr(i - j - 1); 1665 s += ");"; 1666 break; 1667 } 1668 case OpRev64: { 1669 unsigned DblWordElts = nElts >> (int)quad; 1670 s += "__builtin_shufflevector(__a, __a"; 1671 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1672 for (unsigned j = 0; j != DblWordElts; ++j) 1673 s += ", " + utostr(i - j - 1); 1674 s += ");"; 1675 break; 1676 } 1677 case OpAbdl: { 1678 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1679 if (typestr[0] != 'U') { 1680 // vabd results are always unsigned and must be zero-extended. 1681 std::string utype = "U" + typestr.str(); 1682 s += "(" + TypeString(proto[0], typestr) + ")"; 1683 abd = "(" + TypeString('d', utype) + ")" + abd; 1684 s += Extend(utype, abd) + ";"; 1685 } else { 1686 s += Extend(typestr, abd) + ";"; 1687 } 1688 break; 1689 } 1690 case OpAbdlHi: 1691 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1692 break; 1693 case OpAddhnHi: { 1694 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1695 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1696 s += ";"; 1697 break; 1698 } 1699 case OpRAddhnHi: { 1700 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1701 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1702 s += ";"; 1703 break; 1704 } 1705 case OpSubhnHi: { 1706 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1707 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1708 s += ";"; 1709 break; 1710 } 1711 case OpRSubhnHi: { 1712 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1713 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1714 s += ";"; 1715 break; 1716 } 1717 case OpAba: 1718 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1719 break; 1720 case OpAbal: 1721 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1722 break; 1723 case OpAbalHi: 1724 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1725 break; 1726 case OpQDMullHi: 1727 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1728 break; 1729 case OpQDMlalHi: 1730 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1731 break; 1732 case OpQDMlslHi: 1733 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1734 break; 1735 case OpDiv: 1736 s += "__a / __b;"; 1737 break; 1738 case OpMovlHi: { 1739 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1740 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1741 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1742 s += "(__a1, 0);"; 1743 break; 1744 } 1745 case OpLongHi: { 1746 // Another local variable __a1 is needed for calling a Macro, 1747 // or using __a will have naming conflict when Macro expanding. 1748 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1749 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 1750 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 1751 "(__a1, __b);"; 1752 break; 1753 } 1754 case OpNarrowHi: { 1755 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 1756 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 1757 break; 1758 } 1759 default: 1760 PrintFatalError("unknown OpKind!"); 1761 } 1762 return s; 1763} 1764 1765static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1766 unsigned mod = proto[0]; 1767 1768 if (mod == 'v' || mod == 'f') 1769 mod = proto[1]; 1770 1771 bool quad = false; 1772 bool poly = false; 1773 bool usgn = false; 1774 bool scal = false; 1775 bool cnst = false; 1776 bool pntr = false; 1777 1778 // Base type to get the type string for. 1779 char type = ClassifyType(typestr, quad, poly, usgn); 1780 1781 // Based on the modifying character, change the type and width if necessary. 1782 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1783 1784 NeonTypeFlags::EltType ET; 1785 switch (type) { 1786 case 'c': 1787 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1788 break; 1789 case 's': 1790 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1791 break; 1792 case 'i': 1793 ET = NeonTypeFlags::Int32; 1794 break; 1795 case 'l': 1796 ET = NeonTypeFlags::Int64; 1797 break; 1798 case 'h': 1799 ET = NeonTypeFlags::Float16; 1800 break; 1801 case 'f': 1802 ET = NeonTypeFlags::Float32; 1803 break; 1804 case 'd': 1805 ET = NeonTypeFlags::Float64; 1806 break; 1807 default: 1808 PrintFatalError("unhandled type!"); 1809 } 1810 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1811 return Flags.getFlags(); 1812} 1813 1814// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1815static std::string GenBuiltin(const std::string &name, const std::string &proto, 1816 StringRef typestr, ClassKind ck) { 1817 std::string s; 1818 1819 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1820 // sret-like argument. 1821 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1822 1823 bool define = UseMacro(proto); 1824 1825 // Check if the prototype has a scalar operand with the type of the vector 1826 // elements. If not, bitcasting the args will take care of arg checking. 1827 // The actual signedness etc. will be taken care of with special enums. 1828 if (proto.find('s') == std::string::npos) 1829 ck = ClassB; 1830 1831 if (proto[0] != 'v') { 1832 std::string ts = TypeString(proto[0], typestr); 1833 1834 if (define) { 1835 if (sret) 1836 s += ts + " r; "; 1837 else 1838 s += "(" + ts + ")"; 1839 } else if (sret) { 1840 s += ts + " r; "; 1841 } else { 1842 s += "return (" + ts + ")"; 1843 } 1844 } 1845 1846 bool splat = proto.find('a') != std::string::npos; 1847 1848 s += "__builtin_neon_"; 1849 if (splat) { 1850 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1851 std::string vname(name, 0, name.size()-2); 1852 s += MangleName(vname, typestr, ck); 1853 } else { 1854 s += MangleName(name, typestr, ck); 1855 } 1856 s += "("; 1857 1858 // Pass the address of the return variable as the first argument to sret-like 1859 // builtins. 1860 if (sret) 1861 s += "&r, "; 1862 1863 char arg = 'a'; 1864 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1865 std::string args = std::string(&arg, 1); 1866 1867 // Use the local temporaries instead of the macro arguments. 1868 args = "__" + args; 1869 1870 bool argQuad = false; 1871 bool argPoly = false; 1872 bool argUsgn = false; 1873 bool argScalar = false; 1874 bool dummy = false; 1875 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1876 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1877 dummy, dummy); 1878 1879 // Handle multiple-vector values specially, emitting each subvector as an 1880 // argument to the __builtin. 1881 if (proto[i] >= '2' && proto[i] <= '4') { 1882 // Check if an explicit cast is needed. 1883 if (argType != 'c' || argPoly || argUsgn) 1884 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1885 1886 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1887 s += args + ".val[" + utostr(vi) + "]"; 1888 if ((vi + 1) < ve) 1889 s += ", "; 1890 } 1891 if ((i + 1) < e) 1892 s += ", "; 1893 1894 continue; 1895 } 1896 1897 if (splat && (i + 1) == e) 1898 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 1899 1900 // Check if an explicit cast is needed. 1901 if ((splat || !argScalar) && 1902 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 1903 std::string argTypeStr = "c"; 1904 if (ck != ClassB) 1905 argTypeStr = argType; 1906 if (argQuad) 1907 argTypeStr = "Q" + argTypeStr; 1908 args = "(" + TypeString('d', argTypeStr) + ")" + args; 1909 } 1910 1911 s += args; 1912 if ((i + 1) < e) 1913 s += ", "; 1914 } 1915 1916 // Extra constant integer to hold type class enum for this function, e.g. s8 1917 if (ck == ClassB) 1918 s += ", " + utostr(GetNeonEnum(proto, typestr)); 1919 1920 s += ");"; 1921 1922 if (proto[0] != 'v' && sret) { 1923 if (define) 1924 s += " r;"; 1925 else 1926 s += " return r;"; 1927 } 1928 return s; 1929} 1930 1931static std::string GenBuiltinDef(const std::string &name, 1932 const std::string &proto, 1933 StringRef typestr, ClassKind ck) { 1934 std::string s("BUILTIN(__builtin_neon_"); 1935 1936 // If all types are the same size, bitcasting the args will take care 1937 // of arg checking. The actual signedness etc. will be taken care of with 1938 // special enums. 1939 if (proto.find('s') == std::string::npos) 1940 ck = ClassB; 1941 1942 s += MangleName(name, typestr, ck); 1943 s += ", \""; 1944 1945 for (unsigned i = 0, e = proto.size(); i != e; ++i) 1946 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 1947 1948 // Extra constant integer to hold type class enum for this function, e.g. s8 1949 if (ck == ClassB) 1950 s += "i"; 1951 1952 s += "\", \"n\")"; 1953 return s; 1954} 1955 1956static std::string GenIntrinsic(const std::string &name, 1957 const std::string &proto, 1958 StringRef outTypeStr, StringRef inTypeStr, 1959 OpKind kind, ClassKind classKind) { 1960 assert(!proto.empty() && ""); 1961 bool define = UseMacro(proto) && kind != OpUnavailable; 1962 std::string s; 1963 1964 // static always inline + return type 1965 if (define) 1966 s += "#define "; 1967 else 1968 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 1969 1970 // Function name with type suffix 1971 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1972 if (outTypeStr != inTypeStr) { 1973 // If the input type is different (e.g., for vreinterpret), append a suffix 1974 // for the input type. String off a "Q" (quad) prefix so that MangleName 1975 // does not insert another "q" in the name. 1976 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1977 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1978 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1979 } 1980 s += mangledName; 1981 1982 // Function arguments 1983 s += GenArgs(proto, inTypeStr); 1984 1985 // Definition. 1986 if (define) { 1987 s += " __extension__ ({ \\\n "; 1988 s += GenMacroLocals(proto, inTypeStr); 1989 } else if (kind == OpUnavailable) { 1990 s += " __attribute__((unavailable));\n"; 1991 return s; 1992 } else 1993 s += " {\n "; 1994 1995 if (kind != OpNone) 1996 s += GenOpString(name, kind, proto, outTypeStr); 1997 else 1998 s += GenBuiltin(name, proto, outTypeStr, classKind); 1999 if (define) 2000 s += " })"; 2001 else 2002 s += " }"; 2003 s += "\n"; 2004 return s; 2005} 2006 2007/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2008/// is comprised of type definitions and function declarations. 2009void NeonEmitter::run(raw_ostream &OS) { 2010 OS << 2011 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2012 "---===\n" 2013 " *\n" 2014 " * Permission is hereby granted, free of charge, to any person obtaining " 2015 "a copy\n" 2016 " * of this software and associated documentation files (the \"Software\")," 2017 " to deal\n" 2018 " * in the Software without restriction, including without limitation the " 2019 "rights\n" 2020 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2021 "and/or sell\n" 2022 " * copies of the Software, and to permit persons to whom the Software is\n" 2023 " * furnished to do so, subject to the following conditions:\n" 2024 " *\n" 2025 " * The above copyright notice and this permission notice shall be " 2026 "included in\n" 2027 " * all copies or substantial portions of the Software.\n" 2028 " *\n" 2029 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2030 "EXPRESS OR\n" 2031 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2032 "MERCHANTABILITY,\n" 2033 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2034 "SHALL THE\n" 2035 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2036 "OTHER\n" 2037 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2038 "ARISING FROM,\n" 2039 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2040 "DEALINGS IN\n" 2041 " * THE SOFTWARE.\n" 2042 " *\n" 2043 " *===--------------------------------------------------------------------" 2044 "---===\n" 2045 " */\n\n"; 2046 2047 OS << "#ifndef __ARM_NEON_H\n"; 2048 OS << "#define __ARM_NEON_H\n\n"; 2049 2050 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2051 OS << "#error \"NEON support not enabled\"\n"; 2052 OS << "#endif\n\n"; 2053 2054 OS << "#include <stdint.h>\n\n"; 2055 2056 // Emit NEON-specific scalar typedefs. 2057 OS << "typedef float float32_t;\n"; 2058 OS << "typedef __fp16 float16_t;\n"; 2059 2060 OS << "#ifdef __aarch64__\n"; 2061 OS << "typedef double float64_t;\n"; 2062 OS << "#endif\n\n"; 2063 2064 // For now, signedness of polynomial types depends on target 2065 OS << "#ifdef __aarch64__\n"; 2066 OS << "typedef uint8_t poly8_t;\n"; 2067 OS << "typedef uint16_t poly16_t;\n"; 2068 OS << "#else\n"; 2069 OS << "typedef int8_t poly8_t;\n"; 2070 OS << "typedef int16_t poly16_t;\n"; 2071 OS << "#endif\n"; 2072 2073 // Emit Neon vector typedefs. 2074 std::string TypedefTypes( 2075 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs"); 2076 SmallVector<StringRef, 24> TDTypeVec; 2077 ParseTypes(0, TypedefTypes, TDTypeVec); 2078 2079 // Emit vector typedefs. 2080 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2081 bool dummy, quad = false, poly = false; 2082 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2083 bool isA64 = false; 2084 2085 if (type == 'd' && quad) 2086 isA64 = true; 2087 2088 if (isA64) 2089 OS << "#ifdef __aarch64__\n"; 2090 2091 if (poly) 2092 OS << "typedef __attribute__((neon_polyvector_type("; 2093 else 2094 OS << "typedef __attribute__((neon_vector_type("; 2095 2096 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2097 OS << utostr(nElts) << "))) "; 2098 if (nElts < 10) 2099 OS << " "; 2100 2101 OS << TypeString('s', TDTypeVec[i]); 2102 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2103 2104 if (isA64) 2105 OS << "#endif\n"; 2106 } 2107 OS << "\n"; 2108 2109 // Emit struct typedefs. 2110 for (unsigned vi = 2; vi != 5; ++vi) { 2111 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2112 bool dummy, quad = false, poly = false; 2113 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2114 bool isA64 = false; 2115 2116 if (type == 'd' && quad) 2117 isA64 = true; 2118 2119 if (isA64) 2120 OS << "#ifdef __aarch64__\n"; 2121 2122 std::string ts = TypeString('d', TDTypeVec[i]); 2123 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2124 OS << "typedef struct " << vs << " {\n"; 2125 OS << " " << ts << " val"; 2126 OS << "[" << utostr(vi) << "]"; 2127 OS << ";\n} "; 2128 OS << vs << ";\n"; 2129 2130 if (isA64) 2131 OS << "#endif\n"; 2132 2133 OS << "\n"; 2134 } 2135 } 2136 2137 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2138 2139 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2140 2141 StringMap<ClassKind> EmittedMap; 2142 2143 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2144 // intrinsics. (Some of the saturating multiply instructions are also 2145 // used to implement the corresponding "_lane" variants, but tablegen 2146 // sorts the records into alphabetical order so that the "_lane" variants 2147 // come after the intrinsics they use.) 2148 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2149 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2150 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2151 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2152 2153 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2154 // common intrinsics appear only once in the output stream. 2155 // The check for uniquiness is done in emitIntrinsic. 2156 // Emit ARM intrinsics. 2157 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2158 Record *R = RV[i]; 2159 2160 // Skip AArch64 intrinsics; they will be emitted at the end. 2161 bool isA64 = R->getValueAsBit("isA64"); 2162 if (isA64) 2163 continue; 2164 2165 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2166 R->getName() != "VABD") 2167 emitIntrinsic(OS, R, EmittedMap); 2168 } 2169 2170 // Emit AArch64-specific intrinsics. 2171 OS << "#ifdef __aarch64__\n"; 2172 2173 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2174 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2175 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2176 2177 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2178 Record *R = RV[i]; 2179 2180 // Skip ARM intrinsics already included above. 2181 bool isA64 = R->getValueAsBit("isA64"); 2182 if (!isA64) 2183 continue; 2184 2185 emitIntrinsic(OS, R, EmittedMap); 2186 } 2187 2188 OS << "#endif\n\n"; 2189 2190 OS << "#undef __ai\n\n"; 2191 OS << "#endif /* __ARM_NEON_H */\n"; 2192} 2193 2194/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2195/// intrinsics specified by record R checking for intrinsic uniqueness. 2196void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2197 StringMap<ClassKind> &EmittedMap) { 2198 std::string name = R->getValueAsString("Name"); 2199 std::string Proto = R->getValueAsString("Prototype"); 2200 std::string Types = R->getValueAsString("Types"); 2201 2202 SmallVector<StringRef, 16> TypeVec; 2203 ParseTypes(R, Types, TypeVec); 2204 2205 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2206 2207 ClassKind classKind = ClassNone; 2208 if (R->getSuperClasses().size() >= 2) 2209 classKind = ClassMap[R->getSuperClasses()[1]]; 2210 if (classKind == ClassNone && kind == OpNone) 2211 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2212 2213 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2214 if (kind == OpReinterpret) { 2215 bool outQuad = false; 2216 bool dummy = false; 2217 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2218 for (unsigned srcti = 0, srcte = TypeVec.size(); 2219 srcti != srcte; ++srcti) { 2220 bool inQuad = false; 2221 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2222 if (srcti == ti || inQuad != outQuad) 2223 continue; 2224 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2225 OpCast, ClassS); 2226 if (EmittedMap.count(s)) 2227 continue; 2228 EmittedMap[s] = ClassS; 2229 OS << s; 2230 } 2231 } else { 2232 std::string s = 2233 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2234 if (EmittedMap.count(s)) 2235 continue; 2236 EmittedMap[s] = classKind; 2237 OS << s; 2238 } 2239 } 2240 OS << "\n"; 2241} 2242 2243static unsigned RangeFromType(const char mod, StringRef typestr) { 2244 // base type to get the type string for. 2245 bool quad = false, dummy = false; 2246 char type = ClassifyType(typestr, quad, dummy, dummy); 2247 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2248 2249 switch (type) { 2250 case 'c': 2251 return (8 << (int)quad) - 1; 2252 case 'h': 2253 case 's': 2254 return (4 << (int)quad) - 1; 2255 case 'f': 2256 case 'i': 2257 return (2 << (int)quad) - 1; 2258 case 'l': 2259 return (1 << (int)quad) - 1; 2260 default: 2261 PrintFatalError("unhandled type!"); 2262 } 2263} 2264 2265/// Generate the ARM and AArch64 intrinsic range checking code for 2266/// shift/lane immediates, checking for unique declarations. 2267void 2268NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2269 StringMap<ClassKind> &A64IntrinsicMap, 2270 bool isA64RangeCheck) { 2271 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2272 StringMap<OpKind> EmittedMap; 2273 2274 // Generate the intrinsic range checking code for shift/lane immediates. 2275 if (isA64RangeCheck) 2276 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2277 else 2278 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2279 2280 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2281 Record *R = RV[i]; 2282 2283 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2284 if (k != OpNone) 2285 continue; 2286 2287 std::string name = R->getValueAsString("Name"); 2288 std::string Proto = R->getValueAsString("Prototype"); 2289 std::string Types = R->getValueAsString("Types"); 2290 std::string Rename = name + "@" + Proto; 2291 2292 // Functions with 'a' (the splat code) in the type prototype should not get 2293 // their own builtin as they use the non-splat variant. 2294 if (Proto.find('a') != std::string::npos) 2295 continue; 2296 2297 // Functions which do not have an immediate do not need to have range 2298 // checking code emitted. 2299 size_t immPos = Proto.find('i'); 2300 if (immPos == std::string::npos) 2301 continue; 2302 2303 SmallVector<StringRef, 16> TypeVec; 2304 ParseTypes(R, Types, TypeVec); 2305 2306 if (R->getSuperClasses().size() < 2) 2307 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2308 2309 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2310 2311 // Do not include AArch64 range checks if not generating code for AArch64. 2312 bool isA64 = R->getValueAsBit("isA64"); 2313 if (!isA64RangeCheck && isA64) 2314 continue; 2315 2316 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2317 // redefined by AArch64 to handle new types. 2318 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2319 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2320 if (A64CK == ck && ck != ClassNone) 2321 continue; 2322 } 2323 2324 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2325 std::string namestr, shiftstr, rangestr; 2326 2327 if (R->getValueAsBit("isVCVT_N")) { 2328 // VCVT between floating- and fixed-point values takes an immediate 2329 // in the range [1, 32] for f32, or [1, 64] for f64. 2330 ck = ClassB; 2331 if (name.find("32") != std::string::npos) 2332 rangestr = "l = 1; u = 31"; // upper bound = l + u 2333 else if (name.find("64") != std::string::npos) 2334 rangestr = "l = 1; u = 63"; 2335 else 2336 PrintFatalError(R->getLoc(), 2337 "Fixed point convert name should contains \"32\" or \"64\""); 2338 } else if (Proto.find('s') == std::string::npos) { 2339 // Builtins which are overloaded by type will need to have their upper 2340 // bound computed at Sema time based on the type constant. 2341 ck = ClassB; 2342 if (R->getValueAsBit("isShift")) { 2343 shiftstr = ", true"; 2344 2345 // Right shifts have an 'r' in the name, left shifts do not. 2346 if (name.find('r') != std::string::npos) 2347 rangestr = "l = 1; "; 2348 } 2349 rangestr += "u = RFT(TV" + shiftstr + ")"; 2350 } else { 2351 // The immediate generally refers to a lane in the preceding argument. 2352 assert(immPos > 0 && "unexpected immediate operand"); 2353 rangestr = 2354 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2355 } 2356 // Make sure cases appear only once by uniquing them in a string map. 2357 namestr = MangleName(name, TypeVec[ti], ck); 2358 if (EmittedMap.count(namestr)) 2359 continue; 2360 EmittedMap[namestr] = OpNone; 2361 2362 // Calculate the index of the immediate that should be range checked. 2363 unsigned immidx = 0; 2364 2365 // Builtins that return a struct of multiple vectors have an extra 2366 // leading arg for the struct return. 2367 if (Proto[0] >= '2' && Proto[0] <= '4') 2368 ++immidx; 2369 2370 // Add one to the index for each argument until we reach the immediate 2371 // to be checked. Structs of vectors are passed as multiple arguments. 2372 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2373 switch (Proto[ii]) { 2374 default: 2375 immidx += 1; 2376 break; 2377 case '2': 2378 immidx += 2; 2379 break; 2380 case '3': 2381 immidx += 3; 2382 break; 2383 case '4': 2384 immidx += 4; 2385 break; 2386 case 'i': 2387 ie = ii + 1; 2388 break; 2389 } 2390 } 2391 if (isA64RangeCheck) 2392 OS << "case AArch64::BI__builtin_neon_"; 2393 else 2394 OS << "case ARM::BI__builtin_neon_"; 2395 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2396 << rangestr << "; break;\n"; 2397 } 2398 } 2399 OS << "#endif\n\n"; 2400} 2401 2402/// Generate the ARM and AArch64 overloaded type checking code for 2403/// SemaChecking.cpp, checking for unique builtin declarations. 2404void 2405NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2406 StringMap<ClassKind> &A64IntrinsicMap, 2407 bool isA64TypeCheck) { 2408 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2409 StringMap<OpKind> EmittedMap; 2410 2411 // Generate the overloaded type checking code for SemaChecking.cpp 2412 if (isA64TypeCheck) 2413 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2414 else 2415 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2416 2417 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2418 Record *R = RV[i]; 2419 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2420 if (k != OpNone) 2421 continue; 2422 2423 std::string Proto = R->getValueAsString("Prototype"); 2424 std::string Types = R->getValueAsString("Types"); 2425 std::string name = R->getValueAsString("Name"); 2426 std::string Rename = name + "@" + Proto; 2427 2428 // Functions with 'a' (the splat code) in the type prototype should not get 2429 // their own builtin as they use the non-splat variant. 2430 if (Proto.find('a') != std::string::npos) 2431 continue; 2432 2433 // Functions which have a scalar argument cannot be overloaded, no need to 2434 // check them if we are emitting the type checking code. 2435 if (Proto.find('s') != std::string::npos) 2436 continue; 2437 2438 SmallVector<StringRef, 16> TypeVec; 2439 ParseTypes(R, Types, TypeVec); 2440 2441 if (R->getSuperClasses().size() < 2) 2442 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2443 2444 // Do not include AArch64 type checks if not generating code for AArch64. 2445 bool isA64 = R->getValueAsBit("isA64"); 2446 if (!isA64TypeCheck && isA64) 2447 continue; 2448 2449 // Include ARM type check in AArch64 but only if ARM intrinsics 2450 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2451 // redefined in AArch64 to handle an additional 2 x f64 type. 2452 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2453 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2454 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2455 if (A64CK == ck && ck != ClassNone) 2456 continue; 2457 } 2458 2459 int si = -1, qi = -1; 2460 uint64_t mask = 0, qmask = 0; 2461 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2462 // Generate the switch case(s) for this builtin for the type validation. 2463 bool quad = false, poly = false, usgn = false; 2464 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2465 2466 if (quad) { 2467 qi = ti; 2468 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2469 } else { 2470 si = ti; 2471 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2472 } 2473 } 2474 2475 // Check if the builtin function has a pointer or const pointer argument. 2476 int PtrArgNum = -1; 2477 bool HasConstPtr = false; 2478 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2479 char ArgType = Proto[arg]; 2480 if (ArgType == 'c') { 2481 HasConstPtr = true; 2482 PtrArgNum = arg - 1; 2483 break; 2484 } 2485 if (ArgType == 'p') { 2486 PtrArgNum = arg - 1; 2487 break; 2488 } 2489 } 2490 // For sret builtins, adjust the pointer argument index. 2491 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 2492 PtrArgNum += 1; 2493 2494 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2495 // and vst1_lane intrinsics. Using a pointer to the vector element 2496 // type with one of those operations causes codegen to select an aligned 2497 // load/store instruction. If you want an unaligned operation, 2498 // the pointer argument needs to have less alignment than element type, 2499 // so just accept any pointer type. 2500 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2501 PtrArgNum = -1; 2502 HasConstPtr = false; 2503 } 2504 2505 if (mask) { 2506 if (isA64TypeCheck) 2507 OS << "case AArch64::BI__builtin_neon_"; 2508 else 2509 OS << "case ARM::BI__builtin_neon_"; 2510 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2511 << "0x" << utohexstr(mask) << "ULL"; 2512 if (PtrArgNum >= 0) 2513 OS << "; PtrArgNum = " << PtrArgNum; 2514 if (HasConstPtr) 2515 OS << "; HasConstPtr = true"; 2516 OS << "; break;\n"; 2517 } 2518 if (qmask) { 2519 if (isA64TypeCheck) 2520 OS << "case AArch64::BI__builtin_neon_"; 2521 else 2522 OS << "case ARM::BI__builtin_neon_"; 2523 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2524 << "0x" << utohexstr(qmask) << "ULL"; 2525 if (PtrArgNum >= 0) 2526 OS << "; PtrArgNum = " << PtrArgNum; 2527 if (HasConstPtr) 2528 OS << "; HasConstPtr = true"; 2529 OS << "; break;\n"; 2530 } 2531 } 2532 OS << "#endif\n\n"; 2533} 2534 2535/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2536/// declaration of builtins, checking for unique builtin declarations. 2537void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2538 StringMap<ClassKind> &A64IntrinsicMap, 2539 bool isA64GenBuiltinDef) { 2540 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2541 StringMap<OpKind> EmittedMap; 2542 2543 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2544 if (isA64GenBuiltinDef) 2545 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2546 else 2547 OS << "#ifdef GET_NEON_BUILTINS\n"; 2548 2549 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2550 Record *R = RV[i]; 2551 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2552 if (k != OpNone) 2553 continue; 2554 2555 std::string Proto = R->getValueAsString("Prototype"); 2556 std::string name = R->getValueAsString("Name"); 2557 std::string Rename = name + "@" + Proto; 2558 2559 // Functions with 'a' (the splat code) in the type prototype should not get 2560 // their own builtin as they use the non-splat variant. 2561 if (Proto.find('a') != std::string::npos) 2562 continue; 2563 2564 std::string Types = R->getValueAsString("Types"); 2565 SmallVector<StringRef, 16> TypeVec; 2566 ParseTypes(R, Types, TypeVec); 2567 2568 if (R->getSuperClasses().size() < 2) 2569 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2570 2571 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2572 2573 // Do not include AArch64 BUILTIN() macros if not generating 2574 // code for AArch64 2575 bool isA64 = R->getValueAsBit("isA64"); 2576 if (!isA64GenBuiltinDef && isA64) 2577 continue; 2578 2579 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2580 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2581 // redefined in AArch64 to handle an additional 2 x f64 type. 2582 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2583 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2584 if (A64CK == ck && ck != ClassNone) 2585 continue; 2586 } 2587 2588 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2589 // Generate the declaration for this builtin, ensuring 2590 // that each unique BUILTIN() macro appears only once in the output 2591 // stream. 2592 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 2593 if (EmittedMap.count(bd)) 2594 continue; 2595 2596 EmittedMap[bd] = OpNone; 2597 OS << bd << "\n"; 2598 } 2599 } 2600 OS << "#endif\n\n"; 2601} 2602 2603/// runHeader - Emit a file with sections defining: 2604/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2605/// 2. the SemaChecking code for the type overload checking. 2606/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2607void NeonEmitter::runHeader(raw_ostream &OS) { 2608 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2609 2610 // build a map of AArch64 intriniscs to be used in uniqueness checks. 2611 StringMap<ClassKind> A64IntrinsicMap; 2612 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2613 Record *R = RV[i]; 2614 2615 bool isA64 = R->getValueAsBit("isA64"); 2616 if (!isA64) 2617 continue; 2618 2619 ClassKind CK = ClassNone; 2620 if (R->getSuperClasses().size() >= 2) 2621 CK = ClassMap[R->getSuperClasses()[1]]; 2622 2623 std::string Name = R->getValueAsString("Name"); 2624 std::string Proto = R->getValueAsString("Prototype"); 2625 std::string Rename = Name + "@" + Proto; 2626 if (A64IntrinsicMap.count(Rename)) 2627 continue; 2628 A64IntrinsicMap[Rename] = CK; 2629 } 2630 2631 // Generate BuiltinsARM.def for ARM 2632 genBuiltinsDef(OS, A64IntrinsicMap, false); 2633 2634 // Generate BuiltinsAArch64.def for AArch64 2635 genBuiltinsDef(OS, A64IntrinsicMap, true); 2636 2637 // Generate ARM overloaded type checking code for SemaChecking.cpp 2638 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 2639 2640 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 2641 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 2642 2643 // Generate ARM range checking code for shift/lane immediates. 2644 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 2645 2646 // Generate the AArch64 range checking code for shift/lane immediates. 2647 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 2648} 2649 2650/// GenTest - Write out a test for the intrinsic specified by the name and 2651/// type strings, including the embedded patterns for FileCheck to match. 2652static std::string GenTest(const std::string &name, 2653 const std::string &proto, 2654 StringRef outTypeStr, StringRef inTypeStr, 2655 bool isShift, bool isHiddenLOp, 2656 ClassKind ck, const std::string &InstName, 2657 bool isA64, 2658 std::string & testFuncProto) { 2659 assert(!proto.empty() && ""); 2660 std::string s; 2661 2662 // Function name with type suffix 2663 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2664 if (outTypeStr != inTypeStr) { 2665 // If the input type is different (e.g., for vreinterpret), append a suffix 2666 // for the input type. String off a "Q" (quad) prefix so that MangleName 2667 // does not insert another "q" in the name. 2668 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2669 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2670 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2671 } 2672 2673 // todo: GenerateChecksForIntrinsic does not generate CHECK 2674 // for aarch64 instructions yet 2675 std::vector<std::string> FileCheckPatterns; 2676 if (!isA64) { 2677 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2678 isHiddenLOp, FileCheckPatterns); 2679 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 2680 } 2681 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 2682 2683 // Emit the FileCheck patterns. 2684 // If for any reason we do not want to emit a check, mangledInst 2685 // will be the empty string. 2686 if (FileCheckPatterns.size()) { 2687 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2688 e = FileCheckPatterns.end(); 2689 i != e; 2690 ++i) { 2691 s += "// CHECK_ARM: " + *i + "\n"; 2692 } 2693 } 2694 2695 // Emit the start of the test function. 2696 2697 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 2698 char arg = 'a'; 2699 std::string comma; 2700 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2701 // Do not create arguments for values that must be immediate constants. 2702 if (proto[i] == 'i') 2703 continue; 2704 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 2705 testFuncProto.push_back(arg); 2706 comma = ", "; 2707 } 2708 testFuncProto += ")"; 2709 2710 s+= testFuncProto; 2711 s+= " {\n "; 2712 2713 if (proto[0] != 'v') 2714 s += "return "; 2715 s += mangledName + "("; 2716 arg = 'a'; 2717 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2718 if (proto[i] == 'i') { 2719 // For immediate operands, test the maximum value. 2720 if (isShift) 2721 s += "1"; // FIXME 2722 else 2723 // The immediate generally refers to a lane in the preceding argument. 2724 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 2725 } else { 2726 s.push_back(arg); 2727 } 2728 if ((i + 1) < e) 2729 s += ", "; 2730 } 2731 s += ");\n}\n\n"; 2732 return s; 2733} 2734 2735/// Write out all intrinsic tests for the specified target, checking 2736/// for intrinsic test uniqueness. 2737void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 2738 bool isA64GenTest) { 2739 if (isA64GenTest) 2740 OS << "#ifdef __aarch64__\n"; 2741 2742 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2743 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2744 Record *R = RV[i]; 2745 std::string name = R->getValueAsString("Name"); 2746 std::string Proto = R->getValueAsString("Prototype"); 2747 std::string Types = R->getValueAsString("Types"); 2748 bool isShift = R->getValueAsBit("isShift"); 2749 std::string InstName = R->getValueAsString("InstName"); 2750 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 2751 bool isA64 = R->getValueAsBit("isA64"); 2752 2753 // do not include AArch64 intrinsic test if not generating 2754 // code for AArch64 2755 if (!isA64GenTest && isA64) 2756 continue; 2757 2758 SmallVector<StringRef, 16> TypeVec; 2759 ParseTypes(R, Types, TypeVec); 2760 2761 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2762 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2763 if (kind == OpUnavailable) 2764 continue; 2765 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2766 if (kind == OpReinterpret) { 2767 bool outQuad = false; 2768 bool dummy = false; 2769 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2770 for (unsigned srcti = 0, srcte = TypeVec.size(); 2771 srcti != srcte; ++srcti) { 2772 bool inQuad = false; 2773 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2774 if (srcti == ti || inQuad != outQuad) 2775 continue; 2776 std::string testFuncProto; 2777 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 2778 isShift, isHiddenLOp, ck, InstName, isA64, 2779 testFuncProto); 2780 if (EmittedMap.count(testFuncProto)) 2781 continue; 2782 EmittedMap[testFuncProto] = kind; 2783 OS << s << "\n"; 2784 } 2785 } else { 2786 std::string testFuncProto; 2787 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 2788 isHiddenLOp, ck, InstName, isA64, testFuncProto); 2789 if (EmittedMap.count(testFuncProto)) 2790 continue; 2791 EmittedMap[testFuncProto] = kind; 2792 OS << s << "\n"; 2793 } 2794 } 2795 } 2796 2797 if (isA64GenTest) 2798 OS << "#endif\n"; 2799} 2800/// runTests - Write out a complete set of tests for all of the Neon 2801/// intrinsics. 2802void NeonEmitter::runTests(raw_ostream &OS) { 2803 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 2804 "apcs-gnu\\\n" 2805 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 2806 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 2807 "\n" 2808 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 2809 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 2810 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 2811 "\n" 2812 "// REQUIRES: long_tests\n" 2813 "\n" 2814 "#include <arm_neon.h>\n" 2815 "\n"; 2816 2817 // ARM tests must be emitted before AArch64 tests to ensure 2818 // tests for intrinsics that are common to ARM and AArch64 2819 // appear only once in the output stream. 2820 // The check for uniqueness is done in genTargetTest. 2821 StringMap<OpKind> EmittedMap; 2822 2823 genTargetTest(OS, EmittedMap, false); 2824 2825 genTargetTest(OS, EmittedMap, true); 2826} 2827 2828namespace clang { 2829void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2830 NeonEmitter(Records).run(OS); 2831} 2832void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2833 NeonEmitter(Records).runHeader(OS); 2834} 2835void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2836 NeonEmitter(Records).runTests(OS); 2837} 2838} // End namespace clang 2839