NeonEmitter.cpp revision 8137a607eebe799d95fd05226fb91119a5b054b0
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpTrn1, 85 OpZip1, 86 OpUzp1, 87 OpTrn2, 88 OpZip2, 89 OpUzp2, 90 OpEq, 91 OpGe, 92 OpLe, 93 OpGt, 94 OpLt, 95 OpNeg, 96 OpNot, 97 OpAnd, 98 OpOr, 99 OpXor, 100 OpAndNot, 101 OpOrNot, 102 OpCast, 103 OpConcat, 104 OpDup, 105 OpDupLane, 106 OpHi, 107 OpLo, 108 OpSelect, 109 OpRev16, 110 OpRev32, 111 OpRev64, 112 OpXtnHi, 113 OpSqxtunHi, 114 OpQxtnHi, 115 OpFcvtnHi, 116 OpFcvtlHi, 117 OpFcvtxnHi, 118 OpReinterpret, 119 OpAddhnHi, 120 OpRAddhnHi, 121 OpSubhnHi, 122 OpRSubhnHi, 123 OpAbdl, 124 OpAbdlHi, 125 OpAba, 126 OpAbal, 127 OpAbalHi, 128 OpQDMullHi, 129 OpQDMlalHi, 130 OpQDMlslHi, 131 OpDiv, 132 OpLongHi, 133 OpNarrowHi, 134 OpMovlHi, 135 OpCopyLane, 136 OpCopyQLane, 137 OpCopyLaneQ 138}; 139 140enum ClassKind { 141 ClassNone, 142 ClassI, // generic integer instruction, e.g., "i8" suffix 143 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 144 ClassW, // width-specific instruction, e.g., "8" suffix 145 ClassB, // bitcast arguments with enum argument to specify type 146 ClassL, // Logical instructions which are op instructions 147 // but we need to not emit any suffix for in our 148 // tests. 149 ClassNoTest // Instructions which we do not test since they are 150 // not TRUE instructions. 151}; 152 153/// NeonTypeFlags - Flags to identify the types for overloaded Neon 154/// builtins. These must be kept in sync with the flags in 155/// include/clang/Basic/TargetBuiltins.h. 156namespace { 157class NeonTypeFlags { 158 enum { 159 EltTypeMask = 0xf, 160 UnsignedFlag = 0x10, 161 QuadFlag = 0x20 162 }; 163 uint32_t Flags; 164 165public: 166 enum EltType { 167 Int8, 168 Int16, 169 Int32, 170 Int64, 171 Poly8, 172 Poly16, 173 Float16, 174 Float32, 175 Float64 176 }; 177 178 NeonTypeFlags(unsigned F) : Flags(F) {} 179 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 180 if (IsUnsigned) 181 Flags |= UnsignedFlag; 182 if (IsQuad) 183 Flags |= QuadFlag; 184 } 185 186 uint32_t getFlags() const { return Flags; } 187}; 188} // end anonymous namespace 189 190namespace { 191class NeonEmitter { 192 RecordKeeper &Records; 193 StringMap<OpKind> OpMap; 194 DenseMap<Record*, ClassKind> ClassMap; 195 196public: 197 NeonEmitter(RecordKeeper &R) : Records(R) { 198 OpMap["OP_NONE"] = OpNone; 199 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 200 OpMap["OP_ADD"] = OpAdd; 201 OpMap["OP_ADDL"] = OpAddl; 202 OpMap["OP_ADDLHi"] = OpAddlHi; 203 OpMap["OP_ADDW"] = OpAddw; 204 OpMap["OP_ADDWHi"] = OpAddwHi; 205 OpMap["OP_SUB"] = OpSub; 206 OpMap["OP_SUBL"] = OpSubl; 207 OpMap["OP_SUBLHi"] = OpSublHi; 208 OpMap["OP_SUBW"] = OpSubw; 209 OpMap["OP_SUBWHi"] = OpSubwHi; 210 OpMap["OP_MUL"] = OpMul; 211 OpMap["OP_MLA"] = OpMla; 212 OpMap["OP_MLAL"] = OpMlal; 213 OpMap["OP_MULLHi"] = OpMullHi; 214 OpMap["OP_MLALHi"] = OpMlalHi; 215 OpMap["OP_MLS"] = OpMls; 216 OpMap["OP_MLSL"] = OpMlsl; 217 OpMap["OP_MLSLHi"] = OpMlslHi; 218 OpMap["OP_MUL_N"] = OpMulN; 219 OpMap["OP_MLA_N"] = OpMlaN; 220 OpMap["OP_MLS_N"] = OpMlsN; 221 OpMap["OP_MLAL_N"] = OpMlalN; 222 OpMap["OP_MLSL_N"] = OpMlslN; 223 OpMap["OP_MUL_LN"]= OpMulLane; 224 OpMap["OP_MULX_LN"]= OpMulXLane; 225 OpMap["OP_MULL_LN"] = OpMullLane; 226 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 227 OpMap["OP_MLA_LN"]= OpMlaLane; 228 OpMap["OP_MLS_LN"]= OpMlsLane; 229 OpMap["OP_MLAL_LN"] = OpMlalLane; 230 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 231 OpMap["OP_MLSL_LN"] = OpMlslLane; 232 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 233 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 234 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 235 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 236 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 237 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 238 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 239 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 240 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 241 OpMap["OP_FMS_LN"] = OpFMSLane; 242 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 243 OpMap["OP_TRN1"] = OpTrn1; 244 OpMap["OP_ZIP1"] = OpZip1; 245 OpMap["OP_UZP1"] = OpUzp1; 246 OpMap["OP_TRN2"] = OpTrn2; 247 OpMap["OP_ZIP2"] = OpZip2; 248 OpMap["OP_UZP2"] = OpUzp2; 249 OpMap["OP_EQ"] = OpEq; 250 OpMap["OP_GE"] = OpGe; 251 OpMap["OP_LE"] = OpLe; 252 OpMap["OP_GT"] = OpGt; 253 OpMap["OP_LT"] = OpLt; 254 OpMap["OP_NEG"] = OpNeg; 255 OpMap["OP_NOT"] = OpNot; 256 OpMap["OP_AND"] = OpAnd; 257 OpMap["OP_OR"] = OpOr; 258 OpMap["OP_XOR"] = OpXor; 259 OpMap["OP_ANDN"] = OpAndNot; 260 OpMap["OP_ORN"] = OpOrNot; 261 OpMap["OP_CAST"] = OpCast; 262 OpMap["OP_CONC"] = OpConcat; 263 OpMap["OP_HI"] = OpHi; 264 OpMap["OP_LO"] = OpLo; 265 OpMap["OP_DUP"] = OpDup; 266 OpMap["OP_DUP_LN"] = OpDupLane; 267 OpMap["OP_SEL"] = OpSelect; 268 OpMap["OP_REV16"] = OpRev16; 269 OpMap["OP_REV32"] = OpRev32; 270 OpMap["OP_REV64"] = OpRev64; 271 OpMap["OP_XTN"] = OpXtnHi; 272 OpMap["OP_SQXTUN"] = OpSqxtunHi; 273 OpMap["OP_QXTN"] = OpQxtnHi; 274 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; 275 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; 276 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; 277 OpMap["OP_REINT"] = OpReinterpret; 278 OpMap["OP_ADDHNHi"] = OpAddhnHi; 279 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 280 OpMap["OP_SUBHNHi"] = OpSubhnHi; 281 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 282 OpMap["OP_ABDL"] = OpAbdl; 283 OpMap["OP_ABDLHi"] = OpAbdlHi; 284 OpMap["OP_ABA"] = OpAba; 285 OpMap["OP_ABAL"] = OpAbal; 286 OpMap["OP_ABALHi"] = OpAbalHi; 287 OpMap["OP_QDMULLHi"] = OpQDMullHi; 288 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 289 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 290 OpMap["OP_DIV"] = OpDiv; 291 OpMap["OP_LONG_HI"] = OpLongHi; 292 OpMap["OP_NARROW_HI"] = OpNarrowHi; 293 OpMap["OP_MOVL_HI"] = OpMovlHi; 294 OpMap["OP_COPY_LN"] = OpCopyLane; 295 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 296 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 297 298 Record *SI = R.getClass("SInst"); 299 Record *II = R.getClass("IInst"); 300 Record *WI = R.getClass("WInst"); 301 Record *SOpI = R.getClass("SOpInst"); 302 Record *IOpI = R.getClass("IOpInst"); 303 Record *WOpI = R.getClass("WOpInst"); 304 Record *LOpI = R.getClass("LOpInst"); 305 Record *NoTestOpI = R.getClass("NoTestOpInst"); 306 307 ClassMap[SI] = ClassS; 308 ClassMap[II] = ClassI; 309 ClassMap[WI] = ClassW; 310 ClassMap[SOpI] = ClassS; 311 ClassMap[IOpI] = ClassI; 312 ClassMap[WOpI] = ClassW; 313 ClassMap[LOpI] = ClassL; 314 ClassMap[NoTestOpI] = ClassNoTest; 315 } 316 317 // run - Emit arm_neon.h.inc 318 void run(raw_ostream &o); 319 320 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 321 void runHeader(raw_ostream &o); 322 323 // runTests - Emit tests for all the Neon intrinsics. 324 void runTests(raw_ostream &o); 325 326private: 327 void emitIntrinsic(raw_ostream &OS, Record *R, 328 StringMap<ClassKind> &EmittedMap); 329 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 330 bool isA64GenBuiltinDef); 331 void genOverloadTypeCheckCode(raw_ostream &OS, 332 StringMap<ClassKind> &A64IntrinsicMap, 333 bool isA64TypeCheck); 334 void genIntrinsicRangeCheckCode(raw_ostream &OS, 335 StringMap<ClassKind> &A64IntrinsicMap, 336 bool isA64RangeCheck); 337 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 338 bool isA64TestGen); 339}; 340} // end anonymous namespace 341 342/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 343/// which each StringRef representing a single type declared in the string. 344/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 345/// 2xfloat and 4xfloat respectively. 346static void ParseTypes(Record *r, std::string &s, 347 SmallVectorImpl<StringRef> &TV) { 348 const char *data = s.data(); 349 int len = 0; 350 351 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 352 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 353 || data[len] == 'H' || data[len] == 'S') 354 continue; 355 356 switch (data[len]) { 357 case 'c': 358 case 's': 359 case 'i': 360 case 'l': 361 case 'h': 362 case 'f': 363 case 'd': 364 break; 365 default: 366 PrintFatalError(r->getLoc(), 367 "Unexpected letter: " + std::string(data + len, 1)); 368 } 369 TV.push_back(StringRef(data, len + 1)); 370 data += len + 1; 371 len = -1; 372 } 373} 374 375/// Widen - Convert a type code into the next wider type. char -> short, 376/// short -> int, etc. 377static char Widen(const char t) { 378 switch (t) { 379 case 'c': 380 return 's'; 381 case 's': 382 return 'i'; 383 case 'i': 384 return 'l'; 385 case 'h': 386 return 'f'; 387 case 'f': 388 return 'd'; 389 default: 390 PrintFatalError("unhandled type in widen!"); 391 } 392} 393 394/// Narrow - Convert a type code into the next smaller type. short -> char, 395/// float -> half float, etc. 396static char Narrow(const char t) { 397 switch (t) { 398 case 's': 399 return 'c'; 400 case 'i': 401 return 's'; 402 case 'l': 403 return 'i'; 404 case 'f': 405 return 'h'; 406 case 'd': 407 return 'f'; 408 default: 409 PrintFatalError("unhandled type in narrow!"); 410 } 411} 412 413static std::string GetNarrowTypestr(StringRef ty) 414{ 415 std::string s; 416 for (size_t i = 0, end = ty.size(); i < end; i++) { 417 switch (ty[i]) { 418 case 's': 419 s += 'c'; 420 break; 421 case 'i': 422 s += 's'; 423 break; 424 case 'l': 425 s += 'i'; 426 break; 427 default: 428 s += ty[i]; 429 break; 430 } 431 } 432 433 return s; 434} 435 436/// For a particular StringRef, return the base type code, and whether it has 437/// the quad-vector, polynomial, or unsigned modifiers set. 438static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 439 unsigned off = 0; 440 // ignore scalar. 441 if (ty[off] == 'S') { 442 ++off; 443 } 444 // remember quad. 445 if (ty[off] == 'Q' || ty[off] == 'H') { 446 quad = true; 447 ++off; 448 } 449 450 // remember poly. 451 if (ty[off] == 'P') { 452 poly = true; 453 ++off; 454 } 455 456 // remember unsigned. 457 if (ty[off] == 'U') { 458 usgn = true; 459 ++off; 460 } 461 462 // base type to get the type string for. 463 return ty[off]; 464} 465 466/// ModType - Transform a type code and its modifiers based on a mod code. The 467/// mod code definitions may be found at the top of arm_neon.td. 468static char ModType(const char mod, char type, bool &quad, bool &poly, 469 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 470 switch (mod) { 471 case 't': 472 if (poly) { 473 poly = false; 474 usgn = true; 475 } 476 break; 477 case 'b': 478 scal = true; 479 case 'u': 480 usgn = true; 481 poly = false; 482 if (type == 'f') 483 type = 'i'; 484 if (type == 'd') 485 type = 'l'; 486 break; 487 case '$': 488 scal = true; 489 case 'x': 490 usgn = false; 491 poly = false; 492 if (type == 'f') 493 type = 'i'; 494 if (type == 'd') 495 type = 'l'; 496 break; 497 case 'o': 498 scal = true; 499 type = 'd'; 500 usgn = false; 501 break; 502 case 'y': 503 scal = true; 504 case 'f': 505 if (type == 'h') 506 quad = true; 507 type = 'f'; 508 usgn = false; 509 break; 510 case 'g': 511 quad = false; 512 break; 513 case 'B': 514 case 'C': 515 case 'D': 516 case 'j': 517 quad = true; 518 break; 519 case 'w': 520 type = Widen(type); 521 quad = true; 522 break; 523 case 'n': 524 type = Widen(type); 525 break; 526 case 'i': 527 type = 'i'; 528 scal = true; 529 break; 530 case 'l': 531 type = 'l'; 532 scal = true; 533 usgn = true; 534 break; 535 case 'z': 536 type = Narrow(type); 537 scal = true; 538 break; 539 case 'r': 540 type = Widen(type); 541 scal = true; 542 break; 543 case 's': 544 case 'a': 545 scal = true; 546 break; 547 case 'k': 548 quad = true; 549 break; 550 case 'c': 551 cnst = true; 552 case 'p': 553 pntr = true; 554 scal = true; 555 break; 556 case 'h': 557 type = Narrow(type); 558 if (type == 'h') 559 quad = false; 560 break; 561 case 'q': 562 type = Narrow(type); 563 quad = true; 564 break; 565 case 'e': 566 type = Narrow(type); 567 usgn = true; 568 break; 569 case 'm': 570 type = Narrow(type); 571 quad = false; 572 break; 573 default: 574 break; 575 } 576 return type; 577} 578 579static bool IsMultiVecProto(const char p) { 580 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 581} 582 583/// TypeString - for a modifier and type, generate the name of the typedef for 584/// that type. QUc -> uint8x8_t. 585static std::string TypeString(const char mod, StringRef typestr) { 586 bool quad = false; 587 bool poly = false; 588 bool usgn = false; 589 bool scal = false; 590 bool cnst = false; 591 bool pntr = false; 592 593 if (mod == 'v') 594 return "void"; 595 if (mod == 'i') 596 return "int"; 597 598 // base type to get the type string for. 599 char type = ClassifyType(typestr, quad, poly, usgn); 600 601 // Based on the modifying character, change the type and width if necessary. 602 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 603 604 SmallString<128> s; 605 606 if (usgn) 607 s.push_back('u'); 608 609 switch (type) { 610 case 'c': 611 s += poly ? "poly8" : "int8"; 612 if (scal) 613 break; 614 s += quad ? "x16" : "x8"; 615 break; 616 case 's': 617 s += poly ? "poly16" : "int16"; 618 if (scal) 619 break; 620 s += quad ? "x8" : "x4"; 621 break; 622 case 'i': 623 s += "int32"; 624 if (scal) 625 break; 626 s += quad ? "x4" : "x2"; 627 break; 628 case 'l': 629 s += "int64"; 630 if (scal) 631 break; 632 s += quad ? "x2" : "x1"; 633 break; 634 case 'h': 635 s += "float16"; 636 if (scal) 637 break; 638 s += quad ? "x8" : "x4"; 639 break; 640 case 'f': 641 s += "float32"; 642 if (scal) 643 break; 644 s += quad ? "x4" : "x2"; 645 break; 646 case 'd': 647 s += "float64"; 648 if (scal) 649 break; 650 s += quad ? "x2" : "x1"; 651 break; 652 653 default: 654 PrintFatalError("unhandled type!"); 655 } 656 657 if (mod == '2' || mod == 'B') 658 s += "x2"; 659 if (mod == '3' || mod == 'C') 660 s += "x3"; 661 if (mod == '4' || mod == 'D') 662 s += "x4"; 663 664 // Append _t, finishing the type string typedef type. 665 s += "_t"; 666 667 if (cnst) 668 s += " const"; 669 670 if (pntr) 671 s += " *"; 672 673 return s.str(); 674} 675 676/// BuiltinTypeString - for a modifier and type, generate the clang 677/// BuiltinsARM.def prototype code for the function. See the top of clang's 678/// Builtins.def for a description of the type strings. 679static std::string BuiltinTypeString(const char mod, StringRef typestr, 680 ClassKind ck, bool ret) { 681 bool quad = false; 682 bool poly = false; 683 bool usgn = false; 684 bool scal = false; 685 bool cnst = false; 686 bool pntr = false; 687 688 if (mod == 'v') 689 return "v"; // void 690 if (mod == 'i') 691 return "i"; // int 692 693 // base type to get the type string for. 694 char type = ClassifyType(typestr, quad, poly, usgn); 695 696 // Based on the modifying character, change the type and width if necessary. 697 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 698 699 // All pointers are void* pointers. Change type to 'v' now. 700 if (pntr) { 701 usgn = false; 702 poly = false; 703 type = 'v'; 704 } 705 // Treat half-float ('h') types as unsigned short ('s') types. 706 if (type == 'h') { 707 type = 's'; 708 usgn = true; 709 } 710 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 711 scal && type != 'f' && type != 'd'); 712 713 if (scal) { 714 SmallString<128> s; 715 716 if (usgn) 717 s.push_back('U'); 718 else if (type == 'c') 719 s.push_back('S'); // make chars explicitly signed 720 721 if (type == 'l') // 64-bit long 722 s += "LLi"; 723 else 724 s.push_back(type); 725 726 if (cnst) 727 s.push_back('C'); 728 if (pntr) 729 s.push_back('*'); 730 return s.str(); 731 } 732 733 // Since the return value must be one type, return a vector type of the 734 // appropriate width which we will bitcast. An exception is made for 735 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 736 // fashion, storing them to a pointer arg. 737 if (ret) { 738 if (IsMultiVecProto(mod)) 739 return "vv*"; // void result with void* first argument 740 if (mod == 'f' || (ck != ClassB && type == 'f')) 741 return quad ? "V4f" : "V2f"; 742 if (ck != ClassB && type == 'd') 743 return quad ? "V2d" : "V1d"; 744 if (ck != ClassB && type == 's') 745 return quad ? "V8s" : "V4s"; 746 if (ck != ClassB && type == 'i') 747 return quad ? "V4i" : "V2i"; 748 if (ck != ClassB && type == 'l') 749 return quad ? "V2LLi" : "V1LLi"; 750 751 return quad ? "V16Sc" : "V8Sc"; 752 } 753 754 // Non-return array types are passed as individual vectors. 755 if (mod == '2' || mod == 'B') 756 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 757 if (mod == '3' || mod == 'C') 758 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 759 if (mod == '4' || mod == 'D') 760 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 761 762 if (mod == 'f' || (ck != ClassB && type == 'f')) 763 return quad ? "V4f" : "V2f"; 764 if (ck != ClassB && type == 'd') 765 return quad ? "V2d" : "V1d"; 766 if (ck != ClassB && type == 's') 767 return quad ? "V8s" : "V4s"; 768 if (ck != ClassB && type == 'i') 769 return quad ? "V4i" : "V2i"; 770 if (ck != ClassB && type == 'l') 771 return quad ? "V2LLi" : "V1LLi"; 772 773 return quad ? "V16Sc" : "V8Sc"; 774} 775 776/// InstructionTypeCode - Computes the ARM argument character code and 777/// quad status for a specific type string and ClassKind. 778static void InstructionTypeCode(const StringRef &typeStr, 779 const ClassKind ck, 780 bool &quad, 781 std::string &typeCode) { 782 bool poly = false; 783 bool usgn = false; 784 char type = ClassifyType(typeStr, quad, poly, usgn); 785 786 switch (type) { 787 case 'c': 788 switch (ck) { 789 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 790 case ClassI: typeCode = "i8"; break; 791 case ClassW: typeCode = "8"; break; 792 default: break; 793 } 794 break; 795 case 's': 796 switch (ck) { 797 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 798 case ClassI: typeCode = "i16"; break; 799 case ClassW: typeCode = "16"; break; 800 default: break; 801 } 802 break; 803 case 'i': 804 switch (ck) { 805 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 806 case ClassI: typeCode = "i32"; break; 807 case ClassW: typeCode = "32"; break; 808 default: break; 809 } 810 break; 811 case 'l': 812 switch (ck) { 813 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 814 case ClassI: typeCode = "i64"; break; 815 case ClassW: typeCode = "64"; break; 816 default: break; 817 } 818 break; 819 case 'h': 820 switch (ck) { 821 case ClassS: 822 case ClassI: typeCode = "f16"; break; 823 case ClassW: typeCode = "16"; break; 824 default: break; 825 } 826 break; 827 case 'f': 828 switch (ck) { 829 case ClassS: 830 case ClassI: typeCode = "f32"; break; 831 case ClassW: typeCode = "32"; break; 832 default: break; 833 } 834 break; 835 case 'd': 836 switch (ck) { 837 case ClassS: 838 case ClassI: 839 typeCode += "f64"; 840 break; 841 case ClassW: 842 PrintFatalError("unhandled type!"); 843 default: 844 break; 845 } 846 break; 847 default: 848 PrintFatalError("unhandled type!"); 849 } 850} 851 852static char Insert_BHSD_Suffix(StringRef typestr){ 853 unsigned off = 0; 854 if(typestr[off++] == 'S'){ 855 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 856 typestr[off] == 'P' || typestr[off] == 'U') 857 ++off; 858 switch (typestr[off]){ 859 default : break; 860 case 'c' : return 'b'; 861 case 's' : return 'h'; 862 case 'i' : 863 case 'f' : return 's'; 864 case 'l' : 865 case 'd' : return 'd'; 866 } 867 } 868 return 0; 869} 870 871/// MangleName - Append a type or width suffix to a base neon function name, 872/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 873/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 874/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 875static std::string MangleName(const std::string &name, StringRef typestr, 876 ClassKind ck) { 877 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64") 878 return name; 879 880 bool quad = false; 881 std::string typeCode = ""; 882 883 InstructionTypeCode(typestr, ck, quad, typeCode); 884 885 std::string s = name; 886 887 if (typeCode.size() > 0) { 888 s += "_" + typeCode; 889 } 890 891 if (ck == ClassB) 892 s += "_v"; 893 894 // Insert a 'q' before the first '_' character so that it ends up before 895 // _lane or _n on vector-scalar operations. 896 if (typestr.find("Q") != StringRef::npos) { 897 size_t pos = s.find('_'); 898 s = s.insert(pos, "q"); 899 } 900 char ins = Insert_BHSD_Suffix(typestr); 901 if(ins){ 902 size_t pos = s.find('_'); 903 s = s.insert(pos, &ins, 1); 904 } 905 906 return s; 907} 908 909static void PreprocessInstruction(const StringRef &Name, 910 const std::string &InstName, 911 std::string &Prefix, 912 bool &HasNPostfix, 913 bool &HasLanePostfix, 914 bool &HasDupPostfix, 915 bool &IsSpecialVCvt, 916 size_t &TBNumber) { 917 // All of our instruction name fields from arm_neon.td are of the form 918 // <instructionname>_... 919 // Thus we grab our instruction name via computation of said Prefix. 920 const size_t PrefixEnd = Name.find_first_of('_'); 921 // If InstName is passed in, we use that instead of our name Prefix. 922 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 923 924 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 925 926 HasNPostfix = Postfix.count("_n"); 927 HasLanePostfix = Postfix.count("_lane"); 928 HasDupPostfix = Postfix.count("_dup"); 929 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 930 931 if (InstName.compare("vtbl") == 0 || 932 InstName.compare("vtbx") == 0) { 933 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 934 // encoding to get its true value. 935 TBNumber = Name[Name.size()-1] - 48; 936 } 937} 938 939/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 940/// extracted, generate a FileCheck pattern for a Load Or Store 941static void 942GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 943 const std::string& OutTypeCode, 944 const bool &IsQuad, 945 const bool &HasDupPostfix, 946 const bool &HasLanePostfix, 947 const size_t Count, 948 std::string &RegisterSuffix) { 949 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 950 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 951 // will output a series of v{ld,st}1s, so we have to handle it specially. 952 if ((Count == 3 || Count == 4) && IsQuad) { 953 RegisterSuffix += "{"; 954 for (size_t i = 0; i < Count; i++) { 955 RegisterSuffix += "d{{[0-9]+}}"; 956 if (HasDupPostfix) { 957 RegisterSuffix += "[]"; 958 } 959 if (HasLanePostfix) { 960 RegisterSuffix += "[{{[0-9]+}}]"; 961 } 962 if (i < Count-1) { 963 RegisterSuffix += ", "; 964 } 965 } 966 RegisterSuffix += "}"; 967 } else { 968 969 // Handle normal loads and stores. 970 RegisterSuffix += "{"; 971 for (size_t i = 0; i < Count; i++) { 972 RegisterSuffix += "d{{[0-9]+}}"; 973 if (HasDupPostfix) { 974 RegisterSuffix += "[]"; 975 } 976 if (HasLanePostfix) { 977 RegisterSuffix += "[{{[0-9]+}}]"; 978 } 979 if (IsQuad && !HasLanePostfix) { 980 RegisterSuffix += ", d{{[0-9]+}}"; 981 if (HasDupPostfix) { 982 RegisterSuffix += "[]"; 983 } 984 } 985 if (i < Count-1) { 986 RegisterSuffix += ", "; 987 } 988 } 989 RegisterSuffix += "}, [r{{[0-9]+}}"; 990 991 // We only include the alignment hint if we have a vld1.*64 or 992 // a dup/lane instruction. 993 if (IsLDSTOne) { 994 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 995 RegisterSuffix += ":" + OutTypeCode; 996 } 997 } 998 999 RegisterSuffix += "]"; 1000 } 1001} 1002 1003static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 1004 const bool &HasNPostfix) { 1005 return (NameRef.count("vmla") || 1006 NameRef.count("vmlal") || 1007 NameRef.count("vmlsl") || 1008 NameRef.count("vmull") || 1009 NameRef.count("vqdmlal") || 1010 NameRef.count("vqdmlsl") || 1011 NameRef.count("vqdmulh") || 1012 NameRef.count("vqdmull") || 1013 NameRef.count("vqrdmulh")) && HasNPostfix; 1014} 1015 1016static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1017 const bool &HasLanePostfix) { 1018 return (NameRef.count("vmla") || 1019 NameRef.count("vmls") || 1020 NameRef.count("vmlal") || 1021 NameRef.count("vmlsl") || 1022 (NameRef.count("vmul") && NameRef.size() == 3)|| 1023 NameRef.count("vqdmlal") || 1024 NameRef.count("vqdmlsl") || 1025 NameRef.count("vqdmulh") || 1026 NameRef.count("vqrdmulh")) && HasLanePostfix; 1027} 1028 1029static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1030 const bool &HasLanePostfix, 1031 const bool &IsQuad) { 1032 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1033 && IsQuad; 1034 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1035 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1036} 1037 1038static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1039 const std::string &Proto, 1040 const bool &HasNPostfix, 1041 const bool &IsQuad, 1042 const bool &HasLanePostfix, 1043 const bool &HasDupPostfix, 1044 std::string &NormedProto) { 1045 // Handle generic case. 1046 const StringRef NameRef(Name); 1047 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1048 switch (Proto[i]) { 1049 case 'u': 1050 case 'f': 1051 case 'd': 1052 case 's': 1053 case 'x': 1054 case 't': 1055 case 'n': 1056 NormedProto += IsQuad? 'q' : 'd'; 1057 break; 1058 case 'w': 1059 case 'k': 1060 NormedProto += 'q'; 1061 break; 1062 case 'g': 1063 case 'j': 1064 case 'h': 1065 case 'e': 1066 NormedProto += 'd'; 1067 break; 1068 case 'i': 1069 NormedProto += HasLanePostfix? 'a' : 'i'; 1070 break; 1071 case 'a': 1072 if (HasLanePostfix) { 1073 NormedProto += 'a'; 1074 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1075 NormedProto += IsQuad? 'q' : 'd'; 1076 } else { 1077 NormedProto += 'i'; 1078 } 1079 break; 1080 } 1081 } 1082 1083 // Handle Special Cases. 1084 const bool IsNotVExt = !NameRef.count("vext"); 1085 const bool IsVPADAL = NameRef.count("vpadal"); 1086 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1087 HasLanePostfix); 1088 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1089 IsQuad); 1090 1091 if (IsSpecialLaneMul) { 1092 // If 1093 NormedProto[2] = NormedProto[3]; 1094 NormedProto.erase(3); 1095 } else if (NormedProto.size() == 4 && 1096 NormedProto[0] == NormedProto[1] && 1097 IsNotVExt) { 1098 // If NormedProto.size() == 4 and the first two proto characters are the 1099 // same, ignore the first. 1100 NormedProto = NormedProto.substr(1, 3); 1101 } else if (Is5OpLaneAccum) { 1102 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1103 std::string tmp = NormedProto.substr(1,2); 1104 tmp += NormedProto[4]; 1105 NormedProto = tmp; 1106 } else if (IsVPADAL) { 1107 // If we have VPADAL, ignore the first character. 1108 NormedProto = NormedProto.substr(0, 2); 1109 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1110 // If our instruction is a dup instruction, keep only the first and 1111 // last characters. 1112 std::string tmp = ""; 1113 tmp += NormedProto[0]; 1114 tmp += NormedProto[NormedProto.size()-1]; 1115 NormedProto = tmp; 1116 } 1117} 1118 1119/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1120/// extracted, generate a FileCheck pattern to check that an 1121/// instruction's arguments are correct. 1122static void GenerateRegisterCheckPattern(const std::string &Name, 1123 const std::string &Proto, 1124 const std::string &OutTypeCode, 1125 const bool &HasNPostfix, 1126 const bool &IsQuad, 1127 const bool &HasLanePostfix, 1128 const bool &HasDupPostfix, 1129 const size_t &TBNumber, 1130 std::string &RegisterSuffix) { 1131 1132 RegisterSuffix = ""; 1133 1134 const StringRef NameRef(Name); 1135 const StringRef ProtoRef(Proto); 1136 1137 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1138 return; 1139 } 1140 1141 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1142 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1143 1144 if (IsLoadStore) { 1145 // Grab N value from v{ld,st}N using its ascii representation. 1146 const size_t Count = NameRef[3] - 48; 1147 1148 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1149 HasDupPostfix, HasLanePostfix, 1150 Count, RegisterSuffix); 1151 } else if (IsTBXOrTBL) { 1152 RegisterSuffix += "d{{[0-9]+}}, {"; 1153 for (size_t i = 0; i < TBNumber-1; i++) { 1154 RegisterSuffix += "d{{[0-9]+}}, "; 1155 } 1156 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1157 } else { 1158 // Handle a normal instruction. 1159 if (NameRef.count("vget") || NameRef.count("vset")) 1160 return; 1161 1162 // We first normalize our proto, since we only need to emit 4 1163 // different types of checks, yet have more than 4 proto types 1164 // that map onto those 4 patterns. 1165 std::string NormalizedProto(""); 1166 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1167 HasLanePostfix, HasDupPostfix, 1168 NormalizedProto); 1169 1170 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1171 const char &c = NormalizedProto[i]; 1172 switch (c) { 1173 case 'q': 1174 RegisterSuffix += "q{{[0-9]+}}, "; 1175 break; 1176 1177 case 'd': 1178 RegisterSuffix += "d{{[0-9]+}}, "; 1179 break; 1180 1181 case 'i': 1182 RegisterSuffix += "#{{[0-9]+}}, "; 1183 break; 1184 1185 case 'a': 1186 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1187 break; 1188 } 1189 } 1190 1191 // Remove extra ", ". 1192 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1193 } 1194} 1195 1196/// GenerateChecksForIntrinsic - Given a specific instruction name + 1197/// typestr + class kind, generate the proper set of FileCheck 1198/// Patterns to check for. We could just return a string, but instead 1199/// use a vector since it provides us with the extra flexibility of 1200/// emitting multiple checks, which comes in handy for certain cases 1201/// like mla where we want to check for 2 different instructions. 1202static void GenerateChecksForIntrinsic(const std::string &Name, 1203 const std::string &Proto, 1204 StringRef &OutTypeStr, 1205 StringRef &InTypeStr, 1206 ClassKind Ck, 1207 const std::string &InstName, 1208 bool IsHiddenLOp, 1209 std::vector<std::string>& Result) { 1210 1211 // If Ck is a ClassNoTest instruction, just return so no test is 1212 // emitted. 1213 if(Ck == ClassNoTest) 1214 return; 1215 1216 if (Name == "vcvt_f32_f16") { 1217 Result.push_back("vcvt.f32.f16"); 1218 return; 1219 } 1220 1221 1222 // Now we preprocess our instruction given the data we have to get the 1223 // data that we need. 1224 // Create a StringRef for String Manipulation of our Name. 1225 const StringRef NameRef(Name); 1226 // Instruction Prefix. 1227 std::string Prefix; 1228 // The type code for our out type string. 1229 std::string OutTypeCode; 1230 // To handle our different cases, we need to check for different postfixes. 1231 // Is our instruction a quad instruction. 1232 bool IsQuad = false; 1233 // Our instruction is of the form <instructionname>_n. 1234 bool HasNPostfix = false; 1235 // Our instruction is of the form <instructionname>_lane. 1236 bool HasLanePostfix = false; 1237 // Our instruction is of the form <instructionname>_dup. 1238 bool HasDupPostfix = false; 1239 // Our instruction is a vcvt instruction which requires special handling. 1240 bool IsSpecialVCvt = false; 1241 // If we have a vtbxN or vtblN instruction, this is set to N. 1242 size_t TBNumber = -1; 1243 // Register Suffix 1244 std::string RegisterSuffix; 1245 1246 PreprocessInstruction(NameRef, InstName, Prefix, 1247 HasNPostfix, HasLanePostfix, HasDupPostfix, 1248 IsSpecialVCvt, TBNumber); 1249 1250 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1251 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1252 HasLanePostfix, HasDupPostfix, TBNumber, 1253 RegisterSuffix); 1254 1255 // In the following section, we handle a bunch of special cases. You can tell 1256 // a special case by the fact we are returning early. 1257 1258 // If our instruction is a logical instruction without postfix or a 1259 // hidden LOp just return the current Prefix. 1260 if (Ck == ClassL || IsHiddenLOp) { 1261 Result.push_back(Prefix + " " + RegisterSuffix); 1262 return; 1263 } 1264 1265 // If we have a vmov, due to the many different cases, some of which 1266 // vary within the different intrinsics generated for a single 1267 // instruction type, just output a vmov. (e.g. given an instruction 1268 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1269 // 1270 // FIXME: Maybe something can be done about this. The two cases that we care 1271 // about are vmov as an LType and vmov as a WType. 1272 if (Prefix == "vmov") { 1273 Result.push_back(Prefix + " " + RegisterSuffix); 1274 return; 1275 } 1276 1277 // In the following section, we handle special cases. 1278 1279 if (OutTypeCode == "64") { 1280 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1281 // type, the intrinsic will be optimized away, so just return 1282 // nothing. On the other hand if we are handling an uint64x2_t 1283 // (i.e. quad instruction), vdup/vmov instructions should be 1284 // emitted. 1285 if (Prefix == "vdup" || Prefix == "vext") { 1286 if (IsQuad) { 1287 Result.push_back("{{vmov|vdup}}"); 1288 } 1289 return; 1290 } 1291 1292 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1293 // multiple register operands. 1294 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1295 || Prefix == "vld4"; 1296 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1297 || Prefix == "vst4"; 1298 if (MultiLoadPrefix || MultiStorePrefix) { 1299 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1300 return; 1301 } 1302 1303 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1304 // emitting said instructions. So return a check for 1305 // vldr/vstr/vmov/str instead. 1306 if (HasLanePostfix || HasDupPostfix) { 1307 if (Prefix == "vst1") { 1308 Result.push_back("{{str|vstr|vmov}}"); 1309 return; 1310 } else if (Prefix == "vld1") { 1311 Result.push_back("{{ldr|vldr|vmov}}"); 1312 return; 1313 } 1314 } 1315 } 1316 1317 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1318 // sometimes disassembled as vtrn.32. We use a regex to handle both 1319 // cases. 1320 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1321 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1322 return; 1323 } 1324 1325 // Currently on most ARM processors, we do not use vmla/vmls for 1326 // quad floating point operations. Instead we output vmul + vadd. So 1327 // check if we have one of those instructions and just output a 1328 // check for vmul. 1329 if (OutTypeCode == "f32") { 1330 if (Prefix == "vmls") { 1331 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1332 Result.push_back("vsub." + OutTypeCode); 1333 return; 1334 } else if (Prefix == "vmla") { 1335 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1336 Result.push_back("vadd." + OutTypeCode); 1337 return; 1338 } 1339 } 1340 1341 // If we have vcvt, get the input type from the instruction name 1342 // (which should be of the form instname_inputtype) and append it 1343 // before the output type. 1344 if (Prefix == "vcvt") { 1345 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1346 Prefix += "." + inTypeCode; 1347 } 1348 1349 // Append output type code to get our final mangled instruction. 1350 Prefix += "." + OutTypeCode; 1351 1352 Result.push_back(Prefix + " " + RegisterSuffix); 1353} 1354 1355/// UseMacro - Examine the prototype string to determine if the intrinsic 1356/// should be defined as a preprocessor macro instead of an inline function. 1357static bool UseMacro(const std::string &proto) { 1358 // If this builtin takes an immediate argument, we need to #define it rather 1359 // than use a standard declaration, so that SemaChecking can range check 1360 // the immediate passed by the user. 1361 if (proto.find('i') != std::string::npos) 1362 return true; 1363 1364 // Pointer arguments need to use macros to avoid hiding aligned attributes 1365 // from the pointer type. 1366 if (proto.find('p') != std::string::npos || 1367 proto.find('c') != std::string::npos) 1368 return true; 1369 1370 return false; 1371} 1372 1373/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1374/// defined as a macro should be accessed directly instead of being first 1375/// assigned to a local temporary. 1376static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1377 // True for constant ints (i), pointers (p) and const pointers (c). 1378 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1379} 1380 1381// Generate the string "(argtype a, argtype b, ...)" 1382static std::string GenArgs(const std::string &proto, StringRef typestr, 1383 const std::string &name) { 1384 bool define = UseMacro(proto); 1385 char arg = 'a'; 1386 1387 std::string s; 1388 s += "("; 1389 1390 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1391 if (define) { 1392 // Some macro arguments are used directly instead of being assigned 1393 // to local temporaries; prepend an underscore prefix to make their 1394 // names consistent with the local temporaries. 1395 if (MacroArgUsedDirectly(proto, i)) 1396 s += "__"; 1397 } else { 1398 s += TypeString(proto[i], typestr) + " __"; 1399 } 1400 s.push_back(arg); 1401 //To avoid argument being multiple defined, add extra number for renaming. 1402 if (name == "vcopy_lane" || name == "vcopy_laneq") 1403 s.push_back('1'); 1404 if ((i + 1) < e) 1405 s += ", "; 1406 } 1407 1408 s += ")"; 1409 return s; 1410} 1411 1412// Macro arguments are not type-checked like inline function arguments, so 1413// assign them to local temporaries to get the right type checking. 1414static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1415 const std::string &name ) { 1416 char arg = 'a'; 1417 std::string s; 1418 bool generatedLocal = false; 1419 1420 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1421 // Do not create a temporary for an immediate argument. 1422 // That would defeat the whole point of using a macro! 1423 if (MacroArgUsedDirectly(proto, i)) 1424 continue; 1425 generatedLocal = true; 1426 bool extranumber = false; 1427 if (name == "vcopy_lane" || name == "vcopy_laneq") 1428 extranumber = true; 1429 1430 s += TypeString(proto[i], typestr) + " __"; 1431 s.push_back(arg); 1432 if(extranumber) 1433 s.push_back('1'); 1434 s += " = ("; 1435 s.push_back(arg); 1436 if(extranumber) 1437 s.push_back('1'); 1438 s += "); "; 1439 } 1440 1441 if (generatedLocal) 1442 s += "\\\n "; 1443 return s; 1444} 1445 1446// Use the vmovl builtin to sign-extend or zero-extend a vector. 1447static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1448 std::string s, high; 1449 high = h ? "_high" : ""; 1450 s = MangleName("vmovl" + high, typestr, ClassS); 1451 s += "(" + a + ")"; 1452 return s; 1453} 1454 1455// Get the high 64-bit part of a vector 1456static std::string GetHigh(const std::string &a, StringRef typestr) { 1457 std::string s; 1458 s = MangleName("vget_high", typestr, ClassS); 1459 s += "(" + a + ")"; 1460 return s; 1461} 1462 1463// Gen operation with two operands and get high 64-bit for both of two operands. 1464static std::string Gen2OpWith2High(StringRef typestr, 1465 const std::string &op, 1466 const std::string &a, 1467 const std::string &b) { 1468 std::string s; 1469 std::string Op1 = GetHigh(a, typestr); 1470 std::string Op2 = GetHigh(b, typestr); 1471 s = MangleName(op, typestr, ClassS); 1472 s += "(" + Op1 + ", " + Op2 + ");"; 1473 return s; 1474} 1475 1476// Gen operation with three operands and get high 64-bit of the latter 1477// two operands. 1478static std::string Gen3OpWith2High(StringRef typestr, 1479 const std::string &op, 1480 const std::string &a, 1481 const std::string &b, 1482 const std::string &c) { 1483 std::string s; 1484 std::string Op1 = GetHigh(b, typestr); 1485 std::string Op2 = GetHigh(c, typestr); 1486 s = MangleName(op, typestr, ClassS); 1487 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1488 return s; 1489} 1490 1491// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1492static std::string GenCombine(std::string typestr, 1493 const std::string &a, 1494 const std::string &b) { 1495 std::string s; 1496 s = MangleName("vcombine", typestr, ClassS); 1497 s += "(" + a + ", " + b + ")"; 1498 return s; 1499} 1500 1501static std::string Duplicate(unsigned nElts, StringRef typestr, 1502 const std::string &a) { 1503 std::string s; 1504 1505 s = "(" + TypeString('d', typestr) + "){ "; 1506 for (unsigned i = 0; i != nElts; ++i) { 1507 s += a; 1508 if ((i + 1) < nElts) 1509 s += ", "; 1510 } 1511 s += " }"; 1512 1513 return s; 1514} 1515 1516static std::string SplatLane(unsigned nElts, const std::string &vec, 1517 const std::string &lane) { 1518 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1519 for (unsigned i = 0; i < nElts; ++i) 1520 s += ", " + lane; 1521 s += ")"; 1522 return s; 1523} 1524 1525static std::string RemoveHigh(const std::string &name) { 1526 std::string s = name; 1527 std::size_t found = s.find("_high_"); 1528 if (found == std::string::npos) 1529 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1530 s.replace(found, 5, ""); 1531 return s; 1532} 1533 1534static unsigned GetNumElements(StringRef typestr, bool &quad) { 1535 quad = false; 1536 bool dummy = false; 1537 char type = ClassifyType(typestr, quad, dummy, dummy); 1538 unsigned nElts = 0; 1539 switch (type) { 1540 case 'c': nElts = 8; break; 1541 case 's': nElts = 4; break; 1542 case 'i': nElts = 2; break; 1543 case 'l': nElts = 1; break; 1544 case 'h': nElts = 4; break; 1545 case 'f': nElts = 2; break; 1546 case 'd': 1547 nElts = 1; 1548 break; 1549 default: 1550 PrintFatalError("unhandled type!"); 1551 } 1552 if (quad) nElts <<= 1; 1553 return nElts; 1554} 1555 1556// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1557static std::string GenOpString(const std::string &name, OpKind op, 1558 const std::string &proto, StringRef typestr) { 1559 bool quad; 1560 unsigned nElts = GetNumElements(typestr, quad); 1561 bool define = UseMacro(proto); 1562 1563 std::string ts = TypeString(proto[0], typestr); 1564 std::string s; 1565 if (!define) { 1566 s = "return "; 1567 } 1568 1569 switch(op) { 1570 case OpAdd: 1571 s += "__a + __b;"; 1572 break; 1573 case OpAddl: 1574 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1575 break; 1576 case OpAddlHi: 1577 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1578 break; 1579 case OpAddw: 1580 s += "__a + " + Extend(typestr, "__b") + ";"; 1581 break; 1582 case OpAddwHi: 1583 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1584 break; 1585 case OpSub: 1586 s += "__a - __b;"; 1587 break; 1588 case OpSubl: 1589 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1590 break; 1591 case OpSublHi: 1592 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1593 break; 1594 case OpSubw: 1595 s += "__a - " + Extend(typestr, "__b") + ";"; 1596 break; 1597 case OpSubwHi: 1598 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1599 break; 1600 case OpMulN: 1601 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1602 break; 1603 case OpMulLane: 1604 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1605 break; 1606 case OpMulXLane: 1607 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1608 SplatLane(nElts, "__b", "__c") + ");"; 1609 break; 1610 case OpMul: 1611 s += "__a * __b;"; 1612 break; 1613 case OpMullLane: 1614 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1615 SplatLane(nElts, "__b", "__c") + ");"; 1616 break; 1617 case OpMullHiLane: 1618 s += MangleName("vmull", typestr, ClassS) + "(" + 1619 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1620 break; 1621 case OpMlaN: 1622 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1623 break; 1624 case OpMlaLane: 1625 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1626 break; 1627 case OpMla: 1628 s += "__a + (__b * __c);"; 1629 break; 1630 case OpMlalN: 1631 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1632 Duplicate(nElts, typestr, "__c") + ");"; 1633 break; 1634 case OpMlalLane: 1635 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1636 SplatLane(nElts, "__c", "__d") + ");"; 1637 break; 1638 case OpMlalHiLane: 1639 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1640 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1641 break; 1642 case OpMlal: 1643 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1644 break; 1645 case OpMullHi: 1646 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1647 break; 1648 case OpMlalHi: 1649 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1650 break; 1651 case OpMlsN: 1652 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1653 break; 1654 case OpMlsLane: 1655 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1656 break; 1657 case OpFMSLane: 1658 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1659 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1660 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1661 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1662 break; 1663 case OpFMSLaneQ: 1664 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1665 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1666 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1667 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1668 break; 1669 case OpMls: 1670 s += "__a - (__b * __c);"; 1671 break; 1672 case OpMlslN: 1673 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1674 Duplicate(nElts, typestr, "__c") + ");"; 1675 break; 1676 case OpMlslLane: 1677 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1678 SplatLane(nElts, "__c", "__d") + ");"; 1679 break; 1680 case OpMlslHiLane: 1681 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1682 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1683 break; 1684 case OpMlsl: 1685 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1686 break; 1687 case OpMlslHi: 1688 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1689 break; 1690 case OpQDMullLane: 1691 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1692 SplatLane(nElts, "__b", "__c") + ");"; 1693 break; 1694 case OpQDMullHiLane: 1695 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1696 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1697 break; 1698 case OpQDMlalLane: 1699 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1700 SplatLane(nElts, "__c", "__d") + ");"; 1701 break; 1702 case OpQDMlalHiLane: 1703 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1704 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1705 break; 1706 case OpQDMlslLane: 1707 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1708 SplatLane(nElts, "__c", "__d") + ");"; 1709 break; 1710 case OpQDMlslHiLane: 1711 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1712 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1713 break; 1714 case OpQDMulhLane: 1715 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1716 SplatLane(nElts, "__b", "__c") + ");"; 1717 break; 1718 case OpQRDMulhLane: 1719 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1720 SplatLane(nElts, "__b", "__c") + ");"; 1721 break; 1722 case OpEq: 1723 s += "(" + ts + ")(__a == __b);"; 1724 break; 1725 case OpGe: 1726 s += "(" + ts + ")(__a >= __b);"; 1727 break; 1728 case OpLe: 1729 s += "(" + ts + ")(__a <= __b);"; 1730 break; 1731 case OpGt: 1732 s += "(" + ts + ")(__a > __b);"; 1733 break; 1734 case OpLt: 1735 s += "(" + ts + ")(__a < __b);"; 1736 break; 1737 case OpNeg: 1738 s += " -__a;"; 1739 break; 1740 case OpNot: 1741 s += " ~__a;"; 1742 break; 1743 case OpAnd: 1744 s += "__a & __b;"; 1745 break; 1746 case OpOr: 1747 s += "__a | __b;"; 1748 break; 1749 case OpXor: 1750 s += "__a ^ __b;"; 1751 break; 1752 case OpAndNot: 1753 s += "__a & ~__b;"; 1754 break; 1755 case OpOrNot: 1756 s += "__a | ~__b;"; 1757 break; 1758 case OpCast: 1759 s += "(" + ts + ")__a;"; 1760 break; 1761 case OpConcat: 1762 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1763 s += ", (int64x1_t)__b, 0, 1);"; 1764 break; 1765 case OpHi: 1766 // nElts is for the result vector, so the source is twice that number. 1767 s += "__builtin_shufflevector(__a, __a"; 1768 for (unsigned i = nElts; i < nElts * 2; ++i) 1769 s += ", " + utostr(i); 1770 s+= ");"; 1771 break; 1772 case OpLo: 1773 s += "__builtin_shufflevector(__a, __a"; 1774 for (unsigned i = 0; i < nElts; ++i) 1775 s += ", " + utostr(i); 1776 s+= ");"; 1777 break; 1778 case OpDup: 1779 s += Duplicate(nElts, typestr, "__a") + ";"; 1780 break; 1781 case OpDupLane: 1782 s += SplatLane(nElts, "__a", "__b") + ";"; 1783 break; 1784 case OpSelect: 1785 // ((0 & 1) | (~0 & 2)) 1786 s += "(" + ts + ")"; 1787 ts = TypeString(proto[1], typestr); 1788 s += "((__a & (" + ts + ")__b) | "; 1789 s += "(~__a & (" + ts + ")__c));"; 1790 break; 1791 case OpRev16: 1792 s += "__builtin_shufflevector(__a, __a"; 1793 for (unsigned i = 2; i <= nElts; i += 2) 1794 for (unsigned j = 0; j != 2; ++j) 1795 s += ", " + utostr(i - j - 1); 1796 s += ");"; 1797 break; 1798 case OpRev32: { 1799 unsigned WordElts = nElts >> (1 + (int)quad); 1800 s += "__builtin_shufflevector(__a, __a"; 1801 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1802 for (unsigned j = 0; j != WordElts; ++j) 1803 s += ", " + utostr(i - j - 1); 1804 s += ");"; 1805 break; 1806 } 1807 case OpRev64: { 1808 unsigned DblWordElts = nElts >> (int)quad; 1809 s += "__builtin_shufflevector(__a, __a"; 1810 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1811 for (unsigned j = 0; j != DblWordElts; ++j) 1812 s += ", " + utostr(i - j - 1); 1813 s += ");"; 1814 break; 1815 } 1816 case OpXtnHi: { 1817 s = TypeString(proto[1], typestr) + " __a1 = " + 1818 MangleName("vmovn", typestr, ClassS) + "(__b);\n " + 1819 "return __builtin_shufflevector(__a, __a1"; 1820 for (unsigned i = 0; i < nElts * 4; ++i) 1821 s += ", " + utostr(i); 1822 s += ");"; 1823 break; 1824 } 1825 case OpSqxtunHi: { 1826 s = TypeString(proto[1], typestr) + " __a1 = " + 1827 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + 1828 "return __builtin_shufflevector(__a, __a1"; 1829 for (unsigned i = 0; i < nElts * 4; ++i) 1830 s += ", " + utostr(i); 1831 s += ");"; 1832 break; 1833 } 1834 case OpQxtnHi: { 1835 s = TypeString(proto[1], typestr) + " __a1 = " + 1836 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + 1837 "return __builtin_shufflevector(__a, __a1"; 1838 for (unsigned i = 0; i < nElts * 4; ++i) 1839 s += ", " + utostr(i); 1840 s += ");"; 1841 break; 1842 } 1843 case OpFcvtnHi: { 1844 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; 1845 s = TypeString(proto[1], typestr) + " __a1 = " + 1846 MangleName(FName, typestr, ClassS) + "(__b);\n " + 1847 "return __builtin_shufflevector(__a, __a1"; 1848 for (unsigned i = 0; i < nElts * 4; ++i) 1849 s += ", " + utostr(i); 1850 s += ");"; 1851 break; 1852 } 1853 case OpFcvtlHi: { 1854 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; 1855 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + 1856 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; 1857 break; 1858 } 1859 case OpFcvtxnHi: { 1860 s = TypeString(proto[1], typestr) + " __a1 = " + 1861 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + 1862 "return __builtin_shufflevector(__a, __a1"; 1863 for (unsigned i = 0; i < nElts * 4; ++i) 1864 s += ", " + utostr(i); 1865 s += ");"; 1866 break; 1867 } 1868 case OpUzp1: 1869 s += "__builtin_shufflevector(__a, __b"; 1870 for (unsigned i = 0; i < nElts; i++) 1871 s += ", " + utostr(2*i); 1872 s += ");"; 1873 break; 1874 case OpUzp2: 1875 s += "__builtin_shufflevector(__a, __b"; 1876 for (unsigned i = 0; i < nElts; i++) 1877 s += ", " + utostr(2*i+1); 1878 s += ");"; 1879 break; 1880 case OpZip1: 1881 s += "__builtin_shufflevector(__a, __b"; 1882 for (unsigned i = 0; i < (nElts/2); i++) 1883 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1884 s += ");"; 1885 break; 1886 case OpZip2: 1887 s += "__builtin_shufflevector(__a, __b"; 1888 for (unsigned i = nElts/2; i < nElts; i++) 1889 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1890 s += ");"; 1891 break; 1892 case OpTrn1: 1893 s += "__builtin_shufflevector(__a, __b"; 1894 for (unsigned i = 0; i < (nElts/2); i++) 1895 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 1896 s += ");"; 1897 break; 1898 case OpTrn2: 1899 s += "__builtin_shufflevector(__a, __b"; 1900 for (unsigned i = 0; i < (nElts/2); i++) 1901 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 1902 s += ");"; 1903 break; 1904 case OpAbdl: { 1905 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1906 if (typestr[0] != 'U') { 1907 // vabd results are always unsigned and must be zero-extended. 1908 std::string utype = "U" + typestr.str(); 1909 s += "(" + TypeString(proto[0], typestr) + ")"; 1910 abd = "(" + TypeString('d', utype) + ")" + abd; 1911 s += Extend(utype, abd) + ";"; 1912 } else { 1913 s += Extend(typestr, abd) + ";"; 1914 } 1915 break; 1916 } 1917 case OpAbdlHi: 1918 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1919 break; 1920 case OpAddhnHi: { 1921 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1922 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1923 s += ";"; 1924 break; 1925 } 1926 case OpRAddhnHi: { 1927 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1928 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1929 s += ";"; 1930 break; 1931 } 1932 case OpSubhnHi: { 1933 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1934 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1935 s += ";"; 1936 break; 1937 } 1938 case OpRSubhnHi: { 1939 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1940 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1941 s += ";"; 1942 break; 1943 } 1944 case OpAba: 1945 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1946 break; 1947 case OpAbal: 1948 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1949 break; 1950 case OpAbalHi: 1951 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1952 break; 1953 case OpQDMullHi: 1954 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1955 break; 1956 case OpQDMlalHi: 1957 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1958 break; 1959 case OpQDMlslHi: 1960 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1961 break; 1962 case OpDiv: 1963 s += "__a / __b;"; 1964 break; 1965 case OpMovlHi: { 1966 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1967 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1968 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1969 s += "(__a1, 0);"; 1970 break; 1971 } 1972 case OpLongHi: { 1973 // Another local variable __a1 is needed for calling a Macro, 1974 // or using __a will have naming conflict when Macro expanding. 1975 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1976 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 1977 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 1978 "(__a1, __b);"; 1979 break; 1980 } 1981 case OpNarrowHi: { 1982 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 1983 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 1984 break; 1985 } 1986 case OpCopyLane: { 1987 s += TypeString('s', typestr) + " __c2 = " + 1988 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 1989 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 1990 break; 1991 } 1992 case OpCopyQLane: { 1993 std::string typeCode = ""; 1994 InstructionTypeCode(typestr, ClassS, quad, typeCode); 1995 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 1996 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; 1997 break; 1998 } 1999 case OpCopyLaneQ: { 2000 std::string typeCode = ""; 2001 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2002 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 2003 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; 2004 break; 2005 } 2006 default: 2007 PrintFatalError("unknown OpKind!"); 2008 } 2009 return s; 2010} 2011 2012static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 2013 unsigned mod = proto[0]; 2014 2015 if (mod == 'v' || mod == 'f') 2016 mod = proto[1]; 2017 2018 bool quad = false; 2019 bool poly = false; 2020 bool usgn = false; 2021 bool scal = false; 2022 bool cnst = false; 2023 bool pntr = false; 2024 2025 // Base type to get the type string for. 2026 char type = ClassifyType(typestr, quad, poly, usgn); 2027 2028 // Based on the modifying character, change the type and width if necessary. 2029 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 2030 2031 NeonTypeFlags::EltType ET; 2032 switch (type) { 2033 case 'c': 2034 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 2035 break; 2036 case 's': 2037 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 2038 break; 2039 case 'i': 2040 ET = NeonTypeFlags::Int32; 2041 break; 2042 case 'l': 2043 ET = NeonTypeFlags::Int64; 2044 break; 2045 case 'h': 2046 ET = NeonTypeFlags::Float16; 2047 break; 2048 case 'f': 2049 ET = NeonTypeFlags::Float32; 2050 break; 2051 case 'd': 2052 ET = NeonTypeFlags::Float64; 2053 break; 2054 default: 2055 PrintFatalError("unhandled type!"); 2056 } 2057 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 2058 return Flags.getFlags(); 2059} 2060 2061static bool ProtoHasScalar(const std::string proto) 2062{ 2063 return (proto.find('s') != std::string::npos 2064 || proto.find('r') != std::string::npos); 2065} 2066 2067// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2068static std::string GenBuiltin(const std::string &name, const std::string &proto, 2069 StringRef typestr, ClassKind ck) { 2070 std::string s; 2071 2072 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2073 // sret-like argument. 2074 bool sret = IsMultiVecProto(proto[0]); 2075 2076 bool define = UseMacro(proto); 2077 2078 // Check if the prototype has a scalar operand with the type of the vector 2079 // elements. If not, bitcasting the args will take care of arg checking. 2080 // The actual signedness etc. will be taken care of with special enums. 2081 if (!ProtoHasScalar(proto)) 2082 ck = ClassB; 2083 2084 if (proto[0] != 'v') { 2085 std::string ts = TypeString(proto[0], typestr); 2086 2087 if (define) { 2088 if (sret) 2089 s += ts + " r; "; 2090 else 2091 s += "(" + ts + ")"; 2092 } else if (sret) { 2093 s += ts + " r; "; 2094 } else { 2095 s += "return (" + ts + ")"; 2096 } 2097 } 2098 2099 bool splat = proto.find('a') != std::string::npos; 2100 2101 s += "__builtin_neon_"; 2102 if (splat) { 2103 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2104 std::string vname(name, 0, name.size()-2); 2105 s += MangleName(vname, typestr, ck); 2106 } else { 2107 s += MangleName(name, typestr, ck); 2108 } 2109 s += "("; 2110 2111 // Pass the address of the return variable as the first argument to sret-like 2112 // builtins. 2113 if (sret) 2114 s += "&r, "; 2115 2116 char arg = 'a'; 2117 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2118 std::string args = std::string(&arg, 1); 2119 2120 // Use the local temporaries instead of the macro arguments. 2121 args = "__" + args; 2122 2123 bool argQuad = false; 2124 bool argPoly = false; 2125 bool argUsgn = false; 2126 bool argScalar = false; 2127 bool dummy = false; 2128 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2129 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2130 dummy, dummy); 2131 2132 // Handle multiple-vector values specially, emitting each subvector as an 2133 // argument to the __builtin. 2134 unsigned NumOfVec = 0; 2135 if (proto[i] >= '2' && proto[i] <= '4') { 2136 NumOfVec = proto[i] - '0'; 2137 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2138 NumOfVec = proto[i] - 'A' + 1; 2139 } 2140 2141 if (NumOfVec > 0) { 2142 // Check if an explicit cast is needed. 2143 if (argType != 'c' || argPoly || argUsgn) 2144 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2145 2146 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2147 s += args + ".val[" + utostr(vi) + "]"; 2148 if ((vi + 1) < ve) 2149 s += ", "; 2150 } 2151 if ((i + 1) < e) 2152 s += ", "; 2153 2154 continue; 2155 } 2156 2157 if (splat && (i + 1) == e) 2158 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2159 2160 // Check if an explicit cast is needed. 2161 if ((splat || !argScalar) && 2162 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2163 std::string argTypeStr = "c"; 2164 if (ck != ClassB) 2165 argTypeStr = argType; 2166 if (argQuad) 2167 argTypeStr = "Q" + argTypeStr; 2168 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2169 } 2170 2171 s += args; 2172 if ((i + 1) < e) 2173 s += ", "; 2174 } 2175 2176 // Extra constant integer to hold type class enum for this function, e.g. s8 2177 if (ck == ClassB) 2178 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2179 2180 s += ");"; 2181 2182 if (proto[0] != 'v' && sret) { 2183 if (define) 2184 s += " r;"; 2185 else 2186 s += " return r;"; 2187 } 2188 return s; 2189} 2190 2191static std::string GenBuiltinDef(const std::string &name, 2192 const std::string &proto, 2193 StringRef typestr, ClassKind ck) { 2194 std::string s("BUILTIN(__builtin_neon_"); 2195 2196 // If all types are the same size, bitcasting the args will take care 2197 // of arg checking. The actual signedness etc. will be taken care of with 2198 // special enums. 2199 if (!ProtoHasScalar(proto)) 2200 ck = ClassB; 2201 2202 s += MangleName(name, typestr, ck); 2203 s += ", \""; 2204 2205 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2206 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2207 2208 // Extra constant integer to hold type class enum for this function, e.g. s8 2209 if (ck == ClassB) 2210 s += "i"; 2211 2212 s += "\", \"n\")"; 2213 return s; 2214} 2215 2216static std::string GenIntrinsic(const std::string &name, 2217 const std::string &proto, 2218 StringRef outTypeStr, StringRef inTypeStr, 2219 OpKind kind, ClassKind classKind) { 2220 assert(!proto.empty() && ""); 2221 bool define = UseMacro(proto) && kind != OpUnavailable; 2222 std::string s; 2223 2224 // static always inline + return type 2225 if (define) 2226 s += "#define "; 2227 else 2228 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2229 2230 // Function name with type suffix 2231 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2232 if (outTypeStr != inTypeStr) { 2233 // If the input type is different (e.g., for vreinterpret), append a suffix 2234 // for the input type. String off a "Q" (quad) prefix so that MangleName 2235 // does not insert another "q" in the name. 2236 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2237 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2238 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2239 } 2240 s += mangledName; 2241 2242 // Function arguments 2243 s += GenArgs(proto, inTypeStr, name); 2244 2245 // Definition. 2246 if (define) { 2247 s += " __extension__ ({ \\\n "; 2248 s += GenMacroLocals(proto, inTypeStr, name); 2249 } else if (kind == OpUnavailable) { 2250 s += " __attribute__((unavailable));\n"; 2251 return s; 2252 } else 2253 s += " {\n "; 2254 2255 if (kind != OpNone) 2256 s += GenOpString(name, kind, proto, outTypeStr); 2257 else 2258 s += GenBuiltin(name, proto, outTypeStr, classKind); 2259 if (define) 2260 s += " })"; 2261 else 2262 s += " }"; 2263 s += "\n"; 2264 return s; 2265} 2266 2267/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2268/// is comprised of type definitions and function declarations. 2269void NeonEmitter::run(raw_ostream &OS) { 2270 OS << 2271 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2272 "---===\n" 2273 " *\n" 2274 " * Permission is hereby granted, free of charge, to any person obtaining " 2275 "a copy\n" 2276 " * of this software and associated documentation files (the \"Software\")," 2277 " to deal\n" 2278 " * in the Software without restriction, including without limitation the " 2279 "rights\n" 2280 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2281 "and/or sell\n" 2282 " * copies of the Software, and to permit persons to whom the Software is\n" 2283 " * furnished to do so, subject to the following conditions:\n" 2284 " *\n" 2285 " * The above copyright notice and this permission notice shall be " 2286 "included in\n" 2287 " * all copies or substantial portions of the Software.\n" 2288 " *\n" 2289 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2290 "EXPRESS OR\n" 2291 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2292 "MERCHANTABILITY,\n" 2293 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2294 "SHALL THE\n" 2295 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2296 "OTHER\n" 2297 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2298 "ARISING FROM,\n" 2299 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2300 "DEALINGS IN\n" 2301 " * THE SOFTWARE.\n" 2302 " *\n" 2303 " *===--------------------------------------------------------------------" 2304 "---===\n" 2305 " */\n\n"; 2306 2307 OS << "#ifndef __ARM_NEON_H\n"; 2308 OS << "#define __ARM_NEON_H\n\n"; 2309 2310 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2311 OS << "#error \"NEON support not enabled\"\n"; 2312 OS << "#endif\n\n"; 2313 2314 OS << "#include <stdint.h>\n\n"; 2315 2316 // Emit NEON-specific scalar typedefs. 2317 OS << "typedef float float32_t;\n"; 2318 OS << "typedef __fp16 float16_t;\n"; 2319 2320 OS << "#ifdef __aarch64__\n"; 2321 OS << "typedef double float64_t;\n"; 2322 OS << "#endif\n\n"; 2323 2324 // For now, signedness of polynomial types depends on target 2325 OS << "#ifdef __aarch64__\n"; 2326 OS << "typedef uint8_t poly8_t;\n"; 2327 OS << "typedef uint16_t poly16_t;\n"; 2328 OS << "#else\n"; 2329 OS << "typedef int8_t poly8_t;\n"; 2330 OS << "typedef int16_t poly16_t;\n"; 2331 OS << "#endif\n"; 2332 2333 // Emit Neon vector typedefs. 2334 std::string TypedefTypes( 2335 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs"); 2336 SmallVector<StringRef, 24> TDTypeVec; 2337 ParseTypes(0, TypedefTypes, TDTypeVec); 2338 2339 // Emit vector typedefs. 2340 bool isA64 = false; 2341 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2342 bool dummy, quad = false, poly = false; 2343 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2344 bool preinsert = false; 2345 bool postinsert = false; 2346 2347 if (type == 'd') { 2348 preinsert = isA64? false: true; 2349 isA64 = true; 2350 } else { 2351 postinsert = isA64? true: false; 2352 isA64 = false; 2353 } 2354 if (postinsert) 2355 OS << "#endif\n"; 2356 if (preinsert) 2357 OS << "#ifdef __aarch64__\n"; 2358 2359 if (poly) 2360 OS << "typedef __attribute__((neon_polyvector_type("; 2361 else 2362 OS << "typedef __attribute__((neon_vector_type("; 2363 2364 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2365 OS << utostr(nElts) << "))) "; 2366 if (nElts < 10) 2367 OS << " "; 2368 2369 OS << TypeString('s', TDTypeVec[i]); 2370 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2371 2372 } 2373 OS << "\n"; 2374 2375 // Emit struct typedefs. 2376 isA64 = false; 2377 for (unsigned vi = 2; vi != 5; ++vi) { 2378 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2379 bool dummy, quad = false, poly = false; 2380 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2381 bool preinsert = false; 2382 bool postinsert = false; 2383 2384 if (type == 'd') { 2385 preinsert = isA64? false: true; 2386 isA64 = true; 2387 } else { 2388 postinsert = isA64? true: false; 2389 isA64 = false; 2390 } 2391 if (postinsert) 2392 OS << "#endif\n"; 2393 if (preinsert) 2394 OS << "#ifdef __aarch64__\n"; 2395 2396 std::string ts = TypeString('d', TDTypeVec[i]); 2397 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2398 OS << "typedef struct " << vs << " {\n"; 2399 OS << " " << ts << " val"; 2400 OS << "[" << utostr(vi) << "]"; 2401 OS << ";\n} "; 2402 OS << vs << ";\n"; 2403 OS << "\n"; 2404 } 2405 } 2406 2407 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2408 2409 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2410 2411 StringMap<ClassKind> EmittedMap; 2412 2413 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2414 // intrinsics. (Some of the saturating multiply instructions are also 2415 // used to implement the corresponding "_lane" variants, but tablegen 2416 // sorts the records into alphabetical order so that the "_lane" variants 2417 // come after the intrinsics they use.) 2418 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2419 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2420 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2421 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2422 2423 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2424 // common intrinsics appear only once in the output stream. 2425 // The check for uniquiness is done in emitIntrinsic. 2426 // Emit ARM intrinsics. 2427 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2428 Record *R = RV[i]; 2429 2430 // Skip AArch64 intrinsics; they will be emitted at the end. 2431 bool isA64 = R->getValueAsBit("isA64"); 2432 if (isA64) 2433 continue; 2434 2435 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2436 R->getName() != "VABD") 2437 emitIntrinsic(OS, R, EmittedMap); 2438 } 2439 2440 // Emit AArch64-specific intrinsics. 2441 OS << "#ifdef __aarch64__\n"; 2442 2443 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2444 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2445 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2446 2447 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2448 Record *R = RV[i]; 2449 2450 // Skip ARM intrinsics already included above. 2451 bool isA64 = R->getValueAsBit("isA64"); 2452 if (!isA64) 2453 continue; 2454 2455 emitIntrinsic(OS, R, EmittedMap); 2456 } 2457 2458 OS << "#endif\n\n"; 2459 2460 OS << "#undef __ai\n\n"; 2461 OS << "#endif /* __ARM_NEON_H */\n"; 2462} 2463 2464/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2465/// intrinsics specified by record R checking for intrinsic uniqueness. 2466void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2467 StringMap<ClassKind> &EmittedMap) { 2468 std::string name = R->getValueAsString("Name"); 2469 std::string Proto = R->getValueAsString("Prototype"); 2470 std::string Types = R->getValueAsString("Types"); 2471 2472 SmallVector<StringRef, 16> TypeVec; 2473 ParseTypes(R, Types, TypeVec); 2474 2475 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2476 2477 ClassKind classKind = ClassNone; 2478 if (R->getSuperClasses().size() >= 2) 2479 classKind = ClassMap[R->getSuperClasses()[1]]; 2480 if (classKind == ClassNone && kind == OpNone) 2481 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2482 2483 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2484 if (kind == OpReinterpret) { 2485 bool outQuad = false; 2486 bool dummy = false; 2487 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2488 for (unsigned srcti = 0, srcte = TypeVec.size(); 2489 srcti != srcte; ++srcti) { 2490 bool inQuad = false; 2491 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2492 if (srcti == ti || inQuad != outQuad) 2493 continue; 2494 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2495 OpCast, ClassS); 2496 if (EmittedMap.count(s)) 2497 continue; 2498 EmittedMap[s] = ClassS; 2499 OS << s; 2500 } 2501 } else { 2502 std::string s = 2503 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2504 if (EmittedMap.count(s)) 2505 continue; 2506 EmittedMap[s] = classKind; 2507 OS << s; 2508 } 2509 } 2510 OS << "\n"; 2511} 2512 2513static unsigned RangeFromType(const char mod, StringRef typestr) { 2514 // base type to get the type string for. 2515 bool quad = false, dummy = false; 2516 char type = ClassifyType(typestr, quad, dummy, dummy); 2517 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2518 2519 switch (type) { 2520 case 'c': 2521 return (8 << (int)quad) - 1; 2522 case 'h': 2523 case 's': 2524 return (4 << (int)quad) - 1; 2525 case 'f': 2526 case 'i': 2527 return (2 << (int)quad) - 1; 2528 case 'd': 2529 case 'l': 2530 return (1 << (int)quad) - 1; 2531 default: 2532 PrintFatalError("unhandled type!"); 2533 } 2534} 2535 2536static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2537 // base type to get the type string for. 2538 bool dummy = false; 2539 char type = ClassifyType(typestr, dummy, dummy, dummy); 2540 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2541 2542 switch (type) { 2543 case 'c': 2544 return 7; 2545 case 'h': 2546 case 's': 2547 return 15; 2548 case 'f': 2549 case 'i': 2550 return 31; 2551 case 'd': 2552 case 'l': 2553 return 63; 2554 default: 2555 PrintFatalError("unhandled type!"); 2556 } 2557} 2558 2559/// Generate the ARM and AArch64 intrinsic range checking code for 2560/// shift/lane immediates, checking for unique declarations. 2561void 2562NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2563 StringMap<ClassKind> &A64IntrinsicMap, 2564 bool isA64RangeCheck) { 2565 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2566 StringMap<OpKind> EmittedMap; 2567 2568 // Generate the intrinsic range checking code for shift/lane immediates. 2569 if (isA64RangeCheck) 2570 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2571 else 2572 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2573 2574 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2575 Record *R = RV[i]; 2576 2577 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2578 if (k != OpNone) 2579 continue; 2580 2581 std::string name = R->getValueAsString("Name"); 2582 std::string Proto = R->getValueAsString("Prototype"); 2583 std::string Types = R->getValueAsString("Types"); 2584 std::string Rename = name + "@" + Proto; 2585 2586 // Functions with 'a' (the splat code) in the type prototype should not get 2587 // their own builtin as they use the non-splat variant. 2588 if (Proto.find('a') != std::string::npos) 2589 continue; 2590 2591 // Functions which do not have an immediate do not need to have range 2592 // checking code emitted. 2593 size_t immPos = Proto.find('i'); 2594 if (immPos == std::string::npos) 2595 continue; 2596 2597 SmallVector<StringRef, 16> TypeVec; 2598 ParseTypes(R, Types, TypeVec); 2599 2600 if (R->getSuperClasses().size() < 2) 2601 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2602 2603 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2604 2605 // Do not include AArch64 range checks if not generating code for AArch64. 2606 bool isA64 = R->getValueAsBit("isA64"); 2607 if (!isA64RangeCheck && isA64) 2608 continue; 2609 2610 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2611 // redefined by AArch64 to handle new types. 2612 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2613 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2614 if (A64CK == ck && ck != ClassNone) 2615 continue; 2616 } 2617 2618 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2619 std::string namestr, shiftstr, rangestr; 2620 2621 if (R->getValueAsBit("isVCVT_N")) { 2622 // VCVT between floating- and fixed-point values takes an immediate 2623 // in the range [1, 32] for f32, or [1, 64] for f64. 2624 ck = ClassB; 2625 if (name.find("32") != std::string::npos) 2626 rangestr = "l = 1; u = 31"; // upper bound = l + u 2627 else if (name.find("64") != std::string::npos) 2628 rangestr = "l = 1; u = 63"; 2629 else 2630 PrintFatalError(R->getLoc(), 2631 "Fixed point convert name should contains \"32\" or \"64\""); 2632 2633 } else if (R->getValueAsBit("isScalarShift")) { 2634 // Right shifts have an 'r' in the name, left shifts do not. Convert 2635 // instructions have the same bounds and right shifts. 2636 if (name.find('r') != std::string::npos || 2637 name.find("cvt") != std::string::npos) 2638 rangestr = "l = 1; "; 2639 2640 rangestr += "u = " + 2641 utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti])); 2642 } else if (!ProtoHasScalar(Proto)) { 2643 // Builtins which are overloaded by type will need to have their upper 2644 // bound computed at Sema time based on the type constant. 2645 ck = ClassB; 2646 if (R->getValueAsBit("isShift")) { 2647 shiftstr = ", true"; 2648 2649 // Right shifts have an 'r' in the name, left shifts do not. 2650 if (name.find('r') != std::string::npos) 2651 rangestr = "l = 1; "; 2652 } 2653 rangestr += "u = RFT(TV" + shiftstr + ")"; 2654 } else { 2655 // The immediate generally refers to a lane in the preceding argument. 2656 assert(immPos > 0 && "unexpected immediate operand"); 2657 rangestr = 2658 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2659 } 2660 // Make sure cases appear only once by uniquing them in a string map. 2661 namestr = MangleName(name, TypeVec[ti], ck); 2662 if (EmittedMap.count(namestr)) 2663 continue; 2664 EmittedMap[namestr] = OpNone; 2665 2666 // Calculate the index of the immediate that should be range checked. 2667 unsigned immidx = 0; 2668 2669 // Builtins that return a struct of multiple vectors have an extra 2670 // leading arg for the struct return. 2671 if (IsMultiVecProto(Proto[0])) 2672 ++immidx; 2673 2674 // Add one to the index for each argument until we reach the immediate 2675 // to be checked. Structs of vectors are passed as multiple arguments. 2676 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2677 switch (Proto[ii]) { 2678 default: 2679 immidx += 1; 2680 break; 2681 case '2': 2682 case 'B': 2683 immidx += 2; 2684 break; 2685 case '3': 2686 case 'C': 2687 immidx += 3; 2688 break; 2689 case '4': 2690 case 'D': 2691 immidx += 4; 2692 break; 2693 case 'i': 2694 ie = ii + 1; 2695 break; 2696 } 2697 } 2698 if (isA64RangeCheck) 2699 OS << "case AArch64::BI__builtin_neon_"; 2700 else 2701 OS << "case ARM::BI__builtin_neon_"; 2702 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2703 << rangestr << "; break;\n"; 2704 } 2705 } 2706 OS << "#endif\n\n"; 2707} 2708 2709/// Generate the ARM and AArch64 overloaded type checking code for 2710/// SemaChecking.cpp, checking for unique builtin declarations. 2711void 2712NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2713 StringMap<ClassKind> &A64IntrinsicMap, 2714 bool isA64TypeCheck) { 2715 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2716 StringMap<OpKind> EmittedMap; 2717 2718 // Generate the overloaded type checking code for SemaChecking.cpp 2719 if (isA64TypeCheck) 2720 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2721 else 2722 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2723 2724 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2725 Record *R = RV[i]; 2726 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2727 if (k != OpNone) 2728 continue; 2729 2730 std::string Proto = R->getValueAsString("Prototype"); 2731 std::string Types = R->getValueAsString("Types"); 2732 std::string name = R->getValueAsString("Name"); 2733 std::string Rename = name + "@" + Proto; 2734 2735 // Functions with 'a' (the splat code) in the type prototype should not get 2736 // their own builtin as they use the non-splat variant. 2737 if (Proto.find('a') != std::string::npos) 2738 continue; 2739 2740 // Functions which have a scalar argument cannot be overloaded, no need to 2741 // check them if we are emitting the type checking code. 2742 if (ProtoHasScalar(Proto)) 2743 continue; 2744 2745 SmallVector<StringRef, 16> TypeVec; 2746 ParseTypes(R, Types, TypeVec); 2747 2748 if (R->getSuperClasses().size() < 2) 2749 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2750 2751 // Do not include AArch64 type checks if not generating code for AArch64. 2752 bool isA64 = R->getValueAsBit("isA64"); 2753 if (!isA64TypeCheck && isA64) 2754 continue; 2755 2756 // Include ARM type check in AArch64 but only if ARM intrinsics 2757 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2758 // redefined in AArch64 to handle an additional 2 x f64 type. 2759 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2760 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2761 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2762 if (A64CK == ck && ck != ClassNone) 2763 continue; 2764 } 2765 2766 int si = -1, qi = -1; 2767 uint64_t mask = 0, qmask = 0; 2768 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2769 // Generate the switch case(s) for this builtin for the type validation. 2770 bool quad = false, poly = false, usgn = false; 2771 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2772 2773 if (quad) { 2774 qi = ti; 2775 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2776 } else { 2777 si = ti; 2778 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2779 } 2780 } 2781 2782 // Check if the builtin function has a pointer or const pointer argument. 2783 int PtrArgNum = -1; 2784 bool HasConstPtr = false; 2785 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2786 char ArgType = Proto[arg]; 2787 if (ArgType == 'c') { 2788 HasConstPtr = true; 2789 PtrArgNum = arg - 1; 2790 break; 2791 } 2792 if (ArgType == 'p') { 2793 PtrArgNum = arg - 1; 2794 break; 2795 } 2796 } 2797 // For sret builtins, adjust the pointer argument index. 2798 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 2799 PtrArgNum += 1; 2800 2801 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2802 // and vst1_lane intrinsics. Using a pointer to the vector element 2803 // type with one of those operations causes codegen to select an aligned 2804 // load/store instruction. If you want an unaligned operation, 2805 // the pointer argument needs to have less alignment than element type, 2806 // so just accept any pointer type. 2807 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2808 PtrArgNum = -1; 2809 HasConstPtr = false; 2810 } 2811 2812 if (mask) { 2813 if (isA64TypeCheck) 2814 OS << "case AArch64::BI__builtin_neon_"; 2815 else 2816 OS << "case ARM::BI__builtin_neon_"; 2817 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2818 << "0x" << utohexstr(mask) << "ULL"; 2819 if (PtrArgNum >= 0) 2820 OS << "; PtrArgNum = " << PtrArgNum; 2821 if (HasConstPtr) 2822 OS << "; HasConstPtr = true"; 2823 OS << "; break;\n"; 2824 } 2825 if (qmask) { 2826 if (isA64TypeCheck) 2827 OS << "case AArch64::BI__builtin_neon_"; 2828 else 2829 OS << "case ARM::BI__builtin_neon_"; 2830 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2831 << "0x" << utohexstr(qmask) << "ULL"; 2832 if (PtrArgNum >= 0) 2833 OS << "; PtrArgNum = " << PtrArgNum; 2834 if (HasConstPtr) 2835 OS << "; HasConstPtr = true"; 2836 OS << "; break;\n"; 2837 } 2838 } 2839 OS << "#endif\n\n"; 2840} 2841 2842/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2843/// declaration of builtins, checking for unique builtin declarations. 2844void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2845 StringMap<ClassKind> &A64IntrinsicMap, 2846 bool isA64GenBuiltinDef) { 2847 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2848 StringMap<OpKind> EmittedMap; 2849 2850 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2851 if (isA64GenBuiltinDef) 2852 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2853 else 2854 OS << "#ifdef GET_NEON_BUILTINS\n"; 2855 2856 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2857 Record *R = RV[i]; 2858 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2859 if (k != OpNone) 2860 continue; 2861 2862 std::string Proto = R->getValueAsString("Prototype"); 2863 std::string name = R->getValueAsString("Name"); 2864 std::string Rename = name + "@" + Proto; 2865 2866 // Functions with 'a' (the splat code) in the type prototype should not get 2867 // their own builtin as they use the non-splat variant. 2868 if (Proto.find('a') != std::string::npos) 2869 continue; 2870 2871 std::string Types = R->getValueAsString("Types"); 2872 SmallVector<StringRef, 16> TypeVec; 2873 ParseTypes(R, Types, TypeVec); 2874 2875 if (R->getSuperClasses().size() < 2) 2876 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2877 2878 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2879 2880 // Do not include AArch64 BUILTIN() macros if not generating 2881 // code for AArch64 2882 bool isA64 = R->getValueAsBit("isA64"); 2883 if (!isA64GenBuiltinDef && isA64) 2884 continue; 2885 2886 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2887 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2888 // redefined in AArch64 to handle an additional 2 x f64 type. 2889 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2890 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2891 if (A64CK == ck && ck != ClassNone) 2892 continue; 2893 } 2894 2895 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2896 // Generate the declaration for this builtin, ensuring 2897 // that each unique BUILTIN() macro appears only once in the output 2898 // stream. 2899 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 2900 if (EmittedMap.count(bd)) 2901 continue; 2902 2903 EmittedMap[bd] = OpNone; 2904 OS << bd << "\n"; 2905 } 2906 } 2907 OS << "#endif\n\n"; 2908} 2909 2910/// runHeader - Emit a file with sections defining: 2911/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2912/// 2. the SemaChecking code for the type overload checking. 2913/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2914void NeonEmitter::runHeader(raw_ostream &OS) { 2915 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2916 2917 // build a map of AArch64 intriniscs to be used in uniqueness checks. 2918 StringMap<ClassKind> A64IntrinsicMap; 2919 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2920 Record *R = RV[i]; 2921 2922 bool isA64 = R->getValueAsBit("isA64"); 2923 if (!isA64) 2924 continue; 2925 2926 ClassKind CK = ClassNone; 2927 if (R->getSuperClasses().size() >= 2) 2928 CK = ClassMap[R->getSuperClasses()[1]]; 2929 2930 std::string Name = R->getValueAsString("Name"); 2931 std::string Proto = R->getValueAsString("Prototype"); 2932 std::string Rename = Name + "@" + Proto; 2933 if (A64IntrinsicMap.count(Rename)) 2934 continue; 2935 A64IntrinsicMap[Rename] = CK; 2936 } 2937 2938 // Generate BuiltinsARM.def for ARM 2939 genBuiltinsDef(OS, A64IntrinsicMap, false); 2940 2941 // Generate BuiltinsAArch64.def for AArch64 2942 genBuiltinsDef(OS, A64IntrinsicMap, true); 2943 2944 // Generate ARM overloaded type checking code for SemaChecking.cpp 2945 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 2946 2947 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 2948 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 2949 2950 // Generate ARM range checking code for shift/lane immediates. 2951 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 2952 2953 // Generate the AArch64 range checking code for shift/lane immediates. 2954 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 2955} 2956 2957/// GenTest - Write out a test for the intrinsic specified by the name and 2958/// type strings, including the embedded patterns for FileCheck to match. 2959static std::string GenTest(const std::string &name, 2960 const std::string &proto, 2961 StringRef outTypeStr, StringRef inTypeStr, 2962 bool isShift, bool isHiddenLOp, 2963 ClassKind ck, const std::string &InstName, 2964 bool isA64, 2965 std::string & testFuncProto) { 2966 assert(!proto.empty() && ""); 2967 std::string s; 2968 2969 // Function name with type suffix 2970 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2971 if (outTypeStr != inTypeStr) { 2972 // If the input type is different (e.g., for vreinterpret), append a suffix 2973 // for the input type. String off a "Q" (quad) prefix so that MangleName 2974 // does not insert another "q" in the name. 2975 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2976 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2977 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2978 } 2979 2980 // todo: GenerateChecksForIntrinsic does not generate CHECK 2981 // for aarch64 instructions yet 2982 std::vector<std::string> FileCheckPatterns; 2983 if (!isA64) { 2984 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2985 isHiddenLOp, FileCheckPatterns); 2986 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 2987 } 2988 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 2989 2990 // Emit the FileCheck patterns. 2991 // If for any reason we do not want to emit a check, mangledInst 2992 // will be the empty string. 2993 if (FileCheckPatterns.size()) { 2994 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2995 e = FileCheckPatterns.end(); 2996 i != e; 2997 ++i) { 2998 s += "// CHECK_ARM: " + *i + "\n"; 2999 } 3000 } 3001 3002 // Emit the start of the test function. 3003 3004 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 3005 char arg = 'a'; 3006 std::string comma; 3007 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3008 // Do not create arguments for values that must be immediate constants. 3009 if (proto[i] == 'i') 3010 continue; 3011 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 3012 testFuncProto.push_back(arg); 3013 comma = ", "; 3014 } 3015 testFuncProto += ")"; 3016 3017 s+= testFuncProto; 3018 s+= " {\n "; 3019 3020 if (proto[0] != 'v') 3021 s += "return "; 3022 s += mangledName + "("; 3023 arg = 'a'; 3024 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3025 if (proto[i] == 'i') { 3026 // For immediate operands, test the maximum value. 3027 if (isShift) 3028 s += "1"; // FIXME 3029 else 3030 // The immediate generally refers to a lane in the preceding argument. 3031 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 3032 } else { 3033 s.push_back(arg); 3034 } 3035 if ((i + 1) < e) 3036 s += ", "; 3037 } 3038 s += ");\n}\n\n"; 3039 return s; 3040} 3041 3042/// Write out all intrinsic tests for the specified target, checking 3043/// for intrinsic test uniqueness. 3044void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 3045 bool isA64GenTest) { 3046 if (isA64GenTest) 3047 OS << "#ifdef __aarch64__\n"; 3048 3049 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3050 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3051 Record *R = RV[i]; 3052 std::string name = R->getValueAsString("Name"); 3053 std::string Proto = R->getValueAsString("Prototype"); 3054 std::string Types = R->getValueAsString("Types"); 3055 bool isShift = R->getValueAsBit("isShift"); 3056 std::string InstName = R->getValueAsString("InstName"); 3057 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 3058 bool isA64 = R->getValueAsBit("isA64"); 3059 3060 // do not include AArch64 intrinsic test if not generating 3061 // code for AArch64 3062 if (!isA64GenTest && isA64) 3063 continue; 3064 3065 SmallVector<StringRef, 16> TypeVec; 3066 ParseTypes(R, Types, TypeVec); 3067 3068 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3069 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3070 if (kind == OpUnavailable) 3071 continue; 3072 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3073 if (kind == OpReinterpret) { 3074 bool outQuad = false; 3075 bool dummy = false; 3076 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3077 for (unsigned srcti = 0, srcte = TypeVec.size(); 3078 srcti != srcte; ++srcti) { 3079 bool inQuad = false; 3080 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3081 if (srcti == ti || inQuad != outQuad) 3082 continue; 3083 std::string testFuncProto; 3084 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3085 isShift, isHiddenLOp, ck, InstName, isA64, 3086 testFuncProto); 3087 if (EmittedMap.count(testFuncProto)) 3088 continue; 3089 EmittedMap[testFuncProto] = kind; 3090 OS << s << "\n"; 3091 } 3092 } else { 3093 std::string testFuncProto; 3094 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 3095 isHiddenLOp, ck, InstName, isA64, testFuncProto); 3096 if (EmittedMap.count(testFuncProto)) 3097 continue; 3098 EmittedMap[testFuncProto] = kind; 3099 OS << s << "\n"; 3100 } 3101 } 3102 } 3103 3104 if (isA64GenTest) 3105 OS << "#endif\n"; 3106} 3107/// runTests - Write out a complete set of tests for all of the Neon 3108/// intrinsics. 3109void NeonEmitter::runTests(raw_ostream &OS) { 3110 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3111 "apcs-gnu\\\n" 3112 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3113 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3114 "\n" 3115 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3116 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3117 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3118 "\n" 3119 "// REQUIRES: long_tests\n" 3120 "\n" 3121 "#include <arm_neon.h>\n" 3122 "\n"; 3123 3124 // ARM tests must be emitted before AArch64 tests to ensure 3125 // tests for intrinsics that are common to ARM and AArch64 3126 // appear only once in the output stream. 3127 // The check for uniqueness is done in genTargetTest. 3128 StringMap<OpKind> EmittedMap; 3129 3130 genTargetTest(OS, EmittedMap, false); 3131 3132 genTargetTest(OS, EmittedMap, true); 3133} 3134 3135namespace clang { 3136void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3137 NeonEmitter(Records).run(OS); 3138} 3139void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3140 NeonEmitter(Records).runHeader(OS); 3141} 3142void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3143 NeonEmitter(Records).runTests(OS); 3144} 3145} // End namespace clang 3146