NeonEmitter.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMullHiP64, 56 OpMullHiN, 57 OpMlalHi, 58 OpMlalHiN, 59 OpMls, 60 OpMlsl, 61 OpMlslHi, 62 OpMlslHiN, 63 OpMulN, 64 OpMlaN, 65 OpMlsN, 66 OpFMlaN, 67 OpFMlsN, 68 OpMlalN, 69 OpMlslN, 70 OpMulLane, 71 OpMulXLane, 72 OpMullLane, 73 OpMullHiLane, 74 OpMlaLane, 75 OpMlsLane, 76 OpMlalLane, 77 OpMlalHiLane, 78 OpMlslLane, 79 OpMlslHiLane, 80 OpQDMullLane, 81 OpQDMullHiLane, 82 OpQDMlalLane, 83 OpQDMlalHiLane, 84 OpQDMlslLane, 85 OpQDMlslHiLane, 86 OpQDMulhLane, 87 OpQRDMulhLane, 88 OpFMSLane, 89 OpFMSLaneQ, 90 OpTrn1, 91 OpZip1, 92 OpUzp1, 93 OpTrn2, 94 OpZip2, 95 OpUzp2, 96 OpEq, 97 OpGe, 98 OpLe, 99 OpGt, 100 OpLt, 101 OpNeg, 102 OpNot, 103 OpAnd, 104 OpOr, 105 OpXor, 106 OpAndNot, 107 OpOrNot, 108 OpCast, 109 OpConcat, 110 OpDup, 111 OpDupLane, 112 OpHi, 113 OpLo, 114 OpSelect, 115 OpRev16, 116 OpRev32, 117 OpRev64, 118 OpXtnHi, 119 OpSqxtunHi, 120 OpQxtnHi, 121 OpFcvtnHi, 122 OpFcvtlHi, 123 OpFcvtxnHi, 124 OpReinterpret, 125 OpAddhnHi, 126 OpRAddhnHi, 127 OpSubhnHi, 128 OpRSubhnHi, 129 OpAbdl, 130 OpAbdlHi, 131 OpAba, 132 OpAbal, 133 OpAbalHi, 134 OpQDMullHi, 135 OpQDMullHiN, 136 OpQDMlalHi, 137 OpQDMlalHiN, 138 OpQDMlslHi, 139 OpQDMlslHiN, 140 OpDiv, 141 OpLongHi, 142 OpNarrowHi, 143 OpMovlHi, 144 OpCopyLane, 145 OpCopyQLane, 146 OpCopyLaneQ, 147 OpScalarMulLane, 148 OpScalarMulLaneQ, 149 OpScalarMulXLane, 150 OpScalarMulXLaneQ, 151 OpScalarVMulXLane, 152 OpScalarVMulXLaneQ, 153 OpScalarQDMullLane, 154 OpScalarQDMullLaneQ, 155 OpScalarQDMulHiLane, 156 OpScalarQDMulHiLaneQ, 157 OpScalarQRDMulHiLane, 158 OpScalarQRDMulHiLaneQ, 159 OpScalarGetLane, 160 OpScalarSetLane 161}; 162 163enum ClassKind { 164 ClassNone, 165 ClassI, // generic integer instruction, e.g., "i8" suffix 166 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 167 ClassW, // width-specific instruction, e.g., "8" suffix 168 ClassB, // bitcast arguments with enum argument to specify type 169 ClassL, // Logical instructions which are op instructions 170 // but we need to not emit any suffix for in our 171 // tests. 172 ClassNoTest // Instructions which we do not test since they are 173 // not TRUE instructions. 174}; 175 176/// NeonTypeFlags - Flags to identify the types for overloaded Neon 177/// builtins. These must be kept in sync with the flags in 178/// include/clang/Basic/TargetBuiltins.h. 179namespace { 180class NeonTypeFlags { 181 enum { 182 EltTypeMask = 0xf, 183 UnsignedFlag = 0x10, 184 QuadFlag = 0x20 185 }; 186 uint32_t Flags; 187 188public: 189 enum EltType { 190 Int8, 191 Int16, 192 Int32, 193 Int64, 194 Poly8, 195 Poly16, 196 Poly64, 197 Poly128, 198 Float16, 199 Float32, 200 Float64 201 }; 202 203 NeonTypeFlags(unsigned F) : Flags(F) {} 204 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 205 if (IsUnsigned) 206 Flags |= UnsignedFlag; 207 if (IsQuad) 208 Flags |= QuadFlag; 209 } 210 211 uint32_t getFlags() const { return Flags; } 212}; 213} // end anonymous namespace 214 215namespace { 216class NeonEmitter { 217 RecordKeeper &Records; 218 StringMap<OpKind> OpMap; 219 DenseMap<Record*, ClassKind> ClassMap; 220 221public: 222 NeonEmitter(RecordKeeper &R) : Records(R) { 223 OpMap["OP_NONE"] = OpNone; 224 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 225 OpMap["OP_ADD"] = OpAdd; 226 OpMap["OP_ADDL"] = OpAddl; 227 OpMap["OP_ADDLHi"] = OpAddlHi; 228 OpMap["OP_ADDW"] = OpAddw; 229 OpMap["OP_ADDWHi"] = OpAddwHi; 230 OpMap["OP_SUB"] = OpSub; 231 OpMap["OP_SUBL"] = OpSubl; 232 OpMap["OP_SUBLHi"] = OpSublHi; 233 OpMap["OP_SUBW"] = OpSubw; 234 OpMap["OP_SUBWHi"] = OpSubwHi; 235 OpMap["OP_MUL"] = OpMul; 236 OpMap["OP_MLA"] = OpMla; 237 OpMap["OP_MLAL"] = OpMlal; 238 OpMap["OP_MULLHi"] = OpMullHi; 239 OpMap["OP_MULLHi_P64"] = OpMullHiP64; 240 OpMap["OP_MULLHi_N"] = OpMullHiN; 241 OpMap["OP_MLALHi"] = OpMlalHi; 242 OpMap["OP_MLALHi_N"] = OpMlalHiN; 243 OpMap["OP_MLS"] = OpMls; 244 OpMap["OP_MLSL"] = OpMlsl; 245 OpMap["OP_MLSLHi"] = OpMlslHi; 246 OpMap["OP_MLSLHi_N"] = OpMlslHiN; 247 OpMap["OP_MUL_N"] = OpMulN; 248 OpMap["OP_MLA_N"] = OpMlaN; 249 OpMap["OP_MLS_N"] = OpMlsN; 250 OpMap["OP_FMLA_N"] = OpFMlaN; 251 OpMap["OP_FMLS_N"] = OpFMlsN; 252 OpMap["OP_MLAL_N"] = OpMlalN; 253 OpMap["OP_MLSL_N"] = OpMlslN; 254 OpMap["OP_MUL_LN"]= OpMulLane; 255 OpMap["OP_MULX_LN"]= OpMulXLane; 256 OpMap["OP_MULL_LN"] = OpMullLane; 257 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 258 OpMap["OP_MLA_LN"]= OpMlaLane; 259 OpMap["OP_MLS_LN"]= OpMlsLane; 260 OpMap["OP_MLAL_LN"] = OpMlalLane; 261 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 262 OpMap["OP_MLSL_LN"] = OpMlslLane; 263 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 264 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 265 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 266 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 267 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 268 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 269 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 270 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 271 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 272 OpMap["OP_FMS_LN"] = OpFMSLane; 273 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 274 OpMap["OP_TRN1"] = OpTrn1; 275 OpMap["OP_ZIP1"] = OpZip1; 276 OpMap["OP_UZP1"] = OpUzp1; 277 OpMap["OP_TRN2"] = OpTrn2; 278 OpMap["OP_ZIP2"] = OpZip2; 279 OpMap["OP_UZP2"] = OpUzp2; 280 OpMap["OP_EQ"] = OpEq; 281 OpMap["OP_GE"] = OpGe; 282 OpMap["OP_LE"] = OpLe; 283 OpMap["OP_GT"] = OpGt; 284 OpMap["OP_LT"] = OpLt; 285 OpMap["OP_NEG"] = OpNeg; 286 OpMap["OP_NOT"] = OpNot; 287 OpMap["OP_AND"] = OpAnd; 288 OpMap["OP_OR"] = OpOr; 289 OpMap["OP_XOR"] = OpXor; 290 OpMap["OP_ANDN"] = OpAndNot; 291 OpMap["OP_ORN"] = OpOrNot; 292 OpMap["OP_CAST"] = OpCast; 293 OpMap["OP_CONC"] = OpConcat; 294 OpMap["OP_HI"] = OpHi; 295 OpMap["OP_LO"] = OpLo; 296 OpMap["OP_DUP"] = OpDup; 297 OpMap["OP_DUP_LN"] = OpDupLane; 298 OpMap["OP_SEL"] = OpSelect; 299 OpMap["OP_REV16"] = OpRev16; 300 OpMap["OP_REV32"] = OpRev32; 301 OpMap["OP_REV64"] = OpRev64; 302 OpMap["OP_XTN"] = OpXtnHi; 303 OpMap["OP_SQXTUN"] = OpSqxtunHi; 304 OpMap["OP_QXTN"] = OpQxtnHi; 305 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; 306 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; 307 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; 308 OpMap["OP_REINT"] = OpReinterpret; 309 OpMap["OP_ADDHNHi"] = OpAddhnHi; 310 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 311 OpMap["OP_SUBHNHi"] = OpSubhnHi; 312 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 313 OpMap["OP_ABDL"] = OpAbdl; 314 OpMap["OP_ABDLHi"] = OpAbdlHi; 315 OpMap["OP_ABA"] = OpAba; 316 OpMap["OP_ABAL"] = OpAbal; 317 OpMap["OP_ABALHi"] = OpAbalHi; 318 OpMap["OP_QDMULLHi"] = OpQDMullHi; 319 OpMap["OP_QDMULLHi_N"] = OpQDMullHiN; 320 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 321 OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN; 322 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 323 OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN; 324 OpMap["OP_DIV"] = OpDiv; 325 OpMap["OP_LONG_HI"] = OpLongHi; 326 OpMap["OP_NARROW_HI"] = OpNarrowHi; 327 OpMap["OP_MOVL_HI"] = OpMovlHi; 328 OpMap["OP_COPY_LN"] = OpCopyLane; 329 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 330 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 331 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; 332 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; 333 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; 334 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; 335 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; 336 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; 337 OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane; 338 OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ; 339 OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane; 340 OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; 341 OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; 342 OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; 343 OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane; 344 OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane; 345 346 Record *SI = R.getClass("SInst"); 347 Record *II = R.getClass("IInst"); 348 Record *WI = R.getClass("WInst"); 349 Record *SOpI = R.getClass("SOpInst"); 350 Record *IOpI = R.getClass("IOpInst"); 351 Record *WOpI = R.getClass("WOpInst"); 352 Record *LOpI = R.getClass("LOpInst"); 353 Record *NoTestOpI = R.getClass("NoTestOpInst"); 354 355 ClassMap[SI] = ClassS; 356 ClassMap[II] = ClassI; 357 ClassMap[WI] = ClassW; 358 ClassMap[SOpI] = ClassS; 359 ClassMap[IOpI] = ClassI; 360 ClassMap[WOpI] = ClassW; 361 ClassMap[LOpI] = ClassL; 362 ClassMap[NoTestOpI] = ClassNoTest; 363 } 364 365 // run - Emit arm_neon.h.inc 366 void run(raw_ostream &o); 367 368 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 369 void runHeader(raw_ostream &o); 370 371 // runTests - Emit tests for all the Neon intrinsics. 372 void runTests(raw_ostream &o); 373 374private: 375 void emitGuardedIntrinsic(raw_ostream &OS, Record *R, 376 std::string &CurrentGuard, bool &InGuard, 377 StringMap<ClassKind> &EmittedMap); 378 void emitIntrinsic(raw_ostream &OS, Record *R, 379 StringMap<ClassKind> &EmittedMap); 380 void genBuiltinsDef(raw_ostream &OS); 381 void genOverloadTypeCheckCode(raw_ostream &OS); 382 void genIntrinsicRangeCheckCode(raw_ostream &OS); 383 void genTargetTest(raw_ostream &OS); 384}; 385} // end anonymous namespace 386 387/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 388/// which each StringRef representing a single type declared in the string. 389/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 390/// 2xfloat and 4xfloat respectively. 391static void ParseTypes(Record *r, std::string &s, 392 SmallVectorImpl<StringRef> &TV) { 393 const char *data = s.data(); 394 int len = 0; 395 396 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 397 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 398 || data[len] == 'H' || data[len] == 'S') 399 continue; 400 401 switch (data[len]) { 402 case 'c': 403 case 's': 404 case 'i': 405 case 'l': 406 case 'k': 407 case 'h': 408 case 'f': 409 case 'd': 410 break; 411 default: 412 PrintFatalError(r->getLoc(), 413 "Unexpected letter: " + std::string(data + len, 1)); 414 } 415 TV.push_back(StringRef(data, len + 1)); 416 data += len + 1; 417 len = -1; 418 } 419} 420 421/// Widen - Convert a type code into the next wider type. char -> short, 422/// short -> int, etc. 423static char Widen(const char t) { 424 switch (t) { 425 case 'c': 426 return 's'; 427 case 's': 428 return 'i'; 429 case 'i': 430 return 'l'; 431 case 'l': 432 return 'k'; 433 case 'h': 434 return 'f'; 435 case 'f': 436 return 'd'; 437 default: 438 PrintFatalError("unhandled type in widen!"); 439 } 440} 441 442/// Narrow - Convert a type code into the next smaller type. short -> char, 443/// float -> half float, etc. 444static char Narrow(const char t) { 445 switch (t) { 446 case 's': 447 return 'c'; 448 case 'i': 449 return 's'; 450 case 'l': 451 return 'i'; 452 case 'k': 453 return 'l'; 454 case 'f': 455 return 'h'; 456 case 'd': 457 return 'f'; 458 default: 459 PrintFatalError("unhandled type in narrow!"); 460 } 461} 462 463static std::string GetNarrowTypestr(StringRef ty) 464{ 465 std::string s; 466 for (size_t i = 0, end = ty.size(); i < end; i++) { 467 switch (ty[i]) { 468 case 's': 469 s += 'c'; 470 break; 471 case 'i': 472 s += 's'; 473 break; 474 case 'l': 475 s += 'i'; 476 break; 477 case 'k': 478 s += 'l'; 479 break; 480 default: 481 s += ty[i]; 482 break; 483 } 484 } 485 486 return s; 487} 488 489/// For a particular StringRef, return the base type code, and whether it has 490/// the quad-vector, polynomial, or unsigned modifiers set. 491static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 492 unsigned off = 0; 493 // ignore scalar. 494 if (ty[off] == 'S') { 495 ++off; 496 } 497 // remember quad. 498 if (ty[off] == 'Q' || ty[off] == 'H') { 499 quad = true; 500 ++off; 501 } 502 503 // remember poly. 504 if (ty[off] == 'P') { 505 poly = true; 506 ++off; 507 } 508 509 // remember unsigned. 510 if (ty[off] == 'U') { 511 usgn = true; 512 ++off; 513 } 514 515 // base type to get the type string for. 516 return ty[off]; 517} 518 519/// ModType - Transform a type code and its modifiers based on a mod code. The 520/// mod code definitions may be found at the top of arm_neon.td. 521static char ModType(const char mod, char type, bool &quad, bool &poly, 522 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 523 switch (mod) { 524 case 't': 525 if (poly) { 526 poly = false; 527 usgn = true; 528 } 529 break; 530 case 'b': 531 scal = true; 532 case 'u': 533 usgn = true; 534 poly = false; 535 if (type == 'f') 536 type = 'i'; 537 if (type == 'd') 538 type = 'l'; 539 break; 540 case '$': 541 scal = true; 542 case 'x': 543 usgn = false; 544 poly = false; 545 if (type == 'f') 546 type = 'i'; 547 if (type == 'd') 548 type = 'l'; 549 break; 550 case 'o': 551 scal = true; 552 type = 'd'; 553 usgn = false; 554 break; 555 case 'y': 556 scal = true; 557 case 'f': 558 if (type == 'h') 559 quad = true; 560 type = 'f'; 561 usgn = false; 562 break; 563 case 'F': 564 type = 'd'; 565 usgn = false; 566 break; 567 case 'g': 568 quad = false; 569 break; 570 case 'B': 571 case 'C': 572 case 'D': 573 case 'j': 574 quad = true; 575 break; 576 case 'w': 577 type = Widen(type); 578 quad = true; 579 break; 580 case 'n': 581 type = Widen(type); 582 break; 583 case 'i': 584 type = 'i'; 585 scal = true; 586 break; 587 case 'l': 588 type = 'l'; 589 scal = true; 590 usgn = true; 591 break; 592 case 'z': 593 type = Narrow(type); 594 scal = true; 595 break; 596 case 'r': 597 type = Widen(type); 598 scal = true; 599 break; 600 case 's': 601 case 'a': 602 scal = true; 603 break; 604 case 'k': 605 quad = true; 606 break; 607 case 'c': 608 cnst = true; 609 case 'p': 610 pntr = true; 611 scal = true; 612 break; 613 case 'h': 614 type = Narrow(type); 615 if (type == 'h') 616 quad = false; 617 break; 618 case 'q': 619 type = Narrow(type); 620 quad = true; 621 break; 622 case 'e': 623 type = Narrow(type); 624 usgn = true; 625 break; 626 case 'm': 627 type = Narrow(type); 628 quad = false; 629 break; 630 default: 631 break; 632 } 633 return type; 634} 635 636static bool IsMultiVecProto(const char p) { 637 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 638} 639 640/// TypeString - for a modifier and type, generate the name of the typedef for 641/// that type. QUc -> uint8x8_t. 642static std::string TypeString(const char mod, StringRef typestr) { 643 bool quad = false; 644 bool poly = false; 645 bool usgn = false; 646 bool scal = false; 647 bool cnst = false; 648 bool pntr = false; 649 650 if (mod == 'v') 651 return "void"; 652 if (mod == 'i') 653 return "int"; 654 655 // base type to get the type string for. 656 char type = ClassifyType(typestr, quad, poly, usgn); 657 658 // Based on the modifying character, change the type and width if necessary. 659 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 660 661 SmallString<128> s; 662 663 if (usgn) 664 s.push_back('u'); 665 666 switch (type) { 667 case 'c': 668 s += poly ? "poly8" : "int8"; 669 if (scal) 670 break; 671 s += quad ? "x16" : "x8"; 672 break; 673 case 's': 674 s += poly ? "poly16" : "int16"; 675 if (scal) 676 break; 677 s += quad ? "x8" : "x4"; 678 break; 679 case 'i': 680 s += "int32"; 681 if (scal) 682 break; 683 s += quad ? "x4" : "x2"; 684 break; 685 case 'l': 686 s += (poly && !usgn)? "poly64" : "int64"; 687 if (scal) 688 break; 689 s += quad ? "x2" : "x1"; 690 break; 691 case 'k': 692 s += "poly128"; 693 break; 694 case 'h': 695 s += "float16"; 696 if (scal) 697 break; 698 s += quad ? "x8" : "x4"; 699 break; 700 case 'f': 701 s += "float32"; 702 if (scal) 703 break; 704 s += quad ? "x4" : "x2"; 705 break; 706 case 'd': 707 s += "float64"; 708 if (scal) 709 break; 710 s += quad ? "x2" : "x1"; 711 break; 712 713 default: 714 PrintFatalError("unhandled type!"); 715 } 716 717 if (mod == '2' || mod == 'B') 718 s += "x2"; 719 if (mod == '3' || mod == 'C') 720 s += "x3"; 721 if (mod == '4' || mod == 'D') 722 s += "x4"; 723 724 // Append _t, finishing the type string typedef type. 725 s += "_t"; 726 727 if (cnst) 728 s += " const"; 729 730 if (pntr) 731 s += " *"; 732 733 return s.str(); 734} 735 736/// BuiltinTypeString - for a modifier and type, generate the clang 737/// BuiltinsARM.def prototype code for the function. See the top of clang's 738/// Builtins.def for a description of the type strings. 739static std::string BuiltinTypeString(const char mod, StringRef typestr, 740 ClassKind ck, bool ret) { 741 bool quad = false; 742 bool poly = false; 743 bool usgn = false; 744 bool scal = false; 745 bool cnst = false; 746 bool pntr = false; 747 748 if (mod == 'v') 749 return "v"; // void 750 if (mod == 'i') 751 return "i"; // int 752 753 // base type to get the type string for. 754 char type = ClassifyType(typestr, quad, poly, usgn); 755 756 // Based on the modifying character, change the type and width if necessary. 757 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 758 759 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 760 scal && type != 'f' && type != 'd'); 761 762 // All pointers are void* pointers. Change type to 'v' now. 763 if (pntr) { 764 usgn = false; 765 poly = false; 766 type = 'v'; 767 } 768 // Treat half-float ('h') types as unsigned short ('s') types. 769 if (type == 'h') { 770 type = 's'; 771 usgn = true; 772 } 773 774 if (scal) { 775 SmallString<128> s; 776 777 if (usgn) 778 s.push_back('U'); 779 else if (type == 'c') 780 s.push_back('S'); // make chars explicitly signed 781 782 if (type == 'l') // 64-bit long 783 s += "Wi"; 784 else if (type == 'k') // 128-bit long 785 s = "LLLi"; 786 else 787 s.push_back(type); 788 789 if (cnst) 790 s.push_back('C'); 791 if (pntr) 792 s.push_back('*'); 793 return s.str(); 794 } 795 796 // Since the return value must be one type, return a vector type of the 797 // appropriate width which we will bitcast. An exception is made for 798 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 799 // fashion, storing them to a pointer arg. 800 if (ret) { 801 if (IsMultiVecProto(mod)) 802 return "vv*"; // void result with void* first argument 803 if (mod == 'f' || (ck != ClassB && type == 'f')) 804 return quad ? "V4f" : "V2f"; 805 if (mod == 'F' || (ck != ClassB && type == 'd')) 806 return quad ? "V2d" : "V1d"; 807 if (ck != ClassB && type == 's') 808 return quad ? "V8s" : "V4s"; 809 if (ck != ClassB && type == 'i') 810 return quad ? "V4i" : "V2i"; 811 if (ck != ClassB && type == 'l') 812 return quad ? "V2Wi" : "V1Wi"; 813 814 return quad ? "V16Sc" : "V8Sc"; 815 } 816 817 // Non-return array types are passed as individual vectors. 818 if (mod == '2' || mod == 'B') 819 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 820 if (mod == '3' || mod == 'C') 821 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 822 if (mod == '4' || mod == 'D') 823 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 824 825 if (mod == 'f' || (ck != ClassB && type == 'f')) 826 return quad ? "V4f" : "V2f"; 827 if (mod == 'F' || (ck != ClassB && type == 'd')) 828 return quad ? "V2d" : "V1d"; 829 if (ck != ClassB && type == 's') 830 return quad ? "V8s" : "V4s"; 831 if (ck != ClassB && type == 'i') 832 return quad ? "V4i" : "V2i"; 833 if (ck != ClassB && type == 'l') 834 return quad ? "V2Wi" : "V1Wi"; 835 836 return quad ? "V16Sc" : "V8Sc"; 837} 838 839/// InstructionTypeCode - Computes the ARM argument character code and 840/// quad status for a specific type string and ClassKind. 841static void InstructionTypeCode(const StringRef &typeStr, 842 const ClassKind ck, 843 bool &quad, 844 std::string &typeCode) { 845 bool poly = false; 846 bool usgn = false; 847 char type = ClassifyType(typeStr, quad, poly, usgn); 848 849 switch (type) { 850 case 'c': 851 switch (ck) { 852 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 853 case ClassI: typeCode = "i8"; break; 854 case ClassW: typeCode = "8"; break; 855 default: break; 856 } 857 break; 858 case 's': 859 switch (ck) { 860 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 861 case ClassI: typeCode = "i16"; break; 862 case ClassW: typeCode = "16"; break; 863 default: break; 864 } 865 break; 866 case 'i': 867 switch (ck) { 868 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 869 case ClassI: typeCode = "i32"; break; 870 case ClassW: typeCode = "32"; break; 871 default: break; 872 } 873 break; 874 case 'l': 875 switch (ck) { 876 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; 877 case ClassI: typeCode = "i64"; break; 878 case ClassW: typeCode = "64"; break; 879 default: break; 880 } 881 break; 882 case 'k': 883 assert(poly && "Unrecognized 128 bit integer."); 884 typeCode = "p128"; 885 break; 886 case 'h': 887 switch (ck) { 888 case ClassS: 889 case ClassI: typeCode = "f16"; break; 890 case ClassW: typeCode = "16"; break; 891 default: break; 892 } 893 break; 894 case 'f': 895 switch (ck) { 896 case ClassS: 897 case ClassI: typeCode = "f32"; break; 898 case ClassW: typeCode = "32"; break; 899 default: break; 900 } 901 break; 902 case 'd': 903 switch (ck) { 904 case ClassS: 905 case ClassI: 906 typeCode += "f64"; 907 break; 908 case ClassW: 909 PrintFatalError("unhandled type!"); 910 default: 911 break; 912 } 913 break; 914 default: 915 PrintFatalError("unhandled type!"); 916 } 917} 918 919static char Insert_BHSD_Suffix(StringRef typestr){ 920 unsigned off = 0; 921 if(typestr[off++] == 'S'){ 922 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 923 typestr[off] == 'P' || typestr[off] == 'U') 924 ++off; 925 switch (typestr[off]){ 926 default : break; 927 case 'c' : return 'b'; 928 case 's' : return 'h'; 929 case 'i' : 930 case 'f' : return 's'; 931 case 'l' : 932 case 'd' : return 'd'; 933 } 934 } 935 return 0; 936} 937 938static bool endsWith_xN(std::string const &name) { 939 if (name.length() > 3) { 940 if (name.compare(name.length() - 3, 3, "_x2") == 0 || 941 name.compare(name.length() - 3, 3, "_x3") == 0 || 942 name.compare(name.length() - 3, 3, "_x4") == 0) 943 return true; 944 } 945 return false; 946} 947 948/// MangleName - Append a type or width suffix to a base neon function name, 949/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 950/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 951/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 952static std::string MangleName(const std::string &name, StringRef typestr, 953 ClassKind ck) { 954 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" || 955 name == "vcvt_f64_f32") 956 return name; 957 958 bool quad = false; 959 std::string typeCode = ""; 960 961 InstructionTypeCode(typestr, ck, quad, typeCode); 962 963 std::string s = name; 964 965 if (typeCode.size() > 0) { 966 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. 967 if (endsWith_xN(s)) 968 s.insert(s.length() - 3, "_" + typeCode); 969 else 970 s += "_" + typeCode; 971 } 972 973 if (ck == ClassB) 974 s += "_v"; 975 976 // Insert a 'q' before the first '_' character so that it ends up before 977 // _lane or _n on vector-scalar operations. 978 if (typestr.find("Q") != StringRef::npos) { 979 size_t pos = s.find('_'); 980 s = s.insert(pos, "q"); 981 } 982 char ins = Insert_BHSD_Suffix(typestr); 983 if(ins){ 984 size_t pos = s.find('_'); 985 s = s.insert(pos, &ins, 1); 986 } 987 988 return s; 989} 990 991static void PreprocessInstruction(const StringRef &Name, 992 const std::string &InstName, 993 std::string &Prefix, 994 bool &HasNPostfix, 995 bool &HasLanePostfix, 996 bool &HasDupPostfix, 997 bool &IsSpecialVCvt, 998 size_t &TBNumber) { 999 // All of our instruction name fields from arm_neon.td are of the form 1000 // <instructionname>_... 1001 // Thus we grab our instruction name via computation of said Prefix. 1002 const size_t PrefixEnd = Name.find_first_of('_'); 1003 // If InstName is passed in, we use that instead of our name Prefix. 1004 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 1005 1006 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 1007 1008 HasNPostfix = Postfix.count("_n"); 1009 HasLanePostfix = Postfix.count("_lane"); 1010 HasDupPostfix = Postfix.count("_dup"); 1011 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 1012 1013 if (InstName.compare("vtbl") == 0 || 1014 InstName.compare("vtbx") == 0) { 1015 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 1016 // encoding to get its true value. 1017 TBNumber = Name[Name.size()-1] - 48; 1018 } 1019} 1020 1021/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 1022/// extracted, generate a FileCheck pattern for a Load Or Store 1023static void 1024GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 1025 const std::string& OutTypeCode, 1026 const bool &IsQuad, 1027 const bool &HasDupPostfix, 1028 const bool &HasLanePostfix, 1029 const size_t Count, 1030 std::string &RegisterSuffix) { 1031 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 1032 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 1033 // will output a series of v{ld,st}1s, so we have to handle it specially. 1034 if ((Count == 3 || Count == 4) && IsQuad) { 1035 RegisterSuffix += "{"; 1036 for (size_t i = 0; i < Count; i++) { 1037 RegisterSuffix += "d{{[0-9]+}}"; 1038 if (HasDupPostfix) { 1039 RegisterSuffix += "[]"; 1040 } 1041 if (HasLanePostfix) { 1042 RegisterSuffix += "[{{[0-9]+}}]"; 1043 } 1044 if (i < Count-1) { 1045 RegisterSuffix += ", "; 1046 } 1047 } 1048 RegisterSuffix += "}"; 1049 } else { 1050 1051 // Handle normal loads and stores. 1052 RegisterSuffix += "{"; 1053 for (size_t i = 0; i < Count; i++) { 1054 RegisterSuffix += "d{{[0-9]+}}"; 1055 if (HasDupPostfix) { 1056 RegisterSuffix += "[]"; 1057 } 1058 if (HasLanePostfix) { 1059 RegisterSuffix += "[{{[0-9]+}}]"; 1060 } 1061 if (IsQuad && !HasLanePostfix) { 1062 RegisterSuffix += ", d{{[0-9]+}}"; 1063 if (HasDupPostfix) { 1064 RegisterSuffix += "[]"; 1065 } 1066 } 1067 if (i < Count-1) { 1068 RegisterSuffix += ", "; 1069 } 1070 } 1071 RegisterSuffix += "}, [r{{[0-9]+}}"; 1072 1073 // We only include the alignment hint if we have a vld1.*64 or 1074 // a dup/lane instruction. 1075 if (IsLDSTOne) { 1076 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 1077 RegisterSuffix += ":" + OutTypeCode; 1078 } 1079 } 1080 1081 RegisterSuffix += "]"; 1082 } 1083} 1084 1085static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 1086 const bool &HasNPostfix) { 1087 return (NameRef.count("vmla") || 1088 NameRef.count("vmlal") || 1089 NameRef.count("vmlsl") || 1090 NameRef.count("vmull") || 1091 NameRef.count("vqdmlal") || 1092 NameRef.count("vqdmlsl") || 1093 NameRef.count("vqdmulh") || 1094 NameRef.count("vqdmull") || 1095 NameRef.count("vqrdmulh")) && HasNPostfix; 1096} 1097 1098static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1099 const bool &HasLanePostfix) { 1100 return (NameRef.count("vmla") || 1101 NameRef.count("vmls") || 1102 NameRef.count("vmlal") || 1103 NameRef.count("vmlsl") || 1104 (NameRef.count("vmul") && NameRef.size() == 3)|| 1105 NameRef.count("vqdmlal") || 1106 NameRef.count("vqdmlsl") || 1107 NameRef.count("vqdmulh") || 1108 NameRef.count("vqrdmulh")) && HasLanePostfix; 1109} 1110 1111static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1112 const bool &HasLanePostfix, 1113 const bool &IsQuad) { 1114 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1115 && IsQuad; 1116 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1117 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1118} 1119 1120static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1121 const std::string &Proto, 1122 const bool &HasNPostfix, 1123 const bool &IsQuad, 1124 const bool &HasLanePostfix, 1125 const bool &HasDupPostfix, 1126 std::string &NormedProto) { 1127 // Handle generic case. 1128 const StringRef NameRef(Name); 1129 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1130 switch (Proto[i]) { 1131 case 'u': 1132 case 'f': 1133 case 'F': 1134 case 'd': 1135 case 's': 1136 case 'x': 1137 case 't': 1138 case 'n': 1139 NormedProto += IsQuad? 'q' : 'd'; 1140 break; 1141 case 'w': 1142 case 'k': 1143 NormedProto += 'q'; 1144 break; 1145 case 'g': 1146 case 'j': 1147 case 'h': 1148 case 'e': 1149 NormedProto += 'd'; 1150 break; 1151 case 'i': 1152 NormedProto += HasLanePostfix? 'a' : 'i'; 1153 break; 1154 case 'a': 1155 if (HasLanePostfix) { 1156 NormedProto += 'a'; 1157 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1158 NormedProto += IsQuad? 'q' : 'd'; 1159 } else { 1160 NormedProto += 'i'; 1161 } 1162 break; 1163 } 1164 } 1165 1166 // Handle Special Cases. 1167 const bool IsNotVExt = !NameRef.count("vext"); 1168 const bool IsVPADAL = NameRef.count("vpadal"); 1169 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1170 HasLanePostfix); 1171 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1172 IsQuad); 1173 1174 if (IsSpecialLaneMul) { 1175 // If 1176 NormedProto[2] = NormedProto[3]; 1177 NormedProto.erase(3); 1178 } else if (NormedProto.size() == 4 && 1179 NormedProto[0] == NormedProto[1] && 1180 IsNotVExt) { 1181 // If NormedProto.size() == 4 and the first two proto characters are the 1182 // same, ignore the first. 1183 NormedProto = NormedProto.substr(1, 3); 1184 } else if (Is5OpLaneAccum) { 1185 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1186 std::string tmp = NormedProto.substr(1,2); 1187 tmp += NormedProto[4]; 1188 NormedProto = tmp; 1189 } else if (IsVPADAL) { 1190 // If we have VPADAL, ignore the first character. 1191 NormedProto = NormedProto.substr(0, 2); 1192 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1193 // If our instruction is a dup instruction, keep only the first and 1194 // last characters. 1195 std::string tmp = ""; 1196 tmp += NormedProto[0]; 1197 tmp += NormedProto[NormedProto.size()-1]; 1198 NormedProto = tmp; 1199 } 1200} 1201 1202/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1203/// extracted, generate a FileCheck pattern to check that an 1204/// instruction's arguments are correct. 1205static void GenerateRegisterCheckPattern(const std::string &Name, 1206 const std::string &Proto, 1207 const std::string &OutTypeCode, 1208 const bool &HasNPostfix, 1209 const bool &IsQuad, 1210 const bool &HasLanePostfix, 1211 const bool &HasDupPostfix, 1212 const size_t &TBNumber, 1213 std::string &RegisterSuffix) { 1214 1215 RegisterSuffix = ""; 1216 1217 const StringRef NameRef(Name); 1218 1219 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1220 return; 1221 } 1222 1223 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1224 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1225 1226 if (IsLoadStore) { 1227 // Grab N value from v{ld,st}N using its ascii representation. 1228 const size_t Count = NameRef[3] - 48; 1229 1230 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1231 HasDupPostfix, HasLanePostfix, 1232 Count, RegisterSuffix); 1233 } else if (IsTBXOrTBL) { 1234 RegisterSuffix += "d{{[0-9]+}}, {"; 1235 for (size_t i = 0; i < TBNumber-1; i++) { 1236 RegisterSuffix += "d{{[0-9]+}}, "; 1237 } 1238 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1239 } else { 1240 // Handle a normal instruction. 1241 if (NameRef.count("vget") || NameRef.count("vset")) 1242 return; 1243 1244 // We first normalize our proto, since we only need to emit 4 1245 // different types of checks, yet have more than 4 proto types 1246 // that map onto those 4 patterns. 1247 std::string NormalizedProto(""); 1248 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1249 HasLanePostfix, HasDupPostfix, 1250 NormalizedProto); 1251 1252 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1253 const char &c = NormalizedProto[i]; 1254 switch (c) { 1255 case 'q': 1256 RegisterSuffix += "q{{[0-9]+}}, "; 1257 break; 1258 1259 case 'd': 1260 RegisterSuffix += "d{{[0-9]+}}, "; 1261 break; 1262 1263 case 'i': 1264 RegisterSuffix += "#{{[0-9]+}}, "; 1265 break; 1266 1267 case 'a': 1268 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1269 break; 1270 } 1271 } 1272 1273 // Remove extra ", ". 1274 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1275 } 1276} 1277 1278/// GenerateChecksForIntrinsic - Given a specific instruction name + 1279/// typestr + class kind, generate the proper set of FileCheck 1280/// Patterns to check for. We could just return a string, but instead 1281/// use a vector since it provides us with the extra flexibility of 1282/// emitting multiple checks, which comes in handy for certain cases 1283/// like mla where we want to check for 2 different instructions. 1284static void GenerateChecksForIntrinsic(const std::string &Name, 1285 const std::string &Proto, 1286 StringRef &OutTypeStr, 1287 StringRef &InTypeStr, 1288 ClassKind Ck, 1289 const std::string &InstName, 1290 bool IsHiddenLOp, 1291 std::vector<std::string>& Result) { 1292 1293 // If Ck is a ClassNoTest instruction, just return so no test is 1294 // emitted. 1295 if(Ck == ClassNoTest) 1296 return; 1297 1298 if (Name == "vcvt_f32_f16") { 1299 Result.push_back("vcvt.f32.f16"); 1300 return; 1301 } 1302 1303 1304 // Now we preprocess our instruction given the data we have to get the 1305 // data that we need. 1306 // Create a StringRef for String Manipulation of our Name. 1307 const StringRef NameRef(Name); 1308 // Instruction Prefix. 1309 std::string Prefix; 1310 // The type code for our out type string. 1311 std::string OutTypeCode; 1312 // To handle our different cases, we need to check for different postfixes. 1313 // Is our instruction a quad instruction. 1314 bool IsQuad = false; 1315 // Our instruction is of the form <instructionname>_n. 1316 bool HasNPostfix = false; 1317 // Our instruction is of the form <instructionname>_lane. 1318 bool HasLanePostfix = false; 1319 // Our instruction is of the form <instructionname>_dup. 1320 bool HasDupPostfix = false; 1321 // Our instruction is a vcvt instruction which requires special handling. 1322 bool IsSpecialVCvt = false; 1323 // If we have a vtbxN or vtblN instruction, this is set to N. 1324 size_t TBNumber = -1; 1325 // Register Suffix 1326 std::string RegisterSuffix; 1327 1328 PreprocessInstruction(NameRef, InstName, Prefix, 1329 HasNPostfix, HasLanePostfix, HasDupPostfix, 1330 IsSpecialVCvt, TBNumber); 1331 1332 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1333 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1334 HasLanePostfix, HasDupPostfix, TBNumber, 1335 RegisterSuffix); 1336 1337 // In the following section, we handle a bunch of special cases. You can tell 1338 // a special case by the fact we are returning early. 1339 1340 // If our instruction is a logical instruction without postfix or a 1341 // hidden LOp just return the current Prefix. 1342 if (Ck == ClassL || IsHiddenLOp) { 1343 Result.push_back(Prefix + " " + RegisterSuffix); 1344 return; 1345 } 1346 1347 // If we have a vmov, due to the many different cases, some of which 1348 // vary within the different intrinsics generated for a single 1349 // instruction type, just output a vmov. (e.g. given an instruction 1350 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1351 // 1352 // FIXME: Maybe something can be done about this. The two cases that we care 1353 // about are vmov as an LType and vmov as a WType. 1354 if (Prefix == "vmov") { 1355 Result.push_back(Prefix + " " + RegisterSuffix); 1356 return; 1357 } 1358 1359 // In the following section, we handle special cases. 1360 1361 if (OutTypeCode == "64") { 1362 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1363 // type, the intrinsic will be optimized away, so just return 1364 // nothing. On the other hand if we are handling an uint64x2_t 1365 // (i.e. quad instruction), vdup/vmov instructions should be 1366 // emitted. 1367 if (Prefix == "vdup" || Prefix == "vext") { 1368 if (IsQuad) { 1369 Result.push_back("{{vmov|vdup}}"); 1370 } 1371 return; 1372 } 1373 1374 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1375 // multiple register operands. 1376 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1377 || Prefix == "vld4"; 1378 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1379 || Prefix == "vst4"; 1380 if (MultiLoadPrefix || MultiStorePrefix) { 1381 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1382 return; 1383 } 1384 1385 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1386 // emitting said instructions. So return a check for 1387 // vldr/vstr/vmov/str instead. 1388 if (HasLanePostfix || HasDupPostfix) { 1389 if (Prefix == "vst1") { 1390 Result.push_back("{{str|vstr|vmov}}"); 1391 return; 1392 } else if (Prefix == "vld1") { 1393 Result.push_back("{{ldr|vldr|vmov}}"); 1394 return; 1395 } 1396 } 1397 } 1398 1399 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1400 // sometimes disassembled as vtrn.32. We use a regex to handle both 1401 // cases. 1402 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1403 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1404 return; 1405 } 1406 1407 // Currently on most ARM processors, we do not use vmla/vmls for 1408 // quad floating point operations. Instead we output vmul + vadd. So 1409 // check if we have one of those instructions and just output a 1410 // check for vmul. 1411 if (OutTypeCode == "f32") { 1412 if (Prefix == "vmls") { 1413 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1414 Result.push_back("vsub." + OutTypeCode); 1415 return; 1416 } else if (Prefix == "vmla") { 1417 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1418 Result.push_back("vadd." + OutTypeCode); 1419 return; 1420 } 1421 } 1422 1423 // If we have vcvt, get the input type from the instruction name 1424 // (which should be of the form instname_inputtype) and append it 1425 // before the output type. 1426 if (Prefix == "vcvt") { 1427 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1428 Prefix += "." + inTypeCode; 1429 } 1430 1431 // Append output type code to get our final mangled instruction. 1432 Prefix += "." + OutTypeCode; 1433 1434 Result.push_back(Prefix + " " + RegisterSuffix); 1435} 1436 1437/// UseMacro - Examine the prototype string to determine if the intrinsic 1438/// should be defined as a preprocessor macro instead of an inline function. 1439static bool UseMacro(const std::string &proto, StringRef typestr) { 1440 // If this builtin takes an immediate argument, we need to #define it rather 1441 // than use a standard declaration, so that SemaChecking can range check 1442 // the immediate passed by the user. 1443 if (proto.find('i') != std::string::npos) 1444 return true; 1445 1446 // Pointer arguments need to use macros to avoid hiding aligned attributes 1447 // from the pointer type. 1448 if (proto.find('p') != std::string::npos || 1449 proto.find('c') != std::string::npos) 1450 return true; 1451 1452 // It is not permitted to pass or return an __fp16 by value, so intrinsics 1453 // taking a scalar float16_t must be implemented as macros. 1454 if (typestr.find('h') != std::string::npos && 1455 proto.find('s') != std::string::npos) 1456 return true; 1457 1458 return false; 1459} 1460 1461/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1462/// defined as a macro should be accessed directly instead of being first 1463/// assigned to a local temporary. 1464static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1465 // True for constant ints (i), pointers (p) and const pointers (c). 1466 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1467} 1468 1469// Generate the string "(argtype a, argtype b, ...)" 1470static std::string GenArgs(const std::string &proto, StringRef typestr, 1471 const std::string &name) { 1472 bool define = UseMacro(proto, typestr); 1473 char arg = 'a'; 1474 1475 std::string s; 1476 s += "("; 1477 1478 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1479 if (define) { 1480 // Some macro arguments are used directly instead of being assigned 1481 // to local temporaries; prepend an underscore prefix to make their 1482 // names consistent with the local temporaries. 1483 if (MacroArgUsedDirectly(proto, i)) 1484 s += "__"; 1485 } else { 1486 s += TypeString(proto[i], typestr) + " __"; 1487 } 1488 s.push_back(arg); 1489 if ((i + 1) < e) 1490 s += ", "; 1491 } 1492 1493 s += ")"; 1494 return s; 1495} 1496 1497// Macro arguments are not type-checked like inline function arguments, so 1498// assign them to local temporaries to get the right type checking. 1499static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1500 const std::string &name ) { 1501 char arg = 'a'; 1502 std::string s; 1503 bool generatedLocal = false; 1504 1505 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1506 // Do not create a temporary for an immediate argument. 1507 // That would defeat the whole point of using a macro! 1508 if (MacroArgUsedDirectly(proto, i)) 1509 continue; 1510 generatedLocal = true; 1511 s += TypeString(proto[i], typestr) + " __"; 1512 s.push_back(arg); 1513 s += " = ("; 1514 s.push_back(arg); 1515 s += "); "; 1516 } 1517 1518 if (generatedLocal) 1519 s += "\\\n "; 1520 return s; 1521} 1522 1523// Use the vmovl builtin to sign-extend or zero-extend a vector. 1524static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1525 std::string s, high; 1526 high = h ? "_high" : ""; 1527 s = MangleName("vmovl" + high, typestr, ClassS); 1528 s += "(" + a + ")"; 1529 return s; 1530} 1531 1532// Get the high 64-bit part of a vector 1533static std::string GetHigh(const std::string &a, StringRef typestr) { 1534 std::string s; 1535 s = MangleName("vget_high", typestr, ClassS); 1536 s += "(" + a + ")"; 1537 return s; 1538} 1539 1540// Gen operation with two operands and get high 64-bit for both of two operands. 1541static std::string Gen2OpWith2High(StringRef typestr, 1542 const std::string &op, 1543 const std::string &a, 1544 const std::string &b) { 1545 std::string s; 1546 std::string Op1 = GetHigh(a, typestr); 1547 std::string Op2 = GetHigh(b, typestr); 1548 s = MangleName(op, typestr, ClassS); 1549 s += "(" + Op1 + ", " + Op2 + ");"; 1550 return s; 1551} 1552 1553// Gen operation with three operands and get high 64-bit of the latter 1554// two operands. 1555static std::string Gen3OpWith2High(StringRef typestr, 1556 const std::string &op, 1557 const std::string &a, 1558 const std::string &b, 1559 const std::string &c) { 1560 std::string s; 1561 std::string Op1 = GetHigh(b, typestr); 1562 std::string Op2 = GetHigh(c, typestr); 1563 s = MangleName(op, typestr, ClassS); 1564 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1565 return s; 1566} 1567 1568// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1569static std::string GenCombine(std::string typestr, 1570 const std::string &a, 1571 const std::string &b) { 1572 std::string s; 1573 s = MangleName("vcombine", typestr, ClassS); 1574 s += "(" + a + ", " + b + ")"; 1575 return s; 1576} 1577 1578static std::string Duplicate(unsigned nElts, StringRef typestr, 1579 const std::string &a) { 1580 std::string s; 1581 1582 s = "(" + TypeString('d', typestr) + "){ "; 1583 for (unsigned i = 0; i != nElts; ++i) { 1584 s += a; 1585 if ((i + 1) < nElts) 1586 s += ", "; 1587 } 1588 s += " }"; 1589 1590 return s; 1591} 1592 1593static std::string SplatLane(unsigned nElts, const std::string &vec, 1594 const std::string &lane) { 1595 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1596 for (unsigned i = 0; i < nElts; ++i) 1597 s += ", " + lane; 1598 s += ")"; 1599 return s; 1600} 1601 1602static std::string RemoveHigh(const std::string &name) { 1603 std::string s = name; 1604 std::size_t found = s.find("_high_"); 1605 if (found == std::string::npos) 1606 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1607 s.replace(found, 5, ""); 1608 return s; 1609} 1610 1611static unsigned GetNumElements(StringRef typestr, bool &quad) { 1612 quad = false; 1613 bool dummy = false; 1614 char type = ClassifyType(typestr, quad, dummy, dummy); 1615 unsigned nElts = 0; 1616 switch (type) { 1617 case 'c': nElts = 8; break; 1618 case 's': nElts = 4; break; 1619 case 'i': nElts = 2; break; 1620 case 'l': nElts = 1; break; 1621 case 'k': nElts = 1; break; 1622 case 'h': nElts = 4; break; 1623 case 'f': nElts = 2; break; 1624 case 'd': 1625 nElts = 1; 1626 break; 1627 default: 1628 PrintFatalError("unhandled type!"); 1629 } 1630 if (quad) nElts <<= 1; 1631 return nElts; 1632} 1633 1634// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1635// 1636// Note that some intrinsic definitions around 'lane' are being implemented 1637// with macros, because they all contain constant integer argument, and we 1638// statically check the range of the lane index to meet the semantic 1639// requirement of different intrinsics. 1640// 1641// For the intrinsics implemented with macro, if they contain another intrinsic 1642// implemented with maco, we have to avoid using the same argument names for 1643// the nested instrinsics. For example, macro vfms_lane is being implemented 1644// with another macor vfma_lane, so we rename all arguments for vfms_lane by 1645// adding a suffix '1'. 1646 1647static std::string GenOpString(const std::string &name, OpKind op, 1648 const std::string &proto, StringRef typestr) { 1649 bool quad; 1650 unsigned nElts = GetNumElements(typestr, quad); 1651 bool define = UseMacro(proto, typestr); 1652 1653 std::string ts = TypeString(proto[0], typestr); 1654 std::string s; 1655 if (!define) { 1656 s = "return "; 1657 } 1658 1659 switch(op) { 1660 case OpAdd: 1661 s += "__a + __b;"; 1662 break; 1663 case OpAddl: 1664 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1665 break; 1666 case OpAddlHi: 1667 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1668 break; 1669 case OpAddw: 1670 s += "__a + " + Extend(typestr, "__b") + ";"; 1671 break; 1672 case OpAddwHi: 1673 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1674 break; 1675 case OpSub: 1676 s += "__a - __b;"; 1677 break; 1678 case OpSubl: 1679 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1680 break; 1681 case OpSublHi: 1682 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1683 break; 1684 case OpSubw: 1685 s += "__a - " + Extend(typestr, "__b") + ";"; 1686 break; 1687 case OpSubwHi: 1688 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1689 break; 1690 case OpMulN: 1691 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1692 break; 1693 case OpMulLane: 1694 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1695 break; 1696 case OpMulXLane: 1697 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1698 SplatLane(nElts, "__b", "__c") + ");"; 1699 break; 1700 case OpMul: 1701 s += "__a * __b;"; 1702 break; 1703 case OpFMlaN: 1704 s += MangleName("vfma", typestr, ClassS); 1705 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; 1706 break; 1707 case OpFMlsN: 1708 s += MangleName("vfms", typestr, ClassS); 1709 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; 1710 break; 1711 case OpMullLane: 1712 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1713 SplatLane(nElts, "__b", "__c") + ");"; 1714 break; 1715 case OpMullHiLane: 1716 s += MangleName("vmull", typestr, ClassS) + "(" + 1717 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1718 break; 1719 case OpMlaN: 1720 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1721 break; 1722 case OpMlaLane: 1723 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1724 break; 1725 case OpMla: 1726 s += "__a + (__b * __c);"; 1727 break; 1728 case OpMlalN: 1729 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1730 Duplicate(nElts, typestr, "__c") + ");"; 1731 break; 1732 case OpMlalLane: 1733 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1734 SplatLane(nElts, "__c", "__d") + ");"; 1735 break; 1736 case OpMlalHiLane: 1737 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1738 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1739 break; 1740 case OpMlal: 1741 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1742 break; 1743 case OpMullHi: 1744 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1745 break; 1746 case OpMullHiP64: { 1747 std::string Op1 = GetHigh("__a", typestr); 1748 std::string Op2 = GetHigh("__b", typestr); 1749 s += MangleName("vmull", typestr, ClassS); 1750 s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");"; 1751 break; 1752 } 1753 case OpMullHiN: 1754 s += MangleName("vmull_n", typestr, ClassS); 1755 s += "(" + GetHigh("__a", typestr) + ", __b);"; 1756 return s; 1757 case OpMlalHi: 1758 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1759 break; 1760 case OpMlalHiN: 1761 s += MangleName("vmlal_n", typestr, ClassS); 1762 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 1763 return s; 1764 case OpMlsN: 1765 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1766 break; 1767 case OpMlsLane: 1768 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1769 break; 1770 case OpFMSLane: 1771 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1772 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1773 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1774 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1775 break; 1776 case OpFMSLaneQ: 1777 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1778 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1779 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1780 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1781 break; 1782 case OpMls: 1783 s += "__a - (__b * __c);"; 1784 break; 1785 case OpMlslN: 1786 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1787 Duplicate(nElts, typestr, "__c") + ");"; 1788 break; 1789 case OpMlslLane: 1790 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1791 SplatLane(nElts, "__c", "__d") + ");"; 1792 break; 1793 case OpMlslHiLane: 1794 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1795 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1796 break; 1797 case OpMlsl: 1798 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1799 break; 1800 case OpMlslHi: 1801 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1802 break; 1803 case OpMlslHiN: 1804 s += MangleName("vmlsl_n", typestr, ClassS); 1805 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 1806 break; 1807 case OpQDMullLane: 1808 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1809 SplatLane(nElts, "__b", "__c") + ");"; 1810 break; 1811 case OpQDMullHiLane: 1812 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1813 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1814 break; 1815 case OpQDMlalLane: 1816 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1817 SplatLane(nElts, "__c", "__d") + ");"; 1818 break; 1819 case OpQDMlalHiLane: 1820 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1821 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1822 break; 1823 case OpQDMlslLane: 1824 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1825 SplatLane(nElts, "__c", "__d") + ");"; 1826 break; 1827 case OpQDMlslHiLane: 1828 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1829 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1830 break; 1831 case OpQDMulhLane: 1832 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1833 SplatLane(nElts, "__b", "__c") + ");"; 1834 break; 1835 case OpQRDMulhLane: 1836 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1837 SplatLane(nElts, "__b", "__c") + ");"; 1838 break; 1839 case OpEq: 1840 s += "(" + ts + ")(__a == __b);"; 1841 break; 1842 case OpGe: 1843 s += "(" + ts + ")(__a >= __b);"; 1844 break; 1845 case OpLe: 1846 s += "(" + ts + ")(__a <= __b);"; 1847 break; 1848 case OpGt: 1849 s += "(" + ts + ")(__a > __b);"; 1850 break; 1851 case OpLt: 1852 s += "(" + ts + ")(__a < __b);"; 1853 break; 1854 case OpNeg: 1855 s += " -__a;"; 1856 break; 1857 case OpNot: 1858 s += " ~__a;"; 1859 break; 1860 case OpAnd: 1861 s += "__a & __b;"; 1862 break; 1863 case OpOr: 1864 s += "__a | __b;"; 1865 break; 1866 case OpXor: 1867 s += "__a ^ __b;"; 1868 break; 1869 case OpAndNot: 1870 s += "__a & ~__b;"; 1871 break; 1872 case OpOrNot: 1873 s += "__a | ~__b;"; 1874 break; 1875 case OpCast: 1876 s += "(" + ts + ")__a;"; 1877 break; 1878 case OpConcat: 1879 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1880 s += ", (int64x1_t)__b, 0, 1);"; 1881 break; 1882 case OpHi: 1883 // nElts is for the result vector, so the source is twice that number. 1884 s += "__builtin_shufflevector(__a, __a"; 1885 for (unsigned i = nElts; i < nElts * 2; ++i) 1886 s += ", " + utostr(i); 1887 s+= ");"; 1888 break; 1889 case OpLo: 1890 s += "__builtin_shufflevector(__a, __a"; 1891 for (unsigned i = 0; i < nElts; ++i) 1892 s += ", " + utostr(i); 1893 s+= ");"; 1894 break; 1895 case OpDup: 1896 s += Duplicate(nElts, typestr, "__a") + ";"; 1897 break; 1898 case OpDupLane: 1899 s += SplatLane(nElts, "__a", "__b") + ";"; 1900 break; 1901 case OpSelect: 1902 // ((0 & 1) | (~0 & 2)) 1903 s += "(" + ts + ")"; 1904 ts = TypeString(proto[1], typestr); 1905 s += "((__a & (" + ts + ")__b) | "; 1906 s += "(~__a & (" + ts + ")__c));"; 1907 break; 1908 case OpRev16: 1909 s += "__builtin_shufflevector(__a, __a"; 1910 for (unsigned i = 2; i <= nElts; i += 2) 1911 for (unsigned j = 0; j != 2; ++j) 1912 s += ", " + utostr(i - j - 1); 1913 s += ");"; 1914 break; 1915 case OpRev32: { 1916 unsigned WordElts = nElts >> (1 + (int)quad); 1917 s += "__builtin_shufflevector(__a, __a"; 1918 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1919 for (unsigned j = 0; j != WordElts; ++j) 1920 s += ", " + utostr(i - j - 1); 1921 s += ");"; 1922 break; 1923 } 1924 case OpRev64: { 1925 unsigned DblWordElts = nElts >> (int)quad; 1926 s += "__builtin_shufflevector(__a, __a"; 1927 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1928 for (unsigned j = 0; j != DblWordElts; ++j) 1929 s += ", " + utostr(i - j - 1); 1930 s += ");"; 1931 break; 1932 } 1933 case OpXtnHi: { 1934 s = TypeString(proto[1], typestr) + " __a1 = " + 1935 MangleName("vmovn", typestr, ClassS) + "(__b);\n " + 1936 "return __builtin_shufflevector(__a, __a1"; 1937 for (unsigned i = 0; i < nElts * 4; ++i) 1938 s += ", " + utostr(i); 1939 s += ");"; 1940 break; 1941 } 1942 case OpSqxtunHi: { 1943 s = TypeString(proto[1], typestr) + " __a1 = " + 1944 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + 1945 "return __builtin_shufflevector(__a, __a1"; 1946 for (unsigned i = 0; i < nElts * 4; ++i) 1947 s += ", " + utostr(i); 1948 s += ");"; 1949 break; 1950 } 1951 case OpQxtnHi: { 1952 s = TypeString(proto[1], typestr) + " __a1 = " + 1953 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + 1954 "return __builtin_shufflevector(__a, __a1"; 1955 for (unsigned i = 0; i < nElts * 4; ++i) 1956 s += ", " + utostr(i); 1957 s += ");"; 1958 break; 1959 } 1960 case OpFcvtnHi: { 1961 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; 1962 s = TypeString(proto[1], typestr) + " __a1 = " + 1963 MangleName(FName, typestr, ClassS) + "(__b);\n " + 1964 "return __builtin_shufflevector(__a, __a1"; 1965 for (unsigned i = 0; i < nElts * 4; ++i) 1966 s += ", " + utostr(i); 1967 s += ");"; 1968 break; 1969 } 1970 case OpFcvtlHi: { 1971 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; 1972 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + 1973 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; 1974 break; 1975 } 1976 case OpFcvtxnHi: { 1977 s = TypeString(proto[1], typestr) + " __a1 = " + 1978 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + 1979 "return __builtin_shufflevector(__a, __a1"; 1980 for (unsigned i = 0; i < nElts * 4; ++i) 1981 s += ", " + utostr(i); 1982 s += ");"; 1983 break; 1984 } 1985 case OpUzp1: 1986 s += "__builtin_shufflevector(__a, __b"; 1987 for (unsigned i = 0; i < nElts; i++) 1988 s += ", " + utostr(2*i); 1989 s += ");"; 1990 break; 1991 case OpUzp2: 1992 s += "__builtin_shufflevector(__a, __b"; 1993 for (unsigned i = 0; i < nElts; i++) 1994 s += ", " + utostr(2*i+1); 1995 s += ");"; 1996 break; 1997 case OpZip1: 1998 s += "__builtin_shufflevector(__a, __b"; 1999 for (unsigned i = 0; i < (nElts/2); i++) 2000 s += ", " + utostr(i) + ", " + utostr(i+nElts); 2001 s += ");"; 2002 break; 2003 case OpZip2: 2004 s += "__builtin_shufflevector(__a, __b"; 2005 for (unsigned i = nElts/2; i < nElts; i++) 2006 s += ", " + utostr(i) + ", " + utostr(i+nElts); 2007 s += ");"; 2008 break; 2009 case OpTrn1: 2010 s += "__builtin_shufflevector(__a, __b"; 2011 for (unsigned i = 0; i < (nElts/2); i++) 2012 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 2013 s += ");"; 2014 break; 2015 case OpTrn2: 2016 s += "__builtin_shufflevector(__a, __b"; 2017 for (unsigned i = 0; i < (nElts/2); i++) 2018 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 2019 s += ");"; 2020 break; 2021 case OpAbdl: { 2022 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 2023 if (typestr[0] != 'U') { 2024 // vabd results are always unsigned and must be zero-extended. 2025 std::string utype = "U" + typestr.str(); 2026 s += "(" + TypeString(proto[0], typestr) + ")"; 2027 abd = "(" + TypeString('d', utype) + ")" + abd; 2028 s += Extend(utype, abd) + ";"; 2029 } else { 2030 s += Extend(typestr, abd) + ";"; 2031 } 2032 break; 2033 } 2034 case OpAbdlHi: 2035 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 2036 break; 2037 case OpAddhnHi: { 2038 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 2039 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 2040 s += ";"; 2041 break; 2042 } 2043 case OpRAddhnHi: { 2044 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 2045 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 2046 s += ";"; 2047 break; 2048 } 2049 case OpSubhnHi: { 2050 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 2051 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 2052 s += ";"; 2053 break; 2054 } 2055 case OpRSubhnHi: { 2056 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 2057 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 2058 s += ";"; 2059 break; 2060 } 2061 case OpAba: 2062 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 2063 break; 2064 case OpAbal: 2065 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 2066 break; 2067 case OpAbalHi: 2068 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 2069 break; 2070 case OpQDMullHi: 2071 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 2072 break; 2073 case OpQDMullHiN: 2074 s += MangleName("vqdmull_n", typestr, ClassS); 2075 s += "(" + GetHigh("__a", typestr) + ", __b);"; 2076 return s; 2077 case OpQDMlalHi: 2078 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 2079 break; 2080 case OpQDMlalHiN: 2081 s += MangleName("vqdmlal_n", typestr, ClassS); 2082 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 2083 return s; 2084 case OpQDMlslHi: 2085 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 2086 break; 2087 case OpQDMlslHiN: 2088 s += MangleName("vqdmlsl_n", typestr, ClassS); 2089 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 2090 return s; 2091 case OpDiv: 2092 s += "__a / __b;"; 2093 break; 2094 case OpMovlHi: { 2095 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2096 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 2097 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 2098 s += "(__a1, 0);"; 2099 break; 2100 } 2101 case OpLongHi: { 2102 // Another local variable __a1 is needed for calling a Macro, 2103 // or using __a will have naming conflict when Macro expanding. 2104 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2105 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 2106 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 2107 "(__a1, __b);"; 2108 break; 2109 } 2110 case OpNarrowHi: { 2111 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 2112 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 2113 break; 2114 } 2115 case OpCopyLane: { 2116 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2117 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 2118 s += TypeString('s', typestr) + " __c2 = " + 2119 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n " + 2120 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);"; 2121 break; 2122 } 2123 case OpCopyQLane: { 2124 std::string typeCode = ""; 2125 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2126 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2127 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 2128 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 2129 "(__c1, __d); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b);"; 2130 break; 2131 } 2132 case OpCopyLaneQ: { 2133 std::string typeCode = ""; 2134 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2135 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2136 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 2137 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 2138 "(__c1, __d); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b);"; 2139 break; 2140 } 2141 case OpScalarMulLane: { 2142 std::string typeCode = ""; 2143 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2144 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2145 "(__b, __c);\\\n __a * __d1;"; 2146 break; 2147 } 2148 case OpScalarMulLaneQ: { 2149 std::string typeCode = ""; 2150 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2151 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2152 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2153 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + 2154 "(__b1, __c);\\\n __a1 * __d1;"; 2155 break; 2156 } 2157 case OpScalarMulXLane: { 2158 bool dummy = false; 2159 char type = ClassifyType(typestr, dummy, dummy, dummy); 2160 if (type == 'f') type = 's'; 2161 std::string typeCode = ""; 2162 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2163 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2164 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2165 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2166 "(__b1, __c);\\\n vmulx" + type + "_" + 2167 typeCode + "(__a1, __d1);"; 2168 break; 2169 } 2170 case OpScalarMulXLaneQ: { 2171 bool dummy = false; 2172 char type = ClassifyType(typestr, dummy, dummy, dummy); 2173 if (type == 'f') type = 's'; 2174 std::string typeCode = ""; 2175 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2176 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2177 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2178 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + 2179 typeCode + "(__b1, __c);\\\n vmulx" + type + 2180 "_" + typeCode + "(__a1, __d1);"; 2181 break; 2182 } 2183 2184 case OpScalarVMulXLane: { 2185 bool dummy = false; 2186 char type = ClassifyType(typestr, dummy, dummy, dummy); 2187 if (type == 'f') type = 's'; 2188 std::string typeCode = ""; 2189 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2190 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2191 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2192 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2193 typeCode + "(__a1, 0);\\\n" + 2194 " " + TypeString('s', typestr) + " __e1 = vget_lane_" + 2195 typeCode + "(__b1, __c);\\\n" + 2196 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2197 typeCode + "(__d1, __e1);\\\n" + 2198 " " + TypeString('d', typestr) + " __g1;\\\n" + 2199 " vset_lane_" + typeCode + "(__f1, __g1, __c);"; 2200 break; 2201 } 2202 2203 case OpScalarVMulXLaneQ: { 2204 bool dummy = false; 2205 char type = ClassifyType(typestr, dummy, dummy, dummy); 2206 if (type == 'f') type = 's'; 2207 std::string typeCode = ""; 2208 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2209 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2210 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2211 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2212 typeCode + "(__a1, 0);\\\n" + 2213 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + 2214 typeCode + "(__b1, __c);\\\n" + 2215 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2216 typeCode + "(__d1, __e1);\\\n" + 2217 " " + TypeString('d', typestr) + " __g1;\\\n" + 2218 " vset_lane_" + typeCode + "(__f1, __g1, 0);"; 2219 break; 2220 } 2221 case OpScalarQDMullLane: { 2222 std::string typeCode = ""; 2223 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2224 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2225 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2226 s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " + 2227 "vget_lane_" + typeCode + "(__b1, __c));"; 2228 break; 2229 } 2230 case OpScalarQDMullLaneQ: { 2231 std::string typeCode = ""; 2232 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2233 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2234 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2235 s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " + 2236 "vgetq_lane_" + typeCode + "(__b1, __c));"; 2237 break; 2238 } 2239 case OpScalarQDMulHiLane: { 2240 std::string typeCode = ""; 2241 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2242 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2243 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2244 s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " + 2245 "vget_lane_" + typeCode + "(__b1, __c));"; 2246 break; 2247 } 2248 case OpScalarQDMulHiLaneQ: { 2249 std::string typeCode = ""; 2250 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2251 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2252 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2253 s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " + 2254 "vgetq_lane_" + typeCode + "(__b1, __c));"; 2255 break; 2256 } 2257 case OpScalarQRDMulHiLane: { 2258 std::string typeCode = ""; 2259 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2260 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2261 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2262 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " + 2263 "vget_lane_" + typeCode + "(__b1, __c));"; 2264 break; 2265 } 2266 case OpScalarQRDMulHiLaneQ: { 2267 std::string typeCode = ""; 2268 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2269 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2270 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 2271 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " + 2272 "vgetq_lane_" + typeCode + "(__b1, __c));"; 2273 break; 2274 } 2275 case OpScalarGetLane:{ 2276 std::string typeCode = ""; 2277 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2278 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 2279 2280 std::string intType = quad ? "int16x8_t" : "int16x4_t"; 2281 std::string intName = quad ? "vgetq" : "vget"; 2282 2283 // reinterpret float16 vector as int16 vector 2284 s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n"; 2285 2286 s += " int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n"; 2287 2288 // reinterpret int16 vector as float16 vector 2289 s += " float16_t __a4 = *(float16_t *)(&__a3);\\\n"; 2290 s += " __a4;"; 2291 break; 2292 } 2293 case OpScalarSetLane:{ 2294 std::string typeCode = ""; 2295 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2296 s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n "; 2297 2298 std::string origType = quad ? "float16x8_t" : "float16x4_t"; 2299 std::string intType = quad ? "int16x8_t" : "int16x4_t"; 2300 std::string intName = quad ? "vsetq" : "vset"; 2301 2302 // reinterpret float16_t as int16_t 2303 s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n"; 2304 // reinterpret float16 vector as int16 vector 2305 s += " " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n"; 2306 2307 s += " " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n"; 2308 2309 // reinterpret int16 vector as float16 vector 2310 s += " " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n"; 2311 s += "__b4;"; 2312 break; 2313 } 2314 2315 default: 2316 PrintFatalError("unknown OpKind!"); 2317 } 2318 return s; 2319} 2320 2321static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 2322 unsigned mod = proto[0]; 2323 2324 if (mod == 'v' || mod == 'f' || mod == 'F') 2325 mod = proto[1]; 2326 2327 bool quad = false; 2328 bool poly = false; 2329 bool usgn = false; 2330 bool scal = false; 2331 bool cnst = false; 2332 bool pntr = false; 2333 2334 // Base type to get the type string for. 2335 char type = ClassifyType(typestr, quad, poly, usgn); 2336 2337 // Based on the modifying character, change the type and width if necessary. 2338 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 2339 2340 NeonTypeFlags::EltType ET; 2341 switch (type) { 2342 case 'c': 2343 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 2344 break; 2345 case 's': 2346 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 2347 break; 2348 case 'i': 2349 ET = NeonTypeFlags::Int32; 2350 break; 2351 case 'l': 2352 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; 2353 break; 2354 case 'k': 2355 ET = NeonTypeFlags::Poly128; 2356 break; 2357 case 'h': 2358 ET = NeonTypeFlags::Float16; 2359 break; 2360 case 'f': 2361 ET = NeonTypeFlags::Float32; 2362 break; 2363 case 'd': 2364 ET = NeonTypeFlags::Float64; 2365 break; 2366 default: 2367 PrintFatalError("unhandled type!"); 2368 } 2369 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 2370 return Flags.getFlags(); 2371} 2372 2373// We don't check 'a' in this function, because for builtin function the 2374// argument matching to 'a' uses a vector type splatted from a scalar type. 2375static bool ProtoHasScalar(const std::string proto) 2376{ 2377 return (proto.find('s') != std::string::npos 2378 || proto.find('z') != std::string::npos 2379 || proto.find('r') != std::string::npos 2380 || proto.find('b') != std::string::npos 2381 || proto.find('$') != std::string::npos 2382 || proto.find('y') != std::string::npos 2383 || proto.find('o') != std::string::npos); 2384} 2385 2386// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2387static std::string GenBuiltin(const std::string &name, const std::string &proto, 2388 StringRef typestr, ClassKind ck) { 2389 std::string s; 2390 2391 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2392 // sret-like argument. 2393 bool sret = IsMultiVecProto(proto[0]); 2394 2395 bool define = UseMacro(proto, typestr); 2396 2397 // Check if the prototype has a scalar operand with the type of the vector 2398 // elements. If not, bitcasting the args will take care of arg checking. 2399 // The actual signedness etc. will be taken care of with special enums. 2400 if (!ProtoHasScalar(proto)) 2401 ck = ClassB; 2402 2403 if (proto[0] != 'v') { 2404 std::string ts = TypeString(proto[0], typestr); 2405 2406 if (define) { 2407 if (sret) 2408 s += ts + " r; "; 2409 else 2410 s += "(" + ts + ")"; 2411 } else if (sret) { 2412 s += ts + " r; "; 2413 } else { 2414 s += "return (" + ts + ")"; 2415 } 2416 } 2417 2418 bool splat = proto.find('a') != std::string::npos; 2419 2420 s += "__builtin_neon_"; 2421 if (splat) { 2422 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2423 std::string vname(name, 0, name.size()-2); 2424 s += MangleName(vname, typestr, ck); 2425 } else { 2426 s += MangleName(name, typestr, ck); 2427 } 2428 s += "("; 2429 2430 // Pass the address of the return variable as the first argument to sret-like 2431 // builtins. 2432 if (sret) 2433 s += "&r, "; 2434 2435 char arg = 'a'; 2436 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2437 std::string args = std::string(&arg, 1); 2438 2439 // Use the local temporaries instead of the macro arguments. 2440 args = "__" + args; 2441 2442 bool argQuad = false; 2443 bool argPoly = false; 2444 bool argUsgn = false; 2445 bool argScalar = false; 2446 bool dummy = false; 2447 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2448 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2449 dummy, dummy); 2450 2451 // Handle multiple-vector values specially, emitting each subvector as an 2452 // argument to the __builtin. 2453 unsigned NumOfVec = 0; 2454 if (proto[i] >= '2' && proto[i] <= '4') { 2455 NumOfVec = proto[i] - '0'; 2456 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2457 NumOfVec = proto[i] - 'A' + 1; 2458 } 2459 2460 if (NumOfVec > 0) { 2461 // Check if an explicit cast is needed. 2462 if (argType != 'c' || argPoly || argUsgn) 2463 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2464 2465 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2466 s += args + ".val[" + utostr(vi) + "]"; 2467 if ((vi + 1) < ve) 2468 s += ", "; 2469 } 2470 if ((i + 1) < e) 2471 s += ", "; 2472 2473 continue; 2474 } 2475 2476 if (splat && (i + 1) == e) 2477 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2478 2479 // Check if an explicit cast is needed. 2480 if ((splat || !argScalar) && 2481 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2482 std::string argTypeStr = "c"; 2483 if (ck != ClassB) 2484 argTypeStr = argType; 2485 if (argQuad) 2486 argTypeStr = "Q" + argTypeStr; 2487 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2488 } 2489 2490 s += args; 2491 if ((i + 1) < e) 2492 s += ", "; 2493 } 2494 2495 // Extra constant integer to hold type class enum for this function, e.g. s8 2496 if (ck == ClassB) 2497 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2498 2499 s += ");"; 2500 2501 if (proto[0] != 'v' && sret) { 2502 if (define) 2503 s += " r;"; 2504 else 2505 s += " return r;"; 2506 } 2507 return s; 2508} 2509 2510static std::string GenBuiltinDef(const std::string &name, 2511 const std::string &proto, 2512 StringRef typestr, ClassKind ck) { 2513 std::string s("BUILTIN(__builtin_neon_"); 2514 2515 // If all types are the same size, bitcasting the args will take care 2516 // of arg checking. The actual signedness etc. will be taken care of with 2517 // special enums. 2518 if (!ProtoHasScalar(proto)) 2519 ck = ClassB; 2520 2521 s += MangleName(name, typestr, ck); 2522 s += ", \""; 2523 2524 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2525 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2526 2527 // Extra constant integer to hold type class enum for this function, e.g. s8 2528 if (ck == ClassB) 2529 s += "i"; 2530 2531 s += "\", \"n\")"; 2532 return s; 2533} 2534 2535static std::string GenIntrinsic(const std::string &name, 2536 const std::string &proto, 2537 StringRef outTypeStr, StringRef inTypeStr, 2538 OpKind kind, ClassKind classKind) { 2539 assert(!proto.empty() && ""); 2540 bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable; 2541 std::string s; 2542 2543 // static always inline + return type 2544 if (define) 2545 s += "#define "; 2546 else 2547 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2548 2549 // Function name with type suffix 2550 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2551 if (outTypeStr != inTypeStr) { 2552 // If the input type is different (e.g., for vreinterpret), append a suffix 2553 // for the input type. String off a "Q" (quad) prefix so that MangleName 2554 // does not insert another "q" in the name. 2555 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2556 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2557 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2558 } 2559 s += mangledName; 2560 2561 // Function arguments 2562 s += GenArgs(proto, inTypeStr, name); 2563 2564 // Definition. 2565 if (define) { 2566 s += " __extension__ ({ \\\n "; 2567 s += GenMacroLocals(proto, inTypeStr, name); 2568 } else if (kind == OpUnavailable) { 2569 s += " __attribute__((unavailable));\n"; 2570 return s; 2571 } else 2572 s += " {\n "; 2573 2574 if (kind != OpNone) 2575 s += GenOpString(name, kind, proto, outTypeStr); 2576 else 2577 s += GenBuiltin(name, proto, outTypeStr, classKind); 2578 if (define) 2579 s += " })"; 2580 else 2581 s += " }"; 2582 s += "\n"; 2583 return s; 2584} 2585 2586/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2587/// is comprised of type definitions and function declarations. 2588void NeonEmitter::run(raw_ostream &OS) { 2589 OS << 2590 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2591 "---===\n" 2592 " *\n" 2593 " * Permission is hereby granted, free of charge, to any person obtaining " 2594 "a copy\n" 2595 " * of this software and associated documentation files (the \"Software\")," 2596 " to deal\n" 2597 " * in the Software without restriction, including without limitation the " 2598 "rights\n" 2599 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2600 "and/or sell\n" 2601 " * copies of the Software, and to permit persons to whom the Software is\n" 2602 " * furnished to do so, subject to the following conditions:\n" 2603 " *\n" 2604 " * The above copyright notice and this permission notice shall be " 2605 "included in\n" 2606 " * all copies or substantial portions of the Software.\n" 2607 " *\n" 2608 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2609 "EXPRESS OR\n" 2610 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2611 "MERCHANTABILITY,\n" 2612 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2613 "SHALL THE\n" 2614 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2615 "OTHER\n" 2616 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2617 "ARISING FROM,\n" 2618 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2619 "DEALINGS IN\n" 2620 " * THE SOFTWARE.\n" 2621 " *\n" 2622 " *===--------------------------------------------------------------------" 2623 "---===\n" 2624 " */\n\n"; 2625 2626 OS << "#ifndef __ARM_NEON_H\n"; 2627 OS << "#define __ARM_NEON_H\n\n"; 2628 2629 OS << "#if !defined(__ARM_NEON)\n"; 2630 OS << "#error \"NEON support not enabled\"\n"; 2631 OS << "#endif\n\n"; 2632 2633 OS << "#include <stdint.h>\n\n"; 2634 2635 // Emit NEON-specific scalar typedefs. 2636 OS << "typedef float float32_t;\n"; 2637 OS << "typedef __fp16 float16_t;\n"; 2638 2639 OS << "#ifdef __aarch64__\n"; 2640 OS << "typedef double float64_t;\n"; 2641 OS << "#endif\n\n"; 2642 2643 // For now, signedness of polynomial types depends on target 2644 OS << "#ifdef __aarch64__\n"; 2645 OS << "typedef uint8_t poly8_t;\n"; 2646 OS << "typedef uint16_t poly16_t;\n"; 2647 OS << "typedef uint64_t poly64_t;\n"; 2648 OS << "typedef __uint128_t poly128_t;\n"; 2649 OS << "#else\n"; 2650 OS << "typedef int8_t poly8_t;\n"; 2651 OS << "typedef int16_t poly16_t;\n"; 2652 OS << "#endif\n"; 2653 2654 // Emit Neon vector typedefs. 2655 std::string TypedefTypes( 2656 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2657 SmallVector<StringRef, 24> TDTypeVec; 2658 ParseTypes(0, TypedefTypes, TDTypeVec); 2659 2660 // Emit vector typedefs. 2661 bool isA64 = false; 2662 bool preinsert; 2663 bool postinsert; 2664 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2665 bool dummy, quad = false, poly = false; 2666 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2667 preinsert = false; 2668 postinsert = false; 2669 2670 if (type == 'd' || (type == 'l' && poly)) { 2671 preinsert = isA64? false: true; 2672 isA64 = true; 2673 } else { 2674 postinsert = isA64? true: false; 2675 isA64 = false; 2676 } 2677 if (postinsert) 2678 OS << "#endif\n"; 2679 if (preinsert) 2680 OS << "#ifdef __aarch64__\n"; 2681 2682 if (poly) 2683 OS << "typedef __attribute__((neon_polyvector_type("; 2684 else 2685 OS << "typedef __attribute__((neon_vector_type("; 2686 2687 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2688 OS << utostr(nElts) << "))) "; 2689 if (nElts < 10) 2690 OS << " "; 2691 2692 OS << TypeString('s', TDTypeVec[i]); 2693 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2694 2695 } 2696 postinsert = isA64? true: false; 2697 if (postinsert) 2698 OS << "#endif\n"; 2699 OS << "\n"; 2700 2701 // Emit struct typedefs. 2702 isA64 = false; 2703 for (unsigned vi = 2; vi != 5; ++vi) { 2704 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2705 bool dummy, quad = false, poly = false; 2706 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2707 preinsert = false; 2708 postinsert = false; 2709 2710 if (type == 'd' || (type == 'l' && poly)) { 2711 preinsert = isA64? false: true; 2712 isA64 = true; 2713 } else { 2714 postinsert = isA64? true: false; 2715 isA64 = false; 2716 } 2717 if (postinsert) 2718 OS << "#endif\n"; 2719 if (preinsert) 2720 OS << "#ifdef __aarch64__\n"; 2721 2722 std::string ts = TypeString('d', TDTypeVec[i]); 2723 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2724 OS << "typedef struct " << vs << " {\n"; 2725 OS << " " << ts << " val"; 2726 OS << "[" << utostr(vi) << "]"; 2727 OS << ";\n} "; 2728 OS << vs << ";\n"; 2729 OS << "\n"; 2730 } 2731 } 2732 postinsert = isA64? true: false; 2733 if (postinsert) 2734 OS << "#endif\n"; 2735 OS << "\n"; 2736 2737 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2738 2739 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2740 2741 StringMap<ClassKind> EmittedMap; 2742 std::string CurrentGuard = ""; 2743 bool InGuard = false; 2744 2745 // Some intrinsics are used to express others. These need to be emitted near 2746 // the beginning so that the declarations are present when needed. This is 2747 // rather an ugly, arbitrary list, but probably simpler than actually tracking 2748 // dependency info. 2749 static const char *EarlyDefsArr[] = 2750 { "VFMA", "VQMOVN", "VQMOVUN", "VABD", "VMOVL", 2751 "VABDL", "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH", 2752 "VMULL", "VMLAL_N", "VMLSL_N", "VMULL_N", "VMULL_P64", 2753 "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" }; 2754 ArrayRef<const char *> EarlyDefs(EarlyDefsArr); 2755 2756 for (unsigned i = 0; i < EarlyDefs.size(); ++i) { 2757 Record *R = Records.getDef(EarlyDefs[i]); 2758 emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap); 2759 } 2760 2761 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2762 Record *R = RV[i]; 2763 if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) != 2764 EarlyDefs.end()) 2765 continue; 2766 2767 emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap); 2768 } 2769 2770 if (InGuard) 2771 OS << "#endif\n\n"; 2772 2773 OS << "#undef __ai\n\n"; 2774 OS << "#endif /* __ARM_NEON_H */\n"; 2775} 2776 2777void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R, 2778 std::string &CurrentGuard, bool &InGuard, 2779 StringMap<ClassKind> &EmittedMap) { 2780 2781 std::string NewGuard = R->getValueAsString("ArchGuard"); 2782 if (NewGuard != CurrentGuard) { 2783 if (InGuard) 2784 OS << "#endif\n\n"; 2785 if (NewGuard.size()) 2786 OS << "#if " << NewGuard << '\n'; 2787 2788 CurrentGuard = NewGuard; 2789 InGuard = NewGuard.size() != 0; 2790 } 2791 2792 emitIntrinsic(OS, R, EmittedMap); 2793} 2794 2795/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2796/// intrinsics specified by record R checking for intrinsic uniqueness. 2797void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2798 StringMap<ClassKind> &EmittedMap) { 2799 std::string name = R->getValueAsString("Name"); 2800 std::string Proto = R->getValueAsString("Prototype"); 2801 std::string Types = R->getValueAsString("Types"); 2802 2803 SmallVector<StringRef, 16> TypeVec; 2804 ParseTypes(R, Types, TypeVec); 2805 2806 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2807 2808 ClassKind classKind = ClassNone; 2809 if (R->getSuperClasses().size() >= 2) 2810 classKind = ClassMap[R->getSuperClasses()[1]]; 2811 if (classKind == ClassNone && kind == OpNone) 2812 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2813 2814 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2815 if (kind == OpReinterpret) { 2816 bool outQuad = false; 2817 bool dummy = false; 2818 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2819 for (unsigned srcti = 0, srcte = TypeVec.size(); 2820 srcti != srcte; ++srcti) { 2821 bool inQuad = false; 2822 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2823 if (srcti == ti || inQuad != outQuad) 2824 continue; 2825 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2826 OpCast, ClassS); 2827 if (EmittedMap.count(s)) 2828 continue; 2829 EmittedMap[s] = ClassS; 2830 OS << s; 2831 } 2832 } else { 2833 std::string s = 2834 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2835 if (EmittedMap.count(s)) { 2836 errs() << "warning: duplicate definition: " << name 2837 << " (type: " << TypeString('d', TypeVec[ti]) << ")\n"; 2838 continue; 2839 } 2840 EmittedMap[s] = classKind; 2841 OS << s; 2842 } 2843 } 2844 OS << "\n"; 2845} 2846 2847static unsigned RangeFromType(const char mod, StringRef typestr) { 2848 // base type to get the type string for. 2849 bool quad = false, dummy = false; 2850 char type = ClassifyType(typestr, quad, dummy, dummy); 2851 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2852 2853 switch (type) { 2854 case 'c': 2855 return (8 << (int)quad) - 1; 2856 case 'h': 2857 case 's': 2858 return (4 << (int)quad) - 1; 2859 case 'f': 2860 case 'i': 2861 return (2 << (int)quad) - 1; 2862 case 'd': 2863 case 'l': 2864 return (1 << (int)quad) - 1; 2865 case 'k': 2866 return 0; 2867 default: 2868 PrintFatalError("unhandled type!"); 2869 } 2870} 2871 2872static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2873 // base type to get the type string for. 2874 bool dummy = false; 2875 char type = ClassifyType(typestr, dummy, dummy, dummy); 2876 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2877 2878 switch (type) { 2879 case 'c': 2880 return 7; 2881 case 'h': 2882 case 's': 2883 return 15; 2884 case 'f': 2885 case 'i': 2886 return 31; 2887 case 'd': 2888 case 'l': 2889 return 63; 2890 case 'k': 2891 return 127; 2892 default: 2893 PrintFatalError("unhandled type!"); 2894 } 2895} 2896 2897/// Generate the ARM and AArch64 intrinsic range checking code for 2898/// shift/lane immediates, checking for unique declarations. 2899void 2900NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) { 2901 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2902 StringMap<OpKind> EmittedMap; 2903 2904 // Generate the intrinsic range checking code for shift/lane immediates. 2905 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2906 2907 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2908 Record *R = RV[i]; 2909 2910 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2911 if (k != OpNone) 2912 continue; 2913 2914 std::string name = R->getValueAsString("Name"); 2915 std::string Proto = R->getValueAsString("Prototype"); 2916 std::string Types = R->getValueAsString("Types"); 2917 std::string Rename = name + "@" + Proto; 2918 2919 // Functions with 'a' (the splat code) in the type prototype should not get 2920 // their own builtin as they use the non-splat variant. 2921 if (Proto.find('a') != std::string::npos) 2922 continue; 2923 2924 // Functions which do not have an immediate do not need to have range 2925 // checking code emitted. 2926 size_t immPos = Proto.find('i'); 2927 if (immPos == std::string::npos) 2928 continue; 2929 2930 SmallVector<StringRef, 16> TypeVec; 2931 ParseTypes(R, Types, TypeVec); 2932 2933 if (R->getSuperClasses().size() < 2) 2934 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2935 2936 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2937 if (!ProtoHasScalar(Proto)) 2938 ck = ClassB; 2939 2940 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2941 std::string namestr, shiftstr, rangestr; 2942 2943 if (R->getValueAsBit("isVCVT_N")) { 2944 // VCVT between floating- and fixed-point values takes an immediate 2945 // in the range [1, 32] for f32, or [1, 64] for f64. 2946 ck = ClassB; 2947 if (name.find("32") != std::string::npos) 2948 rangestr = "l = 1; u = 31"; // upper bound = l + u 2949 else if (name.find("64") != std::string::npos) 2950 rangestr = "l = 1; u = 63"; 2951 else 2952 PrintFatalError(R->getLoc(), 2953 "Fixed point convert name should contains \"32\" or \"64\""); 2954 2955 } else if (R->getValueAsBit("isScalarShift")) { 2956 // Right shifts have an 'r' in the name, left shifts do not. Convert 2957 // instructions have the same bounds and right shifts. 2958 if (name.find('r') != std::string::npos || 2959 name.find("cvt") != std::string::npos) 2960 rangestr = "l = 1; "; 2961 2962 unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]); 2963 // Narrow shift has half the upper bound 2964 if (R->getValueAsBit("isScalarNarrowShift")) 2965 upBound /= 2; 2966 2967 rangestr += "u = " + utostr(upBound); 2968 } else if (R->getValueAsBit("isShift")) { 2969 // Builtins which are overloaded by type will need to have their upper 2970 // bound computed at Sema time based on the type constant. 2971 shiftstr = ", true"; 2972 2973 // Right shifts have an 'r' in the name, left shifts do not. 2974 if (name.find('r') != std::string::npos) 2975 rangestr = "l = 1; "; 2976 2977 rangestr += "u = RFT(TV" + shiftstr + ")"; 2978 } else if (ck == ClassB) { 2979 // ClassB intrinsics have a type (and hence lane number) that is only 2980 // known at runtime. 2981 assert(immPos > 0 && "unexpected immediate operand"); 2982 if (R->getValueAsBit("isLaneQ")) 2983 rangestr = "u = RFT(TV, false, true)"; 2984 else 2985 rangestr = "u = RFT(TV, false, false)"; 2986 } else { 2987 // The immediate generally refers to a lane in the preceding argument. 2988 assert(immPos > 0 && "unexpected immediate operand"); 2989 rangestr = 2990 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2991 } 2992 // Make sure cases appear only once by uniquing them in a string map. 2993 namestr = MangleName(name, TypeVec[ti], ck); 2994 if (EmittedMap.count(namestr)) 2995 continue; 2996 EmittedMap[namestr] = OpNone; 2997 2998 // Calculate the index of the immediate that should be range checked. 2999 unsigned immidx = 0; 3000 3001 // Builtins that return a struct of multiple vectors have an extra 3002 // leading arg for the struct return. 3003 if (IsMultiVecProto(Proto[0])) 3004 ++immidx; 3005 3006 // Add one to the index for each argument until we reach the immediate 3007 // to be checked. Structs of vectors are passed as multiple arguments. 3008 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 3009 switch (Proto[ii]) { 3010 default: 3011 immidx += 1; 3012 break; 3013 case '2': 3014 case 'B': 3015 immidx += 2; 3016 break; 3017 case '3': 3018 case 'C': 3019 immidx += 3; 3020 break; 3021 case '4': 3022 case 'D': 3023 immidx += 4; 3024 break; 3025 case 'i': 3026 ie = ii + 1; 3027 break; 3028 } 3029 } 3030 OS << "case NEON::BI__builtin_neon_"; 3031 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 3032 << rangestr << "; break;\n"; 3033 } 3034 } 3035 OS << "#endif\n\n"; 3036} 3037 3038struct OverloadInfo { 3039 uint64_t Mask; 3040 int PtrArgNum; 3041 bool HasConstPtr; 3042}; 3043/// Generate the ARM and AArch64 overloaded type checking code for 3044/// SemaChecking.cpp, checking for unique builtin declarations. 3045void 3046NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) { 3047 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3048 3049 // Generate the overloaded type checking code for SemaChecking.cpp 3050 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 3051 3052 // We record each overload check line before emitting because subsequent Inst 3053 // definitions may extend the number of permitted types (i.e. augment the 3054 // Mask). Use std::map to avoid sorting the table by hash number. 3055 std::map<std::string, OverloadInfo> OverloadMap; 3056 typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator; 3057 3058 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3059 Record *R = RV[i]; 3060 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 3061 if (k != OpNone) 3062 continue; 3063 3064 std::string Proto = R->getValueAsString("Prototype"); 3065 std::string Types = R->getValueAsString("Types"); 3066 std::string name = R->getValueAsString("Name"); 3067 std::string Rename = name + "@" + Proto; 3068 3069 // Functions with 'a' (the splat code) in the type prototype should not get 3070 // their own builtin as they use the non-splat variant. 3071 if (Proto.find('a') != std::string::npos) 3072 continue; 3073 3074 // Functions which have a scalar argument cannot be overloaded, no need to 3075 // check them if we are emitting the type checking code. 3076 if (ProtoHasScalar(Proto)) 3077 continue; 3078 3079 SmallVector<StringRef, 16> TypeVec; 3080 ParseTypes(R, Types, TypeVec); 3081 3082 if (R->getSuperClasses().size() < 2) 3083 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 3084 3085 int si = -1, qi = -1; 3086 uint64_t mask = 0, qmask = 0; 3087 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3088 // Generate the switch case(s) for this builtin for the type validation. 3089 bool quad = false, poly = false, usgn = false; 3090 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 3091 3092 if (quad) { 3093 qi = ti; 3094 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 3095 } else { 3096 si = ti; 3097 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 3098 } 3099 } 3100 3101 // Check if the builtin function has a pointer or const pointer argument. 3102 int PtrArgNum = -1; 3103 bool HasConstPtr = false; 3104 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 3105 char ArgType = Proto[arg]; 3106 if (ArgType == 'c') { 3107 HasConstPtr = true; 3108 PtrArgNum = arg - 1; 3109 break; 3110 } 3111 if (ArgType == 'p') { 3112 PtrArgNum = arg - 1; 3113 break; 3114 } 3115 } 3116 // For sret builtins, adjust the pointer argument index. 3117 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 3118 PtrArgNum += 1; 3119 3120 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 3121 // and vst1_lane intrinsics. Using a pointer to the vector element 3122 // type with one of those operations causes codegen to select an aligned 3123 // load/store instruction. If you want an unaligned operation, 3124 // the pointer argument needs to have less alignment than element type, 3125 // so just accept any pointer type. 3126 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 3127 PtrArgNum = -1; 3128 HasConstPtr = false; 3129 } 3130 3131 if (mask) { 3132 std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair( 3133 MangleName(name, TypeVec[si], ClassB), OverloadInfo())); 3134 OverloadInfo &Record = I.first->second; 3135 if (!I.second) 3136 assert(Record.PtrArgNum == PtrArgNum && 3137 Record.HasConstPtr == HasConstPtr); 3138 Record.Mask |= mask; 3139 Record.PtrArgNum = PtrArgNum; 3140 Record.HasConstPtr = HasConstPtr; 3141 } 3142 if (qmask) { 3143 std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair( 3144 MangleName(name, TypeVec[qi], ClassB), OverloadInfo())); 3145 OverloadInfo &Record = I.first->second; 3146 if (!I.second) 3147 assert(Record.PtrArgNum == PtrArgNum && 3148 Record.HasConstPtr == HasConstPtr); 3149 Record.Mask |= qmask; 3150 Record.PtrArgNum = PtrArgNum; 3151 Record.HasConstPtr = HasConstPtr; 3152 } 3153 } 3154 3155 for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E; 3156 ++I) { 3157 OverloadInfo &BuiltinOverloads = I->second; 3158 OS << "case NEON::BI__builtin_neon_" << I->first << ": "; 3159 OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL"; 3160 if (BuiltinOverloads.PtrArgNum >= 0) 3161 OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum; 3162 if (BuiltinOverloads.HasConstPtr) 3163 OS << "; HasConstPtr = true"; 3164 OS << "; break;\n"; 3165 } 3166 3167 OS << "#endif\n\n"; 3168} 3169 3170/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 3171/// declaration of builtins, checking for unique builtin declarations. 3172void NeonEmitter::genBuiltinsDef(raw_ostream &OS) { 3173 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3174 3175 // We want to emit the intrinsics in alphabetical order, so use the more 3176 // expensive std::map to gather them together first. 3177 std::map<std::string, OpKind> EmittedMap; 3178 3179 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3180 Record *R = RV[i]; 3181 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 3182 if (k != OpNone) 3183 continue; 3184 3185 std::string Proto = R->getValueAsString("Prototype"); 3186 std::string name = R->getValueAsString("Name"); 3187 std::string Rename = name + "@" + Proto; 3188 3189 // Functions with 'a' (the splat code) in the type prototype should not get 3190 // their own builtin as they use the non-splat variant. 3191 if (Proto.find('a') != std::string::npos) 3192 continue; 3193 3194 std::string Types = R->getValueAsString("Types"); 3195 SmallVector<StringRef, 16> TypeVec; 3196 ParseTypes(R, Types, TypeVec); 3197 3198 if (R->getSuperClasses().size() < 2) 3199 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 3200 3201 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3202 3203 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3204 // Generate the declaration for this builtin, ensuring 3205 // that each unique BUILTIN() macro appears only once in the output 3206 // stream. 3207 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 3208 if (EmittedMap.count(bd)) 3209 continue; 3210 3211 EmittedMap[bd] = OpNone; 3212 } 3213 } 3214 3215 // Generate BuiltinsNEON. 3216 OS << "#ifdef GET_NEON_BUILTINS\n"; 3217 3218 for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(), 3219 E = EmittedMap.end(); 3220 I != E; ++I) 3221 OS << I->first << "\n"; 3222 3223 OS << "#endif\n\n"; 3224} 3225 3226/// runHeader - Emit a file with sections defining: 3227/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 3228/// 2. the SemaChecking code for the type overload checking. 3229/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 3230void NeonEmitter::runHeader(raw_ostream &OS) { 3231 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3232 3233 // Generate shared BuiltinsXXX.def 3234 genBuiltinsDef(OS); 3235 3236 // Generate ARM overloaded type checking code for SemaChecking.cpp 3237 genOverloadTypeCheckCode(OS); 3238 3239 // Generate ARM range checking code for shift/lane immediates. 3240 genIntrinsicRangeCheckCode(OS); 3241} 3242 3243/// GenTest - Write out a test for the intrinsic specified by the name and 3244/// type strings, including the embedded patterns for FileCheck to match. 3245static std::string GenTest(const std::string &name, 3246 const std::string &proto, 3247 StringRef outTypeStr, StringRef inTypeStr, 3248 bool isShift, bool isHiddenLOp, 3249 ClassKind ck, const std::string &InstName, 3250 bool isA64, 3251 std::string & testFuncProto) { 3252 assert(!proto.empty() && ""); 3253 std::string s; 3254 3255 // Function name with type suffix 3256 std::string mangledName = MangleName(name, outTypeStr, ClassS); 3257 if (outTypeStr != inTypeStr) { 3258 // If the input type is different (e.g., for vreinterpret), append a suffix 3259 // for the input type. String off a "Q" (quad) prefix so that MangleName 3260 // does not insert another "q" in the name. 3261 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 3262 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 3263 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 3264 } 3265 3266 // todo: GenerateChecksForIntrinsic does not generate CHECK 3267 // for aarch64 instructions yet 3268 std::vector<std::string> FileCheckPatterns; 3269 if (!isA64) { 3270 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 3271 isHiddenLOp, FileCheckPatterns); 3272 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 3273 } 3274 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 3275 3276 // Emit the FileCheck patterns. 3277 // If for any reason we do not want to emit a check, mangledInst 3278 // will be the empty string. 3279 if (FileCheckPatterns.size()) { 3280 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 3281 e = FileCheckPatterns.end(); 3282 i != e; 3283 ++i) { 3284 s += "// CHECK_ARM: " + *i + "\n"; 3285 } 3286 } 3287 3288 // Emit the start of the test function. 3289 3290 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 3291 char arg = 'a'; 3292 std::string comma; 3293 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3294 // Do not create arguments for values that must be immediate constants. 3295 if (proto[i] == 'i') 3296 continue; 3297 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 3298 testFuncProto.push_back(arg); 3299 comma = ", "; 3300 } 3301 testFuncProto += ")"; 3302 3303 s+= testFuncProto; 3304 s+= " {\n "; 3305 3306 if (proto[0] != 'v') 3307 s += "return "; 3308 s += mangledName + "("; 3309 arg = 'a'; 3310 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3311 if (proto[i] == 'i') { 3312 // For immediate operands, test the maximum value. 3313 if (isShift) 3314 s += "1"; // FIXME 3315 else 3316 // The immediate generally refers to a lane in the preceding argument. 3317 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 3318 } else { 3319 s.push_back(arg); 3320 } 3321 if ((i + 1) < e) 3322 s += ", "; 3323 } 3324 s += ");\n}\n\n"; 3325 return s; 3326} 3327 3328/// Write out all intrinsic tests for the specified target, checking 3329/// for intrinsic test uniqueness. 3330void NeonEmitter::genTargetTest(raw_ostream &OS) { 3331 StringMap<OpKind> EmittedMap; 3332 std::string CurrentGuard = ""; 3333 bool InGuard = false; 3334 3335 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3336 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3337 Record *R = RV[i]; 3338 std::string name = R->getValueAsString("Name"); 3339 std::string Proto = R->getValueAsString("Prototype"); 3340 std::string Types = R->getValueAsString("Types"); 3341 bool isShift = R->getValueAsBit("isShift"); 3342 std::string InstName = R->getValueAsString("InstName"); 3343 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 3344 3345 std::string NewGuard = R->getValueAsString("ArchGuard"); 3346 if (NewGuard != CurrentGuard) { 3347 if (InGuard) 3348 OS << "#endif\n\n"; 3349 if (NewGuard.size()) 3350 OS << "#if " << NewGuard << '\n'; 3351 3352 CurrentGuard = NewGuard; 3353 InGuard = NewGuard.size() != 0; 3354 } 3355 3356 SmallVector<StringRef, 16> TypeVec; 3357 ParseTypes(R, Types, TypeVec); 3358 3359 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3360 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3361 if (kind == OpUnavailable) 3362 continue; 3363 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3364 if (kind == OpReinterpret) { 3365 bool outQuad = false; 3366 bool dummy = false; 3367 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3368 for (unsigned srcti = 0, srcte = TypeVec.size(); 3369 srcti != srcte; ++srcti) { 3370 bool inQuad = false; 3371 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3372 if (srcti == ti || inQuad != outQuad) 3373 continue; 3374 std::string testFuncProto; 3375 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3376 isShift, isHiddenLOp, ck, InstName, 3377 CurrentGuard.size(), testFuncProto); 3378 if (EmittedMap.count(testFuncProto)) 3379 continue; 3380 EmittedMap[testFuncProto] = kind; 3381 OS << s << "\n"; 3382 } 3383 } else { 3384 std::string testFuncProto; 3385 std::string s = 3386 GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp, 3387 ck, InstName, CurrentGuard.size(), testFuncProto); 3388 OS << s << "\n"; 3389 } 3390 } 3391 } 3392 3393 if (InGuard) 3394 OS << "#endif\n"; 3395} 3396/// runTests - Write out a complete set of tests for all of the Neon 3397/// intrinsics. 3398void NeonEmitter::runTests(raw_ostream &OS) { 3399 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3400 "apcs-gnu\\\n" 3401 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3402 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3403 "\n" 3404 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3405 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3406 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3407 "\n" 3408 "// REQUIRES: long_tests\n" 3409 "\n" 3410 "#include <arm_neon.h>\n" 3411 "\n"; 3412 3413 genTargetTest(OS); 3414} 3415 3416namespace clang { 3417void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3418 NeonEmitter(Records).run(OS); 3419} 3420void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3421 NeonEmitter(Records).runHeader(OS); 3422} 3423void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3424 NeonEmitter(Records).runTests(OS); 3425} 3426} // End namespace clang 3427