NeonEmitter.cpp revision 66981c7ca1fcef529d7d5e5c53b07020ff23d8e3
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddw, 44 OpSub, 45 OpSubl, 46 OpSubw, 47 OpMul, 48 OpMla, 49 OpMlal, 50 OpMls, 51 OpMlsl, 52 OpMulN, 53 OpMlaN, 54 OpMlsN, 55 OpMlalN, 56 OpMlslN, 57 OpMulLane, 58 OpMullLane, 59 OpMlaLane, 60 OpMlsLane, 61 OpMlalLane, 62 OpMlslLane, 63 OpQDMullLane, 64 OpQDMlalLane, 65 OpQDMlslLane, 66 OpQDMulhLane, 67 OpQRDMulhLane, 68 OpEq, 69 OpGe, 70 OpLe, 71 OpGt, 72 OpLt, 73 OpNeg, 74 OpNot, 75 OpAnd, 76 OpOr, 77 OpXor, 78 OpAndNot, 79 OpOrNot, 80 OpCast, 81 OpConcat, 82 OpDup, 83 OpDupLane, 84 OpHi, 85 OpLo, 86 OpSelect, 87 OpRev16, 88 OpRev32, 89 OpRev64, 90 OpReinterpret, 91 OpAbdl, 92 OpAba, 93 OpAbal 94}; 95 96enum ClassKind { 97 ClassNone, 98 ClassI, // generic integer instruction, e.g., "i8" suffix 99 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 100 ClassW, // width-specific instruction, e.g., "8" suffix 101 ClassB // bitcast arguments with enum argument to specify type 102}; 103 104/// NeonTypeFlags - Flags to identify the types for overloaded Neon 105/// builtins. These must be kept in sync with the flags in 106/// include/clang/Basic/TargetBuiltins.h. 107namespace { 108class NeonTypeFlags { 109 enum { 110 EltTypeMask = 0xf, 111 UnsignedFlag = 0x10, 112 QuadFlag = 0x20 113 }; 114 uint32_t Flags; 115 116public: 117 enum EltType { 118 Int8, 119 Int16, 120 Int32, 121 Int64, 122 Poly8, 123 Poly16, 124 Float16, 125 Float32 126 }; 127 128 NeonTypeFlags(unsigned F) : Flags(F) {} 129 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 130 if (IsUnsigned) 131 Flags |= UnsignedFlag; 132 if (IsQuad) 133 Flags |= QuadFlag; 134 } 135 136 uint32_t getFlags() const { return Flags; } 137}; 138} // end anonymous namespace 139 140namespace { 141class NeonEmitter { 142 RecordKeeper &Records; 143 StringMap<OpKind> OpMap; 144 DenseMap<Record*, ClassKind> ClassMap; 145 146public: 147 NeonEmitter(RecordKeeper &R) : Records(R) { 148 OpMap["OP_NONE"] = OpNone; 149 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 150 OpMap["OP_ADD"] = OpAdd; 151 OpMap["OP_ADDL"] = OpAddl; 152 OpMap["OP_ADDW"] = OpAddw; 153 OpMap["OP_SUB"] = OpSub; 154 OpMap["OP_SUBL"] = OpSubl; 155 OpMap["OP_SUBW"] = OpSubw; 156 OpMap["OP_MUL"] = OpMul; 157 OpMap["OP_MLA"] = OpMla; 158 OpMap["OP_MLAL"] = OpMlal; 159 OpMap["OP_MLS"] = OpMls; 160 OpMap["OP_MLSL"] = OpMlsl; 161 OpMap["OP_MUL_N"] = OpMulN; 162 OpMap["OP_MLA_N"] = OpMlaN; 163 OpMap["OP_MLS_N"] = OpMlsN; 164 OpMap["OP_MLAL_N"] = OpMlalN; 165 OpMap["OP_MLSL_N"] = OpMlslN; 166 OpMap["OP_MUL_LN"]= OpMulLane; 167 OpMap["OP_MULL_LN"] = OpMullLane; 168 OpMap["OP_MLA_LN"]= OpMlaLane; 169 OpMap["OP_MLS_LN"]= OpMlsLane; 170 OpMap["OP_MLAL_LN"] = OpMlalLane; 171 OpMap["OP_MLSL_LN"] = OpMlslLane; 172 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 173 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 174 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 175 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 176 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 177 OpMap["OP_EQ"] = OpEq; 178 OpMap["OP_GE"] = OpGe; 179 OpMap["OP_LE"] = OpLe; 180 OpMap["OP_GT"] = OpGt; 181 OpMap["OP_LT"] = OpLt; 182 OpMap["OP_NEG"] = OpNeg; 183 OpMap["OP_NOT"] = OpNot; 184 OpMap["OP_AND"] = OpAnd; 185 OpMap["OP_OR"] = OpOr; 186 OpMap["OP_XOR"] = OpXor; 187 OpMap["OP_ANDN"] = OpAndNot; 188 OpMap["OP_ORN"] = OpOrNot; 189 OpMap["OP_CAST"] = OpCast; 190 OpMap["OP_CONC"] = OpConcat; 191 OpMap["OP_HI"] = OpHi; 192 OpMap["OP_LO"] = OpLo; 193 OpMap["OP_DUP"] = OpDup; 194 OpMap["OP_DUP_LN"] = OpDupLane; 195 OpMap["OP_SEL"] = OpSelect; 196 OpMap["OP_REV16"] = OpRev16; 197 OpMap["OP_REV32"] = OpRev32; 198 OpMap["OP_REV64"] = OpRev64; 199 OpMap["OP_REINT"] = OpReinterpret; 200 OpMap["OP_ABDL"] = OpAbdl; 201 OpMap["OP_ABA"] = OpAba; 202 OpMap["OP_ABAL"] = OpAbal; 203 204 Record *SI = R.getClass("SInst"); 205 Record *II = R.getClass("IInst"); 206 Record *WI = R.getClass("WInst"); 207 ClassMap[SI] = ClassS; 208 ClassMap[II] = ClassI; 209 ClassMap[WI] = ClassW; 210 } 211 212 // run - Emit arm_neon.h.inc 213 void run(raw_ostream &o); 214 215 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 216 void runHeader(raw_ostream &o); 217 218 // runTests - Emit tests for all the Neon intrinsics. 219 void runTests(raw_ostream &o); 220 221private: 222 void emitIntrinsic(raw_ostream &OS, Record *R); 223}; 224} // end anonymous namespace 225 226/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 227/// which each StringRef representing a single type declared in the string. 228/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 229/// 2xfloat and 4xfloat respectively. 230static void ParseTypes(Record *r, std::string &s, 231 SmallVectorImpl<StringRef> &TV) { 232 const char *data = s.data(); 233 int len = 0; 234 235 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 236 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U') 237 continue; 238 239 switch (data[len]) { 240 case 'c': 241 case 's': 242 case 'i': 243 case 'l': 244 case 'h': 245 case 'f': 246 break; 247 default: 248 throw TGError(r->getLoc(), 249 "Unexpected letter: " + std::string(data + len, 1)); 250 } 251 TV.push_back(StringRef(data, len + 1)); 252 data += len + 1; 253 len = -1; 254 } 255} 256 257/// Widen - Convert a type code into the next wider type. char -> short, 258/// short -> int, etc. 259static char Widen(const char t) { 260 switch (t) { 261 case 'c': 262 return 's'; 263 case 's': 264 return 'i'; 265 case 'i': 266 return 'l'; 267 case 'h': 268 return 'f'; 269 default: throw "unhandled type in widen!"; 270 } 271} 272 273/// Narrow - Convert a type code into the next smaller type. short -> char, 274/// float -> half float, etc. 275static char Narrow(const char t) { 276 switch (t) { 277 case 's': 278 return 'c'; 279 case 'i': 280 return 's'; 281 case 'l': 282 return 'i'; 283 case 'f': 284 return 'h'; 285 default: throw "unhandled type in narrow!"; 286 } 287} 288 289/// For a particular StringRef, return the base type code, and whether it has 290/// the quad-vector, polynomial, or unsigned modifiers set. 291static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 292 unsigned off = 0; 293 294 // remember quad. 295 if (ty[off] == 'Q') { 296 quad = true; 297 ++off; 298 } 299 300 // remember poly. 301 if (ty[off] == 'P') { 302 poly = true; 303 ++off; 304 } 305 306 // remember unsigned. 307 if (ty[off] == 'U') { 308 usgn = true; 309 ++off; 310 } 311 312 // base type to get the type string for. 313 return ty[off]; 314} 315 316/// ModType - Transform a type code and its modifiers based on a mod code. The 317/// mod code definitions may be found at the top of arm_neon.td. 318static char ModType(const char mod, char type, bool &quad, bool &poly, 319 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 320 switch (mod) { 321 case 't': 322 if (poly) { 323 poly = false; 324 usgn = true; 325 } 326 break; 327 case 'u': 328 usgn = true; 329 poly = false; 330 if (type == 'f') 331 type = 'i'; 332 break; 333 case 'x': 334 usgn = false; 335 poly = false; 336 if (type == 'f') 337 type = 'i'; 338 break; 339 case 'f': 340 if (type == 'h') 341 quad = true; 342 type = 'f'; 343 usgn = false; 344 break; 345 case 'g': 346 quad = false; 347 break; 348 case 'w': 349 type = Widen(type); 350 quad = true; 351 break; 352 case 'n': 353 type = Widen(type); 354 break; 355 case 'i': 356 type = 'i'; 357 scal = true; 358 break; 359 case 'l': 360 type = 'l'; 361 scal = true; 362 usgn = true; 363 break; 364 case 's': 365 case 'a': 366 scal = true; 367 break; 368 case 'k': 369 quad = true; 370 break; 371 case 'c': 372 cnst = true; 373 case 'p': 374 pntr = true; 375 scal = true; 376 break; 377 case 'h': 378 type = Narrow(type); 379 if (type == 'h') 380 quad = false; 381 break; 382 case 'e': 383 type = Narrow(type); 384 usgn = true; 385 break; 386 default: 387 break; 388 } 389 return type; 390} 391 392/// TypeString - for a modifier and type, generate the name of the typedef for 393/// that type. QUc -> uint8x8_t. 394static std::string TypeString(const char mod, StringRef typestr) { 395 bool quad = false; 396 bool poly = false; 397 bool usgn = false; 398 bool scal = false; 399 bool cnst = false; 400 bool pntr = false; 401 402 if (mod == 'v') 403 return "void"; 404 if (mod == 'i') 405 return "int"; 406 407 // base type to get the type string for. 408 char type = ClassifyType(typestr, quad, poly, usgn); 409 410 // Based on the modifying character, change the type and width if necessary. 411 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 412 413 SmallString<128> s; 414 415 if (usgn) 416 s.push_back('u'); 417 418 switch (type) { 419 case 'c': 420 s += poly ? "poly8" : "int8"; 421 if (scal) 422 break; 423 s += quad ? "x16" : "x8"; 424 break; 425 case 's': 426 s += poly ? "poly16" : "int16"; 427 if (scal) 428 break; 429 s += quad ? "x8" : "x4"; 430 break; 431 case 'i': 432 s += "int32"; 433 if (scal) 434 break; 435 s += quad ? "x4" : "x2"; 436 break; 437 case 'l': 438 s += "int64"; 439 if (scal) 440 break; 441 s += quad ? "x2" : "x1"; 442 break; 443 case 'h': 444 s += "float16"; 445 if (scal) 446 break; 447 s += quad ? "x8" : "x4"; 448 break; 449 case 'f': 450 s += "float32"; 451 if (scal) 452 break; 453 s += quad ? "x4" : "x2"; 454 break; 455 default: 456 throw "unhandled type!"; 457 } 458 459 if (mod == '2') 460 s += "x2"; 461 if (mod == '3') 462 s += "x3"; 463 if (mod == '4') 464 s += "x4"; 465 466 // Append _t, finishing the type string typedef type. 467 s += "_t"; 468 469 if (cnst) 470 s += " const"; 471 472 if (pntr) 473 s += " *"; 474 475 return s.str(); 476} 477 478/// BuiltinTypeString - for a modifier and type, generate the clang 479/// BuiltinsARM.def prototype code for the function. See the top of clang's 480/// Builtins.def for a description of the type strings. 481static std::string BuiltinTypeString(const char mod, StringRef typestr, 482 ClassKind ck, bool ret) { 483 bool quad = false; 484 bool poly = false; 485 bool usgn = false; 486 bool scal = false; 487 bool cnst = false; 488 bool pntr = false; 489 490 if (mod == 'v') 491 return "v"; // void 492 if (mod == 'i') 493 return "i"; // int 494 495 // base type to get the type string for. 496 char type = ClassifyType(typestr, quad, poly, usgn); 497 498 // Based on the modifying character, change the type and width if necessary. 499 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 500 501 // All pointers are void* pointers. Change type to 'v' now. 502 if (pntr) { 503 usgn = false; 504 poly = false; 505 type = 'v'; 506 } 507 // Treat half-float ('h') types as unsigned short ('s') types. 508 if (type == 'h') { 509 type = 's'; 510 usgn = true; 511 } 512 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f'); 513 514 if (scal) { 515 SmallString<128> s; 516 517 if (usgn) 518 s.push_back('U'); 519 else if (type == 'c') 520 s.push_back('S'); // make chars explicitly signed 521 522 if (type == 'l') // 64-bit long 523 s += "LLi"; 524 else 525 s.push_back(type); 526 527 if (cnst) 528 s.push_back('C'); 529 if (pntr) 530 s.push_back('*'); 531 return s.str(); 532 } 533 534 // Since the return value must be one type, return a vector type of the 535 // appropriate width which we will bitcast. An exception is made for 536 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 537 // fashion, storing them to a pointer arg. 538 if (ret) { 539 if (mod >= '2' && mod <= '4') 540 return "vv*"; // void result with void* first argument 541 if (mod == 'f' || (ck != ClassB && type == 'f')) 542 return quad ? "V4f" : "V2f"; 543 if (ck != ClassB && type == 's') 544 return quad ? "V8s" : "V4s"; 545 if (ck != ClassB && type == 'i') 546 return quad ? "V4i" : "V2i"; 547 if (ck != ClassB && type == 'l') 548 return quad ? "V2LLi" : "V1LLi"; 549 550 return quad ? "V16Sc" : "V8Sc"; 551 } 552 553 // Non-return array types are passed as individual vectors. 554 if (mod == '2') 555 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 556 if (mod == '3') 557 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 558 if (mod == '4') 559 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 560 561 if (mod == 'f' || (ck != ClassB && type == 'f')) 562 return quad ? "V4f" : "V2f"; 563 if (ck != ClassB && type == 's') 564 return quad ? "V8s" : "V4s"; 565 if (ck != ClassB && type == 'i') 566 return quad ? "V4i" : "V2i"; 567 if (ck != ClassB && type == 'l') 568 return quad ? "V2LLi" : "V1LLi"; 569 570 return quad ? "V16Sc" : "V8Sc"; 571} 572 573/// MangleName - Append a type or width suffix to a base neon function name, 574/// and insert a 'q' in the appropriate location if the operation works on 575/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 576static std::string MangleName(const std::string &name, StringRef typestr, 577 ClassKind ck) { 578 if (name == "vcvt_f32_f16") 579 return name; 580 581 bool quad = false; 582 bool poly = false; 583 bool usgn = false; 584 char type = ClassifyType(typestr, quad, poly, usgn); 585 586 std::string s = name; 587 588 switch (type) { 589 case 'c': 590 switch (ck) { 591 case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break; 592 case ClassI: s += "_i8"; break; 593 case ClassW: s += "_8"; break; 594 default: break; 595 } 596 break; 597 case 's': 598 switch (ck) { 599 case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break; 600 case ClassI: s += "_i16"; break; 601 case ClassW: s += "_16"; break; 602 default: break; 603 } 604 break; 605 case 'i': 606 switch (ck) { 607 case ClassS: s += usgn ? "_u32" : "_s32"; break; 608 case ClassI: s += "_i32"; break; 609 case ClassW: s += "_32"; break; 610 default: break; 611 } 612 break; 613 case 'l': 614 switch (ck) { 615 case ClassS: s += usgn ? "_u64" : "_s64"; break; 616 case ClassI: s += "_i64"; break; 617 case ClassW: s += "_64"; break; 618 default: break; 619 } 620 break; 621 case 'h': 622 switch (ck) { 623 case ClassS: 624 case ClassI: s += "_f16"; break; 625 case ClassW: s += "_16"; break; 626 default: break; 627 } 628 break; 629 case 'f': 630 switch (ck) { 631 case ClassS: 632 case ClassI: s += "_f32"; break; 633 case ClassW: s += "_32"; break; 634 default: break; 635 } 636 break; 637 default: 638 throw "unhandled type!"; 639 } 640 if (ck == ClassB) 641 s += "_v"; 642 643 // Insert a 'q' before the first '_' character so that it ends up before 644 // _lane or _n on vector-scalar operations. 645 if (quad) { 646 size_t pos = s.find('_'); 647 s = s.insert(pos, "q"); 648 } 649 return s; 650} 651 652/// UseMacro - Examine the prototype string to determine if the intrinsic 653/// should be defined as a preprocessor macro instead of an inline function. 654static bool UseMacro(const std::string &proto) { 655 // If this builtin takes an immediate argument, we need to #define it rather 656 // than use a standard declaration, so that SemaChecking can range check 657 // the immediate passed by the user. 658 if (proto.find('i') != std::string::npos) 659 return true; 660 661 // Pointer arguments need to use macros to avoid hiding aligned attributes 662 // from the pointer type. 663 if (proto.find('p') != std::string::npos || 664 proto.find('c') != std::string::npos) 665 return true; 666 667 return false; 668} 669 670/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 671/// defined as a macro should be accessed directly instead of being first 672/// assigned to a local temporary. 673static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 674 // True for constant ints (i), pointers (p) and const pointers (c). 675 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 676} 677 678// Generate the string "(argtype a, argtype b, ...)" 679static std::string GenArgs(const std::string &proto, StringRef typestr) { 680 bool define = UseMacro(proto); 681 char arg = 'a'; 682 683 std::string s; 684 s += "("; 685 686 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 687 if (define) { 688 // Some macro arguments are used directly instead of being assigned 689 // to local temporaries; prepend an underscore prefix to make their 690 // names consistent with the local temporaries. 691 if (MacroArgUsedDirectly(proto, i)) 692 s += "__"; 693 } else { 694 s += TypeString(proto[i], typestr) + " __"; 695 } 696 s.push_back(arg); 697 if ((i + 1) < e) 698 s += ", "; 699 } 700 701 s += ")"; 702 return s; 703} 704 705// Macro arguments are not type-checked like inline function arguments, so 706// assign them to local temporaries to get the right type checking. 707static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { 708 char arg = 'a'; 709 std::string s; 710 bool generatedLocal = false; 711 712 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 713 // Do not create a temporary for an immediate argument. 714 // That would defeat the whole point of using a macro! 715 if (MacroArgUsedDirectly(proto, i)) 716 continue; 717 generatedLocal = true; 718 719 s += TypeString(proto[i], typestr) + " __"; 720 s.push_back(arg); 721 s += " = ("; 722 s.push_back(arg); 723 s += "); "; 724 } 725 726 if (generatedLocal) 727 s += "\\\n "; 728 return s; 729} 730 731// Use the vmovl builtin to sign-extend or zero-extend a vector. 732static std::string Extend(StringRef typestr, const std::string &a) { 733 std::string s; 734 s = MangleName("vmovl", typestr, ClassS); 735 s += "(" + a + ")"; 736 return s; 737} 738 739static std::string Duplicate(unsigned nElts, StringRef typestr, 740 const std::string &a) { 741 std::string s; 742 743 s = "(" + TypeString('d', typestr) + "){ "; 744 for (unsigned i = 0; i != nElts; ++i) { 745 s += a; 746 if ((i + 1) < nElts) 747 s += ", "; 748 } 749 s += " }"; 750 751 return s; 752} 753 754static std::string SplatLane(unsigned nElts, const std::string &vec, 755 const std::string &lane) { 756 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 757 for (unsigned i = 0; i < nElts; ++i) 758 s += ", " + lane; 759 s += ")"; 760 return s; 761} 762 763static unsigned GetNumElements(StringRef typestr, bool &quad) { 764 quad = false; 765 bool dummy = false; 766 char type = ClassifyType(typestr, quad, dummy, dummy); 767 unsigned nElts = 0; 768 switch (type) { 769 case 'c': nElts = 8; break; 770 case 's': nElts = 4; break; 771 case 'i': nElts = 2; break; 772 case 'l': nElts = 1; break; 773 case 'h': nElts = 4; break; 774 case 'f': nElts = 2; break; 775 default: 776 throw "unhandled type!"; 777 } 778 if (quad) nElts <<= 1; 779 return nElts; 780} 781 782// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 783static std::string GenOpString(OpKind op, const std::string &proto, 784 StringRef typestr) { 785 bool quad; 786 unsigned nElts = GetNumElements(typestr, quad); 787 bool define = UseMacro(proto); 788 789 std::string ts = TypeString(proto[0], typestr); 790 std::string s; 791 if (!define) { 792 s = "return "; 793 } 794 795 switch(op) { 796 case OpAdd: 797 s += "__a + __b;"; 798 break; 799 case OpAddl: 800 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 801 break; 802 case OpAddw: 803 s += "__a + " + Extend(typestr, "__b") + ";"; 804 break; 805 case OpSub: 806 s += "__a - __b;"; 807 break; 808 case OpSubl: 809 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 810 break; 811 case OpSubw: 812 s += "__a - " + Extend(typestr, "__b") + ";"; 813 break; 814 case OpMulN: 815 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 816 break; 817 case OpMulLane: 818 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 819 break; 820 case OpMul: 821 s += "__a * __b;"; 822 break; 823 case OpMullLane: 824 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 825 SplatLane(nElts, "__b", "__c") + ");"; 826 break; 827 case OpMlaN: 828 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 829 break; 830 case OpMlaLane: 831 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 832 break; 833 case OpMla: 834 s += "__a + (__b * __c);"; 835 break; 836 case OpMlalN: 837 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 838 Duplicate(nElts, typestr, "__c") + ");"; 839 break; 840 case OpMlalLane: 841 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 842 SplatLane(nElts, "__c", "__d") + ");"; 843 break; 844 case OpMlal: 845 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 846 break; 847 case OpMlsN: 848 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 849 break; 850 case OpMlsLane: 851 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 852 break; 853 case OpMls: 854 s += "__a - (__b * __c);"; 855 break; 856 case OpMlslN: 857 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 858 Duplicate(nElts, typestr, "__c") + ");"; 859 break; 860 case OpMlslLane: 861 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 862 SplatLane(nElts, "__c", "__d") + ");"; 863 break; 864 case OpMlsl: 865 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 866 break; 867 case OpQDMullLane: 868 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 869 SplatLane(nElts, "__b", "__c") + ");"; 870 break; 871 case OpQDMlalLane: 872 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 873 SplatLane(nElts, "__c", "__d") + ");"; 874 break; 875 case OpQDMlslLane: 876 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 877 SplatLane(nElts, "__c", "__d") + ");"; 878 break; 879 case OpQDMulhLane: 880 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 881 SplatLane(nElts, "__b", "__c") + ");"; 882 break; 883 case OpQRDMulhLane: 884 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 885 SplatLane(nElts, "__b", "__c") + ");"; 886 break; 887 case OpEq: 888 s += "(" + ts + ")(__a == __b);"; 889 break; 890 case OpGe: 891 s += "(" + ts + ")(__a >= __b);"; 892 break; 893 case OpLe: 894 s += "(" + ts + ")(__a <= __b);"; 895 break; 896 case OpGt: 897 s += "(" + ts + ")(__a > __b);"; 898 break; 899 case OpLt: 900 s += "(" + ts + ")(__a < __b);"; 901 break; 902 case OpNeg: 903 s += " -__a;"; 904 break; 905 case OpNot: 906 s += " ~__a;"; 907 break; 908 case OpAnd: 909 s += "__a & __b;"; 910 break; 911 case OpOr: 912 s += "__a | __b;"; 913 break; 914 case OpXor: 915 s += "__a ^ __b;"; 916 break; 917 case OpAndNot: 918 s += "__a & ~__b;"; 919 break; 920 case OpOrNot: 921 s += "__a | ~__b;"; 922 break; 923 case OpCast: 924 s += "(" + ts + ")__a;"; 925 break; 926 case OpConcat: 927 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 928 s += ", (int64x1_t)__b, 0, 1);"; 929 break; 930 case OpHi: 931 s += "(" + ts + 932 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);"; 933 break; 934 case OpLo: 935 s += "(" + ts + 936 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);"; 937 break; 938 case OpDup: 939 s += Duplicate(nElts, typestr, "__a") + ";"; 940 break; 941 case OpDupLane: 942 s += SplatLane(nElts, "__a", "__b") + ";"; 943 break; 944 case OpSelect: 945 // ((0 & 1) | (~0 & 2)) 946 s += "(" + ts + ")"; 947 ts = TypeString(proto[1], typestr); 948 s += "((__a & (" + ts + ")__b) | "; 949 s += "(~__a & (" + ts + ")__c));"; 950 break; 951 case OpRev16: 952 s += "__builtin_shufflevector(__a, __a"; 953 for (unsigned i = 2; i <= nElts; i += 2) 954 for (unsigned j = 0; j != 2; ++j) 955 s += ", " + utostr(i - j - 1); 956 s += ");"; 957 break; 958 case OpRev32: { 959 unsigned WordElts = nElts >> (1 + (int)quad); 960 s += "__builtin_shufflevector(__a, __a"; 961 for (unsigned i = WordElts; i <= nElts; i += WordElts) 962 for (unsigned j = 0; j != WordElts; ++j) 963 s += ", " + utostr(i - j - 1); 964 s += ");"; 965 break; 966 } 967 case OpRev64: { 968 unsigned DblWordElts = nElts >> (int)quad; 969 s += "__builtin_shufflevector(__a, __a"; 970 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 971 for (unsigned j = 0; j != DblWordElts; ++j) 972 s += ", " + utostr(i - j - 1); 973 s += ");"; 974 break; 975 } 976 case OpAbdl: { 977 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 978 if (typestr[0] != 'U') { 979 // vabd results are always unsigned and must be zero-extended. 980 std::string utype = "U" + typestr.str(); 981 s += "(" + TypeString(proto[0], typestr) + ")"; 982 abd = "(" + TypeString('d', utype) + ")" + abd; 983 s += Extend(utype, abd) + ";"; 984 } else { 985 s += Extend(typestr, abd) + ";"; 986 } 987 break; 988 } 989 case OpAba: 990 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 991 break; 992 case OpAbal: { 993 s += "__a + "; 994 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)"; 995 if (typestr[0] != 'U') { 996 // vabd results are always unsigned and must be zero-extended. 997 std::string utype = "U" + typestr.str(); 998 s += "(" + TypeString(proto[0], typestr) + ")"; 999 abd = "(" + TypeString('d', utype) + ")" + abd; 1000 s += Extend(utype, abd) + ";"; 1001 } else { 1002 s += Extend(typestr, abd) + ";"; 1003 } 1004 break; 1005 } 1006 default: 1007 throw "unknown OpKind!"; 1008 } 1009 return s; 1010} 1011 1012static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1013 unsigned mod = proto[0]; 1014 1015 if (mod == 'v' || mod == 'f') 1016 mod = proto[1]; 1017 1018 bool quad = false; 1019 bool poly = false; 1020 bool usgn = false; 1021 bool scal = false; 1022 bool cnst = false; 1023 bool pntr = false; 1024 1025 // Base type to get the type string for. 1026 char type = ClassifyType(typestr, quad, poly, usgn); 1027 1028 // Based on the modifying character, change the type and width if necessary. 1029 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1030 1031 NeonTypeFlags::EltType ET; 1032 switch (type) { 1033 case 'c': 1034 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1035 break; 1036 case 's': 1037 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1038 break; 1039 case 'i': 1040 ET = NeonTypeFlags::Int32; 1041 break; 1042 case 'l': 1043 ET = NeonTypeFlags::Int64; 1044 break; 1045 case 'h': 1046 ET = NeonTypeFlags::Float16; 1047 break; 1048 case 'f': 1049 ET = NeonTypeFlags::Float32; 1050 break; 1051 default: 1052 throw "unhandled type!"; 1053 } 1054 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1055 return Flags.getFlags(); 1056} 1057 1058// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1059static std::string GenBuiltin(const std::string &name, const std::string &proto, 1060 StringRef typestr, ClassKind ck) { 1061 std::string s; 1062 1063 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1064 // sret-like argument. 1065 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1066 1067 bool define = UseMacro(proto); 1068 1069 // Check if the prototype has a scalar operand with the type of the vector 1070 // elements. If not, bitcasting the args will take care of arg checking. 1071 // The actual signedness etc. will be taken care of with special enums. 1072 if (proto.find('s') == std::string::npos) 1073 ck = ClassB; 1074 1075 if (proto[0] != 'v') { 1076 std::string ts = TypeString(proto[0], typestr); 1077 1078 if (define) { 1079 if (sret) 1080 s += ts + " r; "; 1081 else 1082 s += "(" + ts + ")"; 1083 } else if (sret) { 1084 s += ts + " r; "; 1085 } else { 1086 s += "return (" + ts + ")"; 1087 } 1088 } 1089 1090 bool splat = proto.find('a') != std::string::npos; 1091 1092 s += "__builtin_neon_"; 1093 if (splat) { 1094 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1095 std::string vname(name, 0, name.size()-2); 1096 s += MangleName(vname, typestr, ck); 1097 } else { 1098 s += MangleName(name, typestr, ck); 1099 } 1100 s += "("; 1101 1102 // Pass the address of the return variable as the first argument to sret-like 1103 // builtins. 1104 if (sret) 1105 s += "&r, "; 1106 1107 char arg = 'a'; 1108 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1109 std::string args = std::string(&arg, 1); 1110 1111 // Use the local temporaries instead of the macro arguments. 1112 args = "__" + args; 1113 1114 bool argQuad = false; 1115 bool argPoly = false; 1116 bool argUsgn = false; 1117 bool argScalar = false; 1118 bool dummy = false; 1119 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1120 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1121 dummy, dummy); 1122 1123 // Handle multiple-vector values specially, emitting each subvector as an 1124 // argument to the __builtin. 1125 if (proto[i] >= '2' && proto[i] <= '4') { 1126 // Check if an explicit cast is needed. 1127 if (argType != 'c' || argPoly || argUsgn) 1128 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1129 1130 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1131 s += args + ".val[" + utostr(vi) + "]"; 1132 if ((vi + 1) < ve) 1133 s += ", "; 1134 } 1135 if ((i + 1) < e) 1136 s += ", "; 1137 1138 continue; 1139 } 1140 1141 if (splat && (i + 1) == e) 1142 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 1143 1144 // Check if an explicit cast is needed. 1145 if ((splat || !argScalar) && 1146 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 1147 std::string argTypeStr = "c"; 1148 if (ck != ClassB) 1149 argTypeStr = argType; 1150 if (argQuad) 1151 argTypeStr = "Q" + argTypeStr; 1152 args = "(" + TypeString('d', argTypeStr) + ")" + args; 1153 } 1154 1155 s += args; 1156 if ((i + 1) < e) 1157 s += ", "; 1158 } 1159 1160 // Extra constant integer to hold type class enum for this function, e.g. s8 1161 if (ck == ClassB) 1162 s += ", " + utostr(GetNeonEnum(proto, typestr)); 1163 1164 s += ");"; 1165 1166 if (proto[0] != 'v' && sret) { 1167 if (define) 1168 s += " r;"; 1169 else 1170 s += " return r;"; 1171 } 1172 return s; 1173} 1174 1175static std::string GenBuiltinDef(const std::string &name, 1176 const std::string &proto, 1177 StringRef typestr, ClassKind ck) { 1178 std::string s("BUILTIN(__builtin_neon_"); 1179 1180 // If all types are the same size, bitcasting the args will take care 1181 // of arg checking. The actual signedness etc. will be taken care of with 1182 // special enums. 1183 if (proto.find('s') == std::string::npos) 1184 ck = ClassB; 1185 1186 s += MangleName(name, typestr, ck); 1187 s += ", \""; 1188 1189 for (unsigned i = 0, e = proto.size(); i != e; ++i) 1190 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 1191 1192 // Extra constant integer to hold type class enum for this function, e.g. s8 1193 if (ck == ClassB) 1194 s += "i"; 1195 1196 s += "\", \"n\")"; 1197 return s; 1198} 1199 1200static std::string GenIntrinsic(const std::string &name, 1201 const std::string &proto, 1202 StringRef outTypeStr, StringRef inTypeStr, 1203 OpKind kind, ClassKind classKind) { 1204 assert(!proto.empty() && ""); 1205 bool define = UseMacro(proto) && kind != OpUnavailable; 1206 std::string s; 1207 1208 // static always inline + return type 1209 if (define) 1210 s += "#define "; 1211 else 1212 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 1213 1214 // Function name with type suffix 1215 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1216 if (outTypeStr != inTypeStr) { 1217 // If the input type is different (e.g., for vreinterpret), append a suffix 1218 // for the input type. String off a "Q" (quad) prefix so that MangleName 1219 // does not insert another "q" in the name. 1220 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1221 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1222 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1223 } 1224 s += mangledName; 1225 1226 // Function arguments 1227 s += GenArgs(proto, inTypeStr); 1228 1229 // Definition. 1230 if (define) { 1231 s += " __extension__ ({ \\\n "; 1232 s += GenMacroLocals(proto, inTypeStr); 1233 } else if (kind == OpUnavailable) { 1234 s += " __attribute__((unavailable));\n"; 1235 return s; 1236 } else 1237 s += " {\n "; 1238 1239 if (kind != OpNone) 1240 s += GenOpString(kind, proto, outTypeStr); 1241 else 1242 s += GenBuiltin(name, proto, outTypeStr, classKind); 1243 if (define) 1244 s += " })"; 1245 else 1246 s += " }"; 1247 s += "\n"; 1248 return s; 1249} 1250 1251/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 1252/// is comprised of type definitions and function declarations. 1253void NeonEmitter::run(raw_ostream &OS) { 1254 OS << 1255 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 1256 "---===\n" 1257 " *\n" 1258 " * Permission is hereby granted, free of charge, to any person obtaining " 1259 "a copy\n" 1260 " * of this software and associated documentation files (the \"Software\")," 1261 " to deal\n" 1262 " * in the Software without restriction, including without limitation the " 1263 "rights\n" 1264 " * to use, copy, modify, merge, publish, distribute, sublicense, " 1265 "and/or sell\n" 1266 " * copies of the Software, and to permit persons to whom the Software is\n" 1267 " * furnished to do so, subject to the following conditions:\n" 1268 " *\n" 1269 " * The above copyright notice and this permission notice shall be " 1270 "included in\n" 1271 " * all copies or substantial portions of the Software.\n" 1272 " *\n" 1273 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 1274 "EXPRESS OR\n" 1275 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 1276 "MERCHANTABILITY,\n" 1277 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 1278 "SHALL THE\n" 1279 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 1280 "OTHER\n" 1281 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 1282 "ARISING FROM,\n" 1283 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 1284 "DEALINGS IN\n" 1285 " * THE SOFTWARE.\n" 1286 " *\n" 1287 " *===--------------------------------------------------------------------" 1288 "---===\n" 1289 " */\n\n"; 1290 1291 OS << "#ifndef __ARM_NEON_H\n"; 1292 OS << "#define __ARM_NEON_H\n\n"; 1293 1294 OS << "#ifndef __ARM_NEON__\n"; 1295 OS << "#error \"NEON support not enabled\"\n"; 1296 OS << "#endif\n\n"; 1297 1298 OS << "#include <stdint.h>\n\n"; 1299 1300 // Emit NEON-specific scalar typedefs. 1301 OS << "typedef float float32_t;\n"; 1302 OS << "typedef int8_t poly8_t;\n"; 1303 OS << "typedef int16_t poly16_t;\n"; 1304 OS << "typedef uint16_t float16_t;\n"; 1305 1306 // Emit Neon vector typedefs. 1307 std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs"); 1308 SmallVector<StringRef, 24> TDTypeVec; 1309 ParseTypes(0, TypedefTypes, TDTypeVec); 1310 1311 // Emit vector typedefs. 1312 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1313 bool dummy, quad = false, poly = false; 1314 (void) ClassifyType(TDTypeVec[i], quad, poly, dummy); 1315 if (poly) 1316 OS << "typedef __attribute__((neon_polyvector_type("; 1317 else 1318 OS << "typedef __attribute__((neon_vector_type("; 1319 1320 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 1321 OS << utostr(nElts) << "))) "; 1322 if (nElts < 10) 1323 OS << " "; 1324 1325 OS << TypeString('s', TDTypeVec[i]); 1326 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 1327 } 1328 OS << "\n"; 1329 1330 // Emit struct typedefs. 1331 for (unsigned vi = 2; vi != 5; ++vi) { 1332 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1333 std::string ts = TypeString('d', TDTypeVec[i]); 1334 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 1335 OS << "typedef struct " << vs << " {\n"; 1336 OS << " " << ts << " val"; 1337 OS << "[" << utostr(vi) << "]"; 1338 OS << ";\n} "; 1339 OS << vs << ";\n\n"; 1340 } 1341 } 1342 1343 OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n"; 1344 1345 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1346 1347 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 1348 // intrinsics. (Some of the saturating multiply instructions are also 1349 // used to implement the corresponding "_lane" variants, but tablegen 1350 // sorts the records into alphabetical order so that the "_lane" variants 1351 // come after the intrinsics they use.) 1352 emitIntrinsic(OS, Records.getDef("VMOVL")); 1353 emitIntrinsic(OS, Records.getDef("VMULL")); 1354 emitIntrinsic(OS, Records.getDef("VABD")); 1355 1356 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1357 Record *R = RV[i]; 1358 if (R->getName() != "VMOVL" && 1359 R->getName() != "VMULL" && 1360 R->getName() != "VABD") 1361 emitIntrinsic(OS, R); 1362 } 1363 1364 OS << "#undef __ai\n\n"; 1365 OS << "#endif /* __ARM_NEON_H */\n"; 1366} 1367 1368/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 1369/// intrinsics specified by record R. 1370void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { 1371 std::string name = R->getValueAsString("Name"); 1372 std::string Proto = R->getValueAsString("Prototype"); 1373 std::string Types = R->getValueAsString("Types"); 1374 1375 SmallVector<StringRef, 16> TypeVec; 1376 ParseTypes(R, Types, TypeVec); 1377 1378 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 1379 1380 ClassKind classKind = ClassNone; 1381 if (R->getSuperClasses().size() >= 2) 1382 classKind = ClassMap[R->getSuperClasses()[1]]; 1383 if (classKind == ClassNone && kind == OpNone) 1384 throw TGError(R->getLoc(), "Builtin has no class kind"); 1385 1386 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1387 if (kind == OpReinterpret) { 1388 bool outQuad = false; 1389 bool dummy = false; 1390 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 1391 for (unsigned srcti = 0, srcte = TypeVec.size(); 1392 srcti != srcte; ++srcti) { 1393 bool inQuad = false; 1394 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 1395 if (srcti == ti || inQuad != outQuad) 1396 continue; 1397 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 1398 OpCast, ClassS); 1399 } 1400 } else { 1401 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], 1402 kind, classKind); 1403 } 1404 } 1405 OS << "\n"; 1406} 1407 1408static unsigned RangeFromType(const char mod, StringRef typestr) { 1409 // base type to get the type string for. 1410 bool quad = false, dummy = false; 1411 char type = ClassifyType(typestr, quad, dummy, dummy); 1412 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 1413 1414 switch (type) { 1415 case 'c': 1416 return (8 << (int)quad) - 1; 1417 case 'h': 1418 case 's': 1419 return (4 << (int)quad) - 1; 1420 case 'f': 1421 case 'i': 1422 return (2 << (int)quad) - 1; 1423 case 'l': 1424 return (1 << (int)quad) - 1; 1425 default: 1426 throw "unhandled type!"; 1427 } 1428} 1429 1430/// runHeader - Emit a file with sections defining: 1431/// 1. the NEON section of BuiltinsARM.def. 1432/// 2. the SemaChecking code for the type overload checking. 1433/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 1434void NeonEmitter::runHeader(raw_ostream &OS) { 1435 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1436 1437 StringMap<OpKind> EmittedMap; 1438 1439 // Generate BuiltinsARM.def for NEON 1440 OS << "#ifdef GET_NEON_BUILTINS\n"; 1441 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1442 Record *R = RV[i]; 1443 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1444 if (k != OpNone) 1445 continue; 1446 1447 std::string Proto = R->getValueAsString("Prototype"); 1448 1449 // Functions with 'a' (the splat code) in the type prototype should not get 1450 // their own builtin as they use the non-splat variant. 1451 if (Proto.find('a') != std::string::npos) 1452 continue; 1453 1454 std::string Types = R->getValueAsString("Types"); 1455 SmallVector<StringRef, 16> TypeVec; 1456 ParseTypes(R, Types, TypeVec); 1457 1458 if (R->getSuperClasses().size() < 2) 1459 throw TGError(R->getLoc(), "Builtin has no class kind"); 1460 1461 std::string name = R->getValueAsString("Name"); 1462 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 1463 1464 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1465 // Generate the BuiltinsARM.def declaration for this builtin, ensuring 1466 // that each unique BUILTIN() macro appears only once in the output 1467 // stream. 1468 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 1469 if (EmittedMap.count(bd)) 1470 continue; 1471 1472 EmittedMap[bd] = OpNone; 1473 OS << bd << "\n"; 1474 } 1475 } 1476 OS << "#endif\n\n"; 1477 1478 // Generate the overloaded type checking code for SemaChecking.cpp 1479 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1480 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1481 Record *R = RV[i]; 1482 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1483 if (k != OpNone) 1484 continue; 1485 1486 std::string Proto = R->getValueAsString("Prototype"); 1487 std::string Types = R->getValueAsString("Types"); 1488 std::string name = R->getValueAsString("Name"); 1489 1490 // Functions with 'a' (the splat code) in the type prototype should not get 1491 // their own builtin as they use the non-splat variant. 1492 if (Proto.find('a') != std::string::npos) 1493 continue; 1494 1495 // Functions which have a scalar argument cannot be overloaded, no need to 1496 // check them if we are emitting the type checking code. 1497 if (Proto.find('s') != std::string::npos) 1498 continue; 1499 1500 SmallVector<StringRef, 16> TypeVec; 1501 ParseTypes(R, Types, TypeVec); 1502 1503 if (R->getSuperClasses().size() < 2) 1504 throw TGError(R->getLoc(), "Builtin has no class kind"); 1505 1506 int si = -1, qi = -1; 1507 unsigned mask = 0, qmask = 0; 1508 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1509 // Generate the switch case(s) for this builtin for the type validation. 1510 bool quad = false, poly = false, usgn = false; 1511 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 1512 1513 if (quad) { 1514 qi = ti; 1515 qmask |= 1 << GetNeonEnum(Proto, TypeVec[ti]); 1516 } else { 1517 si = ti; 1518 mask |= 1 << GetNeonEnum(Proto, TypeVec[ti]); 1519 } 1520 } 1521 1522 // Check if the builtin function has a pointer or const pointer argument. 1523 int PtrArgNum = -1; 1524 bool HasConstPtr = false; 1525 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 1526 char ArgType = Proto[arg]; 1527 if (ArgType == 'c') { 1528 HasConstPtr = true; 1529 PtrArgNum = arg - 1; 1530 break; 1531 } 1532 if (ArgType == 'p') { 1533 PtrArgNum = arg - 1; 1534 break; 1535 } 1536 } 1537 // For sret builtins, adjust the pointer argument index. 1538 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 1539 PtrArgNum += 1; 1540 1541 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 1542 // and vst1_lane intrinsics. Using a pointer to the vector element 1543 // type with one of those operations causes codegen to select an aligned 1544 // load/store instruction. If you want an unaligned operation, 1545 // the pointer argument needs to have less alignment than element type, 1546 // so just accept any pointer type. 1547 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 1548 PtrArgNum = -1; 1549 HasConstPtr = false; 1550 } 1551 1552 if (mask) { 1553 OS << "case ARM::BI__builtin_neon_" 1554 << MangleName(name, TypeVec[si], ClassB) 1555 << ": mask = " << "0x" << utohexstr(mask); 1556 if (PtrArgNum >= 0) 1557 OS << "; PtrArgNum = " << PtrArgNum; 1558 if (HasConstPtr) 1559 OS << "; HasConstPtr = true"; 1560 OS << "; break;\n"; 1561 } 1562 if (qmask) { 1563 OS << "case ARM::BI__builtin_neon_" 1564 << MangleName(name, TypeVec[qi], ClassB) 1565 << ": mask = " << "0x" << utohexstr(qmask); 1566 if (PtrArgNum >= 0) 1567 OS << "; PtrArgNum = " << PtrArgNum; 1568 if (HasConstPtr) 1569 OS << "; HasConstPtr = true"; 1570 OS << "; break;\n"; 1571 } 1572 } 1573 OS << "#endif\n\n"; 1574 1575 // Generate the intrinsic range checking code for shift/lane immediates. 1576 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 1577 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1578 Record *R = RV[i]; 1579 1580 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1581 if (k != OpNone) 1582 continue; 1583 1584 std::string name = R->getValueAsString("Name"); 1585 std::string Proto = R->getValueAsString("Prototype"); 1586 std::string Types = R->getValueAsString("Types"); 1587 1588 // Functions with 'a' (the splat code) in the type prototype should not get 1589 // their own builtin as they use the non-splat variant. 1590 if (Proto.find('a') != std::string::npos) 1591 continue; 1592 1593 // Functions which do not have an immediate do not need to have range 1594 // checking code emitted. 1595 size_t immPos = Proto.find('i'); 1596 if (immPos == std::string::npos) 1597 continue; 1598 1599 SmallVector<StringRef, 16> TypeVec; 1600 ParseTypes(R, Types, TypeVec); 1601 1602 if (R->getSuperClasses().size() < 2) 1603 throw TGError(R->getLoc(), "Builtin has no class kind"); 1604 1605 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 1606 1607 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1608 std::string namestr, shiftstr, rangestr; 1609 1610 if (R->getValueAsBit("isVCVT_N")) { 1611 // VCVT between floating- and fixed-point values takes an immediate 1612 // in the range 1 to 32. 1613 ck = ClassB; 1614 rangestr = "l = 1; u = 31"; // upper bound = l + u 1615 } else if (Proto.find('s') == std::string::npos) { 1616 // Builtins which are overloaded by type will need to have their upper 1617 // bound computed at Sema time based on the type constant. 1618 ck = ClassB; 1619 if (R->getValueAsBit("isShift")) { 1620 shiftstr = ", true"; 1621 1622 // Right shifts have an 'r' in the name, left shifts do not. 1623 if (name.find('r') != std::string::npos) 1624 rangestr = "l = 1; "; 1625 } 1626 rangestr += "u = RFT(TV" + shiftstr + ")"; 1627 } else { 1628 // The immediate generally refers to a lane in the preceding argument. 1629 assert(immPos > 0 && "unexpected immediate operand"); 1630 rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti])); 1631 } 1632 // Make sure cases appear only once by uniquing them in a string map. 1633 namestr = MangleName(name, TypeVec[ti], ck); 1634 if (EmittedMap.count(namestr)) 1635 continue; 1636 EmittedMap[namestr] = OpNone; 1637 1638 // Calculate the index of the immediate that should be range checked. 1639 unsigned immidx = 0; 1640 1641 // Builtins that return a struct of multiple vectors have an extra 1642 // leading arg for the struct return. 1643 if (Proto[0] >= '2' && Proto[0] <= '4') 1644 ++immidx; 1645 1646 // Add one to the index for each argument until we reach the immediate 1647 // to be checked. Structs of vectors are passed as multiple arguments. 1648 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 1649 switch (Proto[ii]) { 1650 default: immidx += 1; break; 1651 case '2': immidx += 2; break; 1652 case '3': immidx += 3; break; 1653 case '4': immidx += 4; break; 1654 case 'i': ie = ii + 1; break; 1655 } 1656 } 1657 OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck) 1658 << ": i = " << immidx << "; " << rangestr << "; break;\n"; 1659 } 1660 } 1661 OS << "#endif\n\n"; 1662} 1663 1664/// GenTest - Write out a test for the intrinsic specified by the name and 1665/// type strings, including the embedded patterns for FileCheck to match. 1666static std::string GenTest(const std::string &name, 1667 const std::string &proto, 1668 StringRef outTypeStr, StringRef inTypeStr, 1669 bool isShift) { 1670 assert(!proto.empty() && ""); 1671 std::string s; 1672 1673 // Function name with type suffix 1674 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1675 if (outTypeStr != inTypeStr) { 1676 // If the input type is different (e.g., for vreinterpret), append a suffix 1677 // for the input type. String off a "Q" (quad) prefix so that MangleName 1678 // does not insert another "q" in the name. 1679 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1680 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1681 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1682 } 1683 1684 // Emit the FileCheck patterns. 1685 s += "// CHECK: test_" + mangledName + "\n"; 1686 // s += "// CHECK: \n"; // FIXME: + expected instruction opcode. 1687 1688 // Emit the start of the test function. 1689 s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 1690 char arg = 'a'; 1691 std::string comma; 1692 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1693 // Do not create arguments for values that must be immediate constants. 1694 if (proto[i] == 'i') 1695 continue; 1696 s += comma + TypeString(proto[i], inTypeStr) + " "; 1697 s.push_back(arg); 1698 comma = ", "; 1699 } 1700 s += ") {\n "; 1701 1702 if (proto[0] != 'v') 1703 s += "return "; 1704 s += mangledName + "("; 1705 arg = 'a'; 1706 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1707 if (proto[i] == 'i') { 1708 // For immediate operands, test the maximum value. 1709 if (isShift) 1710 s += "1"; // FIXME 1711 else 1712 // The immediate generally refers to a lane in the preceding argument. 1713 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 1714 } else { 1715 s.push_back(arg); 1716 } 1717 if ((i + 1) < e) 1718 s += ", "; 1719 } 1720 s += ");\n}\n\n"; 1721 return s; 1722} 1723 1724/// runTests - Write out a complete set of tests for all of the Neon 1725/// intrinsics. 1726void NeonEmitter::runTests(raw_ostream &OS) { 1727 OS << 1728 "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n" 1729 "// RUN: -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n" 1730 "\n" 1731 "#include <arm_neon.h>\n" 1732 "\n"; 1733 1734 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1735 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1736 Record *R = RV[i]; 1737 std::string name = R->getValueAsString("Name"); 1738 std::string Proto = R->getValueAsString("Prototype"); 1739 std::string Types = R->getValueAsString("Types"); 1740 bool isShift = R->getValueAsBit("isShift"); 1741 1742 SmallVector<StringRef, 16> TypeVec; 1743 ParseTypes(R, Types, TypeVec); 1744 1745 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 1746 if (kind == OpUnavailable) 1747 continue; 1748 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1749 if (kind == OpReinterpret) { 1750 bool outQuad = false; 1751 bool dummy = false; 1752 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 1753 for (unsigned srcti = 0, srcte = TypeVec.size(); 1754 srcti != srcte; ++srcti) { 1755 bool inQuad = false; 1756 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 1757 if (srcti == ti || inQuad != outQuad) 1758 continue; 1759 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift); 1760 } 1761 } else { 1762 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift); 1763 } 1764 } 1765 OS << "\n"; 1766 } 1767} 1768 1769namespace clang { 1770void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 1771 NeonEmitter(Records).run(OS); 1772} 1773void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 1774 NeonEmitter(Records).runHeader(OS); 1775} 1776void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 1777 NeonEmitter(Records).runTests(OS); 1778} 1779} // End namespace clang 1780