X86ISelLowering.h revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef X86ISELLOWERING_H 16#define X86ISELLOWERING_H 17 18#include "X86Subtarget.h" 19#include "llvm/CodeGen/CallingConvLower.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/Target/TargetLowering.h" 22#include "llvm/Target/TargetOptions.h" 23 24namespace llvm { 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// BSF - Bit scan forward. 34 /// BSR - Bit scan reverse. 35 BSF, 36 BSR, 37 38 /// SHLD, SHRD - Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// FAND - Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// FOR - Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// FXOR - Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// FANDN - Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// FSRL - Bitwise logical right shift of floating point values. These 60 /// corresponds to X86::PSRLDQ. 61 FSRL, 62 63 /// CALL - These operations represent an abstract X86 call 64 /// instruction, which includes a bunch of information. In particular the 65 /// operands of these node are: 66 /// 67 /// #0 - The incoming token chain 68 /// #1 - The callee 69 /// #2 - The number of arg bytes the caller pushes on the stack. 70 /// #3 - The number of arg bytes the callee pops off the stack. 71 /// #4 - The value to pass in AL/AX/EAX (optional) 72 /// #5 - The value to pass in DL/DX/EDX (optional) 73 /// 74 /// The result values of these nodes are: 75 /// 76 /// #0 - The outgoing token chain 77 /// #1 - The first register result value (optional) 78 /// #2 - The second register result value (optional) 79 /// 80 CALL, 81 82 /// RDTSC_DAG - This operation implements the lowering for 83 /// readcyclecounter 84 RDTSC_DAG, 85 86 /// X86 compare and logical compare instructions. 87 CMP, COMI, UCOMI, 88 89 /// X86 bit-test instructions. 90 BT, 91 92 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 93 /// operand, usually produced by a CMP instruction. 94 SETCC, 95 96 /// X86 Select 97 SELECT, 98 99 // Same as SETCC except it's materialized with a sbb and the value is all 100 // one's or all zero's. 101 SETCC_CARRY, // R = carry_bit ? ~0 : 0 102 103 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 104 /// Operands are two FP values to compare; result is a mask of 105 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 106 FSETCC, 107 108 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 109 /// result in an integer GPR. Needs masking for scalar result. 110 FGETSIGNx86, 111 112 /// X86 conditional moves. Operand 0 and operand 1 are the two values 113 /// to select from. Operand 2 is the condition code, and operand 3 is the 114 /// flag operand produced by a CMP or TEST instruction. It also writes a 115 /// flag result. 116 CMOV, 117 118 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 119 /// is the block to branch if condition is true, operand 2 is the 120 /// condition code, and operand 3 is the flag operand produced by a CMP 121 /// or TEST instruction. 122 BRCOND, 123 124 /// Return with a flag operand. Operand 0 is the chain operand, operand 125 /// 1 is the number of bytes of stack to pop. 126 RET_FLAG, 127 128 /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. 129 REP_STOS, 130 131 /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. 132 REP_MOVS, 133 134 /// GlobalBaseReg - On Darwin, this node represents the result of the popl 135 /// at function entry, used for PIC code. 136 GlobalBaseReg, 137 138 /// Wrapper - A wrapper node for TargetConstantPool, 139 /// TargetExternalSymbol, and TargetGlobalAddress. 140 Wrapper, 141 142 /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP 143 /// relative displacements. 144 WrapperRIP, 145 146 /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector 147 /// to an MMX vector. If you think this is too close to the previous 148 /// mnemonic, so do I; blame Intel. 149 MOVDQ2Q, 150 151 /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX 152 /// vector to a GPR. 153 MMX_MOVD2W, 154 155 /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to 156 /// i32, corresponds to X86::PEXTRB. 157 PEXTRB, 158 159 /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to 160 /// i32, corresponds to X86::PEXTRW. 161 PEXTRW, 162 163 /// INSERTPS - Insert any element of a 4 x float vector into any element 164 /// of a destination 4 x floatvector. 165 INSERTPS, 166 167 /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, 168 /// corresponds to X86::PINSRB. 169 PINSRB, 170 171 /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, 172 /// corresponds to X86::PINSRW. 173 PINSRW, MMX_PINSRW, 174 175 /// PSHUFB - Shuffle 16 8-bit values within a vector. 176 PSHUFB, 177 178 /// ANDNP - Bitwise Logical AND NOT of Packed FP values. 179 ANDNP, 180 181 /// PSIGN - Copy integer sign. 182 PSIGN, 183 184 /// BLENDV - Blend where the selector is a register. 185 BLENDV, 186 187 /// BLENDI - Blend where the selector is an immediate. 188 BLENDI, 189 190 // SUBUS - Integer sub with unsigned saturation. 191 SUBUS, 192 193 /// HADD - Integer horizontal add. 194 HADD, 195 196 /// HSUB - Integer horizontal sub. 197 HSUB, 198 199 /// FHADD - Floating point horizontal add. 200 FHADD, 201 202 /// FHSUB - Floating point horizontal sub. 203 FHSUB, 204 205 /// UMAX, UMIN - Unsigned integer max and min. 206 UMAX, UMIN, 207 208 /// SMAX, SMIN - Signed integer max and min. 209 SMAX, SMIN, 210 211 /// FMAX, FMIN - Floating point max and min. 212 /// 213 FMAX, FMIN, 214 215 /// FMAXC, FMINC - Commutative FMIN and FMAX. 216 FMAXC, FMINC, 217 218 /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal 219 /// approximation. Note that these typically require refinement 220 /// in order to obtain suitable precision. 221 FRSQRT, FRCP, 222 223 // TLSADDR - Thread Local Storage. 224 TLSADDR, 225 226 // TLSBASEADDR - Thread Local Storage. A call to get the start address 227 // of the TLS block for the current module. 228 TLSBASEADDR, 229 230 // TLSCALL - Thread Local Storage. When calling to an OS provided 231 // thunk at the address from an earlier relocation. 232 TLSCALL, 233 234 // EH_RETURN - Exception Handling helpers. 235 EH_RETURN, 236 237 // EH_SJLJ_SETJMP - SjLj exception handling setjmp. 238 EH_SJLJ_SETJMP, 239 240 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. 241 EH_SJLJ_LONGJMP, 242 243 /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for 244 /// the list of operands. 245 TC_RETURN, 246 247 // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements. 248 VZEXT_MOVL, 249 250 // VZEXT - Vector integer zero-extend. 251 VZEXT, 252 253 // VSEXT - Vector integer signed-extend. 254 VSEXT, 255 256 // VTRUNC - Vector integer truncate. 257 VTRUNC, 258 259 // VTRUNC - Vector integer truncate with mask. 260 VTRUNCM, 261 262 // VFPEXT - Vector FP extend. 263 VFPEXT, 264 265 // VFPROUND - Vector FP round. 266 VFPROUND, 267 268 // VSHL, VSRL - 128-bit vector logical left / right shift 269 VSHLDQ, VSRLDQ, 270 271 // VSHL, VSRL, VSRA - Vector shift elements 272 VSHL, VSRL, VSRA, 273 274 // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate 275 VSHLI, VSRLI, VSRAI, 276 277 // CMPP - Vector packed double/float comparison. 278 CMPP, 279 280 // PCMP* - Vector integer comparisons. 281 PCMPEQ, PCMPGT, 282 // PCMP*M - Vector integer comparisons, the result is in a mask vector. 283 PCMPEQM, PCMPGTM, 284 285 /// CMPM, CMPMU - Vector comparison generating mask bits for fp and 286 /// integer signed and unsigned data types. 287 CMPM, 288 CMPMU, 289 290 // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. 291 ADD, SUB, ADC, SBB, SMUL, 292 INC, DEC, OR, XOR, AND, 293 294 BZHI, // BZHI - Zero high bits 295 BEXTR, // BEXTR - Bit field extract 296 297 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 298 299 // MUL_IMM - X86 specific multiply by immediate. 300 MUL_IMM, 301 302 // PTEST - Vector bitwise comparisons. 303 PTEST, 304 305 // TESTP - Vector packed fp sign bitwise comparisons. 306 TESTP, 307 308 // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector. 309 TESTM, 310 TESTNM, 311 312 // OR/AND test for masks 313 KORTEST, 314 315 // Several flavors of instructions with vector shuffle behaviors. 316 PALIGNR, 317 PSHUFD, 318 PSHUFHW, 319 PSHUFLW, 320 SHUFP, 321 MOVDDUP, 322 MOVSHDUP, 323 MOVSLDUP, 324 MOVLHPS, 325 MOVLHPD, 326 MOVHLPS, 327 MOVLPS, 328 MOVLPD, 329 MOVSD, 330 MOVSS, 331 UNPCKL, 332 UNPCKH, 333 VPERMILP, 334 VPERMV, 335 VPERMV3, 336 VPERMIV3, 337 VPERMI, 338 VPERM2X128, 339 VBROADCAST, 340 // masked broadcast 341 VBROADCASTM, 342 // Insert/Extract vector element 343 VINSERT, 344 VEXTRACT, 345 346 // PMULUDQ - Vector multiply packed unsigned doubleword integers 347 PMULUDQ, 348 349 // FMA nodes 350 FMADD, 351 FNMADD, 352 FMSUB, 353 FNMSUB, 354 FMADDSUB, 355 FMSUBADD, 356 357 // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, 358 // according to %al. An operator is needed so that this can be expanded 359 // with control flow. 360 VASTART_SAVE_XMM_REGS, 361 362 // WIN_ALLOCA - Windows's _chkstk call to do stack probing. 363 WIN_ALLOCA, 364 365 // SEG_ALLOCA - For allocating variable amounts of stack space when using 366 // segmented stacks. Check if the current stacklet has enough space, and 367 // falls back to heap allocation if not. 368 SEG_ALLOCA, 369 370 // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui. 371 WIN_FTOL, 372 373 // Memory barrier 374 MEMBARRIER, 375 MFENCE, 376 SFENCE, 377 LFENCE, 378 379 // FNSTSW16r - Store FP status word into i16 register. 380 FNSTSW16r, 381 382 // SAHF - Store contents of %ah into %eflags. 383 SAHF, 384 385 // RDRAND - Get a random integer and indicate whether it is valid in CF. 386 RDRAND, 387 388 // RDSEED - Get a NIST SP800-90B & C compliant random integer and 389 // indicate whether it is valid in CF. 390 RDSEED, 391 392 // PCMP*STRI 393 PCMPISTRI, 394 PCMPESTRI, 395 396 // XTEST - Test if in transactional execution. 397 XTEST, 398 399 // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 400 // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 401 // Atomic 64-bit binary operations. 402 ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 403 ATOMSUB64_DAG, 404 ATOMOR64_DAG, 405 ATOMXOR64_DAG, 406 ATOMAND64_DAG, 407 ATOMNAND64_DAG, 408 ATOMMAX64_DAG, 409 ATOMMIN64_DAG, 410 ATOMUMAX64_DAG, 411 ATOMUMIN64_DAG, 412 ATOMSWAP64_DAG, 413 414 // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. 415 LCMPXCHG_DAG, 416 LCMPXCHG8_DAG, 417 LCMPXCHG16_DAG, 418 419 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. 420 VZEXT_LOAD, 421 422 // FNSTCW16m - Store FP control world into i16 memory. 423 FNSTCW16m, 424 425 /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the 426 /// integer destination in memory and a FP reg source. This corresponds 427 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 428 /// has two inputs (token chain and address) and two outputs (int value 429 /// and token chain). 430 FP_TO_INT16_IN_MEM, 431 FP_TO_INT32_IN_MEM, 432 FP_TO_INT64_IN_MEM, 433 434 /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the 435 /// integer source in memory and FP reg result. This corresponds to the 436 /// X86::FILD*m instructions. It has three inputs (token chain, address, 437 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 438 /// also produces a flag). 439 FILD, 440 FILD_FLAG, 441 442 /// FLD - This instruction implements an extending load to FP stack slots. 443 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 444 /// operand, ptr to load from, and a ValueType node indicating the type 445 /// to load to. 446 FLD, 447 448 /// FST - This instruction implements a truncating store to FP stack 449 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 450 /// chain operand, value to store, address, and a ValueType to store it 451 /// as. 452 FST, 453 454 /// VAARG_64 - This instruction grabs the address of the next argument 455 /// from a va_list. (reads and modifies the va_list in memory) 456 VAARG_64 457 458 // WARNING: Do not add anything in the end unless you want the node to 459 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 460 // thought as target memory ops! 461 }; 462 } 463 464 /// Define some predicates that are used for node matching. 465 namespace X86 { 466 /// isVEXTRACT128Index - Return true if the specified 467 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 468 /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. 469 bool isVEXTRACT128Index(SDNode *N); 470 471 /// isVINSERT128Index - Return true if the specified 472 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 473 /// suitable for input to VINSERTF128, VINSERTI128 instructions. 474 bool isVINSERT128Index(SDNode *N); 475 476 /// isVEXTRACT256Index - Return true if the specified 477 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 478 /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. 479 bool isVEXTRACT256Index(SDNode *N); 480 481 /// isVINSERT256Index - Return true if the specified 482 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 483 /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. 484 bool isVINSERT256Index(SDNode *N); 485 486 /// getExtractVEXTRACT128Immediate - Return the appropriate 487 /// immediate to extract the specified EXTRACT_SUBVECTOR index 488 /// with VEXTRACTF128, VEXTRACTI128 instructions. 489 unsigned getExtractVEXTRACT128Immediate(SDNode *N); 490 491 /// getInsertVINSERT128Immediate - Return the appropriate 492 /// immediate to insert at the specified INSERT_SUBVECTOR index 493 /// with VINSERTF128, VINSERT128 instructions. 494 unsigned getInsertVINSERT128Immediate(SDNode *N); 495 496 /// getExtractVEXTRACT256Immediate - Return the appropriate 497 /// immediate to extract the specified EXTRACT_SUBVECTOR index 498 /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. 499 unsigned getExtractVEXTRACT256Immediate(SDNode *N); 500 501 /// getInsertVINSERT256Immediate - Return the appropriate 502 /// immediate to insert at the specified INSERT_SUBVECTOR index 503 /// with VINSERTF64x4, VINSERTI64x4 instructions. 504 unsigned getInsertVINSERT256Immediate(SDNode *N); 505 506 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 507 /// constant +0.0. 508 bool isZeroNode(SDValue Elt); 509 510 /// isOffsetSuitableForCodeModel - Returns true of the given offset can be 511 /// fit into displacement field of the instruction. 512 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 513 bool hasSymbolicDisplacement = true); 514 515 516 /// isCalleePop - Determines whether the callee is required to pop its 517 /// own arguments. Callee pop is necessary to support tail calls. 518 bool isCalleePop(CallingConv::ID CallingConv, 519 bool is64Bit, bool IsVarArg, bool TailCallOpt); 520 } 521 522 //===--------------------------------------------------------------------===// 523 // X86TargetLowering - X86 Implementation of the TargetLowering interface 524 class X86TargetLowering final : public TargetLowering { 525 public: 526 explicit X86TargetLowering(X86TargetMachine &TM); 527 528 unsigned getJumpTableEncoding() const override; 529 530 MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } 531 532 const MCExpr * 533 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 534 const MachineBasicBlock *MBB, unsigned uid, 535 MCContext &Ctx) const override; 536 537 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 538 /// jumptable. 539 SDValue getPICJumpTableRelocBase(SDValue Table, 540 SelectionDAG &DAG) const override; 541 const MCExpr * 542 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 543 unsigned JTI, MCContext &Ctx) const override; 544 545 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 546 /// function arguments in the caller parameter area. For X86, aggregates 547 /// that contains are placed at 16-byte boundaries while the rest are at 548 /// 4-byte boundaries. 549 unsigned getByValTypeAlignment(Type *Ty) const override; 550 551 /// getOptimalMemOpType - Returns the target specific optimal type for load 552 /// and store operations as a result of memset, memcpy, and memmove 553 /// lowering. If DstAlign is zero that means it's safe to destination 554 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 555 /// means there isn't a need to check it against alignment requirement, 556 /// probably because the source does not need to be loaded. If 'IsMemset' is 557 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 558 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 559 /// source is constant so it does not need to be loaded. 560 /// It returns EVT::Other if the type should be determined using generic 561 /// target-independent logic. 562 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 563 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 564 MachineFunction &MF) const override; 565 566 /// isSafeMemOpType - Returns true if it's safe to use load / store of the 567 /// specified type to expand memcpy / memset inline. This is mostly true 568 /// for all types except for some special cases. For example, on X86 569 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 570 /// also does type conversion. Note the specified type doesn't have to be 571 /// legal as the hook is used before type legalization. 572 bool isSafeMemOpType(MVT VT) const override; 573 574 /// allowsUnalignedMemoryAccesses - Returns true if the target allows 575 /// unaligned memory accesses. of the specified type. Returns whether it 576 /// is "fast" by reference in the second argument. 577 bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, 578 bool *Fast) const override; 579 580 /// LowerOperation - Provide custom lowering hooks for some operations. 581 /// 582 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 583 584 /// ReplaceNodeResults - Replace the results of node with an illegal result 585 /// type with new values built out of custom code. 586 /// 587 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 588 SelectionDAG &DAG) const override; 589 590 591 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 592 593 /// isTypeDesirableForOp - Return true if the target has native support for 594 /// the specified value type and it is 'desirable' to use the type for the 595 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 596 /// instruction encodings are longer and some i16 instructions are slow. 597 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 598 599 /// isTypeDesirable - Return true if the target has native support for the 600 /// specified value type and it is 'desirable' to use the type. e.g. On x86 601 /// i16 is legal, but undesirable since i16 instruction encodings are longer 602 /// and some i16 instructions are slow. 603 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 604 605 MachineBasicBlock * 606 EmitInstrWithCustomInserter(MachineInstr *MI, 607 MachineBasicBlock *MBB) const override; 608 609 610 /// getTargetNodeName - This method returns the name of a target specific 611 /// DAG node. 612 const char *getTargetNodeName(unsigned Opcode) const override; 613 614 /// getSetCCResultType - Return the value type to use for ISD::SETCC. 615 EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; 616 617 /// computeMaskedBitsForTargetNode - Determine which of the bits specified 618 /// in Mask are known to be either zero or one and return them in the 619 /// KnownZero/KnownOne bitsets. 620 void computeMaskedBitsForTargetNode(const SDValue Op, 621 APInt &KnownZero, 622 APInt &KnownOne, 623 const SelectionDAG &DAG, 624 unsigned Depth = 0) const override; 625 626 // ComputeNumSignBitsForTargetNode - Determine the number of bits in the 627 // operation that are sign bits. 628 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 629 unsigned Depth) const override; 630 631 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, 632 int64_t &Offset) const override; 633 634 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 635 636 bool ExpandInlineAsm(CallInst *CI) const override; 637 638 ConstraintType 639 getConstraintType(const std::string &Constraint) const override; 640 641 /// Examine constraint string and operand type and determine a weight value. 642 /// The operand object must already have been set up with the operand type. 643 ConstraintWeight 644 getSingleConstraintMatchWeight(AsmOperandInfo &info, 645 const char *constraint) const override; 646 647 const char *LowerXConstraint(EVT ConstraintVT) const override; 648 649 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 650 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is 651 /// true it means one of the asm constraint of the inline asm instruction 652 /// being processed is 'm'. 653 void LowerAsmOperandForConstraint(SDValue Op, 654 std::string &Constraint, 655 std::vector<SDValue> &Ops, 656 SelectionDAG &DAG) const override; 657 658 /// getRegForInlineAsmConstraint - Given a physical register constraint 659 /// (e.g. {edx}), return the register number and the register class for the 660 /// register. This should only be used for C_Register constraints. On 661 /// error, this returns a register number of 0. 662 std::pair<unsigned, const TargetRegisterClass*> 663 getRegForInlineAsmConstraint(const std::string &Constraint, 664 MVT VT) const override; 665 666 /// isLegalAddressingMode - Return true if the addressing mode represented 667 /// by AM is legal for this target, for a load/store of the specified type. 668 bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; 669 670 /// isLegalICmpImmediate - Return true if the specified immediate is legal 671 /// icmp immediate, that is the target has icmp instructions which can 672 /// compare a register against the immediate without having to materialize 673 /// the immediate into a register. 674 bool isLegalICmpImmediate(int64_t Imm) const override; 675 676 /// isLegalAddImmediate - Return true if the specified immediate is legal 677 /// add immediate, that is the target has add instructions which can 678 /// add a register and the immediate without having to materialize 679 /// the immediate into a register. 680 bool isLegalAddImmediate(int64_t Imm) const override; 681 682 683 bool isVectorShiftByScalarCheap(Type *Ty) const override; 684 685 /// isTruncateFree - Return true if it's free to truncate a value of 686 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 687 /// register EAX to i16 by referencing its sub-register AX. 688 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 689 bool isTruncateFree(EVT VT1, EVT VT2) const override; 690 691 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 692 693 /// isZExtFree - Return true if any actual instruction that defines a 694 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 695 /// register. This does not necessarily include registers defined in 696 /// unknown ways, such as incoming arguments, or copies from unknown 697 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 698 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 699 /// all instructions that define 32-bit values implicit zero-extend the 700 /// result out to 64 bits. 701 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 702 bool isZExtFree(EVT VT1, EVT VT2) const override; 703 bool isZExtFree(SDValue Val, EVT VT2) const override; 704 705 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster 706 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be 707 /// expanded to FMAs when this method returns true, otherwise fmuladd is 708 /// expanded to fmul + fadd. 709 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 710 711 /// isNarrowingProfitable - Return true if it's profitable to narrow 712 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 713 /// from i32 to i8 but not from i32 to i16. 714 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 715 716 /// isFPImmLegal - Returns true if the target can instruction select the 717 /// specified FP immediate natively. If false, the legalizer will 718 /// materialize the FP immediate as a load from a constant pool. 719 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 720 721 /// isShuffleMaskLegal - Targets can use this to indicate that they only 722 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 723 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask 724 /// values are assumed to be legal. 725 bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 726 EVT VT) const override; 727 728 /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is 729 /// used by Targets can use this to indicate if there is a suitable 730 /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant 731 /// pool entry. 732 bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 733 EVT VT) const override; 734 735 /// ShouldShrinkFPConstant - If true, then instruction selection should 736 /// seek to shrink the FP constant of the specified type to a smaller type 737 /// in order to save space and / or reduce runtime. 738 bool ShouldShrinkFPConstant(EVT VT) const override { 739 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 740 // expensive than a straight movsd. On the other hand, it's important to 741 // shrink long double fp constant since fldt is very slow. 742 return !X86ScalarSSEf64 || VT == MVT::f80; 743 } 744 745 const X86Subtarget* getSubtarget() const { 746 return Subtarget; 747 } 748 749 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 750 /// computed in an SSE register, not on the X87 floating point stack. 751 bool isScalarFPTypeInSSEReg(EVT VT) const { 752 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 753 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 754 } 755 756 /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine 757 /// for fptoui. 758 bool isTargetFTOL() const { 759 return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); 760 } 761 762 /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be 763 /// used for fptoui to the given type. 764 bool isIntegerTypeFTOL(EVT VT) const { 765 return isTargetFTOL() && VT == MVT::i64; 766 } 767 768 /// \brief Returns true if it is beneficial to convert a load of a constant 769 /// to just the constant itself. 770 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 771 Type *Ty) const override; 772 773 /// Intel processors have a unified instruction and data cache 774 const char * getClearCacheBuiltinName() const { 775 return 0; // nothing to do, move along. 776 } 777 778 /// createFastISel - This method returns a target specific FastISel object, 779 /// or null if the target does not support "fast" ISel. 780 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 781 const TargetLibraryInfo *libInfo) const override; 782 783 /// getStackCookieLocation - Return true if the target stores stack 784 /// protector cookies at a fixed offset in some non-standard address 785 /// space, and populates the address space and offset as 786 /// appropriate. 787 bool getStackCookieLocation(unsigned &AddressSpace, 788 unsigned &Offset) const override; 789 790 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 791 SelectionDAG &DAG) const; 792 793 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 794 795 /// \brief Reset the operation actions based on target options. 796 void resetOperationActions() override; 797 798 protected: 799 std::pair<const TargetRegisterClass*, uint8_t> 800 findRepresentativeClass(MVT VT) const override; 801 802 private: 803 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 804 /// make the right decision when generating code for different targets. 805 const X86Subtarget *Subtarget; 806 const DataLayout *TD; 807 808 /// Used to store the TargetOptions so that we don't waste time resetting 809 /// the operation actions unless we have to. 810 TargetOptions TO; 811 812 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 813 /// floating point ops. 814 /// When SSE is available, use it for f32 operations. 815 /// When SSE2 is available, use it for f64 operations. 816 bool X86ScalarSSEf32; 817 bool X86ScalarSSEf64; 818 819 /// LegalFPImmediates - A list of legal fp immediates. 820 std::vector<APFloat> LegalFPImmediates; 821 822 /// addLegalFPImmediate - Indicate that this x86 target can instruction 823 /// select the specified FP immediate natively. 824 void addLegalFPImmediate(const APFloat& Imm) { 825 LegalFPImmediates.push_back(Imm); 826 } 827 828 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 829 CallingConv::ID CallConv, bool isVarArg, 830 const SmallVectorImpl<ISD::InputArg> &Ins, 831 SDLoc dl, SelectionDAG &DAG, 832 SmallVectorImpl<SDValue> &InVals) const; 833 SDValue LowerMemArgument(SDValue Chain, 834 CallingConv::ID CallConv, 835 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 836 SDLoc dl, SelectionDAG &DAG, 837 const CCValAssign &VA, MachineFrameInfo *MFI, 838 unsigned i) const; 839 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 840 SDLoc dl, SelectionDAG &DAG, 841 const CCValAssign &VA, 842 ISD::ArgFlagsTy Flags) const; 843 844 // Call lowering helpers. 845 846 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 847 /// for tail call optimization. Targets which want to do tail call 848 /// optimization should implement this function. 849 bool IsEligibleForTailCallOptimization(SDValue Callee, 850 CallingConv::ID CalleeCC, 851 bool isVarArg, 852 bool isCalleeStructRet, 853 bool isCallerStructRet, 854 Type *RetTy, 855 const SmallVectorImpl<ISD::OutputArg> &Outs, 856 const SmallVectorImpl<SDValue> &OutVals, 857 const SmallVectorImpl<ISD::InputArg> &Ins, 858 SelectionDAG& DAG) const; 859 bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; 860 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 861 SDValue Chain, bool IsTailCall, bool Is64Bit, 862 int FPDiff, SDLoc dl) const; 863 864 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 865 SelectionDAG &DAG) const; 866 867 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 868 bool isSigned, 869 bool isReplace) const; 870 871 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 872 SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; 873 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 874 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 875 SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; 876 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 877 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 878 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 879 SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, 880 int64_t Offset, SelectionDAG &DAG) const; 881 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 882 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 883 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 884 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 885 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 886 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 887 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 888 SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; 889 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 890 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 891 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 892 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 893 SDLoc dl, SelectionDAG &DAG) const; 894 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 895 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 896 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 897 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; 898 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 899 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 900 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 901 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 902 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 903 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 904 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 905 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 906 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 907 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 908 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 909 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 910 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 911 912 SDValue 913 LowerFormalArguments(SDValue Chain, 914 CallingConv::ID CallConv, bool isVarArg, 915 const SmallVectorImpl<ISD::InputArg> &Ins, 916 SDLoc dl, SelectionDAG &DAG, 917 SmallVectorImpl<SDValue> &InVals) const override; 918 SDValue LowerCall(CallLoweringInfo &CLI, 919 SmallVectorImpl<SDValue> &InVals) const override; 920 921 SDValue LowerReturn(SDValue Chain, 922 CallingConv::ID CallConv, bool isVarArg, 923 const SmallVectorImpl<ISD::OutputArg> &Outs, 924 const SmallVectorImpl<SDValue> &OutVals, 925 SDLoc dl, SelectionDAG &DAG) const override; 926 927 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 928 929 bool mayBeEmittedAsTailCall(CallInst *CI) const override; 930 931 MVT getTypeForExtArgOrReturn(MVT VT, 932 ISD::NodeType ExtendKind) const override; 933 934 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 935 bool isVarArg, 936 const SmallVectorImpl<ISD::OutputArg> &Outs, 937 LLVMContext &Context) const override; 938 939 const uint16_t *getScratchRegisters(CallingConv::ID CC) const override; 940 941 /// Utility function to emit atomic-load-arith operations (and, or, xor, 942 /// nand, max, min, umax, umin). It takes the corresponding instruction to 943 /// expand, the associated machine basic block, and the associated X86 944 /// opcodes for reg/reg. 945 MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, 946 MachineBasicBlock *MBB) const; 947 948 /// Utility function to emit atomic-load-arith operations (and, or, xor, 949 /// nand, add, sub, swap) for 64-bit operands on 32-bit target. 950 MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, 951 MachineBasicBlock *MBB) const; 952 953 // Utility function to emit the low-level va_arg code for X86-64. 954 MachineBasicBlock *EmitVAARG64WithCustomInserter( 955 MachineInstr *MI, 956 MachineBasicBlock *MBB) const; 957 958 /// Utility function to emit the xmm reg save portion of va_start. 959 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 960 MachineInstr *BInstr, 961 MachineBasicBlock *BB) const; 962 963 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 964 MachineBasicBlock *BB) const; 965 966 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 967 MachineBasicBlock *BB) const; 968 969 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 970 MachineBasicBlock *BB, 971 bool Is64Bit) const; 972 973 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 974 MachineBasicBlock *BB) const; 975 976 MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, 977 MachineBasicBlock *BB) const; 978 979 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 980 MachineBasicBlock *MBB) const; 981 982 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 983 MachineBasicBlock *MBB) const; 984 985 MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, 986 MachineBasicBlock *MBB) const; 987 988 /// Emit nodes that will be selected as "test Op0,Op0", or something 989 /// equivalent, for use with the given x86 condition code. 990 SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; 991 992 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 993 /// equivalent, for use with the given x86 condition code. 994 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, 995 SelectionDAG &DAG) const; 996 997 /// Convert a comparison if required by the subtarget. 998 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 999 }; 1000 1001 namespace X86 { 1002 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1003 const TargetLibraryInfo *libInfo); 1004 } 1005} 1006 1007#endif // X86ISELLOWERING_H 1008