X86ISelLowering.h revision 83ec4b6711980242ef3c55a4fa36b2d7a39c1bfb
13f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The LLVM Compiler Infrastructure 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This file is distributed under the University of Illinois Open Source 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// License. See LICENSE.TXT for details. 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//===----------------------------------------------------------------------===// 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This file defines the interfaces that X86 uses to lower LLVM code into a 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// selection DAG. 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//===----------------------------------------------------------------------===// 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef X86ISELLOWERING_H 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define X86ISELLOWERING_H 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "X86Subtarget.h" 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "X86RegisterInfo.h" 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "X86MachineFunctionInfo.h" 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "llvm/Target/TargetLowering.h" 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "llvm/CodeGen/SelectionDAG.h" 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "llvm/CodeGen/CallingConvLower.h" 24ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace llvm { 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott namespace X86ISD { 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // X86 Specific DAG Nodes 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott enum NodeType { 29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Start the numbering where the builtin ops leave off. 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FIRST_NUMBER = ISD::BUILTIN_OP_END+X86::INSTRUCTION_LIST_END, 313f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// BSF - Bit scan forward. 333f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// BSR - Bit scan reverse. 343f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen BSF, 353f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen BSR, 363f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen 373f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// SHLD, SHRD - Double shift instructions. These correspond to 383f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// X86::SHLDxx and X86::SHRDxx instructions. 393f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen SHLD, 403f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen SHRD, 413f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FAND - Bitwise logical AND of floating point values. This corresponds 433f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// to X86::ANDPS or X86::ANDPD. 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FAND, 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FOR - Bitwise logical OR of floating point values. This corresponds 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// to X86::ORPS or X86::ORPD. 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FOR, 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FXOR - Bitwise logical XOR of floating point values. This corresponds 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// to X86::XORPS or X86::XORPD. 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FXOR, 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FSRL - Bitwise logical right shift of floating point values. These 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// corresponds to X86::PSRLDQ. 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FSRL, 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// integer source in memory and FP reg result. This corresponds to the 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// X86::FILD*m instructions. It has three inputs (token chain, address, 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// and source type) and two outputs (FP value and token chain). FILD_FLAG 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// also produces a flag). 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FILD, 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FILD_FLAG, 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// integer destination in memory and a FP reg source. This corresponds 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// to the X86::FIST*m instructions and the rounding mode change stuff. It 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// has two inputs (token chain and address) and two outputs (int value 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// and token chain). 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FP_TO_INT16_IN_MEM, 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FP_TO_INT32_IN_MEM, 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FP_TO_INT64_IN_MEM, 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FLD - This instruction implements an extending load to FP stack slots. 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operand, ptr to load from, and a ValueType node indicating the type 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// to load to. 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FLD, 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FST - This instruction implements a truncating store to FP stack 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// chain operand, value to store, address, and a ValueType to store it 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// as. 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FST, 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// CALL/TAILCALL - These operations represent an abstract X86 call 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// instruction, which includes a bunch of information. In particular the 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operands of these node are: 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #0 - The incoming token chain 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #1 - The callee 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #2 - The number of arg bytes the caller pushes on the stack. 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #3 - The number of arg bytes the callee pops off the stack. 9572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen /// #4 - The value to pass in AL/AX/EAX (optional) 9672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen /// #5 - The value to pass in DL/DX/EDX (optional) 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// The result values of these nodes are: 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #0 - The outgoing token chain 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #1 - The first register result value (optional) 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// #2 - The second register result value (optional) 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// The CALL vs TAILCALL distinction boils down to whether the callee is 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// known not to modify the caller's stack frame, as is standard with 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// LLVM. 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CALL, 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott TAILCALL, 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// RDTSC_DAG - This operation implements the lowering for 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// readcyclecounter 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RDTSC_DAG, 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// X86 compare and logical compare instructions. 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CMP, COMI, UCOMI, 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// X86 SetCC. Operand 1 is condition code, and operand 2 is the flag 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operand produced by a CMP instruction. 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott SETCC, 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// X86 conditional moves. Operand 1 and operand 2 are the two values 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// to select from (operand 1 is a R/W operand). Operand 3 is the 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// condition code, and operand 4 is the flag operand produced by a CMP 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// or TEST instruction. It also writes a flag result. 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CMOV, 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// X86 conditional branches. Operand 1 is the chain operand, operand 2 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// is the block to branch if condition is true, operand 3 is the 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// condition code, and operand 4 is the flag operand produced by a CMP 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// or TEST instruction. 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott BRCOND, 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// Return with a flag operand. Operand 1 is the chain operand, operand 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 2 is the number of bytes of stack to pop. 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RET_FLAG, 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott REP_STOS, 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott REP_MOVS, 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// GlobalBaseReg - On Darwin, this node represents the result of the popl 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// at function entry, used for PIC code. 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott GlobalBaseReg, 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// Wrapper - A wrapper node for TargetConstantPool, 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// TargetExternalSymbol, and TargetGlobalAddress. 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Wrapper, 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// relative displacements. 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott WrapperRIP, 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// i32, corresponds to X86::PEXTRB. 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott PEXTRB, 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// i32, corresponds to X86::PEXTRW. 1613f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen PEXTRW, 1623f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen 1633f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// INSERTPS - Insert any element of a 4 x float vector into any element 1643f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen /// of a destination 4 x floatvector. 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott INSERTPS, 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// corresponds to X86::PINSRB. 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott PINSRB, 170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, 172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// corresponds to X86::PINSRW. 173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott PINSRW, 17472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 17572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen /// FMAX, FMIN - Floating point max and min. 176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// 177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FMAX, FMIN, 178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal 180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// approximation. Note that these typically require refinement 181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// in order to obtain suitable precision. 182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott FRSQRT, FRCP, 183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // TLSADDR, THREAThread - Thread Local Storage. 185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott TLSADDR, THREAD_POINTER, 186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // EH_RETURN - Exception Handling helpers. 188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott EH_RETURN, 189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// TC_RETURN - Tail call return. 191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operand #0 chain 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operand #1 callee (register or absolute) 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott /// operand #2 stack adjustment 194 /// operand #3 optional in flag 195 TC_RETURN, 196 197 // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. 198 LCMPXCHG_DAG, 199 LCMPXCHG8_DAG, 200 201 // FNSTCW16m - Store FP control world into i16 memory. 202 FNSTCW16m, 203 204 // VZEXT_MOVL - Vector move low and zero extend. 205 VZEXT_MOVL, 206 207 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. 208 VZEXT_LOAD, 209 210 // VSHL, VSRL - Vector logical left / right shift. 211 VSHL, VSRL 212 }; 213 } 214 215 /// Define some predicates that are used for node matching. 216 namespace X86 { 217 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 218 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 219 bool isPSHUFDMask(SDNode *N); 220 221 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 222 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 223 bool isPSHUFHWMask(SDNode *N); 224 225 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 226 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 227 bool isPSHUFLWMask(SDNode *N); 228 229 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 230 /// specifies a shuffle of elements that is suitable for input to SHUFP*. 231 bool isSHUFPMask(SDNode *N); 232 233 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 234 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 235 bool isMOVHLPSMask(SDNode *N); 236 237 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 238 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 239 /// <2, 3, 2, 3> 240 bool isMOVHLPS_v_undef_Mask(SDNode *N); 241 242 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 243 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 244 bool isMOVLPMask(SDNode *N); 245 246 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 247 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 248 /// as well as MOVLHPS. 249 bool isMOVHPMask(SDNode *N); 250 251 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 252 /// specifies a shuffle of elements that is suitable for input to UNPCKL. 253 bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); 254 255 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 256 /// specifies a shuffle of elements that is suitable for input to UNPCKH. 257 bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); 258 259 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 260 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 261 /// <0, 0, 1, 1> 262 bool isUNPCKL_v_undef_Mask(SDNode *N); 263 264 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 265 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 266 /// <2, 2, 3, 3> 267 bool isUNPCKH_v_undef_Mask(SDNode *N); 268 269 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 270 /// specifies a shuffle of elements that is suitable for input to MOVSS, 271 /// MOVSD, and MOVD, i.e. setting the lowest element. 272 bool isMOVLMask(SDNode *N); 273 274 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 275 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 276 bool isMOVSHDUPMask(SDNode *N); 277 278 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 279 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 280 bool isMOVSLDUPMask(SDNode *N); 281 282 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand 283 /// specifies a splat of a single element. 284 bool isSplatMask(SDNode *N); 285 286 /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 287 /// specifies a splat of zero element. 288 bool isSplatLoMask(SDNode *N); 289 290 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 291 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 292 /// instructions. 293 unsigned getShuffleSHUFImmediate(SDNode *N); 294 295 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 296 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 297 /// instructions. 298 unsigned getShufflePSHUFHWImmediate(SDNode *N); 299 300 /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle 301 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 302 /// instructions. 303 unsigned getShufflePSHUFLWImmediate(SDNode *N); 304 } 305 306 //===--------------------------------------------------------------------===// 307 // X86TargetLowering - X86 Implementation of the TargetLowering interface 308 class X86TargetLowering : public TargetLowering { 309 int VarArgsFrameIndex; // FrameIndex for start of varargs area. 310 int RegSaveFrameIndex; // X86-64 vararg func register save area. 311 unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. 312 unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. 313 int BytesToPopOnReturn; // Number of arg bytes ret should pop. 314 int BytesCallerReserves; // Number of arg bytes caller makes. 315 316 public: 317 explicit X86TargetLowering(X86TargetMachine &TM); 318 319 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 320 /// jumptable. 321 SDOperand getPICJumpTableRelocBase(SDOperand Table, 322 SelectionDAG &DAG) const; 323 324 // Return the number of bytes that a function should pop when it returns (in 325 // addition to the space used by the return address). 326 // 327 unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } 328 329 // Return the number of bytes that the caller reserves for arguments passed 330 // to this function. 331 unsigned getBytesCallerReserves() const { return BytesCallerReserves; } 332 333 /// getStackPtrReg - Return the stack pointer register we are using: either 334 /// ESP or RSP. 335 unsigned getStackPtrReg() const { return X86StackPtr; } 336 337 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 338 /// function arguments in the caller parameter area. For X86, aggregates 339 /// that contains are placed at 16-byte boundaries while the rest are at 340 /// 4-byte boundaries. 341 virtual unsigned getByValTypeAlignment(const Type *Ty) const; 342 343 /// getOptimalMemOpType - Returns the target specific optimal type for load 344 /// and store operations as a result of memset, memcpy, and memmove 345 /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for 346 /// determining it. 347 virtual 348 MVT getOptimalMemOpType(uint64_t Size, unsigned Align, 349 bool isSrcConst, bool isSrcStr) const; 350 351 /// LowerOperation - Provide custom lowering hooks for some operations. 352 /// 353 virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); 354 355 /// ExpandOperation - Custom lower the specified operation, splitting the 356 /// value into two pieces. 357 /// 358 virtual SDNode *ExpandOperationResult(SDNode *N, SelectionDAG &DAG); 359 360 361 virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 362 363 virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, 364 MachineBasicBlock *MBB); 365 366 367 /// getTargetNodeName - This method returns the name of a target specific 368 /// DAG node. 369 virtual const char *getTargetNodeName(unsigned Opcode) const; 370 371 /// getSetCCResultType - Return the ISD::SETCC ValueType 372 virtual MVT getSetCCResultType(const SDOperand &) const; 373 374 /// computeMaskedBitsForTargetNode - Determine which of the bits specified 375 /// in Mask are known to be either zero or one and return them in the 376 /// KnownZero/KnownOne bitsets. 377 virtual void computeMaskedBitsForTargetNode(const SDOperand Op, 378 const APInt &Mask, 379 APInt &KnownZero, 380 APInt &KnownOne, 381 const SelectionDAG &DAG, 382 unsigned Depth = 0) const; 383 384 virtual bool 385 isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; 386 387 SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG); 388 389 ConstraintType getConstraintType(const std::string &Constraint) const; 390 391 std::vector<unsigned> 392 getRegClassForInlineAsmConstraint(const std::string &Constraint, 393 MVT VT) const; 394 395 virtual const char *LowerXConstraint(MVT ConstraintVT) const; 396 397 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 398 /// vector. If it is invalid, don't add anything to Ops. 399 virtual void LowerAsmOperandForConstraint(SDOperand Op, 400 char ConstraintLetter, 401 std::vector<SDOperand> &Ops, 402 SelectionDAG &DAG) const; 403 404 /// getRegForInlineAsmConstraint - Given a physical register constraint 405 /// (e.g. {edx}), return the register number and the register class for the 406 /// register. This should only be used for C_Register constraints. On 407 /// error, this returns a register number of 0. 408 std::pair<unsigned, const TargetRegisterClass*> 409 getRegForInlineAsmConstraint(const std::string &Constraint, 410 MVT VT) const; 411 412 /// isLegalAddressingMode - Return true if the addressing mode represented 413 /// by AM is legal for this target, for a load/store of the specified type. 414 virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; 415 416 /// isTruncateFree - Return true if it's free to truncate a value of 417 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 418 /// register EAX to i16 by referencing its sub-register AX. 419 virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; 420 virtual bool isTruncateFree(MVT VT1, MVT VT2) const; 421 422 /// isShuffleMaskLegal - Targets can use this to indicate that they only 423 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 424 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask 425 /// values are assumed to be legal. 426 virtual bool isShuffleMaskLegal(SDOperand Mask, MVT VT) const; 427 428 /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is 429 /// used by Targets can use this to indicate if there is a suitable 430 /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant 431 /// pool entry. 432 virtual bool isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps, 433 MVT EVT, SelectionDAG &DAG) const; 434 435 /// ShouldShrinkFPConstant - If true, then instruction selection should 436 /// seek to shrink the FP constant of the specified type to a smaller type 437 /// in order to save space and / or reduce runtime. 438 virtual bool ShouldShrinkFPConstant(MVT VT) const { 439 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 440 // expensive than a straight movsd. On the other hand, it's important to 441 // shrink long double fp constant since fldt is very slow. 442 return !X86ScalarSSEf64 || VT == MVT::f80; 443 } 444 445 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 446 /// for tail call optimization. Target which want to do tail call 447 /// optimization should implement this function. 448 virtual bool IsEligibleForTailCallOptimization(SDOperand Call, 449 SDOperand Ret, 450 SelectionDAG &DAG) const; 451 452 virtual const X86Subtarget* getSubtarget() { 453 return Subtarget; 454 } 455 456 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 457 /// computed in an SSE register, not on the X87 floating point stack. 458 bool isScalarFPTypeInSSEReg(MVT VT) const { 459 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 460 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 461 } 462 463 private: 464 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 465 /// make the right decision when generating code for different targets. 466 const X86Subtarget *Subtarget; 467 const X86RegisterInfo *RegInfo; 468 469 /// X86StackPtr - X86 physical register used as stack ptr. 470 unsigned X86StackPtr; 471 472 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 473 /// floating point ops. 474 /// When SSE is available, use it for f32 operations. 475 /// When SSE2 is available, use it for f64 operations. 476 bool X86ScalarSSEf32; 477 bool X86ScalarSSEf64; 478 479 SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall, 480 unsigned CallingConv, SelectionDAG &DAG); 481 482 SDOperand LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 483 const CCValAssign &VA, MachineFrameInfo *MFI, 484 unsigned CC, SDOperand Root, unsigned i); 485 486 SDOperand LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 487 const SDOperand &StackPtr, 488 const CCValAssign &VA, SDOperand Chain, 489 SDOperand Arg); 490 491 // Call lowering helpers. 492 bool IsCalleePop(SDOperand Op); 493 bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall); 494 bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall); 495 SDOperand EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDOperand &OutRetAddr, 496 SDOperand Chain, bool IsTailCall, bool Is64Bit, 497 int FPDiff); 498 499 CCAssignFn *CCAssignFnForNode(SDOperand Op) const; 500 NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDOperand Op); 501 unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); 502 503 std::pair<SDOperand,SDOperand> FP_TO_SINTHelper(SDOperand Op, 504 SelectionDAG &DAG); 505 506 SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG); 507 SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG); 508 SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG); 509 SDOperand LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG); 510 SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG); 511 SDOperand LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG); 512 SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG); 513 SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG); 514 SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG); 515 SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG); 516 SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG); 517 SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG); 518 SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG); 519 SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG); 520 SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG); 521 SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG); 522 SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG); 523 SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG); 524 SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG); 525 SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG); 526 SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG); 527 SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG); 528 SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); 529 SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG); 530 SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG); 531 SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); 532 SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG); 533 SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG); 534 SDOperand LowerVACOPY(SDOperand Op, SelectionDAG &DAG); 535 SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG); 536 SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG); 537 SDOperand LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG); 538 SDOperand LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, SelectionDAG &DAG); 539 SDOperand LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG); 540 SDOperand LowerTRAMPOLINE(SDOperand Op, SelectionDAG &DAG); 541 SDOperand LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG); 542 SDOperand LowerCTLZ(SDOperand Op, SelectionDAG &DAG); 543 SDOperand LowerCTTZ(SDOperand Op, SelectionDAG &DAG); 544 SDOperand LowerLCS(SDOperand Op, SelectionDAG &DAG); 545 SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); 546 SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); 547 SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); 548 SDNode *ExpandATOMIC_LSS(SDNode *N, SelectionDAG &DAG); 549 550 SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, 551 SDOperand Chain, 552 SDOperand Dst, SDOperand Src, 553 SDOperand Size, unsigned Align, 554 const Value *DstSV, uint64_t DstSVOff); 555 SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, 556 SDOperand Chain, 557 SDOperand Dst, SDOperand Src, 558 SDOperand Size, unsigned Align, 559 bool AlwaysInline, 560 const Value *DstSV, uint64_t DstSVOff, 561 const Value *SrcSV, uint64_t SrcSVOff); 562 563 /// Utility function to emit atomic bitwise operations (and, or, xor). 564 // It takes the bitwise instruction to expand, the associated machine basic 565 // block, and the associated X86 opcodes for reg/reg and reg/imm. 566 MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( 567 MachineInstr *BInstr, 568 MachineBasicBlock *BB, 569 unsigned regOpc, 570 unsigned immOpc); 571 572 /// Utility function to emit atomic min and max. It takes the min/max 573 // instruction to expand, the associated basic block, and the associated 574 // cmov opcode for moving the min or max value. 575 MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, 576 MachineBasicBlock *BB, 577 unsigned cmovOpc); 578 }; 579} 580 581#endif // X86ISELLOWERING_H 582