X86ISelLowering.cpp revision a69571c7991813c93cba64e88eced6899ce93d81
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/ADT/VectorExtras.h" 25#include "llvm/Analysis/ScalarEvolutionExpressions.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineInstrBuilder.h" 29#include "llvm/CodeGen/SelectionDAG.h" 30#include "llvm/CodeGen/SSARegMap.h" 31#include "llvm/Support/MathExtras.h" 32#include "llvm/Target/TargetOptions.h" 33using namespace llvm; 34 35// FIXME: temporary. 36#include "llvm/Support/CommandLine.h" 37static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 38 cl::desc("Enable fastcc on X86")); 39 40X86TargetLowering::X86TargetLowering(TargetMachine &TM) 41 : TargetLowering(TM) { 42 Subtarget = &TM.getSubtarget<X86Subtarget>(); 43 X86ScalarSSE = Subtarget->hasSSE2(); 44 45 // Set up the TargetLowering object. 46 47 // X86 is weird, it always uses i8 for shift amounts and setcc results. 48 setShiftAmountType(MVT::i8); 49 setSetCCResultType(MVT::i8); 50 setSetCCResultContents(ZeroOrOneSetCCResult); 51 setSchedulingPreference(SchedulingForRegPressure); 52 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 53 setStackPointerRegisterToSaveRestore(X86::ESP); 54 55 if (!Subtarget->isTargetDarwin()) 56 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 57 setUseUnderscoreSetJmpLongJmp(true); 58 59 // Add legal addressing mode scale values. 60 addLegalAddressScale(8); 61 addLegalAddressScale(4); 62 addLegalAddressScale(2); 63 // Enter the ones which require both scale + index last. These are more 64 // expensive. 65 addLegalAddressScale(9); 66 addLegalAddressScale(5); 67 addLegalAddressScale(3); 68 69 // Set up the register classes. 70 addRegisterClass(MVT::i8, X86::R8RegisterClass); 71 addRegisterClass(MVT::i16, X86::R16RegisterClass); 72 addRegisterClass(MVT::i32, X86::R32RegisterClass); 73 74 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 75 // operation. 76 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 78 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 79 80 if (X86ScalarSSE) 81 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 82 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 83 else 84 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 85 86 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 87 // this operation. 88 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 89 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 90 // SSE has no i16 to fp conversion, only i32 91 if (X86ScalarSSE) 92 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 93 else { 94 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 95 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 96 } 97 98 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 99 // isn't legal. 100 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 101 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 102 103 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 104 // this operation. 105 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 106 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 107 108 if (X86ScalarSSE) { 109 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 110 } else { 111 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 112 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 113 } 114 115 // Handle FP_TO_UINT by promoting the destination to a larger signed 116 // conversion. 117 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 119 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 120 121 if (X86ScalarSSE && !Subtarget->hasSSE3()) 122 // Expand FP_TO_UINT into a select. 123 // FIXME: We would like to use a Custom expander here eventually to do 124 // the optimal thing for SSE vs. the default expansion in the legalizer. 125 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 126 else 127 // With SSE3 we can use fisttpll to convert to a signed i64. 128 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 129 130 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 131 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 132 133 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 134 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 135 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 136 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 139 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 140 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 141 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 142 setOperationAction(ISD::FREM , MVT::f64 , Expand); 143 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 144 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 146 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 147 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 149 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 150 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 151 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 152 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 153 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 154 155 // These should be promoted to a larger select which is supported. 156 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 157 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 158 159 // X86 wants to expand cmov itself. 160 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 161 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 163 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 166 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 168 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 169 // X86 ret instruction may pop stack. 170 setOperationAction(ISD::RET , MVT::Other, Custom); 171 // Darwin ABI issue. 172 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 173 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 174 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 175 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 176 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 177 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 178 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 179 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 180 // X86 wants to expand memset / memcpy itself. 181 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 182 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 183 184 // We don't have line number support yet. 185 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 186 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 187 // FIXME - use subtarget debug flags 188 if (!Subtarget->isTargetDarwin()) 189 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 190 191 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 192 setOperationAction(ISD::VASTART , MVT::Other, Custom); 193 194 // Use the default implementation. 195 setOperationAction(ISD::VAARG , MVT::Other, Expand); 196 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 197 setOperationAction(ISD::VAEND , MVT::Other, Expand); 198 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 199 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 200 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 201 202 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 203 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 204 205 if (X86ScalarSSE) { 206 // Set up the FP register classes. 207 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 208 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 209 210 // SSE has no load+extend ops 211 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 212 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 213 214 // Use ANDPD to simulate FABS. 215 setOperationAction(ISD::FABS , MVT::f64, Custom); 216 setOperationAction(ISD::FABS , MVT::f32, Custom); 217 218 // Use XORP to simulate FNEG. 219 setOperationAction(ISD::FNEG , MVT::f64, Custom); 220 setOperationAction(ISD::FNEG , MVT::f32, Custom); 221 222 // We don't support sin/cos/fmod 223 setOperationAction(ISD::FSIN , MVT::f64, Expand); 224 setOperationAction(ISD::FCOS , MVT::f64, Expand); 225 setOperationAction(ISD::FREM , MVT::f64, Expand); 226 setOperationAction(ISD::FSIN , MVT::f32, Expand); 227 setOperationAction(ISD::FCOS , MVT::f32, Expand); 228 setOperationAction(ISD::FREM , MVT::f32, Expand); 229 230 // Expand FP immediates into loads from the stack, except for the special 231 // cases we handle. 232 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 233 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 234 addLegalFPImmediate(+0.0); // xorps / xorpd 235 } else { 236 // Set up the FP register classes. 237 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 238 239 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 240 241 if (!UnsafeFPMath) { 242 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 243 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 244 } 245 246 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 247 addLegalFPImmediate(+0.0); // FLD0 248 addLegalFPImmediate(+1.0); // FLD1 249 addLegalFPImmediate(-0.0); // FLD0/FCHS 250 addLegalFPImmediate(-1.0); // FLD1/FCHS 251 } 252 253 // First set operation action for all vector types to expand. Then we 254 // will selectively turn on ones that can be effectively codegen'd. 255 for (unsigned VT = (unsigned)MVT::Vector + 1; 256 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 257 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 262 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 263 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 264 } 265 266 if (Subtarget->hasMMX()) { 267 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 268 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 269 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 270 271 // FIXME: add MMX packed arithmetics 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 273 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 274 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 275 } 276 277 if (Subtarget->hasSSE1()) { 278 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 279 280 setOperationAction(ISD::AND, MVT::v4f32, Legal); 281 setOperationAction(ISD::OR, MVT::v4f32, Legal); 282 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 283 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 284 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 285 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 286 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 287 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 288 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 289 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 290 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 291 } 292 293 if (Subtarget->hasSSE2()) { 294 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 297 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 298 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 299 300 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 301 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 302 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 303 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 304 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 305 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 306 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 307 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 308 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 309 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 310 311 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 312 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 314 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 315 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 316 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 317 318 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 319 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 320 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 321 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 322 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 323 } 324 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 325 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 326 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 327 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 329 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 330 331 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 332 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 333 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 334 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 335 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 336 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 337 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 338 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 339 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 340 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 341 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 342 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 343 } 344 345 // Custom lower v2i64 and v2f64 selects. 346 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 347 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 348 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 349 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 350 } 351 352 // We want to custom lower some of our intrinsics. 353 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 354 355 computeRegisterProperties(); 356 357 // FIXME: These should be based on subtarget info. Plus, the values should 358 // be smaller when we are in optimizing for size mode. 359 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 360 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 361 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 362 allowUnalignedMemoryAccesses = true; // x86 supports it! 363} 364 365std::vector<SDOperand> 366X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 367 std::vector<SDOperand> Args = TargetLowering::LowerArguments(F, DAG); 368 369 FormalArgs.clear(); 370 FormalArgLocs.clear(); 371 372 // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set 373 // before the rest of the function can be lowered. 374 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 375 PreprocessFastCCArguments(Args, F, DAG); 376 else 377 PreprocessCCCArguments(Args, F, DAG); 378 return Args; 379} 380 381std::pair<SDOperand, SDOperand> 382X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 383 bool isVarArg, unsigned CallingConv, 384 bool isTailCall, 385 SDOperand Callee, ArgListTy &Args, 386 SelectionDAG &DAG) { 387 assert((!isVarArg || CallingConv == CallingConv::C) && 388 "Only C takes varargs!"); 389 390 // If the callee is a GlobalAddress node (quite common, every direct call is) 391 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 392 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 393 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 394 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 395 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 396 397 if (CallingConv == CallingConv::Fast && EnableFastCC) 398 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 399 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 400} 401 402//===----------------------------------------------------------------------===// 403// C Calling Convention implementation 404//===----------------------------------------------------------------------===// 405 406/// AddLiveIn - This helper function adds the specified physical register to the 407/// MachineFunction as a live in value. It also creates a corresponding virtual 408/// register for it. 409static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 410 TargetRegisterClass *RC) { 411 assert(RC->contains(PReg) && "Not the correct regclass!"); 412 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 413 MF.addLiveIn(PReg, VReg); 414 return VReg; 415} 416 417/// HowToPassCCCArgument - Returns how an formal argument of the specified type 418/// should be passed. If it is through stack, returns the size of the stack 419/// frame; if it is through XMM register, returns the number of XMM registers 420/// are needed. 421static void 422HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 423 unsigned &ObjSize, unsigned &ObjXMMRegs) { 424 switch (ObjectVT) { 425 default: assert(0 && "Unhandled argument type!"); 426 case MVT::i1: 427 case MVT::i8: ObjSize = 1; break; 428 case MVT::i16: ObjSize = 2; break; 429 case MVT::i32: ObjSize = 4; break; 430 case MVT::i64: ObjSize = 8; break; 431 case MVT::f32: ObjSize = 4; break; 432 case MVT::f64: ObjSize = 8; break; 433 case MVT::v16i8: 434 case MVT::v8i16: 435 case MVT::v4i32: 436 case MVT::v2i64: 437 case MVT::v4f32: 438 case MVT::v2f64: 439 if (NumXMMRegs < 3) 440 ObjXMMRegs = 1; 441 else 442 ObjSize = 16; 443 break; 444 } 445} 446 447/// getFormalArgObjects - Returns itself if Op is a FORMAL_ARGUMENTS, otherwise 448/// returns the FORMAL_ARGUMENTS node(s) that made up parts of the node. 449static std::vector<SDOperand> getFormalArgObjects(SDOperand Op) { 450 unsigned Opc = Op.getOpcode(); 451 std::vector<SDOperand> Objs; 452 if (Opc == ISD::TRUNCATE) { 453 Op = Op.getOperand(0); 454 assert(Op.getOpcode() == ISD::AssertSext || 455 Op.getOpcode() == ISD::AssertZext); 456 Objs.push_back(Op.getOperand(0)); 457 } else if (Opc == ISD::FP_ROUND || Opc == ISD::VBIT_CONVERT) { 458 Objs.push_back(Op.getOperand(0)); 459 } else if (Opc == ISD::BUILD_PAIR) { 460 Objs.push_back(Op.getOperand(0)); 461 Objs.push_back(Op.getOperand(1)); 462 } else { 463 Objs.push_back(Op); 464 } 465 return Objs; 466} 467 468void X86TargetLowering::PreprocessCCCArguments(std::vector<SDOperand>Args, 469 Function &F, SelectionDAG &DAG) { 470 unsigned NumArgs = Args.size(); 471 MachineFunction &MF = DAG.getMachineFunction(); 472 MachineFrameInfo *MFI = MF.getFrameInfo(); 473 474 // Add DAG nodes to load the arguments... On entry to a function on the X86, 475 // the stack frame looks like this: 476 // 477 // [ESP] -- return address 478 // [ESP + 4] -- first argument (leftmost lexically) 479 // [ESP + 8] -- second argument, if first argument is four bytes in size 480 // ... 481 // 482 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 483 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 484 unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 }; 485 for (unsigned i = 0; i < NumArgs; ++i) { 486 SDOperand Op = Args[i]; 487 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 488 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 489 I != E; ++I) { 490 SDOperand Obj = *I; 491 MVT::ValueType ObjectVT = Obj.getValueType(); 492 unsigned ArgIncrement = 4; 493 unsigned ObjSize = 0; 494 unsigned ObjXMMRegs = 0; 495 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 496 if (ObjSize >= 8) 497 ArgIncrement = ObjSize; 498 499 if (ObjXMMRegs) { 500 // Passed in a XMM register. 501 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 502 X86::VR128RegisterClass); 503 std::pair<FALocInfo, FALocInfo> Loc = 504 std::make_pair(FALocInfo(FALocInfo::LiveInRegLoc, Reg, ObjectVT), 505 FALocInfo()); 506 FormalArgLocs.push_back(Loc); 507 NumXMMRegs += ObjXMMRegs; 508 } else { 509 // Create the frame index object for this incoming parameter... 510 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 511 std::pair<FALocInfo, FALocInfo> Loc = 512 std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo()); 513 FormalArgLocs.push_back(Loc); 514 ArgOffset += ArgIncrement; // Move on to the next argument... 515 } 516 } 517 } 518 519 // If the function takes variable number of arguments, make a frame index for 520 // the start of the first vararg value... for expansion of llvm.va_start. 521 if (F.isVarArg()) 522 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 523 ReturnAddrIndex = 0; // No return address slot generated yet. 524 BytesToPopOnReturn = 0; // Callee pops nothing. 525 BytesCallerReserves = ArgOffset; 526} 527 528void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 529 unsigned NumArgs = Op.Val->getNumValues(); 530 MachineFunction &MF = DAG.getMachineFunction(); 531 MachineFrameInfo *MFI = MF.getFrameInfo(); 532 533 for (unsigned i = 0; i < NumArgs; ++i) { 534 std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i]; 535 SDOperand ArgValue; 536 if (Loc.first.Kind == FALocInfo::StackFrameLoc) { 537 // Create the SelectionDAG nodes corresponding to a load from this parameter 538 unsigned FI = FormalArgLocs[i].first.Loc; 539 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 540 ArgValue = DAG.getLoad(Op.Val->getValueType(i), 541 DAG.getEntryNode(), FIN, DAG.getSrcValue(NULL)); 542 } else { 543 // Must be a CopyFromReg 544 ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc, 545 Loc.first.Typ); 546 } 547 FormalArgs.push_back(ArgValue); 548 } 549} 550 551std::pair<SDOperand, SDOperand> 552X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 553 bool isVarArg, bool isTailCall, 554 SDOperand Callee, ArgListTy &Args, 555 SelectionDAG &DAG) { 556 // Count how many bytes are to be pushed on the stack. 557 unsigned NumBytes = 0; 558 559 // Keep track of the number of XMM regs passed so far. 560 unsigned NumXMMRegs = 0; 561 unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 }; 562 563 std::vector<SDOperand> RegValuesToPass; 564 if (Args.empty()) { 565 // Save zero bytes. 566 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 567 } else { 568 for (unsigned i = 0, e = Args.size(); i != e; ++i) 569 switch (getValueType(Args[i].second)) { 570 default: assert(0 && "Unknown value type!"); 571 case MVT::i1: 572 case MVT::i8: 573 case MVT::i16: 574 case MVT::i32: 575 case MVT::f32: 576 NumBytes += 4; 577 break; 578 case MVT::i64: 579 case MVT::f64: 580 NumBytes += 8; 581 break; 582 case MVT::Vector: 583 if (NumXMMRegs < 3) 584 ++NumXMMRegs; 585 else 586 NumBytes += 16; 587 break; 588 } 589 590 Chain = DAG.getCALLSEQ_START(Chain, 591 DAG.getConstant(NumBytes, getPointerTy())); 592 593 // Arguments go on the stack in reverse order, as specified by the ABI. 594 unsigned ArgOffset = 0; 595 NumXMMRegs = 0; 596 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 597 std::vector<SDOperand> Stores; 598 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 599 switch (getValueType(Args[i].second)) { 600 default: assert(0 && "Unexpected ValueType for argument!"); 601 case MVT::i1: 602 case MVT::i8: 603 case MVT::i16: 604 // Promote the integer to 32 bits. If the input type is signed use a 605 // sign extend, otherwise use a zero extend. 606 if (Args[i].second->isSigned()) 607 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 608 else 609 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 610 611 // FALL THROUGH 612 case MVT::i32: 613 case MVT::f32: { 614 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 615 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 616 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 617 Args[i].first, PtrOff, 618 DAG.getSrcValue(NULL))); 619 ArgOffset += 4; 620 break; 621 } 622 case MVT::i64: 623 case MVT::f64: { 624 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 625 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 626 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 627 Args[i].first, PtrOff, 628 DAG.getSrcValue(NULL))); 629 ArgOffset += 8; 630 break; 631 } 632 case MVT::Vector: 633 if (NumXMMRegs < 3) { 634 RegValuesToPass.push_back(Args[i].first); 635 NumXMMRegs++; 636 } else { 637 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 638 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 639 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 640 Args[i].first, PtrOff, 641 DAG.getSrcValue(NULL))); 642 ArgOffset += 16; 643 } 644 } 645 } 646 if (!Stores.empty()) 647 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 648 } 649 650 std::vector<MVT::ValueType> RetVals; 651 MVT::ValueType RetTyVT = getValueType(RetTy); 652 RetVals.push_back(MVT::Other); 653 654 // The result values produced have to be legal. Promote the result. 655 switch (RetTyVT) { 656 case MVT::isVoid: break; 657 default: 658 RetVals.push_back(RetTyVT); 659 break; 660 case MVT::i1: 661 case MVT::i8: 662 case MVT::i16: 663 RetVals.push_back(MVT::i32); 664 break; 665 case MVT::f32: 666 if (X86ScalarSSE) 667 RetVals.push_back(MVT::f32); 668 else 669 RetVals.push_back(MVT::f64); 670 break; 671 case MVT::i64: 672 RetVals.push_back(MVT::i32); 673 RetVals.push_back(MVT::i32); 674 break; 675 } 676 677 // Build a sequence of copy-to-reg nodes chained together with token chain 678 // and flag operands which copy the outgoing args into registers. 679 SDOperand InFlag; 680 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 681 unsigned CCReg = XMMArgRegs[i]; 682 SDOperand RegToPass = RegValuesToPass[i]; 683 assert(RegToPass.getValueType() == MVT::Vector); 684 unsigned NumElems = cast<ConstantSDNode>(*(RegToPass.Val->op_end()-2))->getValue(); 685 MVT::ValueType EVT = cast<VTSDNode>(*(RegToPass.Val->op_end()-1))->getVT(); 686 MVT::ValueType PVT = getVectorType(EVT, NumElems); 687 SDOperand CCRegNode = DAG.getRegister(CCReg, PVT); 688 RegToPass = DAG.getNode(ISD::VBIT_CONVERT, PVT, RegToPass); 689 Chain = DAG.getCopyToReg(Chain, CCRegNode, RegToPass, InFlag); 690 InFlag = Chain.getValue(1); 691 } 692 693 std::vector<MVT::ValueType> NodeTys; 694 NodeTys.push_back(MVT::Other); // Returns a chain 695 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 696 std::vector<SDOperand> Ops; 697 Ops.push_back(Chain); 698 Ops.push_back(Callee); 699 if (InFlag.Val) 700 Ops.push_back(InFlag); 701 702 // FIXME: Do not generate X86ISD::TAILCALL for now. 703 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 704 InFlag = Chain.getValue(1); 705 706 NodeTys.clear(); 707 NodeTys.push_back(MVT::Other); // Returns a chain 708 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 709 Ops.clear(); 710 Ops.push_back(Chain); 711 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 712 Ops.push_back(DAG.getConstant(0, getPointerTy())); 713 Ops.push_back(InFlag); 714 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 715 InFlag = Chain.getValue(1); 716 717 SDOperand RetVal; 718 if (RetTyVT != MVT::isVoid) { 719 switch (RetTyVT) { 720 default: assert(0 && "Unknown value type to return!"); 721 case MVT::i1: 722 case MVT::i8: 723 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 724 Chain = RetVal.getValue(1); 725 if (RetTyVT == MVT::i1) 726 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 727 break; 728 case MVT::i16: 729 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 730 Chain = RetVal.getValue(1); 731 break; 732 case MVT::i32: 733 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 734 Chain = RetVal.getValue(1); 735 break; 736 case MVT::i64: { 737 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 738 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 739 Lo.getValue(2)); 740 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 741 Chain = Hi.getValue(1); 742 break; 743 } 744 case MVT::f32: 745 case MVT::f64: { 746 std::vector<MVT::ValueType> Tys; 747 Tys.push_back(MVT::f64); 748 Tys.push_back(MVT::Other); 749 Tys.push_back(MVT::Flag); 750 std::vector<SDOperand> Ops; 751 Ops.push_back(Chain); 752 Ops.push_back(InFlag); 753 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 754 Chain = RetVal.getValue(1); 755 InFlag = RetVal.getValue(2); 756 if (X86ScalarSSE) { 757 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 758 // shouldn't be necessary except that RFP cannot be live across 759 // multiple blocks. When stackifier is fixed, they can be uncoupled. 760 MachineFunction &MF = DAG.getMachineFunction(); 761 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 762 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 763 Tys.clear(); 764 Tys.push_back(MVT::Other); 765 Ops.clear(); 766 Ops.push_back(Chain); 767 Ops.push_back(RetVal); 768 Ops.push_back(StackSlot); 769 Ops.push_back(DAG.getValueType(RetTyVT)); 770 Ops.push_back(InFlag); 771 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 772 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 773 DAG.getSrcValue(NULL)); 774 Chain = RetVal.getValue(1); 775 } 776 777 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 778 // FIXME: we would really like to remember that this FP_ROUND 779 // operation is okay to eliminate if we allow excess FP precision. 780 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 781 break; 782 } 783 case MVT::Vector: { 784 const PackedType *PTy = cast<PackedType>(RetTy); 785 MVT::ValueType EVT; 786 MVT::ValueType LVT; 787 unsigned NumRegs = getPackedTypeBreakdown(PTy, EVT, LVT); 788 assert(NumRegs == 1 && "Unsupported type!"); 789 RetVal = DAG.getCopyFromReg(Chain, X86::XMM0, EVT, InFlag); 790 Chain = RetVal.getValue(1); 791 break; 792 } 793 } 794 } 795 796 return std::make_pair(RetVal, Chain); 797} 798 799//===----------------------------------------------------------------------===// 800// Fast Calling Convention implementation 801//===----------------------------------------------------------------------===// 802// 803// The X86 'fast' calling convention passes up to two integer arguments in 804// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 805// and requires that the callee pop its arguments off the stack (allowing proper 806// tail calls), and has the same return value conventions as C calling convs. 807// 808// This calling convention always arranges for the callee pop value to be 8n+4 809// bytes, which is needed for tail recursion elimination and stack alignment 810// reasons. 811// 812// Note that this can be enhanced in the future to pass fp vals in registers 813// (when we have a global fp allocator) and do other tricks. 814// 815 816// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 817// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 818// EDX". Anything more is illegal. 819// 820// FIXME: The linscan register allocator currently has problem with 821// coalescing. At the time of this writing, whenever it decides to coalesce 822// a physreg with a virtreg, this increases the size of the physreg's live 823// range, and the live range cannot ever be reduced. This causes problems if 824// too many physregs are coaleced with virtregs, which can cause the register 825// allocator to wedge itself. 826// 827// This code triggers this problem more often if we pass args in registers, 828// so disable it until this is fixed. 829// 830// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 831// about code being dead. 832// 833static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 834 835 836/// HowToPassFastCCArgument - Returns how an formal argument of the specified 837/// type should be passed. If it is through stack, returns the size of the stack 838/// frame; if it is through integer or XMM register, returns the number of 839/// integer or XMM registers are needed. 840static void 841HowToPassFastCCArgument(MVT::ValueType ObjectVT, 842 unsigned NumIntRegs, unsigned NumXMMRegs, 843 unsigned &ObjSize, unsigned &ObjIntRegs, 844 unsigned &ObjXMMRegs) { 845 ObjSize = 0; 846 NumIntRegs = 0; 847 848 switch (ObjectVT) { 849 default: assert(0 && "Unhandled argument type!"); 850 case MVT::i1: 851 case MVT::i8: 852 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 853 ObjIntRegs = 1; 854 else 855 ObjSize = 1; 856 break; 857 case MVT::i16: 858 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 859 ObjIntRegs = 1; 860 else 861 ObjSize = 2; 862 break; 863 case MVT::i32: 864 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 865 ObjIntRegs = 1; 866 else 867 ObjSize = 4; 868 break; 869 case MVT::i64: 870 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 871 ObjIntRegs = 2; 872 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 873 ObjIntRegs = 1; 874 ObjSize = 4; 875 } else 876 ObjSize = 8; 877 case MVT::f32: 878 ObjSize = 4; 879 break; 880 case MVT::f64: 881 ObjSize = 8; 882 break; 883 case MVT::v16i8: 884 case MVT::v8i16: 885 case MVT::v4i32: 886 case MVT::v2i64: 887 case MVT::v4f32: 888 case MVT::v2f64: 889 if (NumXMMRegs < 3) 890 ObjXMMRegs = 1; 891 else 892 ObjSize = 16; 893 break; 894 } 895} 896 897void 898X86TargetLowering::PreprocessFastCCArguments(std::vector<SDOperand>Args, 899 Function &F, SelectionDAG &DAG) { 900 unsigned NumArgs = Args.size(); 901 MachineFunction &MF = DAG.getMachineFunction(); 902 MachineFrameInfo *MFI = MF.getFrameInfo(); 903 904 // Add DAG nodes to load the arguments... On entry to a function the stack 905 // frame looks like this: 906 // 907 // [ESP] -- return address 908 // [ESP + 4] -- first nonreg argument (leftmost lexically) 909 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 910 // ... 911 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 912 913 // Keep track of the number of integer regs passed so far. This can be either 914 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 915 // used). 916 unsigned NumIntRegs = 0; 917 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 918 unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 }; 919 920 for (unsigned i = 0; i < NumArgs; ++i) { 921 SDOperand Op = Args[i]; 922 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 923 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 924 I != E; ++I) { 925 SDOperand Obj = *I; 926 MVT::ValueType ObjectVT = Obj.getValueType(); 927 unsigned ArgIncrement = 4; 928 unsigned ObjSize = 0; 929 unsigned ObjIntRegs = 0; 930 unsigned ObjXMMRegs = 0; 931 932 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 933 ObjSize, ObjIntRegs, ObjXMMRegs); 934 if (ObjSize >= 8) 935 ArgIncrement = ObjSize; 936 937 unsigned Reg; 938 std::pair<FALocInfo,FALocInfo> Loc = std::make_pair(FALocInfo(), 939 FALocInfo()); 940 if (ObjIntRegs) { 941 switch (ObjectVT) { 942 default: assert(0 && "Unhandled argument type!"); 943 case MVT::i1: 944 case MVT::i8: 945 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 946 X86::R8RegisterClass); 947 Loc.first.Kind = FALocInfo::LiveInRegLoc; 948 Loc.first.Loc = Reg; 949 Loc.first.Typ = MVT::i8; 950 break; 951 case MVT::i16: 952 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 953 X86::R16RegisterClass); 954 Loc.first.Kind = FALocInfo::LiveInRegLoc; 955 Loc.first.Loc = Reg; 956 Loc.first.Typ = MVT::i16; 957 break; 958 case MVT::i32: 959 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 960 X86::R32RegisterClass); 961 Loc.first.Kind = FALocInfo::LiveInRegLoc; 962 Loc.first.Loc = Reg; 963 Loc.first.Typ = MVT::i32; 964 break; 965 case MVT::i64: 966 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 967 X86::R32RegisterClass); 968 Loc.first.Kind = FALocInfo::LiveInRegLoc; 969 Loc.first.Loc = Reg; 970 Loc.first.Typ = MVT::i32; 971 if (ObjIntRegs == 2) { 972 Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 973 Loc.second.Kind = FALocInfo::LiveInRegLoc; 974 Loc.second.Loc = Reg; 975 Loc.second.Typ = MVT::i32; 976 } 977 break; 978 case MVT::v16i8: 979 case MVT::v8i16: 980 case MVT::v4i32: 981 case MVT::v2i64: 982 case MVT::v4f32: 983 case MVT::v2f64: 984 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 985 Loc.first.Kind = FALocInfo::LiveInRegLoc; 986 Loc.first.Loc = Reg; 987 Loc.first.Typ = ObjectVT; 988 break; 989 } 990 NumIntRegs += ObjIntRegs; 991 NumXMMRegs += ObjXMMRegs; 992 } 993 if (ObjSize) { 994 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 995 if (ObjectVT == MVT::i64 && ObjIntRegs) { 996 Loc.second.Kind = FALocInfo::StackFrameLoc; 997 Loc.second.Loc = FI; 998 } else { 999 Loc.first.Kind = FALocInfo::StackFrameLoc; 1000 Loc.first.Loc = FI; 1001 } 1002 ArgOffset += ArgIncrement; // Move on to the next argument. 1003 } 1004 1005 FormalArgLocs.push_back(Loc); 1006 } 1007 } 1008 1009 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1010 // arguments and the arguments after the retaddr has been pushed are aligned. 1011 if ((ArgOffset & 7) == 0) 1012 ArgOffset += 4; 1013 1014 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1015 ReturnAddrIndex = 0; // No return address slot generated yet. 1016 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 1017 BytesCallerReserves = 0; 1018 1019 // Finally, inform the code generator which regs we return values in. 1020 switch (getValueType(F.getReturnType())) { 1021 default: assert(0 && "Unknown type!"); 1022 case MVT::isVoid: break; 1023 case MVT::i1: 1024 case MVT::i8: 1025 case MVT::i16: 1026 case MVT::i32: 1027 MF.addLiveOut(X86::EAX); 1028 break; 1029 case MVT::i64: 1030 MF.addLiveOut(X86::EAX); 1031 MF.addLiveOut(X86::EDX); 1032 break; 1033 case MVT::f32: 1034 case MVT::f64: 1035 MF.addLiveOut(X86::ST0); 1036 break; 1037 case MVT::Vector: { 1038 const PackedType *PTy = cast<PackedType>(F.getReturnType()); 1039 MVT::ValueType EVT; 1040 MVT::ValueType LVT; 1041 unsigned NumRegs = getPackedTypeBreakdown(PTy, EVT, LVT); 1042 assert(NumRegs == 1 && "Unsupported type!"); 1043 MF.addLiveOut(X86::XMM0); 1044 break; 1045 } 1046 } 1047} 1048 1049void 1050X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1051 unsigned NumArgs = Op.Val->getNumValues(); 1052 MachineFunction &MF = DAG.getMachineFunction(); 1053 MachineFrameInfo *MFI = MF.getFrameInfo(); 1054 1055 for (unsigned i = 0; i < NumArgs; ++i) { 1056 MVT::ValueType VT = Op.Val->getValueType(i); 1057 std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i]; 1058 SDOperand ArgValue; 1059 if (Loc.first.Kind == FALocInfo::StackFrameLoc) { 1060 // Create the SelectionDAG nodes corresponding to a load from this parameter 1061 SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32); 1062 ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 1063 DAG.getSrcValue(NULL)); 1064 } else { 1065 // Must be a CopyFromReg 1066 ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc, 1067 Loc.first.Typ); 1068 } 1069 1070 if (Loc.second.Kind != FALocInfo::None) { 1071 SDOperand ArgValue2; 1072 if (Loc.second.Kind == FALocInfo::StackFrameLoc) { 1073 // Create the SelectionDAG nodes corresponding to a load from this parameter 1074 SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32); 1075 ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 1076 DAG.getSrcValue(NULL)); 1077 } else { 1078 // Must be a CopyFromReg 1079 ArgValue2 = DAG.getCopyFromReg(DAG.getEntryNode(), 1080 Loc.second.Loc, Loc.second.Typ); 1081 } 1082 ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2); 1083 } 1084 FormalArgs.push_back(ArgValue); 1085 } 1086} 1087 1088std::pair<SDOperand, SDOperand> 1089X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 1090 bool isTailCall, SDOperand Callee, 1091 ArgListTy &Args, SelectionDAG &DAG) { 1092 // Count how many bytes are to be pushed on the stack. 1093 unsigned NumBytes = 0; 1094 1095 // Keep track of the number of integer regs passed so far. This can be either 1096 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1097 // used). 1098 unsigned NumIntRegs = 0; 1099 1100 for (unsigned i = 0, e = Args.size(); i != e; ++i) 1101 switch (getValueType(Args[i].second)) { 1102 default: assert(0 && "Unknown value type!"); 1103 case MVT::i1: 1104 case MVT::i8: 1105 case MVT::i16: 1106 case MVT::i32: 1107 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1108 ++NumIntRegs; 1109 break; 1110 } 1111 // fall through 1112 case MVT::f32: 1113 NumBytes += 4; 1114 break; 1115 case MVT::i64: 1116 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1117 NumIntRegs += 2; 1118 break; 1119 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1120 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 1121 NumBytes += 4; 1122 break; 1123 } 1124 1125 // fall through 1126 case MVT::f64: 1127 NumBytes += 8; 1128 break; 1129 } 1130 1131 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1132 // arguments and the arguments after the retaddr has been pushed are aligned. 1133 if ((NumBytes & 7) == 0) 1134 NumBytes += 4; 1135 1136 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1137 1138 // Arguments go on the stack in reverse order, as specified by the ABI. 1139 unsigned ArgOffset = 0; 1140 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 1141 NumIntRegs = 0; 1142 std::vector<SDOperand> Stores; 1143 std::vector<SDOperand> RegValuesToPass; 1144 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1145 switch (getValueType(Args[i].second)) { 1146 default: assert(0 && "Unexpected ValueType for argument!"); 1147 case MVT::i1: 1148 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 1149 // Fall through. 1150 case MVT::i8: 1151 case MVT::i16: 1152 case MVT::i32: 1153 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1154 RegValuesToPass.push_back(Args[i].first); 1155 ++NumIntRegs; 1156 break; 1157 } 1158 // Fall through 1159 case MVT::f32: { 1160 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1161 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1162 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1163 Args[i].first, PtrOff, 1164 DAG.getSrcValue(NULL))); 1165 ArgOffset += 4; 1166 break; 1167 } 1168 case MVT::i64: 1169 // Can pass (at least) part of it in regs? 1170 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1171 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1172 Args[i].first, DAG.getConstant(1, MVT::i32)); 1173 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1174 Args[i].first, DAG.getConstant(0, MVT::i32)); 1175 RegValuesToPass.push_back(Lo); 1176 ++NumIntRegs; 1177 1178 // Pass both parts in regs? 1179 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1180 RegValuesToPass.push_back(Hi); 1181 ++NumIntRegs; 1182 } else { 1183 // Pass the high part in memory. 1184 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1185 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1186 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1187 Hi, PtrOff, DAG.getSrcValue(NULL))); 1188 ArgOffset += 4; 1189 } 1190 break; 1191 } 1192 // Fall through 1193 case MVT::f64: 1194 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1195 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1196 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1197 Args[i].first, PtrOff, 1198 DAG.getSrcValue(NULL))); 1199 ArgOffset += 8; 1200 break; 1201 } 1202 } 1203 if (!Stores.empty()) 1204 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 1205 1206 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1207 // arguments and the arguments after the retaddr has been pushed are aligned. 1208 if ((ArgOffset & 7) == 0) 1209 ArgOffset += 4; 1210 1211 std::vector<MVT::ValueType> RetVals; 1212 MVT::ValueType RetTyVT = getValueType(RetTy); 1213 1214 RetVals.push_back(MVT::Other); 1215 1216 // The result values produced have to be legal. Promote the result. 1217 switch (RetTyVT) { 1218 case MVT::isVoid: break; 1219 default: 1220 RetVals.push_back(RetTyVT); 1221 break; 1222 case MVT::i1: 1223 case MVT::i8: 1224 case MVT::i16: 1225 RetVals.push_back(MVT::i32); 1226 break; 1227 case MVT::f32: 1228 if (X86ScalarSSE) 1229 RetVals.push_back(MVT::f32); 1230 else 1231 RetVals.push_back(MVT::f64); 1232 break; 1233 case MVT::i64: 1234 RetVals.push_back(MVT::i32); 1235 RetVals.push_back(MVT::i32); 1236 break; 1237 } 1238 1239 // Build a sequence of copy-to-reg nodes chained together with token chain 1240 // and flag operands which copy the outgoing args into registers. 1241 SDOperand InFlag; 1242 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1243 unsigned CCReg; 1244 SDOperand RegToPass = RegValuesToPass[i]; 1245 switch (RegToPass.getValueType()) { 1246 default: assert(0 && "Bad thing to pass in regs"); 1247 case MVT::i8: 1248 CCReg = (i == 0) ? X86::AL : X86::DL; 1249 break; 1250 case MVT::i16: 1251 CCReg = (i == 0) ? X86::AX : X86::DX; 1252 break; 1253 case MVT::i32: 1254 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1255 break; 1256 } 1257 1258 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1259 InFlag = Chain.getValue(1); 1260 } 1261 1262 std::vector<MVT::ValueType> NodeTys; 1263 NodeTys.push_back(MVT::Other); // Returns a chain 1264 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1265 std::vector<SDOperand> Ops; 1266 Ops.push_back(Chain); 1267 Ops.push_back(Callee); 1268 if (InFlag.Val) 1269 Ops.push_back(InFlag); 1270 1271 // FIXME: Do not generate X86ISD::TAILCALL for now. 1272 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, NodeTys, Ops); 1273 InFlag = Chain.getValue(1); 1274 1275 NodeTys.clear(); 1276 NodeTys.push_back(MVT::Other); // Returns a chain 1277 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1278 Ops.clear(); 1279 Ops.push_back(Chain); 1280 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1281 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1282 Ops.push_back(InFlag); 1283 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1284 InFlag = Chain.getValue(1); 1285 1286 SDOperand RetVal; 1287 if (RetTyVT != MVT::isVoid) { 1288 switch (RetTyVT) { 1289 default: assert(0 && "Unknown value type to return!"); 1290 case MVT::i1: 1291 case MVT::i8: 1292 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1293 Chain = RetVal.getValue(1); 1294 if (RetTyVT == MVT::i1) 1295 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1296 break; 1297 case MVT::i16: 1298 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1299 Chain = RetVal.getValue(1); 1300 break; 1301 case MVT::i32: 1302 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1303 Chain = RetVal.getValue(1); 1304 break; 1305 case MVT::i64: { 1306 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1307 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1308 Lo.getValue(2)); 1309 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1310 Chain = Hi.getValue(1); 1311 break; 1312 } 1313 case MVT::f32: 1314 case MVT::f64: { 1315 std::vector<MVT::ValueType> Tys; 1316 Tys.push_back(MVT::f64); 1317 Tys.push_back(MVT::Other); 1318 Tys.push_back(MVT::Flag); 1319 std::vector<SDOperand> Ops; 1320 Ops.push_back(Chain); 1321 Ops.push_back(InFlag); 1322 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1323 Chain = RetVal.getValue(1); 1324 InFlag = RetVal.getValue(2); 1325 if (X86ScalarSSE) { 1326 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1327 // shouldn't be necessary except that RFP cannot be live across 1328 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1329 MachineFunction &MF = DAG.getMachineFunction(); 1330 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1331 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1332 Tys.clear(); 1333 Tys.push_back(MVT::Other); 1334 Ops.clear(); 1335 Ops.push_back(Chain); 1336 Ops.push_back(RetVal); 1337 Ops.push_back(StackSlot); 1338 Ops.push_back(DAG.getValueType(RetTyVT)); 1339 Ops.push_back(InFlag); 1340 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1341 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1342 DAG.getSrcValue(NULL)); 1343 Chain = RetVal.getValue(1); 1344 } 1345 1346 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1347 // FIXME: we would really like to remember that this FP_ROUND 1348 // operation is okay to eliminate if we allow excess FP precision. 1349 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1350 break; 1351 } 1352 } 1353 } 1354 1355 return std::make_pair(RetVal, Chain); 1356} 1357 1358SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1359 if (ReturnAddrIndex == 0) { 1360 // Set up a frame object for the return address. 1361 MachineFunction &MF = DAG.getMachineFunction(); 1362 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1363 } 1364 1365 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1366} 1367 1368 1369 1370std::pair<SDOperand, SDOperand> X86TargetLowering:: 1371LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1372 SelectionDAG &DAG) { 1373 SDOperand Result; 1374 if (Depth) // Depths > 0 not supported yet! 1375 Result = DAG.getConstant(0, getPointerTy()); 1376 else { 1377 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1378 if (!isFrameAddress) 1379 // Just load the return address 1380 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1381 DAG.getSrcValue(NULL)); 1382 else 1383 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1384 DAG.getConstant(4, MVT::i32)); 1385 } 1386 return std::make_pair(Result, Chain); 1387} 1388 1389/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1390/// which corresponds to the condition code. 1391static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1392 switch (X86CC) { 1393 default: assert(0 && "Unknown X86 conditional code!"); 1394 case X86ISD::COND_A: return X86::JA; 1395 case X86ISD::COND_AE: return X86::JAE; 1396 case X86ISD::COND_B: return X86::JB; 1397 case X86ISD::COND_BE: return X86::JBE; 1398 case X86ISD::COND_E: return X86::JE; 1399 case X86ISD::COND_G: return X86::JG; 1400 case X86ISD::COND_GE: return X86::JGE; 1401 case X86ISD::COND_L: return X86::JL; 1402 case X86ISD::COND_LE: return X86::JLE; 1403 case X86ISD::COND_NE: return X86::JNE; 1404 case X86ISD::COND_NO: return X86::JNO; 1405 case X86ISD::COND_NP: return X86::JNP; 1406 case X86ISD::COND_NS: return X86::JNS; 1407 case X86ISD::COND_O: return X86::JO; 1408 case X86ISD::COND_P: return X86::JP; 1409 case X86ISD::COND_S: return X86::JS; 1410 } 1411} 1412 1413/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1414/// specific condition code. It returns a false if it cannot do a direct 1415/// translation. X86CC is the translated CondCode. Flip is set to true if the 1416/// the order of comparison operands should be flipped. 1417static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1418 unsigned &X86CC, bool &Flip) { 1419 Flip = false; 1420 X86CC = X86ISD::COND_INVALID; 1421 if (!isFP) { 1422 switch (SetCCOpcode) { 1423 default: break; 1424 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1425 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1426 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1427 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1428 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1429 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1430 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1431 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1432 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1433 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1434 } 1435 } else { 1436 // On a floating point condition, the flags are set as follows: 1437 // ZF PF CF op 1438 // 0 | 0 | 0 | X > Y 1439 // 0 | 0 | 1 | X < Y 1440 // 1 | 0 | 0 | X == Y 1441 // 1 | 1 | 1 | unordered 1442 switch (SetCCOpcode) { 1443 default: break; 1444 case ISD::SETUEQ: 1445 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1446 case ISD::SETOLT: Flip = true; // Fallthrough 1447 case ISD::SETOGT: 1448 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1449 case ISD::SETOLE: Flip = true; // Fallthrough 1450 case ISD::SETOGE: 1451 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1452 case ISD::SETUGT: Flip = true; // Fallthrough 1453 case ISD::SETULT: 1454 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1455 case ISD::SETUGE: Flip = true; // Fallthrough 1456 case ISD::SETULE: 1457 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1458 case ISD::SETONE: 1459 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1460 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1461 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1462 } 1463 } 1464 1465 return X86CC != X86ISD::COND_INVALID; 1466} 1467 1468static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1469 bool &Flip) { 1470 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1471} 1472 1473/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1474/// code. Current x86 isa includes the following FP cmov instructions: 1475/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1476static bool hasFPCMov(unsigned X86CC) { 1477 switch (X86CC) { 1478 default: 1479 return false; 1480 case X86ISD::COND_B: 1481 case X86ISD::COND_BE: 1482 case X86ISD::COND_E: 1483 case X86ISD::COND_P: 1484 case X86ISD::COND_A: 1485 case X86ISD::COND_AE: 1486 case X86ISD::COND_NE: 1487 case X86ISD::COND_NP: 1488 return true; 1489 } 1490} 1491 1492MachineBasicBlock * 1493X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1494 MachineBasicBlock *BB) { 1495 switch (MI->getOpcode()) { 1496 default: assert(false && "Unexpected instr type to insert"); 1497 case X86::CMOV_FR32: 1498 case X86::CMOV_FR64: 1499 case X86::CMOV_V4F32: 1500 case X86::CMOV_V2F64: 1501 case X86::CMOV_V2I64: { 1502 // To "insert" a SELECT_CC instruction, we actually have to insert the 1503 // diamond control-flow pattern. The incoming instruction knows the 1504 // destination vreg to set, the condition code register to branch on, the 1505 // true/false values to select between, and a branch opcode to use. 1506 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1507 ilist<MachineBasicBlock>::iterator It = BB; 1508 ++It; 1509 1510 // thisMBB: 1511 // ... 1512 // TrueVal = ... 1513 // cmpTY ccX, r1, r2 1514 // bCC copy1MBB 1515 // fallthrough --> copy0MBB 1516 MachineBasicBlock *thisMBB = BB; 1517 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1518 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1519 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1520 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1521 MachineFunction *F = BB->getParent(); 1522 F->getBasicBlockList().insert(It, copy0MBB); 1523 F->getBasicBlockList().insert(It, sinkMBB); 1524 // Update machine-CFG edges by first adding all successors of the current 1525 // block to the new block which will contain the Phi node for the select. 1526 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1527 e = BB->succ_end(); i != e; ++i) 1528 sinkMBB->addSuccessor(*i); 1529 // Next, remove all successors of the current block, and add the true 1530 // and fallthrough blocks as its successors. 1531 while(!BB->succ_empty()) 1532 BB->removeSuccessor(BB->succ_begin()); 1533 BB->addSuccessor(copy0MBB); 1534 BB->addSuccessor(sinkMBB); 1535 1536 // copy0MBB: 1537 // %FalseValue = ... 1538 // # fallthrough to sinkMBB 1539 BB = copy0MBB; 1540 1541 // Update machine-CFG edges 1542 BB->addSuccessor(sinkMBB); 1543 1544 // sinkMBB: 1545 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1546 // ... 1547 BB = sinkMBB; 1548 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1549 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1550 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1551 1552 delete MI; // The pseudo instruction is gone now. 1553 return BB; 1554 } 1555 1556 case X86::FP_TO_INT16_IN_MEM: 1557 case X86::FP_TO_INT32_IN_MEM: 1558 case X86::FP_TO_INT64_IN_MEM: { 1559 // Change the floating point control register to use "round towards zero" 1560 // mode when truncating to an integer value. 1561 MachineFunction *F = BB->getParent(); 1562 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1563 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1564 1565 // Load the old value of the high byte of the control word... 1566 unsigned OldCW = 1567 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1568 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1569 1570 // Set the high part to be round to zero... 1571 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1572 1573 // Reload the modified control word now... 1574 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1575 1576 // Restore the memory image of control word to original value 1577 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1578 1579 // Get the X86 opcode to use. 1580 unsigned Opc; 1581 switch (MI->getOpcode()) { 1582 default: assert(0 && "illegal opcode!"); 1583 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1584 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1585 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1586 } 1587 1588 X86AddressMode AM; 1589 MachineOperand &Op = MI->getOperand(0); 1590 if (Op.isRegister()) { 1591 AM.BaseType = X86AddressMode::RegBase; 1592 AM.Base.Reg = Op.getReg(); 1593 } else { 1594 AM.BaseType = X86AddressMode::FrameIndexBase; 1595 AM.Base.FrameIndex = Op.getFrameIndex(); 1596 } 1597 Op = MI->getOperand(1); 1598 if (Op.isImmediate()) 1599 AM.Scale = Op.getImmedValue(); 1600 Op = MI->getOperand(2); 1601 if (Op.isImmediate()) 1602 AM.IndexReg = Op.getImmedValue(); 1603 Op = MI->getOperand(3); 1604 if (Op.isGlobalAddress()) { 1605 AM.GV = Op.getGlobal(); 1606 } else { 1607 AM.Disp = Op.getImmedValue(); 1608 } 1609 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1610 1611 // Reload the original control word now. 1612 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1613 1614 delete MI; // The pseudo instruction is gone now. 1615 return BB; 1616 } 1617 } 1618} 1619 1620 1621//===----------------------------------------------------------------------===// 1622// X86 Custom Lowering Hooks 1623//===----------------------------------------------------------------------===// 1624 1625/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1626/// load. For Darwin, external and weak symbols are indirect, loading the value 1627/// at address GV rather then the value of GV itself. This means that the 1628/// GlobalAddress must be in the base or index register of the address, not the 1629/// GV offset field. 1630static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1631 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1632 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1633} 1634 1635/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1636/// true if Op is undef or if its value falls within the specified range (L, H]. 1637static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1638 if (Op.getOpcode() == ISD::UNDEF) 1639 return true; 1640 1641 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1642 return (Val >= Low && Val < Hi); 1643} 1644 1645/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1646/// true if Op is undef or if its value equal to the specified value. 1647static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1648 if (Op.getOpcode() == ISD::UNDEF) 1649 return true; 1650 return cast<ConstantSDNode>(Op)->getValue() == Val; 1651} 1652 1653/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1654/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1655bool X86::isPSHUFDMask(SDNode *N) { 1656 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1657 1658 if (N->getNumOperands() != 4) 1659 return false; 1660 1661 // Check if the value doesn't reference the second vector. 1662 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1663 SDOperand Arg = N->getOperand(i); 1664 if (Arg.getOpcode() == ISD::UNDEF) continue; 1665 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1666 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1667 return false; 1668 } 1669 1670 return true; 1671} 1672 1673/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1674/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1675bool X86::isPSHUFHWMask(SDNode *N) { 1676 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1677 1678 if (N->getNumOperands() != 8) 1679 return false; 1680 1681 // Lower quadword copied in order. 1682 for (unsigned i = 0; i != 4; ++i) { 1683 SDOperand Arg = N->getOperand(i); 1684 if (Arg.getOpcode() == ISD::UNDEF) continue; 1685 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1686 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1687 return false; 1688 } 1689 1690 // Upper quadword shuffled. 1691 for (unsigned i = 4; i != 8; ++i) { 1692 SDOperand Arg = N->getOperand(i); 1693 if (Arg.getOpcode() == ISD::UNDEF) continue; 1694 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1695 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1696 if (Val < 4 || Val > 7) 1697 return false; 1698 } 1699 1700 return true; 1701} 1702 1703/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1704/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1705bool X86::isPSHUFLWMask(SDNode *N) { 1706 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1707 1708 if (N->getNumOperands() != 8) 1709 return false; 1710 1711 // Upper quadword copied in order. 1712 for (unsigned i = 4; i != 8; ++i) 1713 if (!isUndefOrEqual(N->getOperand(i), i)) 1714 return false; 1715 1716 // Lower quadword shuffled. 1717 for (unsigned i = 0; i != 4; ++i) 1718 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1719 return false; 1720 1721 return true; 1722} 1723 1724/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1725/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1726static bool isSHUFPMask(std::vector<SDOperand> &N) { 1727 unsigned NumElems = N.size(); 1728 if (NumElems != 2 && NumElems != 4) return false; 1729 1730 unsigned Half = NumElems / 2; 1731 for (unsigned i = 0; i < Half; ++i) 1732 if (!isUndefOrInRange(N[i], 0, NumElems)) 1733 return false; 1734 for (unsigned i = Half; i < NumElems; ++i) 1735 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1736 return false; 1737 1738 return true; 1739} 1740 1741bool X86::isSHUFPMask(SDNode *N) { 1742 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1743 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1744 return ::isSHUFPMask(Ops); 1745} 1746 1747/// isCommutedSHUFP - Returns true if the shuffle mask is except 1748/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1749/// half elements to come from vector 1 (which would equal the dest.) and 1750/// the upper half to come from vector 2. 1751static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1752 unsigned NumElems = Ops.size(); 1753 if (NumElems != 2 && NumElems != 4) return false; 1754 1755 unsigned Half = NumElems / 2; 1756 for (unsigned i = 0; i < Half; ++i) 1757 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1758 return false; 1759 for (unsigned i = Half; i < NumElems; ++i) 1760 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1761 return false; 1762 return true; 1763} 1764 1765static bool isCommutedSHUFP(SDNode *N) { 1766 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1767 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1768 return isCommutedSHUFP(Ops); 1769} 1770 1771/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1772/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1773bool X86::isMOVHLPSMask(SDNode *N) { 1774 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1775 1776 if (N->getNumOperands() != 4) 1777 return false; 1778 1779 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1780 return isUndefOrEqual(N->getOperand(0), 6) && 1781 isUndefOrEqual(N->getOperand(1), 7) && 1782 isUndefOrEqual(N->getOperand(2), 2) && 1783 isUndefOrEqual(N->getOperand(3), 3); 1784} 1785 1786/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1787/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1788bool X86::isMOVLPMask(SDNode *N) { 1789 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1790 1791 unsigned NumElems = N->getNumOperands(); 1792 if (NumElems != 2 && NumElems != 4) 1793 return false; 1794 1795 for (unsigned i = 0; i < NumElems/2; ++i) 1796 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1797 return false; 1798 1799 for (unsigned i = NumElems/2; i < NumElems; ++i) 1800 if (!isUndefOrEqual(N->getOperand(i), i)) 1801 return false; 1802 1803 return true; 1804} 1805 1806/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1807/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1808/// and MOVLHPS. 1809bool X86::isMOVHPMask(SDNode *N) { 1810 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1811 1812 unsigned NumElems = N->getNumOperands(); 1813 if (NumElems != 2 && NumElems != 4) 1814 return false; 1815 1816 for (unsigned i = 0; i < NumElems/2; ++i) 1817 if (!isUndefOrEqual(N->getOperand(i), i)) 1818 return false; 1819 1820 for (unsigned i = 0; i < NumElems/2; ++i) { 1821 SDOperand Arg = N->getOperand(i + NumElems/2); 1822 if (!isUndefOrEqual(Arg, i + NumElems)) 1823 return false; 1824 } 1825 1826 return true; 1827} 1828 1829/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1830/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1831bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1832 unsigned NumElems = N.size(); 1833 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1834 return false; 1835 1836 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1837 SDOperand BitI = N[i]; 1838 SDOperand BitI1 = N[i+1]; 1839 if (!isUndefOrEqual(BitI, j)) 1840 return false; 1841 if (V2IsSplat) { 1842 if (isUndefOrEqual(BitI1, NumElems)) 1843 return false; 1844 } else { 1845 if (!isUndefOrEqual(BitI1, j + NumElems)) 1846 return false; 1847 } 1848 } 1849 1850 return true; 1851} 1852 1853bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1854 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1855 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1856 return ::isUNPCKLMask(Ops, V2IsSplat); 1857} 1858 1859/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1860/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1861bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1862 unsigned NumElems = N.size(); 1863 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1864 return false; 1865 1866 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1867 SDOperand BitI = N[i]; 1868 SDOperand BitI1 = N[i+1]; 1869 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1870 return false; 1871 if (V2IsSplat) { 1872 if (isUndefOrEqual(BitI1, NumElems)) 1873 return false; 1874 } else { 1875 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1876 return false; 1877 } 1878 } 1879 1880 return true; 1881} 1882 1883bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1884 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1885 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1886 return ::isUNPCKHMask(Ops, V2IsSplat); 1887} 1888 1889/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1890/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1891/// <0, 0, 1, 1> 1892bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1893 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1894 1895 unsigned NumElems = N->getNumOperands(); 1896 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1897 return false; 1898 1899 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1900 SDOperand BitI = N->getOperand(i); 1901 SDOperand BitI1 = N->getOperand(i+1); 1902 1903 if (!isUndefOrEqual(BitI, j)) 1904 return false; 1905 if (!isUndefOrEqual(BitI1, j)) 1906 return false; 1907 } 1908 1909 return true; 1910} 1911 1912/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1913/// specifies a shuffle of elements that is suitable for input to MOVSS, 1914/// MOVSD, and MOVD, i.e. setting the lowest element. 1915static bool isMOVLMask(std::vector<SDOperand> &N) { 1916 unsigned NumElems = N.size(); 1917 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1918 return false; 1919 1920 if (!isUndefOrEqual(N[0], NumElems)) 1921 return false; 1922 1923 for (unsigned i = 1; i < NumElems; ++i) { 1924 SDOperand Arg = N[i]; 1925 if (!isUndefOrEqual(Arg, i)) 1926 return false; 1927 } 1928 1929 return true; 1930} 1931 1932bool X86::isMOVLMask(SDNode *N) { 1933 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1934 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1935 return ::isMOVLMask(Ops); 1936} 1937 1938/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1939/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1940/// element of vector 2 and the other elements to come from vector 1 in order. 1941static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1942 unsigned NumElems = Ops.size(); 1943 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1944 return false; 1945 1946 if (!isUndefOrEqual(Ops[0], 0)) 1947 return false; 1948 1949 for (unsigned i = 1; i < NumElems; ++i) { 1950 SDOperand Arg = Ops[i]; 1951 if (V2IsSplat) { 1952 if (!isUndefOrEqual(Arg, NumElems)) 1953 return false; 1954 } else { 1955 if (!isUndefOrEqual(Arg, i+NumElems)) 1956 return false; 1957 } 1958 } 1959 1960 return true; 1961} 1962 1963static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1964 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1965 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1966 return isCommutedMOVL(Ops, V2IsSplat); 1967} 1968 1969/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1970/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1971bool X86::isMOVSHDUPMask(SDNode *N) { 1972 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1973 1974 if (N->getNumOperands() != 4) 1975 return false; 1976 1977 // Expect 1, 1, 3, 3 1978 for (unsigned i = 0; i < 2; ++i) { 1979 SDOperand Arg = N->getOperand(i); 1980 if (Arg.getOpcode() == ISD::UNDEF) continue; 1981 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1982 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1983 if (Val != 1) return false; 1984 } 1985 1986 bool HasHi = false; 1987 for (unsigned i = 2; i < 4; ++i) { 1988 SDOperand Arg = N->getOperand(i); 1989 if (Arg.getOpcode() == ISD::UNDEF) continue; 1990 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1991 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1992 if (Val != 3) return false; 1993 HasHi = true; 1994 } 1995 1996 // Don't use movshdup if it can be done with a shufps. 1997 return HasHi; 1998} 1999 2000/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2001/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2002bool X86::isMOVSLDUPMask(SDNode *N) { 2003 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2004 2005 if (N->getNumOperands() != 4) 2006 return false; 2007 2008 // Expect 0, 0, 2, 2 2009 for (unsigned i = 0; i < 2; ++i) { 2010 SDOperand Arg = N->getOperand(i); 2011 if (Arg.getOpcode() == ISD::UNDEF) continue; 2012 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2013 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2014 if (Val != 0) return false; 2015 } 2016 2017 bool HasHi = false; 2018 for (unsigned i = 2; i < 4; ++i) { 2019 SDOperand Arg = N->getOperand(i); 2020 if (Arg.getOpcode() == ISD::UNDEF) continue; 2021 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2022 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2023 if (Val != 2) return false; 2024 HasHi = true; 2025 } 2026 2027 // Don't use movshdup if it can be done with a shufps. 2028 return HasHi; 2029} 2030 2031/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2032/// a splat of a single element. 2033static bool isSplatMask(SDNode *N) { 2034 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2035 2036 // This is a splat operation if each element of the permute is the same, and 2037 // if the value doesn't reference the second vector. 2038 unsigned NumElems = N->getNumOperands(); 2039 SDOperand ElementBase; 2040 unsigned i = 0; 2041 for (; i != NumElems; ++i) { 2042 SDOperand Elt = N->getOperand(i); 2043 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 2044 ElementBase = Elt; 2045 break; 2046 } 2047 } 2048 2049 if (!ElementBase.Val) 2050 return false; 2051 2052 for (; i != NumElems; ++i) { 2053 SDOperand Arg = N->getOperand(i); 2054 if (Arg.getOpcode() == ISD::UNDEF) continue; 2055 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2056 if (Arg != ElementBase) return false; 2057 } 2058 2059 // Make sure it is a splat of the first vector operand. 2060 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2061} 2062 2063/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2064/// a splat of a single element and it's a 2 or 4 element mask. 2065bool X86::isSplatMask(SDNode *N) { 2066 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2067 2068 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2069 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2070 return false; 2071 return ::isSplatMask(N); 2072} 2073 2074/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2075/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2076/// instructions. 2077unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2078 unsigned NumOperands = N->getNumOperands(); 2079 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2080 unsigned Mask = 0; 2081 for (unsigned i = 0; i < NumOperands; ++i) { 2082 unsigned Val = 0; 2083 SDOperand Arg = N->getOperand(NumOperands-i-1); 2084 if (Arg.getOpcode() != ISD::UNDEF) 2085 Val = cast<ConstantSDNode>(Arg)->getValue(); 2086 if (Val >= NumOperands) Val -= NumOperands; 2087 Mask |= Val; 2088 if (i != NumOperands - 1) 2089 Mask <<= Shift; 2090 } 2091 2092 return Mask; 2093} 2094 2095/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2096/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2097/// instructions. 2098unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2099 unsigned Mask = 0; 2100 // 8 nodes, but we only care about the last 4. 2101 for (unsigned i = 7; i >= 4; --i) { 2102 unsigned Val = 0; 2103 SDOperand Arg = N->getOperand(i); 2104 if (Arg.getOpcode() != ISD::UNDEF) 2105 Val = cast<ConstantSDNode>(Arg)->getValue(); 2106 Mask |= (Val - 4); 2107 if (i != 4) 2108 Mask <<= 2; 2109 } 2110 2111 return Mask; 2112} 2113 2114/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2115/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2116/// instructions. 2117unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2118 unsigned Mask = 0; 2119 // 8 nodes, but we only care about the first 4. 2120 for (int i = 3; i >= 0; --i) { 2121 unsigned Val = 0; 2122 SDOperand Arg = N->getOperand(i); 2123 if (Arg.getOpcode() != ISD::UNDEF) 2124 Val = cast<ConstantSDNode>(Arg)->getValue(); 2125 Mask |= Val; 2126 if (i != 0) 2127 Mask <<= 2; 2128 } 2129 2130 return Mask; 2131} 2132 2133/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2134/// specifies a 8 element shuffle that can be broken into a pair of 2135/// PSHUFHW and PSHUFLW. 2136static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2137 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2138 2139 if (N->getNumOperands() != 8) 2140 return false; 2141 2142 // Lower quadword shuffled. 2143 for (unsigned i = 0; i != 4; ++i) { 2144 SDOperand Arg = N->getOperand(i); 2145 if (Arg.getOpcode() == ISD::UNDEF) continue; 2146 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2147 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2148 if (Val > 4) 2149 return false; 2150 } 2151 2152 // Upper quadword shuffled. 2153 for (unsigned i = 4; i != 8; ++i) { 2154 SDOperand Arg = N->getOperand(i); 2155 if (Arg.getOpcode() == ISD::UNDEF) continue; 2156 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2157 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2158 if (Val < 4 || Val > 7) 2159 return false; 2160 } 2161 2162 return true; 2163} 2164 2165/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2166/// values in ther permute mask. 2167static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 2168 SDOperand V1 = Op.getOperand(0); 2169 SDOperand V2 = Op.getOperand(1); 2170 SDOperand Mask = Op.getOperand(2); 2171 MVT::ValueType VT = Op.getValueType(); 2172 MVT::ValueType MaskVT = Mask.getValueType(); 2173 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2174 unsigned NumElems = Mask.getNumOperands(); 2175 std::vector<SDOperand> MaskVec; 2176 2177 for (unsigned i = 0; i != NumElems; ++i) { 2178 SDOperand Arg = Mask.getOperand(i); 2179 if (Arg.getOpcode() == ISD::UNDEF) { 2180 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2181 continue; 2182 } 2183 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2184 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2185 if (Val < NumElems) 2186 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2187 else 2188 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2189 } 2190 2191 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2192 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 2193} 2194 2195/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2196/// match movhlps. The lower half elements should come from upper half of 2197/// V1 (and in order), and the upper half elements should come from the upper 2198/// half of V2 (and in order). 2199static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2200 unsigned NumElems = Mask->getNumOperands(); 2201 if (NumElems != 4) 2202 return false; 2203 for (unsigned i = 0, e = 2; i != e; ++i) 2204 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2205 return false; 2206 for (unsigned i = 2; i != 4; ++i) 2207 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2208 return false; 2209 return true; 2210} 2211 2212/// isScalarLoadToVector - Returns true if the node is a scalar load that 2213/// is promoted to a vector. 2214static inline bool isScalarLoadToVector(SDNode *N) { 2215 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2216 N = N->getOperand(0).Val; 2217 return (N->getOpcode() == ISD::LOAD); 2218 } 2219 return false; 2220} 2221 2222/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2223/// match movlp{s|d}. The lower half elements should come from lower half of 2224/// V1 (and in order), and the upper half elements should come from the upper 2225/// half of V2 (and in order). And since V1 will become the source of the 2226/// MOVLP, it must be either a vector load or a scalar load to vector. 2227static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2228 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2229 return false; 2230 2231 unsigned NumElems = Mask->getNumOperands(); 2232 if (NumElems != 2 && NumElems != 4) 2233 return false; 2234 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2235 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2236 return false; 2237 for (unsigned i = NumElems/2; i != NumElems; ++i) 2238 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2239 return false; 2240 return true; 2241} 2242 2243/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2244/// all the same. 2245static bool isSplatVector(SDNode *N) { 2246 if (N->getOpcode() != ISD::BUILD_VECTOR) 2247 return false; 2248 2249 SDOperand SplatValue = N->getOperand(0); 2250 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2251 if (N->getOperand(i) != SplatValue) 2252 return false; 2253 return true; 2254} 2255 2256/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2257/// that point to V2 points to its first element. 2258static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2259 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2260 2261 bool Changed = false; 2262 std::vector<SDOperand> MaskVec; 2263 unsigned NumElems = Mask.getNumOperands(); 2264 for (unsigned i = 0; i != NumElems; ++i) { 2265 SDOperand Arg = Mask.getOperand(i); 2266 if (Arg.getOpcode() != ISD::UNDEF) { 2267 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2268 if (Val > NumElems) { 2269 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2270 Changed = true; 2271 } 2272 } 2273 MaskVec.push_back(Arg); 2274 } 2275 2276 if (Changed) 2277 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 2278 return Mask; 2279} 2280 2281/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2282/// operation of specified width. 2283static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2284 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2285 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2286 2287 std::vector<SDOperand> MaskVec; 2288 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2289 for (unsigned i = 1; i != NumElems; ++i) 2290 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2291 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2292} 2293 2294/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2295/// of specified width. 2296static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2297 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2298 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2299 std::vector<SDOperand> MaskVec; 2300 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2301 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2302 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2303 } 2304 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2305} 2306 2307/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2308/// of specified width. 2309static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2310 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2311 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2312 unsigned Half = NumElems/2; 2313 std::vector<SDOperand> MaskVec; 2314 for (unsigned i = 0; i != Half; ++i) { 2315 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2316 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2317 } 2318 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2319} 2320 2321/// getZeroVector - Returns a vector of specified type with all zero elements. 2322/// 2323static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2324 assert(MVT::isVector(VT) && "Expected a vector type"); 2325 unsigned NumElems = getVectorNumElements(VT); 2326 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2327 bool isFP = MVT::isFloatingPoint(EVT); 2328 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2329 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2330 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 2331} 2332 2333/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2334/// 2335static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2336 SDOperand V1 = Op.getOperand(0); 2337 SDOperand Mask = Op.getOperand(2); 2338 MVT::ValueType VT = Op.getValueType(); 2339 unsigned NumElems = Mask.getNumOperands(); 2340 Mask = getUnpacklMask(NumElems, DAG); 2341 while (NumElems != 4) { 2342 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2343 NumElems >>= 1; 2344 } 2345 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2346 2347 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2348 Mask = getZeroVector(MaskVT, DAG); 2349 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2350 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2351 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2352} 2353 2354/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2355/// constant +0.0. 2356static inline bool isZeroNode(SDOperand Elt) { 2357 return ((isa<ConstantSDNode>(Elt) && 2358 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2359 (isa<ConstantFPSDNode>(Elt) && 2360 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2361} 2362 2363/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2364/// vector and zero or undef vector. 2365static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2366 unsigned NumElems, unsigned Idx, 2367 bool isZero, SelectionDAG &DAG) { 2368 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2369 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2370 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2371 SDOperand Zero = DAG.getConstant(0, EVT); 2372 std::vector<SDOperand> MaskVec(NumElems, Zero); 2373 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2374 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2375 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2376} 2377 2378/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2379/// 2380static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2381 unsigned NumNonZero, unsigned NumZero, 2382 SelectionDAG &DAG) { 2383 if (NumNonZero > 8) 2384 return SDOperand(); 2385 2386 SDOperand V(0, 0); 2387 bool First = true; 2388 for (unsigned i = 0; i < 16; ++i) { 2389 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2390 if (ThisIsNonZero && First) { 2391 if (NumZero) 2392 V = getZeroVector(MVT::v8i16, DAG); 2393 else 2394 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2395 First = false; 2396 } 2397 2398 if ((i & 1) != 0) { 2399 SDOperand ThisElt(0, 0), LastElt(0, 0); 2400 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2401 if (LastIsNonZero) { 2402 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2403 } 2404 if (ThisIsNonZero) { 2405 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2406 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2407 ThisElt, DAG.getConstant(8, MVT::i8)); 2408 if (LastIsNonZero) 2409 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2410 } else 2411 ThisElt = LastElt; 2412 2413 if (ThisElt.Val) 2414 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2415 DAG.getConstant(i/2, MVT::i32)); 2416 } 2417 } 2418 2419 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2420} 2421 2422/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2423/// 2424static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2425 unsigned NumNonZero, unsigned NumZero, 2426 SelectionDAG &DAG) { 2427 if (NumNonZero > 4) 2428 return SDOperand(); 2429 2430 SDOperand V(0, 0); 2431 bool First = true; 2432 for (unsigned i = 0; i < 8; ++i) { 2433 bool isNonZero = (NonZeros & (1 << i)) != 0; 2434 if (isNonZero) { 2435 if (First) { 2436 if (NumZero) 2437 V = getZeroVector(MVT::v8i16, DAG); 2438 else 2439 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2440 First = false; 2441 } 2442 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2443 DAG.getConstant(i, MVT::i32)); 2444 } 2445 } 2446 2447 return V; 2448} 2449 2450SDOperand 2451X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2452 // All zero's are handled with pxor. 2453 if (ISD::isBuildVectorAllZeros(Op.Val)) 2454 return Op; 2455 2456 // All one's are handled with pcmpeqd. 2457 if (ISD::isBuildVectorAllOnes(Op.Val)) 2458 return Op; 2459 2460 MVT::ValueType VT = Op.getValueType(); 2461 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2462 unsigned EVTBits = MVT::getSizeInBits(EVT); 2463 2464 unsigned NumElems = Op.getNumOperands(); 2465 unsigned NumZero = 0; 2466 unsigned NumNonZero = 0; 2467 unsigned NonZeros = 0; 2468 std::set<SDOperand> Values; 2469 for (unsigned i = 0; i < NumElems; ++i) { 2470 SDOperand Elt = Op.getOperand(i); 2471 if (Elt.getOpcode() != ISD::UNDEF) { 2472 Values.insert(Elt); 2473 if (isZeroNode(Elt)) 2474 NumZero++; 2475 else { 2476 NonZeros |= (1 << i); 2477 NumNonZero++; 2478 } 2479 } 2480 } 2481 2482 if (NumNonZero == 0) 2483 // Must be a mix of zero and undef. Return a zero vector. 2484 return getZeroVector(VT, DAG); 2485 2486 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2487 if (Values.size() == 1) 2488 return SDOperand(); 2489 2490 // Special case for single non-zero element. 2491 if (NumNonZero == 1) { 2492 unsigned Idx = CountTrailingZeros_32(NonZeros); 2493 SDOperand Item = Op.getOperand(Idx); 2494 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2495 if (Idx == 0) 2496 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2497 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2498 NumZero > 0, DAG); 2499 2500 if (EVTBits == 32) { 2501 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2502 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2503 DAG); 2504 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2505 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2506 std::vector<SDOperand> MaskVec; 2507 for (unsigned i = 0; i < NumElems; i++) 2508 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2509 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2510 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2511 DAG.getNode(ISD::UNDEF, VT), Mask); 2512 } 2513 } 2514 2515 // Let legalizer expand 2-widde build_vector's. 2516 if (EVTBits == 64) 2517 return SDOperand(); 2518 2519 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2520 if (EVTBits == 8) { 2521 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 2522 if (V.Val) return V; 2523 } 2524 2525 if (EVTBits == 16) { 2526 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 2527 if (V.Val) return V; 2528 } 2529 2530 // If element VT is == 32 bits, turn it into a number of shuffles. 2531 std::vector<SDOperand> V(NumElems); 2532 if (NumElems == 4 && NumZero > 0) { 2533 for (unsigned i = 0; i < 4; ++i) { 2534 bool isZero = !(NonZeros & (1 << i)); 2535 if (isZero) 2536 V[i] = getZeroVector(VT, DAG); 2537 else 2538 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2539 } 2540 2541 for (unsigned i = 0; i < 2; ++i) { 2542 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2543 default: break; 2544 case 0: 2545 V[i] = V[i*2]; // Must be a zero vector. 2546 break; 2547 case 1: 2548 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2549 getMOVLMask(NumElems, DAG)); 2550 break; 2551 case 2: 2552 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2553 getMOVLMask(NumElems, DAG)); 2554 break; 2555 case 3: 2556 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2557 getUnpacklMask(NumElems, DAG)); 2558 break; 2559 } 2560 } 2561 2562 // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) 2563 // clears the upper bits. 2564 // FIXME: we can do the same for v4f32 case when we know both parts of 2565 // the lower half come from scalar_to_vector (loadf32). We should do 2566 // that in post legalizer dag combiner with target specific hooks. 2567 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2568 return V[0]; 2569 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2570 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2571 std::vector<SDOperand> MaskVec; 2572 bool Reverse = (NonZeros & 0x3) == 2; 2573 for (unsigned i = 0; i < 2; ++i) 2574 if (Reverse) 2575 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2576 else 2577 MaskVec.push_back(DAG.getConstant(i, EVT)); 2578 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2579 for (unsigned i = 0; i < 2; ++i) 2580 if (Reverse) 2581 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2582 else 2583 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2584 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2585 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2586 } 2587 2588 if (Values.size() > 2) { 2589 // Expand into a number of unpckl*. 2590 // e.g. for v4f32 2591 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2592 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2593 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2594 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2595 for (unsigned i = 0; i < NumElems; ++i) 2596 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2597 NumElems >>= 1; 2598 while (NumElems != 0) { 2599 for (unsigned i = 0; i < NumElems; ++i) 2600 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2601 UnpckMask); 2602 NumElems >>= 1; 2603 } 2604 return V[0]; 2605 } 2606 2607 return SDOperand(); 2608} 2609 2610SDOperand 2611X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2612 SDOperand V1 = Op.getOperand(0); 2613 SDOperand V2 = Op.getOperand(1); 2614 SDOperand PermMask = Op.getOperand(2); 2615 MVT::ValueType VT = Op.getValueType(); 2616 unsigned NumElems = PermMask.getNumOperands(); 2617 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2618 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2619 2620 if (isSplatMask(PermMask.Val)) { 2621 if (NumElems <= 4) return Op; 2622 // Promote it to a v4i32 splat. 2623 return PromoteSplat(Op, DAG); 2624 } 2625 2626 if (X86::isMOVLMask(PermMask.Val)) 2627 return (V1IsUndef) ? V2 : Op; 2628 2629 if (X86::isMOVSHDUPMask(PermMask.Val) || 2630 X86::isMOVSLDUPMask(PermMask.Val) || 2631 X86::isMOVHLPSMask(PermMask.Val) || 2632 X86::isMOVHPMask(PermMask.Val) || 2633 X86::isMOVLPMask(PermMask.Val)) 2634 return Op; 2635 2636 if (ShouldXformToMOVHLPS(PermMask.Val) || 2637 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2638 return CommuteVectorShuffle(Op, DAG); 2639 2640 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 2641 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 2642 if (V1IsSplat && !V2IsSplat) { 2643 Op = CommuteVectorShuffle(Op, DAG); 2644 V1 = Op.getOperand(0); 2645 V2 = Op.getOperand(1); 2646 PermMask = Op.getOperand(2); 2647 V2IsSplat = true; 2648 } 2649 2650 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 2651 if (V2IsUndef) return V1; 2652 Op = CommuteVectorShuffle(Op, DAG); 2653 V1 = Op.getOperand(0); 2654 V2 = Op.getOperand(1); 2655 PermMask = Op.getOperand(2); 2656 if (V2IsSplat) { 2657 // V2 is a splat, so the mask may be malformed. That is, it may point 2658 // to any V2 element. The instruction selectior won't like this. Get 2659 // a corrected mask and commute to form a proper MOVS{S|D}. 2660 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2661 if (NewMask.Val != PermMask.Val) 2662 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2663 } 2664 return Op; 2665 } 2666 2667 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2668 X86::isUNPCKLMask(PermMask.Val) || 2669 X86::isUNPCKHMask(PermMask.Val)) 2670 return Op; 2671 2672 if (V2IsSplat) { 2673 // Normalize mask so all entries that point to V2 points to its first 2674 // element then try to match unpck{h|l} again. If match, return a 2675 // new vector_shuffle with the corrected mask. 2676 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2677 if (NewMask.Val != PermMask.Val) { 2678 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2679 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2680 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2681 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2682 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2683 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2684 } 2685 } 2686 } 2687 2688 // Normalize the node to match x86 shuffle ops if needed 2689 if (V2.getOpcode() != ISD::UNDEF) 2690 if (isCommutedSHUFP(PermMask.Val)) { 2691 Op = CommuteVectorShuffle(Op, DAG); 2692 V1 = Op.getOperand(0); 2693 V2 = Op.getOperand(1); 2694 PermMask = Op.getOperand(2); 2695 } 2696 2697 // If VT is integer, try PSHUF* first, then SHUFP*. 2698 if (MVT::isInteger(VT)) { 2699 if (X86::isPSHUFDMask(PermMask.Val) || 2700 X86::isPSHUFHWMask(PermMask.Val) || 2701 X86::isPSHUFLWMask(PermMask.Val)) { 2702 if (V2.getOpcode() != ISD::UNDEF) 2703 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2704 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2705 return Op; 2706 } 2707 2708 if (X86::isSHUFPMask(PermMask.Val)) 2709 return Op; 2710 2711 // Handle v8i16 shuffle high / low shuffle node pair. 2712 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2713 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2714 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2715 std::vector<SDOperand> MaskVec; 2716 for (unsigned i = 0; i != 4; ++i) 2717 MaskVec.push_back(PermMask.getOperand(i)); 2718 for (unsigned i = 4; i != 8; ++i) 2719 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2720 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2721 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2722 MaskVec.clear(); 2723 for (unsigned i = 0; i != 4; ++i) 2724 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2725 for (unsigned i = 4; i != 8; ++i) 2726 MaskVec.push_back(PermMask.getOperand(i)); 2727 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2728 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2729 } 2730 } else { 2731 // Floating point cases in the other order. 2732 if (X86::isSHUFPMask(PermMask.Val)) 2733 return Op; 2734 if (X86::isPSHUFDMask(PermMask.Val) || 2735 X86::isPSHUFHWMask(PermMask.Val) || 2736 X86::isPSHUFLWMask(PermMask.Val)) { 2737 if (V2.getOpcode() != ISD::UNDEF) 2738 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2739 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2740 return Op; 2741 } 2742 } 2743 2744 if (NumElems == 4) { 2745 MVT::ValueType MaskVT = PermMask.getValueType(); 2746 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2747 std::vector<std::pair<int, int> > Locs; 2748 Locs.reserve(NumElems); 2749 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2750 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2751 unsigned NumHi = 0; 2752 unsigned NumLo = 0; 2753 // If no more than two elements come from either vector. This can be 2754 // implemented with two shuffles. First shuffle gather the elements. 2755 // The second shuffle, which takes the first shuffle as both of its 2756 // vector operands, put the elements into the right order. 2757 for (unsigned i = 0; i != NumElems; ++i) { 2758 SDOperand Elt = PermMask.getOperand(i); 2759 if (Elt.getOpcode() == ISD::UNDEF) { 2760 Locs[i] = std::make_pair(-1, -1); 2761 } else { 2762 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2763 if (Val < NumElems) { 2764 Locs[i] = std::make_pair(0, NumLo); 2765 Mask1[NumLo] = Elt; 2766 NumLo++; 2767 } else { 2768 Locs[i] = std::make_pair(1, NumHi); 2769 if (2+NumHi < NumElems) 2770 Mask1[2+NumHi] = Elt; 2771 NumHi++; 2772 } 2773 } 2774 } 2775 if (NumLo <= 2 && NumHi <= 2) { 2776 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2777 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask1)); 2778 for (unsigned i = 0; i != NumElems; ++i) { 2779 if (Locs[i].first == -1) 2780 continue; 2781 else { 2782 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2783 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2784 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2785 } 2786 } 2787 2788 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2789 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask2)); 2790 } 2791 2792 // Break it into (shuffle shuffle_hi, shuffle_lo). 2793 Locs.clear(); 2794 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2795 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2796 std::vector<SDOperand> *MaskPtr = &LoMask; 2797 unsigned MaskIdx = 0; 2798 unsigned LoIdx = 0; 2799 unsigned HiIdx = NumElems/2; 2800 for (unsigned i = 0; i != NumElems; ++i) { 2801 if (i == NumElems/2) { 2802 MaskPtr = &HiMask; 2803 MaskIdx = 1; 2804 LoIdx = 0; 2805 HiIdx = NumElems/2; 2806 } 2807 SDOperand Elt = PermMask.getOperand(i); 2808 if (Elt.getOpcode() == ISD::UNDEF) { 2809 Locs[i] = std::make_pair(-1, -1); 2810 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2811 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2812 (*MaskPtr)[LoIdx] = Elt; 2813 LoIdx++; 2814 } else { 2815 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2816 (*MaskPtr)[HiIdx] = Elt; 2817 HiIdx++; 2818 } 2819 } 2820 2821 SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2822 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 2823 SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2824 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 2825 std::vector<SDOperand> MaskOps; 2826 for (unsigned i = 0; i != NumElems; ++i) { 2827 if (Locs[i].first == -1) { 2828 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2829 } else { 2830 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2831 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2832 } 2833 } 2834 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2835 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 2836 } 2837 2838 return SDOperand(); 2839} 2840 2841SDOperand 2842X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2843 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2844 return SDOperand(); 2845 2846 MVT::ValueType VT = Op.getValueType(); 2847 // TODO: handle v16i8. 2848 if (MVT::getSizeInBits(VT) == 16) { 2849 // Transform it so it match pextrw which produces a 32-bit result. 2850 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2851 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2852 Op.getOperand(0), Op.getOperand(1)); 2853 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2854 DAG.getValueType(VT)); 2855 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2856 } else if (MVT::getSizeInBits(VT) == 32) { 2857 SDOperand Vec = Op.getOperand(0); 2858 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2859 if (Idx == 0) 2860 return Op; 2861 2862 // SHUFPS the element to the lowest double word, then movss. 2863 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2864 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2865 MVT::getVectorBaseType(MaskVT)); 2866 std::vector<SDOperand> IdxVec; 2867 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2868 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2869 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2870 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2871 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2872 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2873 Vec, Vec, Mask); 2874 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2875 DAG.getConstant(0, MVT::i32)); 2876 } else if (MVT::getSizeInBits(VT) == 64) { 2877 SDOperand Vec = Op.getOperand(0); 2878 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2879 if (Idx == 0) 2880 return Op; 2881 2882 // UNPCKHPD the element to the lowest double word, then movsd. 2883 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2884 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2885 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2886 std::vector<SDOperand> IdxVec; 2887 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2888 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2889 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2890 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2891 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2892 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2893 DAG.getConstant(0, MVT::i32)); 2894 } 2895 2896 return SDOperand(); 2897} 2898 2899SDOperand 2900X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2901 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2902 // as its second argument. 2903 MVT::ValueType VT = Op.getValueType(); 2904 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2905 SDOperand N0 = Op.getOperand(0); 2906 SDOperand N1 = Op.getOperand(1); 2907 SDOperand N2 = Op.getOperand(2); 2908 if (MVT::getSizeInBits(BaseVT) == 16) { 2909 if (N1.getValueType() != MVT::i32) 2910 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2911 if (N2.getValueType() != MVT::i32) 2912 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2913 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2914 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2915 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2916 if (Idx == 0) { 2917 // Use a movss. 2918 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2919 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2920 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2921 std::vector<SDOperand> MaskVec; 2922 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2923 for (unsigned i = 1; i <= 3; ++i) 2924 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2925 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2926 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 2927 } else { 2928 // Use two pinsrw instructions to insert a 32 bit value. 2929 Idx <<= 1; 2930 if (MVT::isFloatingPoint(N1.getValueType())) { 2931 if (N1.getOpcode() == ISD::LOAD) { 2932 // Just load directly from f32mem to R32. 2933 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 2934 N1.getOperand(2)); 2935 } else { 2936 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2937 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2938 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2939 DAG.getConstant(0, MVT::i32)); 2940 } 2941 } 2942 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2943 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2944 DAG.getConstant(Idx, MVT::i32)); 2945 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2946 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2947 DAG.getConstant(Idx+1, MVT::i32)); 2948 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 2949 } 2950 } 2951 2952 return SDOperand(); 2953} 2954 2955SDOperand 2956X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2957 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2958 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2959} 2960 2961// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2962// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2963// one of the above mentioned nodes. It has to be wrapped because otherwise 2964// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2965// be used to form addressing mode. These wrapped nodes will be selected 2966// into MOV32ri. 2967SDOperand 2968X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 2969 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2970 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2971 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2972 CP->getAlignment())); 2973 if (Subtarget->isTargetDarwin()) { 2974 // With PIC, the address is actually $g + Offset. 2975 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2976 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2977 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2978 } 2979 2980 return Result; 2981} 2982 2983SDOperand 2984X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 2985 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2986 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2987 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2988 if (Subtarget->isTargetDarwin()) { 2989 // With PIC, the address is actually $g + Offset. 2990 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2991 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2992 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2993 2994 // For Darwin, external and weak symbols are indirect, so we want to load 2995 // the value at address GV, not the value of GV itself. This means that 2996 // the GlobalAddress must be in the base or index register of the address, 2997 // not the GV offset field. 2998 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2999 DarwinGVRequiresExtraLoad(GV)) 3000 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 3001 Result, DAG.getSrcValue(NULL)); 3002 } 3003 3004 return Result; 3005} 3006 3007SDOperand 3008X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3009 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3010 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3011 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 3012 if (Subtarget->isTargetDarwin()) { 3013 // With PIC, the address is actually $g + Offset. 3014 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 3015 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3016 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3017 } 3018 3019 return Result; 3020} 3021 3022SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3023 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3024 "Not an i64 shift!"); 3025 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3026 SDOperand ShOpLo = Op.getOperand(0); 3027 SDOperand ShOpHi = Op.getOperand(1); 3028 SDOperand ShAmt = Op.getOperand(2); 3029 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 3030 DAG.getConstant(31, MVT::i8)) 3031 : DAG.getConstant(0, MVT::i32); 3032 3033 SDOperand Tmp2, Tmp3; 3034 if (Op.getOpcode() == ISD::SHL_PARTS) { 3035 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3036 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3037 } else { 3038 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3039 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3040 } 3041 3042 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 3043 ShAmt, DAG.getConstant(32, MVT::i8)); 3044 3045 SDOperand Hi, Lo; 3046 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3047 3048 std::vector<MVT::ValueType> Tys; 3049 Tys.push_back(MVT::i32); 3050 Tys.push_back(MVT::Flag); 3051 std::vector<SDOperand> Ops; 3052 if (Op.getOpcode() == ISD::SHL_PARTS) { 3053 Ops.push_back(Tmp2); 3054 Ops.push_back(Tmp3); 3055 Ops.push_back(CC); 3056 Ops.push_back(InFlag); 3057 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 3058 InFlag = Hi.getValue(1); 3059 3060 Ops.clear(); 3061 Ops.push_back(Tmp3); 3062 Ops.push_back(Tmp1); 3063 Ops.push_back(CC); 3064 Ops.push_back(InFlag); 3065 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 3066 } else { 3067 Ops.push_back(Tmp2); 3068 Ops.push_back(Tmp3); 3069 Ops.push_back(CC); 3070 Ops.push_back(InFlag); 3071 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 3072 InFlag = Lo.getValue(1); 3073 3074 Ops.clear(); 3075 Ops.push_back(Tmp3); 3076 Ops.push_back(Tmp1); 3077 Ops.push_back(CC); 3078 Ops.push_back(InFlag); 3079 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 3080 } 3081 3082 Tys.clear(); 3083 Tys.push_back(MVT::i32); 3084 Tys.push_back(MVT::i32); 3085 Ops.clear(); 3086 Ops.push_back(Lo); 3087 Ops.push_back(Hi); 3088 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 3089} 3090 3091SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3092 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3093 Op.getOperand(0).getValueType() >= MVT::i16 && 3094 "Unknown SINT_TO_FP to lower!"); 3095 3096 SDOperand Result; 3097 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3098 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3099 MachineFunction &MF = DAG.getMachineFunction(); 3100 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3101 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3102 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 3103 DAG.getEntryNode(), Op.getOperand(0), 3104 StackSlot, DAG.getSrcValue(NULL)); 3105 3106 // Build the FILD 3107 std::vector<MVT::ValueType> Tys; 3108 Tys.push_back(MVT::f64); 3109 Tys.push_back(MVT::Other); 3110 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3111 std::vector<SDOperand> Ops; 3112 Ops.push_back(Chain); 3113 Ops.push_back(StackSlot); 3114 Ops.push_back(DAG.getValueType(SrcVT)); 3115 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3116 Tys, Ops); 3117 3118 if (X86ScalarSSE) { 3119 Chain = Result.getValue(1); 3120 SDOperand InFlag = Result.getValue(2); 3121 3122 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3123 // shouldn't be necessary except that RFP cannot be live across 3124 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3125 MachineFunction &MF = DAG.getMachineFunction(); 3126 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3127 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3128 std::vector<MVT::ValueType> Tys; 3129 Tys.push_back(MVT::Other); 3130 std::vector<SDOperand> Ops; 3131 Ops.push_back(Chain); 3132 Ops.push_back(Result); 3133 Ops.push_back(StackSlot); 3134 Ops.push_back(DAG.getValueType(Op.getValueType())); 3135 Ops.push_back(InFlag); 3136 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 3137 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 3138 DAG.getSrcValue(NULL)); 3139 } 3140 3141 return Result; 3142} 3143 3144SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3145 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3146 "Unknown FP_TO_SINT to lower!"); 3147 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3148 // stack slot. 3149 MachineFunction &MF = DAG.getMachineFunction(); 3150 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3151 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3152 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3153 3154 unsigned Opc; 3155 switch (Op.getValueType()) { 3156 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3157 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3158 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3159 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3160 } 3161 3162 SDOperand Chain = DAG.getEntryNode(); 3163 SDOperand Value = Op.getOperand(0); 3164 if (X86ScalarSSE) { 3165 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3166 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 3167 DAG.getSrcValue(0)); 3168 std::vector<MVT::ValueType> Tys; 3169 Tys.push_back(MVT::f64); 3170 Tys.push_back(MVT::Other); 3171 std::vector<SDOperand> Ops; 3172 Ops.push_back(Chain); 3173 Ops.push_back(StackSlot); 3174 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 3175 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 3176 Chain = Value.getValue(1); 3177 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3178 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3179 } 3180 3181 // Build the FP_TO_INT*_IN_MEM 3182 std::vector<SDOperand> Ops; 3183 Ops.push_back(Chain); 3184 Ops.push_back(Value); 3185 Ops.push_back(StackSlot); 3186 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 3187 3188 // Load the result. 3189 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 3190 DAG.getSrcValue(NULL)); 3191} 3192 3193SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3194 MVT::ValueType VT = Op.getValueType(); 3195 const Type *OpNTy = MVT::getTypeForValueType(VT); 3196 std::vector<Constant*> CV; 3197 if (VT == MVT::f64) { 3198 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3199 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3200 } else { 3201 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3202 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3203 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3204 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3205 } 3206 Constant *CS = ConstantStruct::get(CV); 3207 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3208 SDOperand Mask 3209 = DAG.getNode(X86ISD::LOAD_PACK, 3210 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3211 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3212} 3213 3214SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3215 MVT::ValueType VT = Op.getValueType(); 3216 const Type *OpNTy = MVT::getTypeForValueType(VT); 3217 std::vector<Constant*> CV; 3218 if (VT == MVT::f64) { 3219 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3220 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3221 } else { 3222 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3223 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3224 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3225 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3226 } 3227 Constant *CS = ConstantStruct::get(CV); 3228 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3229 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, 3230 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3231 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3232} 3233 3234SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3235 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3236 SDOperand Cond; 3237 SDOperand CC = Op.getOperand(2); 3238 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3239 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3240 bool Flip; 3241 unsigned X86CC; 3242 if (translateX86CC(CC, isFP, X86CC, Flip)) { 3243 if (Flip) 3244 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3245 Op.getOperand(1), Op.getOperand(0)); 3246 else 3247 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3248 Op.getOperand(0), Op.getOperand(1)); 3249 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3250 DAG.getConstant(X86CC, MVT::i8), Cond); 3251 } else { 3252 assert(isFP && "Illegal integer SetCC!"); 3253 3254 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3255 Op.getOperand(0), Op.getOperand(1)); 3256 std::vector<MVT::ValueType> Tys; 3257 std::vector<SDOperand> Ops; 3258 switch (SetCCOpcode) { 3259 default: assert(false && "Illegal floating point SetCC!"); 3260 case ISD::SETOEQ: { // !PF & ZF 3261 Tys.push_back(MVT::i8); 3262 Tys.push_back(MVT::Flag); 3263 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 3264 Ops.push_back(Cond); 3265 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3266 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3267 DAG.getConstant(X86ISD::COND_E, MVT::i8), 3268 Tmp1.getValue(1)); 3269 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3270 } 3271 case ISD::SETUNE: { // PF | !ZF 3272 Tys.push_back(MVT::i8); 3273 Tys.push_back(MVT::Flag); 3274 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 3275 Ops.push_back(Cond); 3276 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3277 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3278 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3279 Tmp1.getValue(1)); 3280 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3281 } 3282 } 3283 } 3284} 3285 3286SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3287 MVT::ValueType VT = Op.getValueType(); 3288 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 3289 bool addTest = false; 3290 SDOperand Op0 = Op.getOperand(0); 3291 SDOperand Cond, CC; 3292 if (Op0.getOpcode() == ISD::SETCC) 3293 Op0 = LowerOperation(Op0, DAG); 3294 3295 if (Op0.getOpcode() == X86ISD::SETCC) { 3296 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3297 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3298 // have another use it will be eliminated. 3299 // If the X86ISD::SETCC has more than one use, then it's probably better 3300 // to use a test instead of duplicating the X86ISD::CMP (for register 3301 // pressure reason). 3302 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 3303 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3304 CmpOpc == X86ISD::UCOMI) { 3305 if (!Op0.hasOneUse()) { 3306 std::vector<MVT::ValueType> Tys; 3307 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 3308 Tys.push_back(Op0.Val->getValueType(i)); 3309 std::vector<SDOperand> Ops; 3310 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 3311 Ops.push_back(Op0.getOperand(i)); 3312 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3313 } 3314 3315 CC = Op0.getOperand(0); 3316 Cond = Op0.getOperand(1); 3317 // Make a copy as flag result cannot be used by more than one. 3318 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3319 Cond.getOperand(0), Cond.getOperand(1)); 3320 addTest = 3321 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3322 } else 3323 addTest = true; 3324 } else 3325 addTest = true; 3326 3327 if (addTest) { 3328 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3329 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 3330 } 3331 3332 std::vector<MVT::ValueType> Tys; 3333 Tys.push_back(Op.getValueType()); 3334 Tys.push_back(MVT::Flag); 3335 std::vector<SDOperand> Ops; 3336 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3337 // condition is true. 3338 Ops.push_back(Op.getOperand(2)); 3339 Ops.push_back(Op.getOperand(1)); 3340 Ops.push_back(CC); 3341 Ops.push_back(Cond); 3342 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 3343} 3344 3345SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3346 bool addTest = false; 3347 SDOperand Cond = Op.getOperand(1); 3348 SDOperand Dest = Op.getOperand(2); 3349 SDOperand CC; 3350 if (Cond.getOpcode() == ISD::SETCC) 3351 Cond = LowerOperation(Cond, DAG); 3352 3353 if (Cond.getOpcode() == X86ISD::SETCC) { 3354 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3355 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3356 // have another use it will be eliminated. 3357 // If the X86ISD::SETCC has more than one use, then it's probably better 3358 // to use a test instead of duplicating the X86ISD::CMP (for register 3359 // pressure reason). 3360 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 3361 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3362 CmpOpc == X86ISD::UCOMI) { 3363 if (!Cond.hasOneUse()) { 3364 std::vector<MVT::ValueType> Tys; 3365 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 3366 Tys.push_back(Cond.Val->getValueType(i)); 3367 std::vector<SDOperand> Ops; 3368 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 3369 Ops.push_back(Cond.getOperand(i)); 3370 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3371 } 3372 3373 CC = Cond.getOperand(0); 3374 Cond = Cond.getOperand(1); 3375 // Make a copy as flag result cannot be used by more than one. 3376 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3377 Cond.getOperand(0), Cond.getOperand(1)); 3378 } else 3379 addTest = true; 3380 } else 3381 addTest = true; 3382 3383 if (addTest) { 3384 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3385 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 3386 } 3387 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3388 Op.getOperand(0), Op.getOperand(2), CC, Cond); 3389} 3390 3391SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3392 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3393 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3394 DAG.getTargetJumpTable(JT->getIndex(), 3395 getPointerTy())); 3396 if (Subtarget->isTargetDarwin()) { 3397 // With PIC, the address is actually $g + Offset. 3398 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 3399 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3400 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3401 } 3402 3403 return Result; 3404} 3405 3406SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3407 SDOperand Copy; 3408 3409 switch(Op.getNumOperands()) { 3410 default: 3411 assert(0 && "Do not know how to return this many arguments!"); 3412 abort(); 3413 case 1: // ret void. 3414 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3415 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3416 case 2: { 3417 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3418 3419 if (MVT::isVector(ArgVT)) { 3420 // Integer or FP vector result -> XMM0. 3421 if (DAG.getMachineFunction().liveout_empty()) 3422 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3423 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3424 SDOperand()); 3425 } else if (MVT::isInteger(ArgVT)) { 3426 // Integer result -> EAX 3427 if (DAG.getMachineFunction().liveout_empty()) 3428 DAG.getMachineFunction().addLiveOut(X86::EAX); 3429 3430 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 3431 SDOperand()); 3432 } else if (!X86ScalarSSE) { 3433 // FP return with fp-stack value. 3434 if (DAG.getMachineFunction().liveout_empty()) 3435 DAG.getMachineFunction().addLiveOut(X86::ST0); 3436 3437 std::vector<MVT::ValueType> Tys; 3438 Tys.push_back(MVT::Other); 3439 Tys.push_back(MVT::Flag); 3440 std::vector<SDOperand> Ops; 3441 Ops.push_back(Op.getOperand(0)); 3442 Ops.push_back(Op.getOperand(1)); 3443 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3444 } else { 3445 // FP return with ScalarSSE (return on fp-stack). 3446 if (DAG.getMachineFunction().liveout_empty()) 3447 DAG.getMachineFunction().addLiveOut(X86::ST0); 3448 3449 SDOperand MemLoc; 3450 SDOperand Chain = Op.getOperand(0); 3451 SDOperand Value = Op.getOperand(1); 3452 3453 if (Value.getOpcode() == ISD::LOAD && 3454 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3455 Chain = Value.getOperand(0); 3456 MemLoc = Value.getOperand(1); 3457 } else { 3458 // Spill the value to memory and reload it into top of stack. 3459 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3460 MachineFunction &MF = DAG.getMachineFunction(); 3461 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3462 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3463 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3464 Value, MemLoc, DAG.getSrcValue(0)); 3465 } 3466 std::vector<MVT::ValueType> Tys; 3467 Tys.push_back(MVT::f64); 3468 Tys.push_back(MVT::Other); 3469 std::vector<SDOperand> Ops; 3470 Ops.push_back(Chain); 3471 Ops.push_back(MemLoc); 3472 Ops.push_back(DAG.getValueType(ArgVT)); 3473 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 3474 Tys.clear(); 3475 Tys.push_back(MVT::Other); 3476 Tys.push_back(MVT::Flag); 3477 Ops.clear(); 3478 Ops.push_back(Copy.getValue(1)); 3479 Ops.push_back(Copy); 3480 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3481 } 3482 break; 3483 } 3484 case 3: 3485 if (DAG.getMachineFunction().liveout_empty()) { 3486 DAG.getMachineFunction().addLiveOut(X86::EAX); 3487 DAG.getMachineFunction().addLiveOut(X86::EDX); 3488 } 3489 3490 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 3491 SDOperand()); 3492 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 3493 break; 3494 } 3495 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3496 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3497 Copy.getValue(1)); 3498} 3499 3500SDOperand 3501X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3502 if (FormalArgs.size() == 0) { 3503 unsigned CC = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3504 if (CC == CallingConv::Fast && EnableFastCC) 3505 LowerFastCCArguments(Op, DAG); 3506 else 3507 LowerCCCArguments(Op, DAG); 3508 } 3509 return FormalArgs[Op.ResNo]; 3510} 3511 3512SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3513 SDOperand InFlag(0, 0); 3514 SDOperand Chain = Op.getOperand(0); 3515 unsigned Align = 3516 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3517 if (Align == 0) Align = 1; 3518 3519 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3520 // If not DWORD aligned, call memset if size is less than the threshold. 3521 // It knows how to align to the right boundary first. 3522 if ((Align & 3) != 0 || 3523 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3524 MVT::ValueType IntPtr = getPointerTy(); 3525 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3526 std::vector<std::pair<SDOperand, const Type*> > Args; 3527 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3528 // Extend the ubyte argument to be an int value for the call. 3529 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3530 Args.push_back(std::make_pair(Val, IntPtrTy)); 3531 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3532 std::pair<SDOperand,SDOperand> CallResult = 3533 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3534 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3535 return CallResult.second; 3536 } 3537 3538 MVT::ValueType AVT; 3539 SDOperand Count; 3540 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3541 unsigned BytesLeft = 0; 3542 bool TwoRepStos = false; 3543 if (ValC) { 3544 unsigned ValReg; 3545 unsigned Val = ValC->getValue() & 255; 3546 3547 // If the value is a constant, then we can potentially use larger sets. 3548 switch (Align & 3) { 3549 case 2: // WORD aligned 3550 AVT = MVT::i16; 3551 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3552 BytesLeft = I->getValue() % 2; 3553 Val = (Val << 8) | Val; 3554 ValReg = X86::AX; 3555 break; 3556 case 0: // DWORD aligned 3557 AVT = MVT::i32; 3558 if (I) { 3559 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3560 BytesLeft = I->getValue() % 4; 3561 } else { 3562 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3563 DAG.getConstant(2, MVT::i8)); 3564 TwoRepStos = true; 3565 } 3566 Val = (Val << 8) | Val; 3567 Val = (Val << 16) | Val; 3568 ValReg = X86::EAX; 3569 break; 3570 default: // Byte aligned 3571 AVT = MVT::i8; 3572 Count = Op.getOperand(3); 3573 ValReg = X86::AL; 3574 break; 3575 } 3576 3577 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3578 InFlag); 3579 InFlag = Chain.getValue(1); 3580 } else { 3581 AVT = MVT::i8; 3582 Count = Op.getOperand(3); 3583 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3584 InFlag = Chain.getValue(1); 3585 } 3586 3587 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3588 InFlag = Chain.getValue(1); 3589 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3590 InFlag = Chain.getValue(1); 3591 3592 std::vector<MVT::ValueType> Tys; 3593 Tys.push_back(MVT::Other); 3594 Tys.push_back(MVT::Flag); 3595 std::vector<SDOperand> Ops; 3596 Ops.push_back(Chain); 3597 Ops.push_back(DAG.getValueType(AVT)); 3598 Ops.push_back(InFlag); 3599 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3600 3601 if (TwoRepStos) { 3602 InFlag = Chain.getValue(1); 3603 Count = Op.getOperand(3); 3604 MVT::ValueType CVT = Count.getValueType(); 3605 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3606 DAG.getConstant(3, CVT)); 3607 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3608 InFlag = Chain.getValue(1); 3609 Tys.clear(); 3610 Tys.push_back(MVT::Other); 3611 Tys.push_back(MVT::Flag); 3612 Ops.clear(); 3613 Ops.push_back(Chain); 3614 Ops.push_back(DAG.getValueType(MVT::i8)); 3615 Ops.push_back(InFlag); 3616 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3617 } else if (BytesLeft) { 3618 // Issue stores for the last 1 - 3 bytes. 3619 SDOperand Value; 3620 unsigned Val = ValC->getValue() & 255; 3621 unsigned Offset = I->getValue() - BytesLeft; 3622 SDOperand DstAddr = Op.getOperand(1); 3623 MVT::ValueType AddrVT = DstAddr.getValueType(); 3624 if (BytesLeft >= 2) { 3625 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3626 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3627 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3628 DAG.getConstant(Offset, AddrVT)), 3629 DAG.getSrcValue(NULL)); 3630 BytesLeft -= 2; 3631 Offset += 2; 3632 } 3633 3634 if (BytesLeft == 1) { 3635 Value = DAG.getConstant(Val, MVT::i8); 3636 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3637 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3638 DAG.getConstant(Offset, AddrVT)), 3639 DAG.getSrcValue(NULL)); 3640 } 3641 } 3642 3643 return Chain; 3644} 3645 3646SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3647 SDOperand Chain = Op.getOperand(0); 3648 unsigned Align = 3649 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3650 if (Align == 0) Align = 1; 3651 3652 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3653 // If not DWORD aligned, call memcpy if size is less than the threshold. 3654 // It knows how to align to the right boundary first. 3655 if ((Align & 3) != 0 || 3656 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3657 MVT::ValueType IntPtr = getPointerTy(); 3658 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3659 std::vector<std::pair<SDOperand, const Type*> > Args; 3660 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3661 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 3662 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3663 std::pair<SDOperand,SDOperand> CallResult = 3664 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3665 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3666 return CallResult.second; 3667 } 3668 3669 MVT::ValueType AVT; 3670 SDOperand Count; 3671 unsigned BytesLeft = 0; 3672 bool TwoRepMovs = false; 3673 switch (Align & 3) { 3674 case 2: // WORD aligned 3675 AVT = MVT::i16; 3676 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3677 BytesLeft = I->getValue() % 2; 3678 break; 3679 case 0: // DWORD aligned 3680 AVT = MVT::i32; 3681 if (I) { 3682 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3683 BytesLeft = I->getValue() % 4; 3684 } else { 3685 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3686 DAG.getConstant(2, MVT::i8)); 3687 TwoRepMovs = true; 3688 } 3689 break; 3690 default: // Byte aligned 3691 AVT = MVT::i8; 3692 Count = Op.getOperand(3); 3693 break; 3694 } 3695 3696 SDOperand InFlag(0, 0); 3697 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3698 InFlag = Chain.getValue(1); 3699 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3700 InFlag = Chain.getValue(1); 3701 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 3702 InFlag = Chain.getValue(1); 3703 3704 std::vector<MVT::ValueType> Tys; 3705 Tys.push_back(MVT::Other); 3706 Tys.push_back(MVT::Flag); 3707 std::vector<SDOperand> Ops; 3708 Ops.push_back(Chain); 3709 Ops.push_back(DAG.getValueType(AVT)); 3710 Ops.push_back(InFlag); 3711 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3712 3713 if (TwoRepMovs) { 3714 InFlag = Chain.getValue(1); 3715 Count = Op.getOperand(3); 3716 MVT::ValueType CVT = Count.getValueType(); 3717 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3718 DAG.getConstant(3, CVT)); 3719 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3720 InFlag = Chain.getValue(1); 3721 Tys.clear(); 3722 Tys.push_back(MVT::Other); 3723 Tys.push_back(MVT::Flag); 3724 Ops.clear(); 3725 Ops.push_back(Chain); 3726 Ops.push_back(DAG.getValueType(MVT::i8)); 3727 Ops.push_back(InFlag); 3728 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3729 } else if (BytesLeft) { 3730 // Issue loads and stores for the last 1 - 3 bytes. 3731 unsigned Offset = I->getValue() - BytesLeft; 3732 SDOperand DstAddr = Op.getOperand(1); 3733 MVT::ValueType DstVT = DstAddr.getValueType(); 3734 SDOperand SrcAddr = Op.getOperand(2); 3735 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3736 SDOperand Value; 3737 if (BytesLeft >= 2) { 3738 Value = DAG.getLoad(MVT::i16, Chain, 3739 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3740 DAG.getConstant(Offset, SrcVT)), 3741 DAG.getSrcValue(NULL)); 3742 Chain = Value.getValue(1); 3743 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3744 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3745 DAG.getConstant(Offset, DstVT)), 3746 DAG.getSrcValue(NULL)); 3747 BytesLeft -= 2; 3748 Offset += 2; 3749 } 3750 3751 if (BytesLeft == 1) { 3752 Value = DAG.getLoad(MVT::i8, Chain, 3753 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3754 DAG.getConstant(Offset, SrcVT)), 3755 DAG.getSrcValue(NULL)); 3756 Chain = Value.getValue(1); 3757 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3758 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3759 DAG.getConstant(Offset, DstVT)), 3760 DAG.getSrcValue(NULL)); 3761 } 3762 } 3763 3764 return Chain; 3765} 3766 3767SDOperand 3768X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3769 std::vector<MVT::ValueType> Tys; 3770 Tys.push_back(MVT::Other); 3771 Tys.push_back(MVT::Flag); 3772 std::vector<SDOperand> Ops; 3773 Ops.push_back(Op.getOperand(0)); 3774 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 3775 Ops.clear(); 3776 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 3777 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 3778 MVT::i32, Ops[0].getValue(2))); 3779 Ops.push_back(Ops[1].getValue(1)); 3780 Tys[0] = Tys[1] = MVT::i32; 3781 Tys.push_back(MVT::Other); 3782 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 3783} 3784 3785SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3786 // vastart just stores the address of the VarArgsFrameIndex slot into the 3787 // memory location argument. 3788 // FIXME: Replace MVT::i32 with PointerTy 3789 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 3790 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 3791 Op.getOperand(1), Op.getOperand(2)); 3792} 3793 3794SDOperand 3795X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 3796 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3797 switch (IntNo) { 3798 default: return SDOperand(); // Don't custom lower most intrinsics. 3799 // Comparison intrinsics. 3800 case Intrinsic::x86_sse_comieq_ss: 3801 case Intrinsic::x86_sse_comilt_ss: 3802 case Intrinsic::x86_sse_comile_ss: 3803 case Intrinsic::x86_sse_comigt_ss: 3804 case Intrinsic::x86_sse_comige_ss: 3805 case Intrinsic::x86_sse_comineq_ss: 3806 case Intrinsic::x86_sse_ucomieq_ss: 3807 case Intrinsic::x86_sse_ucomilt_ss: 3808 case Intrinsic::x86_sse_ucomile_ss: 3809 case Intrinsic::x86_sse_ucomigt_ss: 3810 case Intrinsic::x86_sse_ucomige_ss: 3811 case Intrinsic::x86_sse_ucomineq_ss: 3812 case Intrinsic::x86_sse2_comieq_sd: 3813 case Intrinsic::x86_sse2_comilt_sd: 3814 case Intrinsic::x86_sse2_comile_sd: 3815 case Intrinsic::x86_sse2_comigt_sd: 3816 case Intrinsic::x86_sse2_comige_sd: 3817 case Intrinsic::x86_sse2_comineq_sd: 3818 case Intrinsic::x86_sse2_ucomieq_sd: 3819 case Intrinsic::x86_sse2_ucomilt_sd: 3820 case Intrinsic::x86_sse2_ucomile_sd: 3821 case Intrinsic::x86_sse2_ucomigt_sd: 3822 case Intrinsic::x86_sse2_ucomige_sd: 3823 case Intrinsic::x86_sse2_ucomineq_sd: { 3824 unsigned Opc = 0; 3825 ISD::CondCode CC = ISD::SETCC_INVALID; 3826 switch (IntNo) { 3827 default: break; 3828 case Intrinsic::x86_sse_comieq_ss: 3829 case Intrinsic::x86_sse2_comieq_sd: 3830 Opc = X86ISD::COMI; 3831 CC = ISD::SETEQ; 3832 break; 3833 case Intrinsic::x86_sse_comilt_ss: 3834 case Intrinsic::x86_sse2_comilt_sd: 3835 Opc = X86ISD::COMI; 3836 CC = ISD::SETLT; 3837 break; 3838 case Intrinsic::x86_sse_comile_ss: 3839 case Intrinsic::x86_sse2_comile_sd: 3840 Opc = X86ISD::COMI; 3841 CC = ISD::SETLE; 3842 break; 3843 case Intrinsic::x86_sse_comigt_ss: 3844 case Intrinsic::x86_sse2_comigt_sd: 3845 Opc = X86ISD::COMI; 3846 CC = ISD::SETGT; 3847 break; 3848 case Intrinsic::x86_sse_comige_ss: 3849 case Intrinsic::x86_sse2_comige_sd: 3850 Opc = X86ISD::COMI; 3851 CC = ISD::SETGE; 3852 break; 3853 case Intrinsic::x86_sse_comineq_ss: 3854 case Intrinsic::x86_sse2_comineq_sd: 3855 Opc = X86ISD::COMI; 3856 CC = ISD::SETNE; 3857 break; 3858 case Intrinsic::x86_sse_ucomieq_ss: 3859 case Intrinsic::x86_sse2_ucomieq_sd: 3860 Opc = X86ISD::UCOMI; 3861 CC = ISD::SETEQ; 3862 break; 3863 case Intrinsic::x86_sse_ucomilt_ss: 3864 case Intrinsic::x86_sse2_ucomilt_sd: 3865 Opc = X86ISD::UCOMI; 3866 CC = ISD::SETLT; 3867 break; 3868 case Intrinsic::x86_sse_ucomile_ss: 3869 case Intrinsic::x86_sse2_ucomile_sd: 3870 Opc = X86ISD::UCOMI; 3871 CC = ISD::SETLE; 3872 break; 3873 case Intrinsic::x86_sse_ucomigt_ss: 3874 case Intrinsic::x86_sse2_ucomigt_sd: 3875 Opc = X86ISD::UCOMI; 3876 CC = ISD::SETGT; 3877 break; 3878 case Intrinsic::x86_sse_ucomige_ss: 3879 case Intrinsic::x86_sse2_ucomige_sd: 3880 Opc = X86ISD::UCOMI; 3881 CC = ISD::SETGE; 3882 break; 3883 case Intrinsic::x86_sse_ucomineq_ss: 3884 case Intrinsic::x86_sse2_ucomineq_sd: 3885 Opc = X86ISD::UCOMI; 3886 CC = ISD::SETNE; 3887 break; 3888 } 3889 bool Flip; 3890 unsigned X86CC; 3891 translateX86CC(CC, true, X86CC, Flip); 3892 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3893 Op.getOperand(Flip?1:2)); 3894 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3895 DAG.getConstant(X86CC, MVT::i8), Cond); 3896 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3897 } 3898 } 3899} 3900 3901/// LowerOperation - Provide custom lowering hooks for some operations. 3902/// 3903SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3904 switch (Op.getOpcode()) { 3905 default: assert(0 && "Should not custom lower this!"); 3906 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3907 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3908 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3909 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 3910 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3911 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3912 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3913 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 3914 case ISD::SHL_PARTS: 3915 case ISD::SRA_PARTS: 3916 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 3917 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3918 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3919 case ISD::FABS: return LowerFABS(Op, DAG); 3920 case ISD::FNEG: return LowerFNEG(Op, DAG); 3921 case ISD::SETCC: return LowerSETCC(Op, DAG); 3922 case ISD::SELECT: return LowerSELECT(Op, DAG); 3923 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 3924 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3925 case ISD::RET: return LowerRET(Op, DAG); 3926 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 3927 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 3928 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 3929 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 3930 case ISD::VASTART: return LowerVASTART(Op, DAG); 3931 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3932 } 3933} 3934 3935const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3936 switch (Opcode) { 3937 default: return NULL; 3938 case X86ISD::SHLD: return "X86ISD::SHLD"; 3939 case X86ISD::SHRD: return "X86ISD::SHRD"; 3940 case X86ISD::FAND: return "X86ISD::FAND"; 3941 case X86ISD::FXOR: return "X86ISD::FXOR"; 3942 case X86ISD::FILD: return "X86ISD::FILD"; 3943 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3944 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3945 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3946 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3947 case X86ISD::FLD: return "X86ISD::FLD"; 3948 case X86ISD::FST: return "X86ISD::FST"; 3949 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3950 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3951 case X86ISD::CALL: return "X86ISD::CALL"; 3952 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3953 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3954 case X86ISD::CMP: return "X86ISD::CMP"; 3955 case X86ISD::TEST: return "X86ISD::TEST"; 3956 case X86ISD::COMI: return "X86ISD::COMI"; 3957 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3958 case X86ISD::SETCC: return "X86ISD::SETCC"; 3959 case X86ISD::CMOV: return "X86ISD::CMOV"; 3960 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3961 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3962 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3963 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3964 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3965 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3966 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3967 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3968 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3969 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3970 } 3971} 3972 3973void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3974 uint64_t Mask, 3975 uint64_t &KnownZero, 3976 uint64_t &KnownOne, 3977 unsigned Depth) const { 3978 unsigned Opc = Op.getOpcode(); 3979 assert((Opc >= ISD::BUILTIN_OP_END || 3980 Opc == ISD::INTRINSIC_WO_CHAIN || 3981 Opc == ISD::INTRINSIC_W_CHAIN || 3982 Opc == ISD::INTRINSIC_VOID) && 3983 "Should use MaskedValueIsZero if you don't know whether Op" 3984 " is a target node!"); 3985 3986 KnownZero = KnownOne = 0; // Don't know anything. 3987 switch (Opc) { 3988 default: break; 3989 case X86ISD::SETCC: 3990 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3991 break; 3992 } 3993} 3994 3995std::vector<unsigned> X86TargetLowering:: 3996getRegClassForInlineAsmConstraint(const std::string &Constraint, 3997 MVT::ValueType VT) const { 3998 if (Constraint.size() == 1) { 3999 // FIXME: not handling fp-stack yet! 4000 // FIXME: not handling MMX registers yet ('y' constraint). 4001 switch (Constraint[0]) { // GCC X86 Constraint Letters 4002 default: break; // Unknown constriant letter 4003 case 'r': // GENERAL_REGS 4004 case 'R': // LEGACY_REGS 4005 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 4006 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 4007 case 'l': // INDEX_REGS 4008 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 4009 X86::ESI, X86::EDI, X86::EBP, 0); 4010 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4011 case 'Q': // Q_REGS 4012 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 4013 case 'x': // SSE_REGS if SSE1 allowed 4014 if (Subtarget->hasSSE1()) 4015 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4016 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4017 0); 4018 return std::vector<unsigned>(); 4019 case 'Y': // SSE_REGS if SSE2 allowed 4020 if (Subtarget->hasSSE2()) 4021 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4022 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4023 0); 4024 return std::vector<unsigned>(); 4025 } 4026 } 4027 4028 return std::vector<unsigned>(); 4029} 4030 4031/// isLegalAddressImmediate - Return true if the integer value or 4032/// GlobalValue can be used as the offset of the target addressing mode. 4033bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 4034 // X86 allows a sign-extended 32-bit immediate field. 4035 return (V > -(1LL << 32) && V < (1LL << 32)-1); 4036} 4037 4038bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 4039 if (Subtarget->isTargetDarwin()) { 4040 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 4041 if (RModel == Reloc::Static) 4042 return true; 4043 else if (RModel == Reloc::DynamicNoPIC) 4044 return !DarwinGVRequiresExtraLoad(GV); 4045 else 4046 return false; 4047 } else 4048 return true; 4049} 4050 4051/// isShuffleMaskLegal - Targets can use this to indicate that they only 4052/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4053/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4054/// are assumed to be legal. 4055bool 4056X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4057 // Only do shuffles on 128-bit vector types for now. 4058 if (MVT::getSizeInBits(VT) == 64) return false; 4059 return (Mask.Val->getNumOperands() <= 4 || 4060 isSplatMask(Mask.Val) || 4061 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4062 X86::isUNPCKLMask(Mask.Val) || 4063 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4064 X86::isUNPCKHMask(Mask.Val)); 4065} 4066 4067bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4068 MVT::ValueType EVT, 4069 SelectionDAG &DAG) const { 4070 unsigned NumElts = BVOps.size(); 4071 // Only do shuffles on 128-bit vector types for now. 4072 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4073 if (NumElts == 2) return true; 4074 if (NumElts == 4) { 4075 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 4076 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 4077 } 4078 return false; 4079} 4080