X86ISelLowering.cpp revision 2fdd95eee79b66c9ae3cbe43c0db48eba48e116d
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 173 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 174 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 175 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 176 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 178 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 179 // X86 wants to expand memset / memcpy itself. 180 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 181 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 182 183 // We don't have line number support yet. 184 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 185 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 186 // FIXME - use subtarget debug flags 187 if (!Subtarget->isTargetDarwin()) 188 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // Use the default implementation. 194 setOperationAction(ISD::VAARG , MVT::Other, Expand); 195 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 196 setOperationAction(ISD::VAEND , MVT::Other, Expand); 197 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 198 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 199 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 200 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 203 204 if (X86ScalarSSE) { 205 // Set up the FP register classes. 206 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 207 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 208 209 // SSE has no load+extend ops 210 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 211 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 212 213 // Use ANDPD to simulate FABS. 214 setOperationAction(ISD::FABS , MVT::f64, Custom); 215 setOperationAction(ISD::FABS , MVT::f32, Custom); 216 217 // Use XORP to simulate FNEG. 218 setOperationAction(ISD::FNEG , MVT::f64, Custom); 219 setOperationAction(ISD::FNEG , MVT::f32, Custom); 220 221 // We don't support sin/cos/fmod 222 setOperationAction(ISD::FSIN , MVT::f64, Expand); 223 setOperationAction(ISD::FCOS , MVT::f64, Expand); 224 setOperationAction(ISD::FREM , MVT::f64, Expand); 225 setOperationAction(ISD::FSIN , MVT::f32, Expand); 226 setOperationAction(ISD::FCOS , MVT::f32, Expand); 227 setOperationAction(ISD::FREM , MVT::f32, Expand); 228 229 // Expand FP immediates into loads from the stack, except for the special 230 // cases we handle. 231 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 232 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 233 addLegalFPImmediate(+0.0); // xorps / xorpd 234 } else { 235 // Set up the FP register classes. 236 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 237 238 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 239 240 if (!UnsafeFPMath) { 241 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 242 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 243 } 244 245 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 246 addLegalFPImmediate(+0.0); // FLD0 247 addLegalFPImmediate(+1.0); // FLD1 248 addLegalFPImmediate(-0.0); // FLD0/FCHS 249 addLegalFPImmediate(-1.0); // FLD1/FCHS 250 } 251 252 // First set operation action for all vector types to expand. Then we 253 // will selectively turn on ones that can be effectively codegen'd. 254 for (unsigned VT = (unsigned)MVT::Vector + 1; 255 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 256 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 263 } 264 265 if (Subtarget->hasMMX()) { 266 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 268 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 269 270 // FIXME: add MMX packed arithmetics 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 273 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 274 } 275 276 if (Subtarget->hasSSE1()) { 277 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 278 279 setOperationAction(ISD::AND, MVT::v4f32, Legal); 280 setOperationAction(ISD::OR, MVT::v4f32, Legal); 281 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 282 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 283 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 284 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 285 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 286 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 287 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 288 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 289 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 290 } 291 292 if (Subtarget->hasSSE2()) { 293 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 297 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 298 299 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 300 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 301 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 302 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 303 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 304 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 305 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 306 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 307 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 308 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 309 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 311 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 312 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 314 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 315 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 316 317 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 318 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 319 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 320 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 321 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 322 } 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 324 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 325 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 326 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 329 330 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 331 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 332 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 338 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 339 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 340 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 341 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 342 } 343 344 // Custom lower v2i64 and v2f64 selects. 345 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 346 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 347 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 348 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 349 } 350 351 // We want to custom lower some of our intrinsics. 352 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 353 354 computeRegisterProperties(); 355 356 // FIXME: These should be based on subtarget info. Plus, the values should 357 // be smaller when we are in optimizing for size mode. 358 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 359 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 360 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 361 allowUnalignedMemoryAccesses = true; // x86 supports it! 362} 363 364std::vector<SDOperand> 365X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 366 std::vector<SDOperand> Args = TargetLowering::LowerArguments(F, DAG); 367 368 FormalArgs.clear(); 369 FormalArgLocs.clear(); 370 371 // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set 372 // before the rest of the function can be lowered. 373 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 374 PreprocessFastCCArguments(Args, F, DAG); 375 else 376 PreprocessCCCArguments(Args, F, DAG); 377 return Args; 378} 379 380std::pair<SDOperand, SDOperand> 381X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 382 bool isVarArg, unsigned CallingConv, 383 bool isTailCall, 384 SDOperand Callee, ArgListTy &Args, 385 SelectionDAG &DAG) { 386 assert((!isVarArg || CallingConv == CallingConv::C) && 387 "Only C takes varargs!"); 388 389 // If the callee is a GlobalAddress node (quite common, every direct call is) 390 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 391 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 392 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 393 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 394 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 395 396 if (CallingConv == CallingConv::Fast && EnableFastCC) 397 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 398 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 399} 400 401//===----------------------------------------------------------------------===// 402// C Calling Convention implementation 403//===----------------------------------------------------------------------===// 404 405/// AddLiveIn - This helper function adds the specified physical register to the 406/// MachineFunction as a live in value. It also creates a corresponding virtual 407/// register for it. 408static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 409 TargetRegisterClass *RC) { 410 assert(RC->contains(PReg) && "Not the correct regclass!"); 411 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 412 MF.addLiveIn(PReg, VReg); 413 return VReg; 414} 415 416/// HowToPassCCCArgument - Returns how an formal argument of the specified type 417/// should be passed. If it is through stack, returns the size of the stack 418/// frame; if it is through XMM register, returns the number of XMM registers 419/// are needed. 420static void 421HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 422 unsigned &ObjSize, unsigned &ObjXMMRegs) { 423 switch (ObjectVT) { 424 default: assert(0 && "Unhandled argument type!"); 425 case MVT::i1: 426 case MVT::i8: ObjSize = 1; break; 427 case MVT::i16: ObjSize = 2; break; 428 case MVT::i32: ObjSize = 4; break; 429 case MVT::i64: ObjSize = 8; break; 430 case MVT::f32: ObjSize = 4; break; 431 case MVT::f64: ObjSize = 8; break; 432 case MVT::v16i8: 433 case MVT::v8i16: 434 case MVT::v4i32: 435 case MVT::v2i64: 436 case MVT::v4f32: 437 case MVT::v2f64: 438 if (NumXMMRegs < 3) 439 ObjXMMRegs = 1; 440 else 441 ObjSize = 16; 442 break; 443 } 444} 445 446/// getFormalArgObjects - Returns itself if Op is a FORMAL_ARGUMENTS, otherwise 447/// returns the FORMAL_ARGUMENTS node(s) that made up parts of the node. 448static std::vector<SDOperand> getFormalArgObjects(SDOperand Op) { 449 unsigned Opc = Op.getOpcode(); 450 std::vector<SDOperand> Objs; 451 if (Opc == ISD::TRUNCATE) { 452 Op = Op.getOperand(0); 453 assert(Op.getOpcode() == ISD::AssertSext || 454 Op.getOpcode() == ISD::AssertZext); 455 Objs.push_back(Op.getOperand(0)); 456 } else if (Opc == ISD::FP_ROUND) { 457 Objs.push_back(Op.getOperand(0)); 458 } else if (Opc == ISD::BUILD_PAIR) { 459 Objs.push_back(Op.getOperand(0)); 460 Objs.push_back(Op.getOperand(1)); 461 } else { 462 Objs.push_back(Op); 463 } 464 return Objs; 465} 466 467void X86TargetLowering::PreprocessCCCArguments(std::vector<SDOperand>Args, 468 Function &F, SelectionDAG &DAG) { 469 unsigned NumArgs = Args.size(); 470 MachineFunction &MF = DAG.getMachineFunction(); 471 MachineFrameInfo *MFI = MF.getFrameInfo(); 472 473 // Add DAG nodes to load the arguments... On entry to a function on the X86, 474 // the stack frame looks like this: 475 // 476 // [ESP] -- return address 477 // [ESP + 4] -- first argument (leftmost lexically) 478 // [ESP + 8] -- second argument, if first argument is four bytes in size 479 // ... 480 // 481 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 482 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 483 unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 }; 484 for (unsigned i = 0; i < NumArgs; ++i) { 485 SDOperand Op = Args[i]; 486 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 487 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 488 I != E; ++I) { 489 SDOperand Obj = *I; 490 MVT::ValueType ObjectVT = Obj.getValueType(); 491 unsigned ArgIncrement = 4; 492 unsigned ObjSize = 0; 493 unsigned ObjXMMRegs = 0; 494 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 495 if (ObjSize >= 8) 496 ArgIncrement = ObjSize; 497 498 if (ObjXMMRegs) { 499 // Passed in a XMM register. 500 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 501 X86::VR128RegisterClass); 502 std::pair<FALocInfo, FALocInfo> Loc = 503 std::make_pair(FALocInfo(FALocInfo::LiveInRegLoc, Reg, ObjectVT), 504 FALocInfo()); 505 FormalArgLocs.push_back(Loc); 506 NumXMMRegs += ObjXMMRegs; 507 } else { 508 // Create the frame index object for this incoming parameter... 509 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 510 std::pair<FALocInfo, FALocInfo> Loc = 511 std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo()); 512 FormalArgLocs.push_back(Loc); 513 ArgOffset += ArgIncrement; // Move on to the next argument... 514 } 515 } 516 } 517 518 // If the function takes variable number of arguments, make a frame index for 519 // the start of the first vararg value... for expansion of llvm.va_start. 520 if (F.isVarArg()) 521 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 522 ReturnAddrIndex = 0; // No return address slot generated yet. 523 BytesToPopOnReturn = 0; // Callee pops nothing. 524 BytesCallerReserves = ArgOffset; 525} 526 527void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 528 unsigned NumArgs = Op.Val->getNumValues(); 529 MachineFunction &MF = DAG.getMachineFunction(); 530 MachineFrameInfo *MFI = MF.getFrameInfo(); 531 532 for (unsigned i = 0; i < NumArgs; ++i) { 533 std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i]; 534 SDOperand ArgValue; 535 if (Loc.first.Kind == FALocInfo::StackFrameLoc) { 536 // Create the SelectionDAG nodes corresponding to a load from this parameter 537 unsigned FI = FormalArgLocs[i].first.Loc; 538 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 539 ArgValue = DAG.getLoad(Op.Val->getValueType(i), 540 DAG.getEntryNode(), FIN, DAG.getSrcValue(NULL)); 541 } else { 542 // Must be a CopyFromReg 543 ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc, 544 Loc.first.Typ); 545 } 546 FormalArgs.push_back(ArgValue); 547 } 548} 549 550std::pair<SDOperand, SDOperand> 551X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 552 bool isVarArg, bool isTailCall, 553 SDOperand Callee, ArgListTy &Args, 554 SelectionDAG &DAG) { 555 // Count how many bytes are to be pushed on the stack. 556 unsigned NumBytes = 0; 557 558 if (Args.empty()) { 559 // Save zero bytes. 560 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 561 } else { 562 for (unsigned i = 0, e = Args.size(); i != e; ++i) 563 switch (getValueType(Args[i].second)) { 564 default: assert(0 && "Unknown value type!"); 565 case MVT::i1: 566 case MVT::i8: 567 case MVT::i16: 568 case MVT::i32: 569 case MVT::f32: 570 NumBytes += 4; 571 break; 572 case MVT::i64: 573 case MVT::f64: 574 NumBytes += 8; 575 break; 576 } 577 578 Chain = DAG.getCALLSEQ_START(Chain, 579 DAG.getConstant(NumBytes, getPointerTy())); 580 581 // Arguments go on the stack in reverse order, as specified by the ABI. 582 unsigned ArgOffset = 0; 583 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 584 std::vector<SDOperand> Stores; 585 586 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 587 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 588 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 589 590 switch (getValueType(Args[i].second)) { 591 default: assert(0 && "Unexpected ValueType for argument!"); 592 case MVT::i1: 593 case MVT::i8: 594 case MVT::i16: 595 // Promote the integer to 32 bits. If the input type is signed use a 596 // sign extend, otherwise use a zero extend. 597 if (Args[i].second->isSigned()) 598 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 599 else 600 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 601 602 // FALL THROUGH 603 case MVT::i32: 604 case MVT::f32: 605 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 606 Args[i].first, PtrOff, 607 DAG.getSrcValue(NULL))); 608 ArgOffset += 4; 609 break; 610 case MVT::i64: 611 case MVT::f64: 612 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 613 Args[i].first, PtrOff, 614 DAG.getSrcValue(NULL))); 615 ArgOffset += 8; 616 break; 617 } 618 } 619 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 620 } 621 622 std::vector<MVT::ValueType> RetVals; 623 MVT::ValueType RetTyVT = getValueType(RetTy); 624 RetVals.push_back(MVT::Other); 625 626 // The result values produced have to be legal. Promote the result. 627 switch (RetTyVT) { 628 case MVT::isVoid: break; 629 default: 630 RetVals.push_back(RetTyVT); 631 break; 632 case MVT::i1: 633 case MVT::i8: 634 case MVT::i16: 635 RetVals.push_back(MVT::i32); 636 break; 637 case MVT::f32: 638 if (X86ScalarSSE) 639 RetVals.push_back(MVT::f32); 640 else 641 RetVals.push_back(MVT::f64); 642 break; 643 case MVT::i64: 644 RetVals.push_back(MVT::i32); 645 RetVals.push_back(MVT::i32); 646 break; 647 } 648 649 std::vector<MVT::ValueType> NodeTys; 650 NodeTys.push_back(MVT::Other); // Returns a chain 651 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 652 std::vector<SDOperand> Ops; 653 Ops.push_back(Chain); 654 Ops.push_back(Callee); 655 656 // FIXME: Do not generate X86ISD::TAILCALL for now. 657 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 658 SDOperand InFlag = Chain.getValue(1); 659 660 NodeTys.clear(); 661 NodeTys.push_back(MVT::Other); // Returns a chain 662 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 663 Ops.clear(); 664 Ops.push_back(Chain); 665 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 666 Ops.push_back(DAG.getConstant(0, getPointerTy())); 667 Ops.push_back(InFlag); 668 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 669 InFlag = Chain.getValue(1); 670 671 SDOperand RetVal; 672 if (RetTyVT != MVT::isVoid) { 673 switch (RetTyVT) { 674 default: assert(0 && "Unknown value type to return!"); 675 case MVT::i1: 676 case MVT::i8: 677 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 678 Chain = RetVal.getValue(1); 679 if (RetTyVT == MVT::i1) 680 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 681 break; 682 case MVT::i16: 683 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 684 Chain = RetVal.getValue(1); 685 break; 686 case MVT::i32: 687 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 688 Chain = RetVal.getValue(1); 689 break; 690 case MVT::i64: { 691 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 692 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 693 Lo.getValue(2)); 694 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 695 Chain = Hi.getValue(1); 696 break; 697 } 698 case MVT::f32: 699 case MVT::f64: { 700 std::vector<MVT::ValueType> Tys; 701 Tys.push_back(MVT::f64); 702 Tys.push_back(MVT::Other); 703 Tys.push_back(MVT::Flag); 704 std::vector<SDOperand> Ops; 705 Ops.push_back(Chain); 706 Ops.push_back(InFlag); 707 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 708 Chain = RetVal.getValue(1); 709 InFlag = RetVal.getValue(2); 710 if (X86ScalarSSE) { 711 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 712 // shouldn't be necessary except that RFP cannot be live across 713 // multiple blocks. When stackifier is fixed, they can be uncoupled. 714 MachineFunction &MF = DAG.getMachineFunction(); 715 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 716 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 717 Tys.clear(); 718 Tys.push_back(MVT::Other); 719 Ops.clear(); 720 Ops.push_back(Chain); 721 Ops.push_back(RetVal); 722 Ops.push_back(StackSlot); 723 Ops.push_back(DAG.getValueType(RetTyVT)); 724 Ops.push_back(InFlag); 725 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 726 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 727 DAG.getSrcValue(NULL)); 728 Chain = RetVal.getValue(1); 729 } 730 731 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 732 // FIXME: we would really like to remember that this FP_ROUND 733 // operation is okay to eliminate if we allow excess FP precision. 734 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 735 break; 736 } 737 } 738 } 739 740 return std::make_pair(RetVal, Chain); 741} 742 743//===----------------------------------------------------------------------===// 744// Fast Calling Convention implementation 745//===----------------------------------------------------------------------===// 746// 747// The X86 'fast' calling convention passes up to two integer arguments in 748// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 749// and requires that the callee pop its arguments off the stack (allowing proper 750// tail calls), and has the same return value conventions as C calling convs. 751// 752// This calling convention always arranges for the callee pop value to be 8n+4 753// bytes, which is needed for tail recursion elimination and stack alignment 754// reasons. 755// 756// Note that this can be enhanced in the future to pass fp vals in registers 757// (when we have a global fp allocator) and do other tricks. 758// 759 760// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 761// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 762// EDX". Anything more is illegal. 763// 764// FIXME: The linscan register allocator currently has problem with 765// coalescing. At the time of this writing, whenever it decides to coalesce 766// a physreg with a virtreg, this increases the size of the physreg's live 767// range, and the live range cannot ever be reduced. This causes problems if 768// too many physregs are coaleced with virtregs, which can cause the register 769// allocator to wedge itself. 770// 771// This code triggers this problem more often if we pass args in registers, 772// so disable it until this is fixed. 773// 774// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 775// about code being dead. 776// 777static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 778 779 780/// HowToPassFastCCArgument - Returns how an formal argument of the specified 781/// type should be passed. If it is through stack, returns the size of the stack 782/// frame; if it is through integer or XMM register, returns the number of 783/// integer or XMM registers are needed. 784static void 785HowToPassFastCCArgument(MVT::ValueType ObjectVT, 786 unsigned NumIntRegs, unsigned NumXMMRegs, 787 unsigned &ObjSize, unsigned &ObjIntRegs, 788 unsigned &ObjXMMRegs) { 789 ObjSize = 0; 790 NumIntRegs = 0; 791 792 switch (ObjectVT) { 793 default: assert(0 && "Unhandled argument type!"); 794 case MVT::i1: 795 case MVT::i8: 796 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 797 ObjIntRegs = 1; 798 else 799 ObjSize = 1; 800 break; 801 case MVT::i16: 802 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 803 ObjIntRegs = 1; 804 else 805 ObjSize = 2; 806 break; 807 case MVT::i32: 808 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 809 ObjIntRegs = 1; 810 else 811 ObjSize = 4; 812 break; 813 case MVT::i64: 814 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 815 ObjIntRegs = 2; 816 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 817 ObjIntRegs = 1; 818 ObjSize = 4; 819 } else 820 ObjSize = 8; 821 case MVT::f32: 822 ObjSize = 4; 823 break; 824 case MVT::f64: 825 ObjSize = 8; 826 break; 827 case MVT::v16i8: 828 case MVT::v8i16: 829 case MVT::v4i32: 830 case MVT::v2i64: 831 case MVT::v4f32: 832 case MVT::v2f64: 833 if (NumXMMRegs < 3) 834 ObjXMMRegs = 1; 835 else 836 ObjSize = 16; 837 break; 838 } 839} 840 841void 842X86TargetLowering::PreprocessFastCCArguments(std::vector<SDOperand>Args, 843 Function &F, SelectionDAG &DAG) { 844 unsigned NumArgs = Args.size(); 845 MachineFunction &MF = DAG.getMachineFunction(); 846 MachineFrameInfo *MFI = MF.getFrameInfo(); 847 848 // Add DAG nodes to load the arguments... On entry to a function the stack 849 // frame looks like this: 850 // 851 // [ESP] -- return address 852 // [ESP + 4] -- first nonreg argument (leftmost lexically) 853 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 854 // ... 855 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 856 857 // Keep track of the number of integer regs passed so far. This can be either 858 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 859 // used). 860 unsigned NumIntRegs = 0; 861 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 862 unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 }; 863 864 for (unsigned i = 0; i < NumArgs; ++i) { 865 SDOperand Op = Args[i]; 866 std::vector<SDOperand> Objs = getFormalArgObjects(Op); 867 for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end(); 868 I != E; ++I) { 869 SDOperand Obj = *I; 870 MVT::ValueType ObjectVT = Obj.getValueType(); 871 unsigned ArgIncrement = 4; 872 unsigned ObjSize = 0; 873 unsigned ObjIntRegs = 0; 874 unsigned ObjXMMRegs = 0; 875 876 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 877 ObjSize, ObjIntRegs, ObjXMMRegs); 878 if (ObjSize >= 8) 879 ArgIncrement = ObjSize; 880 881 unsigned Reg; 882 std::pair<FALocInfo,FALocInfo> Loc = std::make_pair(FALocInfo(), 883 FALocInfo()); 884 if (ObjIntRegs) { 885 switch (ObjectVT) { 886 default: assert(0 && "Unhandled argument type!"); 887 case MVT::i1: 888 case MVT::i8: 889 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 890 X86::R8RegisterClass); 891 Loc.first.Kind = FALocInfo::LiveInRegLoc; 892 Loc.first.Loc = Reg; 893 Loc.first.Typ = MVT::i8; 894 break; 895 case MVT::i16: 896 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 897 X86::R16RegisterClass); 898 Loc.first.Kind = FALocInfo::LiveInRegLoc; 899 Loc.first.Loc = Reg; 900 Loc.first.Typ = MVT::i16; 901 break; 902 case MVT::i32: 903 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 904 X86::R32RegisterClass); 905 Loc.first.Kind = FALocInfo::LiveInRegLoc; 906 Loc.first.Loc = Reg; 907 Loc.first.Typ = MVT::i32; 908 break; 909 case MVT::i64: 910 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 911 X86::R32RegisterClass); 912 Loc.first.Kind = FALocInfo::LiveInRegLoc; 913 Loc.first.Loc = Reg; 914 Loc.first.Typ = MVT::i32; 915 if (ObjIntRegs == 2) { 916 Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 917 Loc.second.Kind = FALocInfo::LiveInRegLoc; 918 Loc.second.Loc = Reg; 919 Loc.second.Typ = MVT::i32; 920 } 921 break; 922 case MVT::v16i8: 923 case MVT::v8i16: 924 case MVT::v4i32: 925 case MVT::v2i64: 926 case MVT::v4f32: 927 case MVT::v2f64: 928 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 929 Loc.first.Kind = FALocInfo::LiveInRegLoc; 930 Loc.first.Loc = Reg; 931 Loc.first.Typ = ObjectVT; 932 break; 933 } 934 NumIntRegs += ObjIntRegs; 935 NumXMMRegs += ObjXMMRegs; 936 } 937 if (ObjSize) { 938 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 939 if (ObjectVT == MVT::i64 && ObjIntRegs) { 940 Loc.second.Kind = FALocInfo::StackFrameLoc; 941 Loc.second.Loc = FI; 942 } else { 943 Loc.first.Kind = FALocInfo::StackFrameLoc; 944 Loc.first.Loc = FI; 945 } 946 ArgOffset += ArgIncrement; // Move on to the next argument. 947 } 948 949 FormalArgLocs.push_back(Loc); 950 } 951 } 952 953 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 954 // arguments and the arguments after the retaddr has been pushed are aligned. 955 if ((ArgOffset & 7) == 0) 956 ArgOffset += 4; 957 958 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 959 ReturnAddrIndex = 0; // No return address slot generated yet. 960 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 961 BytesCallerReserves = 0; 962 963 // Finally, inform the code generator which regs we return values in. 964 switch (getValueType(F.getReturnType())) { 965 default: assert(0 && "Unknown type!"); 966 case MVT::isVoid: break; 967 case MVT::i1: 968 case MVT::i8: 969 case MVT::i16: 970 case MVT::i32: 971 MF.addLiveOut(X86::EAX); 972 break; 973 case MVT::i64: 974 MF.addLiveOut(X86::EAX); 975 MF.addLiveOut(X86::EDX); 976 break; 977 case MVT::f32: 978 case MVT::f64: 979 MF.addLiveOut(X86::ST0); 980 break; 981 } 982} 983void 984X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 985 unsigned NumArgs = Op.Val->getNumValues(); 986 MachineFunction &MF = DAG.getMachineFunction(); 987 MachineFrameInfo *MFI = MF.getFrameInfo(); 988 989 for (unsigned i = 0; i < NumArgs; ++i) { 990 MVT::ValueType VT = Op.Val->getValueType(i); 991 std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i]; 992 SDOperand ArgValue; 993 if (Loc.first.Kind == FALocInfo::StackFrameLoc) { 994 // Create the SelectionDAG nodes corresponding to a load from this parameter 995 SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32); 996 ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 997 DAG.getSrcValue(NULL)); 998 } else { 999 // Must be a CopyFromReg 1000 ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc, 1001 Loc.first.Typ); 1002 } 1003 1004 if (Loc.second.Kind != FALocInfo::None) { 1005 SDOperand ArgValue2; 1006 if (Loc.second.Kind == FALocInfo::StackFrameLoc) { 1007 // Create the SelectionDAG nodes corresponding to a load from this parameter 1008 SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32); 1009 ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN, 1010 DAG.getSrcValue(NULL)); 1011 } else { 1012 // Must be a CopyFromReg 1013 ArgValue2 = DAG.getCopyFromReg(DAG.getEntryNode(), 1014 Loc.second.Loc, Loc.second.Typ); 1015 } 1016 ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2); 1017 } 1018 FormalArgs.push_back(ArgValue); 1019 } 1020} 1021 1022std::pair<SDOperand, SDOperand> 1023X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 1024 bool isTailCall, SDOperand Callee, 1025 ArgListTy &Args, SelectionDAG &DAG) { 1026 // Count how many bytes are to be pushed on the stack. 1027 unsigned NumBytes = 0; 1028 1029 // Keep track of the number of integer regs passed so far. This can be either 1030 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1031 // used). 1032 unsigned NumIntRegs = 0; 1033 1034 for (unsigned i = 0, e = Args.size(); i != e; ++i) 1035 switch (getValueType(Args[i].second)) { 1036 default: assert(0 && "Unknown value type!"); 1037 case MVT::i1: 1038 case MVT::i8: 1039 case MVT::i16: 1040 case MVT::i32: 1041 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1042 ++NumIntRegs; 1043 break; 1044 } 1045 // fall through 1046 case MVT::f32: 1047 NumBytes += 4; 1048 break; 1049 case MVT::i64: 1050 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1051 NumIntRegs += 2; 1052 break; 1053 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1054 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 1055 NumBytes += 4; 1056 break; 1057 } 1058 1059 // fall through 1060 case MVT::f64: 1061 NumBytes += 8; 1062 break; 1063 } 1064 1065 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1066 // arguments and the arguments after the retaddr has been pushed are aligned. 1067 if ((NumBytes & 7) == 0) 1068 NumBytes += 4; 1069 1070 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1071 1072 // Arguments go on the stack in reverse order, as specified by the ABI. 1073 unsigned ArgOffset = 0; 1074 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 1075 NumIntRegs = 0; 1076 std::vector<SDOperand> Stores; 1077 std::vector<SDOperand> RegValuesToPass; 1078 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1079 switch (getValueType(Args[i].second)) { 1080 default: assert(0 && "Unexpected ValueType for argument!"); 1081 case MVT::i1: 1082 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 1083 // Fall through. 1084 case MVT::i8: 1085 case MVT::i16: 1086 case MVT::i32: 1087 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1088 RegValuesToPass.push_back(Args[i].first); 1089 ++NumIntRegs; 1090 break; 1091 } 1092 // Fall through 1093 case MVT::f32: { 1094 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1095 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1096 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1097 Args[i].first, PtrOff, 1098 DAG.getSrcValue(NULL))); 1099 ArgOffset += 4; 1100 break; 1101 } 1102 case MVT::i64: 1103 // Can pass (at least) part of it in regs? 1104 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1105 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1106 Args[i].first, DAG.getConstant(1, MVT::i32)); 1107 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1108 Args[i].first, DAG.getConstant(0, MVT::i32)); 1109 RegValuesToPass.push_back(Lo); 1110 ++NumIntRegs; 1111 1112 // Pass both parts in regs? 1113 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1114 RegValuesToPass.push_back(Hi); 1115 ++NumIntRegs; 1116 } else { 1117 // Pass the high part in memory. 1118 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1119 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1120 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1121 Hi, PtrOff, DAG.getSrcValue(NULL))); 1122 ArgOffset += 4; 1123 } 1124 break; 1125 } 1126 // Fall through 1127 case MVT::f64: 1128 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1129 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1130 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1131 Args[i].first, PtrOff, 1132 DAG.getSrcValue(NULL))); 1133 ArgOffset += 8; 1134 break; 1135 } 1136 } 1137 if (!Stores.empty()) 1138 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 1139 1140 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1141 // arguments and the arguments after the retaddr has been pushed are aligned. 1142 if ((ArgOffset & 7) == 0) 1143 ArgOffset += 4; 1144 1145 std::vector<MVT::ValueType> RetVals; 1146 MVT::ValueType RetTyVT = getValueType(RetTy); 1147 1148 RetVals.push_back(MVT::Other); 1149 1150 // The result values produced have to be legal. Promote the result. 1151 switch (RetTyVT) { 1152 case MVT::isVoid: break; 1153 default: 1154 RetVals.push_back(RetTyVT); 1155 break; 1156 case MVT::i1: 1157 case MVT::i8: 1158 case MVT::i16: 1159 RetVals.push_back(MVT::i32); 1160 break; 1161 case MVT::f32: 1162 if (X86ScalarSSE) 1163 RetVals.push_back(MVT::f32); 1164 else 1165 RetVals.push_back(MVT::f64); 1166 break; 1167 case MVT::i64: 1168 RetVals.push_back(MVT::i32); 1169 RetVals.push_back(MVT::i32); 1170 break; 1171 } 1172 1173 // Build a sequence of copy-to-reg nodes chained together with token chain 1174 // and flag operands which copy the outgoing args into registers. 1175 SDOperand InFlag; 1176 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1177 unsigned CCReg; 1178 SDOperand RegToPass = RegValuesToPass[i]; 1179 switch (RegToPass.getValueType()) { 1180 default: assert(0 && "Bad thing to pass in regs"); 1181 case MVT::i8: 1182 CCReg = (i == 0) ? X86::AL : X86::DL; 1183 break; 1184 case MVT::i16: 1185 CCReg = (i == 0) ? X86::AX : X86::DX; 1186 break; 1187 case MVT::i32: 1188 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1189 break; 1190 } 1191 1192 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1193 InFlag = Chain.getValue(1); 1194 } 1195 1196 std::vector<MVT::ValueType> NodeTys; 1197 NodeTys.push_back(MVT::Other); // Returns a chain 1198 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1199 std::vector<SDOperand> Ops; 1200 Ops.push_back(Chain); 1201 Ops.push_back(Callee); 1202 if (InFlag.Val) 1203 Ops.push_back(InFlag); 1204 1205 // FIXME: Do not generate X86ISD::TAILCALL for now. 1206 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1207 InFlag = Chain.getValue(1); 1208 1209 NodeTys.clear(); 1210 NodeTys.push_back(MVT::Other); // Returns a chain 1211 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1212 Ops.clear(); 1213 Ops.push_back(Chain); 1214 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1215 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1216 Ops.push_back(InFlag); 1217 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1218 InFlag = Chain.getValue(1); 1219 1220 SDOperand RetVal; 1221 if (RetTyVT != MVT::isVoid) { 1222 switch (RetTyVT) { 1223 default: assert(0 && "Unknown value type to return!"); 1224 case MVT::i1: 1225 case MVT::i8: 1226 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1227 Chain = RetVal.getValue(1); 1228 if (RetTyVT == MVT::i1) 1229 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1230 break; 1231 case MVT::i16: 1232 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1233 Chain = RetVal.getValue(1); 1234 break; 1235 case MVT::i32: 1236 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1237 Chain = RetVal.getValue(1); 1238 break; 1239 case MVT::i64: { 1240 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1241 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1242 Lo.getValue(2)); 1243 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1244 Chain = Hi.getValue(1); 1245 break; 1246 } 1247 case MVT::f32: 1248 case MVT::f64: { 1249 std::vector<MVT::ValueType> Tys; 1250 Tys.push_back(MVT::f64); 1251 Tys.push_back(MVT::Other); 1252 Tys.push_back(MVT::Flag); 1253 std::vector<SDOperand> Ops; 1254 Ops.push_back(Chain); 1255 Ops.push_back(InFlag); 1256 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1257 Chain = RetVal.getValue(1); 1258 InFlag = RetVal.getValue(2); 1259 if (X86ScalarSSE) { 1260 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1261 // shouldn't be necessary except that RFP cannot be live across 1262 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1263 MachineFunction &MF = DAG.getMachineFunction(); 1264 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1265 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1266 Tys.clear(); 1267 Tys.push_back(MVT::Other); 1268 Ops.clear(); 1269 Ops.push_back(Chain); 1270 Ops.push_back(RetVal); 1271 Ops.push_back(StackSlot); 1272 Ops.push_back(DAG.getValueType(RetTyVT)); 1273 Ops.push_back(InFlag); 1274 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1275 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1276 DAG.getSrcValue(NULL)); 1277 Chain = RetVal.getValue(1); 1278 } 1279 1280 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1281 // FIXME: we would really like to remember that this FP_ROUND 1282 // operation is okay to eliminate if we allow excess FP precision. 1283 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1284 break; 1285 } 1286 } 1287 } 1288 1289 return std::make_pair(RetVal, Chain); 1290} 1291 1292SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1293 if (ReturnAddrIndex == 0) { 1294 // Set up a frame object for the return address. 1295 MachineFunction &MF = DAG.getMachineFunction(); 1296 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1297 } 1298 1299 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1300} 1301 1302 1303 1304std::pair<SDOperand, SDOperand> X86TargetLowering:: 1305LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1306 SelectionDAG &DAG) { 1307 SDOperand Result; 1308 if (Depth) // Depths > 0 not supported yet! 1309 Result = DAG.getConstant(0, getPointerTy()); 1310 else { 1311 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1312 if (!isFrameAddress) 1313 // Just load the return address 1314 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1315 DAG.getSrcValue(NULL)); 1316 else 1317 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1318 DAG.getConstant(4, MVT::i32)); 1319 } 1320 return std::make_pair(Result, Chain); 1321} 1322 1323/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1324/// which corresponds to the condition code. 1325static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1326 switch (X86CC) { 1327 default: assert(0 && "Unknown X86 conditional code!"); 1328 case X86ISD::COND_A: return X86::JA; 1329 case X86ISD::COND_AE: return X86::JAE; 1330 case X86ISD::COND_B: return X86::JB; 1331 case X86ISD::COND_BE: return X86::JBE; 1332 case X86ISD::COND_E: return X86::JE; 1333 case X86ISD::COND_G: return X86::JG; 1334 case X86ISD::COND_GE: return X86::JGE; 1335 case X86ISD::COND_L: return X86::JL; 1336 case X86ISD::COND_LE: return X86::JLE; 1337 case X86ISD::COND_NE: return X86::JNE; 1338 case X86ISD::COND_NO: return X86::JNO; 1339 case X86ISD::COND_NP: return X86::JNP; 1340 case X86ISD::COND_NS: return X86::JNS; 1341 case X86ISD::COND_O: return X86::JO; 1342 case X86ISD::COND_P: return X86::JP; 1343 case X86ISD::COND_S: return X86::JS; 1344 } 1345} 1346 1347/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1348/// specific condition code. It returns a false if it cannot do a direct 1349/// translation. X86CC is the translated CondCode. Flip is set to true if the 1350/// the order of comparison operands should be flipped. 1351static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1352 unsigned &X86CC, bool &Flip) { 1353 Flip = false; 1354 X86CC = X86ISD::COND_INVALID; 1355 if (!isFP) { 1356 switch (SetCCOpcode) { 1357 default: break; 1358 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1359 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1360 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1361 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1362 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1363 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1364 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1365 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1366 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1367 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1368 } 1369 } else { 1370 // On a floating point condition, the flags are set as follows: 1371 // ZF PF CF op 1372 // 0 | 0 | 0 | X > Y 1373 // 0 | 0 | 1 | X < Y 1374 // 1 | 0 | 0 | X == Y 1375 // 1 | 1 | 1 | unordered 1376 switch (SetCCOpcode) { 1377 default: break; 1378 case ISD::SETUEQ: 1379 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1380 case ISD::SETOLT: Flip = true; // Fallthrough 1381 case ISD::SETOGT: 1382 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1383 case ISD::SETOLE: Flip = true; // Fallthrough 1384 case ISD::SETOGE: 1385 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1386 case ISD::SETUGT: Flip = true; // Fallthrough 1387 case ISD::SETULT: 1388 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1389 case ISD::SETUGE: Flip = true; // Fallthrough 1390 case ISD::SETULE: 1391 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1392 case ISD::SETONE: 1393 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1394 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1395 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1396 } 1397 } 1398 1399 return X86CC != X86ISD::COND_INVALID; 1400} 1401 1402static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1403 bool &Flip) { 1404 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1405} 1406 1407/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1408/// code. Current x86 isa includes the following FP cmov instructions: 1409/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1410static bool hasFPCMov(unsigned X86CC) { 1411 switch (X86CC) { 1412 default: 1413 return false; 1414 case X86ISD::COND_B: 1415 case X86ISD::COND_BE: 1416 case X86ISD::COND_E: 1417 case X86ISD::COND_P: 1418 case X86ISD::COND_A: 1419 case X86ISD::COND_AE: 1420 case X86ISD::COND_NE: 1421 case X86ISD::COND_NP: 1422 return true; 1423 } 1424} 1425 1426MachineBasicBlock * 1427X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1428 MachineBasicBlock *BB) { 1429 switch (MI->getOpcode()) { 1430 default: assert(false && "Unexpected instr type to insert"); 1431 case X86::CMOV_FR32: 1432 case X86::CMOV_FR64: 1433 case X86::CMOV_V4F32: 1434 case X86::CMOV_V2F64: 1435 case X86::CMOV_V2I64: { 1436 // To "insert" a SELECT_CC instruction, we actually have to insert the 1437 // diamond control-flow pattern. The incoming instruction knows the 1438 // destination vreg to set, the condition code register to branch on, the 1439 // true/false values to select between, and a branch opcode to use. 1440 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1441 ilist<MachineBasicBlock>::iterator It = BB; 1442 ++It; 1443 1444 // thisMBB: 1445 // ... 1446 // TrueVal = ... 1447 // cmpTY ccX, r1, r2 1448 // bCC copy1MBB 1449 // fallthrough --> copy0MBB 1450 MachineBasicBlock *thisMBB = BB; 1451 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1452 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1453 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1454 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1455 MachineFunction *F = BB->getParent(); 1456 F->getBasicBlockList().insert(It, copy0MBB); 1457 F->getBasicBlockList().insert(It, sinkMBB); 1458 // Update machine-CFG edges by first adding all successors of the current 1459 // block to the new block which will contain the Phi node for the select. 1460 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1461 e = BB->succ_end(); i != e; ++i) 1462 sinkMBB->addSuccessor(*i); 1463 // Next, remove all successors of the current block, and add the true 1464 // and fallthrough blocks as its successors. 1465 while(!BB->succ_empty()) 1466 BB->removeSuccessor(BB->succ_begin()); 1467 BB->addSuccessor(copy0MBB); 1468 BB->addSuccessor(sinkMBB); 1469 1470 // copy0MBB: 1471 // %FalseValue = ... 1472 // # fallthrough to sinkMBB 1473 BB = copy0MBB; 1474 1475 // Update machine-CFG edges 1476 BB->addSuccessor(sinkMBB); 1477 1478 // sinkMBB: 1479 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1480 // ... 1481 BB = sinkMBB; 1482 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1483 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1484 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1485 1486 delete MI; // The pseudo instruction is gone now. 1487 return BB; 1488 } 1489 1490 case X86::FP_TO_INT16_IN_MEM: 1491 case X86::FP_TO_INT32_IN_MEM: 1492 case X86::FP_TO_INT64_IN_MEM: { 1493 // Change the floating point control register to use "round towards zero" 1494 // mode when truncating to an integer value. 1495 MachineFunction *F = BB->getParent(); 1496 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1497 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1498 1499 // Load the old value of the high byte of the control word... 1500 unsigned OldCW = 1501 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1502 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1503 1504 // Set the high part to be round to zero... 1505 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1506 1507 // Reload the modified control word now... 1508 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1509 1510 // Restore the memory image of control word to original value 1511 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1512 1513 // Get the X86 opcode to use. 1514 unsigned Opc; 1515 switch (MI->getOpcode()) { 1516 default: assert(0 && "illegal opcode!"); 1517 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1518 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1519 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1520 } 1521 1522 X86AddressMode AM; 1523 MachineOperand &Op = MI->getOperand(0); 1524 if (Op.isRegister()) { 1525 AM.BaseType = X86AddressMode::RegBase; 1526 AM.Base.Reg = Op.getReg(); 1527 } else { 1528 AM.BaseType = X86AddressMode::FrameIndexBase; 1529 AM.Base.FrameIndex = Op.getFrameIndex(); 1530 } 1531 Op = MI->getOperand(1); 1532 if (Op.isImmediate()) 1533 AM.Scale = Op.getImmedValue(); 1534 Op = MI->getOperand(2); 1535 if (Op.isImmediate()) 1536 AM.IndexReg = Op.getImmedValue(); 1537 Op = MI->getOperand(3); 1538 if (Op.isGlobalAddress()) { 1539 AM.GV = Op.getGlobal(); 1540 } else { 1541 AM.Disp = Op.getImmedValue(); 1542 } 1543 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1544 1545 // Reload the original control word now. 1546 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1547 1548 delete MI; // The pseudo instruction is gone now. 1549 return BB; 1550 } 1551 } 1552} 1553 1554 1555//===----------------------------------------------------------------------===// 1556// X86 Custom Lowering Hooks 1557//===----------------------------------------------------------------------===// 1558 1559/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1560/// load. For Darwin, external and weak symbols are indirect, loading the value 1561/// at address GV rather then the value of GV itself. This means that the 1562/// GlobalAddress must be in the base or index register of the address, not the 1563/// GV offset field. 1564static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1565 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1566 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1567} 1568 1569/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1570/// true if Op is undef or if its value falls within the specified range (L, H]. 1571static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1572 if (Op.getOpcode() == ISD::UNDEF) 1573 return true; 1574 1575 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1576 return (Val >= Low && Val < Hi); 1577} 1578 1579/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1580/// true if Op is undef or if its value equal to the specified value. 1581static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1582 if (Op.getOpcode() == ISD::UNDEF) 1583 return true; 1584 return cast<ConstantSDNode>(Op)->getValue() == Val; 1585} 1586 1587/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1588/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1589bool X86::isPSHUFDMask(SDNode *N) { 1590 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1591 1592 if (N->getNumOperands() != 4) 1593 return false; 1594 1595 // Check if the value doesn't reference the second vector. 1596 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1597 SDOperand Arg = N->getOperand(i); 1598 if (Arg.getOpcode() == ISD::UNDEF) continue; 1599 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1600 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1601 return false; 1602 } 1603 1604 return true; 1605} 1606 1607/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1608/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1609bool X86::isPSHUFHWMask(SDNode *N) { 1610 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1611 1612 if (N->getNumOperands() != 8) 1613 return false; 1614 1615 // Lower quadword copied in order. 1616 for (unsigned i = 0; i != 4; ++i) { 1617 SDOperand Arg = N->getOperand(i); 1618 if (Arg.getOpcode() == ISD::UNDEF) continue; 1619 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1620 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1621 return false; 1622 } 1623 1624 // Upper quadword shuffled. 1625 for (unsigned i = 4; i != 8; ++i) { 1626 SDOperand Arg = N->getOperand(i); 1627 if (Arg.getOpcode() == ISD::UNDEF) continue; 1628 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1629 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1630 if (Val < 4 || Val > 7) 1631 return false; 1632 } 1633 1634 return true; 1635} 1636 1637/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1638/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1639bool X86::isPSHUFLWMask(SDNode *N) { 1640 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1641 1642 if (N->getNumOperands() != 8) 1643 return false; 1644 1645 // Upper quadword copied in order. 1646 for (unsigned i = 4; i != 8; ++i) 1647 if (!isUndefOrEqual(N->getOperand(i), i)) 1648 return false; 1649 1650 // Lower quadword shuffled. 1651 for (unsigned i = 0; i != 4; ++i) 1652 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1653 return false; 1654 1655 return true; 1656} 1657 1658/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1659/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1660static bool isSHUFPMask(std::vector<SDOperand> &N) { 1661 unsigned NumElems = N.size(); 1662 if (NumElems != 2 && NumElems != 4) return false; 1663 1664 unsigned Half = NumElems / 2; 1665 for (unsigned i = 0; i < Half; ++i) 1666 if (!isUndefOrInRange(N[i], 0, NumElems)) 1667 return false; 1668 for (unsigned i = Half; i < NumElems; ++i) 1669 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1670 return false; 1671 1672 return true; 1673} 1674 1675bool X86::isSHUFPMask(SDNode *N) { 1676 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1677 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1678 return ::isSHUFPMask(Ops); 1679} 1680 1681/// isCommutedSHUFP - Returns true if the shuffle mask is except 1682/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1683/// half elements to come from vector 1 (which would equal the dest.) and 1684/// the upper half to come from vector 2. 1685static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1686 unsigned NumElems = Ops.size(); 1687 if (NumElems != 2 && NumElems != 4) return false; 1688 1689 unsigned Half = NumElems / 2; 1690 for (unsigned i = 0; i < Half; ++i) 1691 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1692 return false; 1693 for (unsigned i = Half; i < NumElems; ++i) 1694 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1695 return false; 1696 return true; 1697} 1698 1699static bool isCommutedSHUFP(SDNode *N) { 1700 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1701 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1702 return isCommutedSHUFP(Ops); 1703} 1704 1705/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1706/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1707bool X86::isMOVHLPSMask(SDNode *N) { 1708 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1709 1710 if (N->getNumOperands() != 4) 1711 return false; 1712 1713 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1714 return isUndefOrEqual(N->getOperand(0), 6) && 1715 isUndefOrEqual(N->getOperand(1), 7) && 1716 isUndefOrEqual(N->getOperand(2), 2) && 1717 isUndefOrEqual(N->getOperand(3), 3); 1718} 1719 1720/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1721/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1722bool X86::isMOVLPMask(SDNode *N) { 1723 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1724 1725 unsigned NumElems = N->getNumOperands(); 1726 if (NumElems != 2 && NumElems != 4) 1727 return false; 1728 1729 for (unsigned i = 0; i < NumElems/2; ++i) 1730 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1731 return false; 1732 1733 for (unsigned i = NumElems/2; i < NumElems; ++i) 1734 if (!isUndefOrEqual(N->getOperand(i), i)) 1735 return false; 1736 1737 return true; 1738} 1739 1740/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1741/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1742/// and MOVLHPS. 1743bool X86::isMOVHPMask(SDNode *N) { 1744 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1745 1746 unsigned NumElems = N->getNumOperands(); 1747 if (NumElems != 2 && NumElems != 4) 1748 return false; 1749 1750 for (unsigned i = 0; i < NumElems/2; ++i) 1751 if (!isUndefOrEqual(N->getOperand(i), i)) 1752 return false; 1753 1754 for (unsigned i = 0; i < NumElems/2; ++i) { 1755 SDOperand Arg = N->getOperand(i + NumElems/2); 1756 if (!isUndefOrEqual(Arg, i + NumElems)) 1757 return false; 1758 } 1759 1760 return true; 1761} 1762 1763/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1764/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1765bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1766 unsigned NumElems = N.size(); 1767 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1768 return false; 1769 1770 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1771 SDOperand BitI = N[i]; 1772 SDOperand BitI1 = N[i+1]; 1773 if (!isUndefOrEqual(BitI, j)) 1774 return false; 1775 if (V2IsSplat) { 1776 if (isUndefOrEqual(BitI1, NumElems)) 1777 return false; 1778 } else { 1779 if (!isUndefOrEqual(BitI1, j + NumElems)) 1780 return false; 1781 } 1782 } 1783 1784 return true; 1785} 1786 1787bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1788 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1789 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1790 return ::isUNPCKLMask(Ops, V2IsSplat); 1791} 1792 1793/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1794/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1795bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1796 unsigned NumElems = N.size(); 1797 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1798 return false; 1799 1800 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1801 SDOperand BitI = N[i]; 1802 SDOperand BitI1 = N[i+1]; 1803 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1804 return false; 1805 if (V2IsSplat) { 1806 if (isUndefOrEqual(BitI1, NumElems)) 1807 return false; 1808 } else { 1809 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1810 return false; 1811 } 1812 } 1813 1814 return true; 1815} 1816 1817bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1818 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1819 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1820 return ::isUNPCKHMask(Ops, V2IsSplat); 1821} 1822 1823/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1824/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1825/// <0, 0, 1, 1> 1826bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1827 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1828 1829 unsigned NumElems = N->getNumOperands(); 1830 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1831 return false; 1832 1833 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1834 SDOperand BitI = N->getOperand(i); 1835 SDOperand BitI1 = N->getOperand(i+1); 1836 1837 if (!isUndefOrEqual(BitI, j)) 1838 return false; 1839 if (!isUndefOrEqual(BitI1, j)) 1840 return false; 1841 } 1842 1843 return true; 1844} 1845 1846/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1847/// specifies a shuffle of elements that is suitable for input to MOVSS, 1848/// MOVSD, and MOVD, i.e. setting the lowest element. 1849static bool isMOVLMask(std::vector<SDOperand> &N) { 1850 unsigned NumElems = N.size(); 1851 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1852 return false; 1853 1854 if (!isUndefOrEqual(N[0], NumElems)) 1855 return false; 1856 1857 for (unsigned i = 1; i < NumElems; ++i) { 1858 SDOperand Arg = N[i]; 1859 if (!isUndefOrEqual(Arg, i)) 1860 return false; 1861 } 1862 1863 return true; 1864} 1865 1866bool X86::isMOVLMask(SDNode *N) { 1867 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1868 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1869 return ::isMOVLMask(Ops); 1870} 1871 1872/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1873/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1874/// element of vector 2 and the other elements to come from vector 1 in order. 1875static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1876 unsigned NumElems = Ops.size(); 1877 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1878 return false; 1879 1880 if (!isUndefOrEqual(Ops[0], 0)) 1881 return false; 1882 1883 for (unsigned i = 1; i < NumElems; ++i) { 1884 SDOperand Arg = Ops[i]; 1885 if (V2IsSplat) { 1886 if (!isUndefOrEqual(Arg, NumElems)) 1887 return false; 1888 } else { 1889 if (!isUndefOrEqual(Arg, i+NumElems)) 1890 return false; 1891 } 1892 } 1893 1894 return true; 1895} 1896 1897static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1898 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1899 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1900 return isCommutedMOVL(Ops, V2IsSplat); 1901} 1902 1903/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1904/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1905bool X86::isMOVSHDUPMask(SDNode *N) { 1906 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1907 1908 if (N->getNumOperands() != 4) 1909 return false; 1910 1911 // Expect 1, 1, 3, 3 1912 for (unsigned i = 0; i < 2; ++i) { 1913 SDOperand Arg = N->getOperand(i); 1914 if (Arg.getOpcode() == ISD::UNDEF) continue; 1915 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1916 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1917 if (Val != 1) return false; 1918 } 1919 1920 bool HasHi = false; 1921 for (unsigned i = 2; i < 4; ++i) { 1922 SDOperand Arg = N->getOperand(i); 1923 if (Arg.getOpcode() == ISD::UNDEF) continue; 1924 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1925 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1926 if (Val != 3) return false; 1927 HasHi = true; 1928 } 1929 1930 // Don't use movshdup if it can be done with a shufps. 1931 return HasHi; 1932} 1933 1934/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1935/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1936bool X86::isMOVSLDUPMask(SDNode *N) { 1937 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1938 1939 if (N->getNumOperands() != 4) 1940 return false; 1941 1942 // Expect 0, 0, 2, 2 1943 for (unsigned i = 0; i < 2; ++i) { 1944 SDOperand Arg = N->getOperand(i); 1945 if (Arg.getOpcode() == ISD::UNDEF) continue; 1946 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1947 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1948 if (Val != 0) return false; 1949 } 1950 1951 bool HasHi = false; 1952 for (unsigned i = 2; i < 4; ++i) { 1953 SDOperand Arg = N->getOperand(i); 1954 if (Arg.getOpcode() == ISD::UNDEF) continue; 1955 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1956 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1957 if (Val != 2) return false; 1958 HasHi = true; 1959 } 1960 1961 // Don't use movshdup if it can be done with a shufps. 1962 return HasHi; 1963} 1964 1965/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1966/// a splat of a single element. 1967static bool isSplatMask(SDNode *N) { 1968 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1969 1970 // This is a splat operation if each element of the permute is the same, and 1971 // if the value doesn't reference the second vector. 1972 unsigned NumElems = N->getNumOperands(); 1973 SDOperand ElementBase; 1974 unsigned i = 0; 1975 for (; i != NumElems; ++i) { 1976 SDOperand Elt = N->getOperand(i); 1977 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 1978 ElementBase = Elt; 1979 break; 1980 } 1981 } 1982 1983 if (!ElementBase.Val) 1984 return false; 1985 1986 for (; i != NumElems; ++i) { 1987 SDOperand Arg = N->getOperand(i); 1988 if (Arg.getOpcode() == ISD::UNDEF) continue; 1989 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1990 if (Arg != ElementBase) return false; 1991 } 1992 1993 // Make sure it is a splat of the first vector operand. 1994 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1995} 1996 1997/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1998/// a splat of a single element and it's a 2 or 4 element mask. 1999bool X86::isSplatMask(SDNode *N) { 2000 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2001 2002 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2003 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2004 return false; 2005 return ::isSplatMask(N); 2006} 2007 2008/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2009/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2010/// instructions. 2011unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2012 unsigned NumOperands = N->getNumOperands(); 2013 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2014 unsigned Mask = 0; 2015 for (unsigned i = 0; i < NumOperands; ++i) { 2016 unsigned Val = 0; 2017 SDOperand Arg = N->getOperand(NumOperands-i-1); 2018 if (Arg.getOpcode() != ISD::UNDEF) 2019 Val = cast<ConstantSDNode>(Arg)->getValue(); 2020 if (Val >= NumOperands) Val -= NumOperands; 2021 Mask |= Val; 2022 if (i != NumOperands - 1) 2023 Mask <<= Shift; 2024 } 2025 2026 return Mask; 2027} 2028 2029/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2030/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2031/// instructions. 2032unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2033 unsigned Mask = 0; 2034 // 8 nodes, but we only care about the last 4. 2035 for (unsigned i = 7; i >= 4; --i) { 2036 unsigned Val = 0; 2037 SDOperand Arg = N->getOperand(i); 2038 if (Arg.getOpcode() != ISD::UNDEF) 2039 Val = cast<ConstantSDNode>(Arg)->getValue(); 2040 Mask |= (Val - 4); 2041 if (i != 4) 2042 Mask <<= 2; 2043 } 2044 2045 return Mask; 2046} 2047 2048/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2049/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2050/// instructions. 2051unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2052 unsigned Mask = 0; 2053 // 8 nodes, but we only care about the first 4. 2054 for (int i = 3; i >= 0; --i) { 2055 unsigned Val = 0; 2056 SDOperand Arg = N->getOperand(i); 2057 if (Arg.getOpcode() != ISD::UNDEF) 2058 Val = cast<ConstantSDNode>(Arg)->getValue(); 2059 Mask |= Val; 2060 if (i != 0) 2061 Mask <<= 2; 2062 } 2063 2064 return Mask; 2065} 2066 2067/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2068/// specifies a 8 element shuffle that can be broken into a pair of 2069/// PSHUFHW and PSHUFLW. 2070static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2071 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2072 2073 if (N->getNumOperands() != 8) 2074 return false; 2075 2076 // Lower quadword shuffled. 2077 for (unsigned i = 0; i != 4; ++i) { 2078 SDOperand Arg = N->getOperand(i); 2079 if (Arg.getOpcode() == ISD::UNDEF) continue; 2080 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2081 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2082 if (Val > 4) 2083 return false; 2084 } 2085 2086 // Upper quadword shuffled. 2087 for (unsigned i = 4; i != 8; ++i) { 2088 SDOperand Arg = N->getOperand(i); 2089 if (Arg.getOpcode() == ISD::UNDEF) continue; 2090 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2091 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2092 if (Val < 4 || Val > 7) 2093 return false; 2094 } 2095 2096 return true; 2097} 2098 2099/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2100/// values in ther permute mask. 2101static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 2102 SDOperand V1 = Op.getOperand(0); 2103 SDOperand V2 = Op.getOperand(1); 2104 SDOperand Mask = Op.getOperand(2); 2105 MVT::ValueType VT = Op.getValueType(); 2106 MVT::ValueType MaskVT = Mask.getValueType(); 2107 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2108 unsigned NumElems = Mask.getNumOperands(); 2109 std::vector<SDOperand> MaskVec; 2110 2111 for (unsigned i = 0; i != NumElems; ++i) { 2112 SDOperand Arg = Mask.getOperand(i); 2113 if (Arg.getOpcode() == ISD::UNDEF) { 2114 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2115 continue; 2116 } 2117 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2118 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2119 if (Val < NumElems) 2120 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2121 else 2122 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2123 } 2124 2125 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2126 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 2127} 2128 2129/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2130/// match movhlps. The lower half elements should come from upper half of 2131/// V1 (and in order), and the upper half elements should come from the upper 2132/// half of V2 (and in order). 2133static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2134 unsigned NumElems = Mask->getNumOperands(); 2135 if (NumElems != 4) 2136 return false; 2137 for (unsigned i = 0, e = 2; i != e; ++i) 2138 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2139 return false; 2140 for (unsigned i = 2; i != 4; ++i) 2141 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2142 return false; 2143 return true; 2144} 2145 2146/// isScalarLoadToVector - Returns true if the node is a scalar load that 2147/// is promoted to a vector. 2148static inline bool isScalarLoadToVector(SDNode *N) { 2149 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2150 N = N->getOperand(0).Val; 2151 return (N->getOpcode() == ISD::LOAD); 2152 } 2153 return false; 2154} 2155 2156/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2157/// match movlp{s|d}. The lower half elements should come from lower half of 2158/// V1 (and in order), and the upper half elements should come from the upper 2159/// half of V2 (and in order). And since V1 will become the source of the 2160/// MOVLP, it must be either a vector load or a scalar load to vector. 2161static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2162 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2163 return false; 2164 2165 unsigned NumElems = Mask->getNumOperands(); 2166 if (NumElems != 2 && NumElems != 4) 2167 return false; 2168 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2169 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2170 return false; 2171 for (unsigned i = NumElems/2; i != NumElems; ++i) 2172 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2173 return false; 2174 return true; 2175} 2176 2177/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2178/// all the same. 2179static bool isSplatVector(SDNode *N) { 2180 if (N->getOpcode() != ISD::BUILD_VECTOR) 2181 return false; 2182 2183 SDOperand SplatValue = N->getOperand(0); 2184 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2185 if (N->getOperand(i) != SplatValue) 2186 return false; 2187 return true; 2188} 2189 2190/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2191/// that point to V2 points to its first element. 2192static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2193 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2194 2195 bool Changed = false; 2196 std::vector<SDOperand> MaskVec; 2197 unsigned NumElems = Mask.getNumOperands(); 2198 for (unsigned i = 0; i != NumElems; ++i) { 2199 SDOperand Arg = Mask.getOperand(i); 2200 if (Arg.getOpcode() != ISD::UNDEF) { 2201 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2202 if (Val > NumElems) { 2203 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2204 Changed = true; 2205 } 2206 } 2207 MaskVec.push_back(Arg); 2208 } 2209 2210 if (Changed) 2211 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 2212 return Mask; 2213} 2214 2215/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2216/// operation of specified width. 2217static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2218 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2219 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2220 2221 std::vector<SDOperand> MaskVec; 2222 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2223 for (unsigned i = 1; i != NumElems; ++i) 2224 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2225 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2226} 2227 2228/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2229/// of specified width. 2230static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2231 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2232 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2233 std::vector<SDOperand> MaskVec; 2234 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2235 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2236 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2237 } 2238 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2239} 2240 2241/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2242/// of specified width. 2243static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2244 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2245 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2246 unsigned Half = NumElems/2; 2247 std::vector<SDOperand> MaskVec; 2248 for (unsigned i = 0; i != Half; ++i) { 2249 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2250 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2251 } 2252 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2253} 2254 2255/// getZeroVector - Returns a vector of specified type with all zero elements. 2256/// 2257static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2258 assert(MVT::isVector(VT) && "Expected a vector type"); 2259 unsigned NumElems = getVectorNumElements(VT); 2260 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2261 bool isFP = MVT::isFloatingPoint(EVT); 2262 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2263 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2264 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 2265} 2266 2267/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2268/// 2269static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2270 SDOperand V1 = Op.getOperand(0); 2271 SDOperand Mask = Op.getOperand(2); 2272 MVT::ValueType VT = Op.getValueType(); 2273 unsigned NumElems = Mask.getNumOperands(); 2274 Mask = getUnpacklMask(NumElems, DAG); 2275 while (NumElems != 4) { 2276 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2277 NumElems >>= 1; 2278 } 2279 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2280 2281 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2282 Mask = getZeroVector(MaskVT, DAG); 2283 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2284 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2285 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2286} 2287 2288/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2289/// constant +0.0. 2290static inline bool isZeroNode(SDOperand Elt) { 2291 return ((isa<ConstantSDNode>(Elt) && 2292 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2293 (isa<ConstantFPSDNode>(Elt) && 2294 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2295} 2296 2297/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2298/// vector and zero or undef vector. 2299static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2300 unsigned NumElems, unsigned Idx, 2301 bool isZero, SelectionDAG &DAG) { 2302 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2303 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2304 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2305 SDOperand Zero = DAG.getConstant(0, EVT); 2306 std::vector<SDOperand> MaskVec(NumElems, Zero); 2307 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2308 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2309 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2310} 2311 2312/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2313/// 2314static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2315 unsigned NumNonZero, unsigned NumZero, 2316 SelectionDAG &DAG) { 2317 if (NumNonZero > 8) 2318 return SDOperand(); 2319 2320 SDOperand V(0, 0); 2321 bool First = true; 2322 for (unsigned i = 0; i < 16; ++i) { 2323 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2324 if (ThisIsNonZero && First) { 2325 if (NumZero) 2326 V = getZeroVector(MVT::v8i16, DAG); 2327 else 2328 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2329 First = false; 2330 } 2331 2332 if ((i & 1) != 0) { 2333 SDOperand ThisElt(0, 0), LastElt(0, 0); 2334 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2335 if (LastIsNonZero) { 2336 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2337 } 2338 if (ThisIsNonZero) { 2339 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2340 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2341 ThisElt, DAG.getConstant(8, MVT::i8)); 2342 if (LastIsNonZero) 2343 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2344 } else 2345 ThisElt = LastElt; 2346 2347 if (ThisElt.Val) 2348 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2349 DAG.getConstant(i/2, MVT::i32)); 2350 } 2351 } 2352 2353 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2354} 2355 2356/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2357/// 2358static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2359 unsigned NumNonZero, unsigned NumZero, 2360 SelectionDAG &DAG) { 2361 if (NumNonZero > 4) 2362 return SDOperand(); 2363 2364 SDOperand V(0, 0); 2365 bool First = true; 2366 for (unsigned i = 0; i < 8; ++i) { 2367 bool isNonZero = (NonZeros & (1 << i)) != 0; 2368 if (isNonZero) { 2369 if (First) { 2370 if (NumZero) 2371 V = getZeroVector(MVT::v8i16, DAG); 2372 else 2373 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2374 First = false; 2375 } 2376 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2377 DAG.getConstant(i, MVT::i32)); 2378 } 2379 } 2380 2381 return V; 2382} 2383 2384SDOperand 2385X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2386 // All zero's are handled with pxor. 2387 if (ISD::isBuildVectorAllZeros(Op.Val)) 2388 return Op; 2389 2390 // All one's are handled with pcmpeqd. 2391 if (ISD::isBuildVectorAllOnes(Op.Val)) 2392 return Op; 2393 2394 MVT::ValueType VT = Op.getValueType(); 2395 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2396 unsigned EVTBits = MVT::getSizeInBits(EVT); 2397 2398 unsigned NumElems = Op.getNumOperands(); 2399 unsigned NumZero = 0; 2400 unsigned NumNonZero = 0; 2401 unsigned NonZeros = 0; 2402 std::set<SDOperand> Values; 2403 for (unsigned i = 0; i < NumElems; ++i) { 2404 SDOperand Elt = Op.getOperand(i); 2405 if (Elt.getOpcode() != ISD::UNDEF) { 2406 Values.insert(Elt); 2407 if (isZeroNode(Elt)) 2408 NumZero++; 2409 else { 2410 NonZeros |= (1 << i); 2411 NumNonZero++; 2412 } 2413 } 2414 } 2415 2416 if (NumNonZero == 0) 2417 // Must be a mix of zero and undef. Return a zero vector. 2418 return getZeroVector(VT, DAG); 2419 2420 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2421 if (Values.size() == 1) 2422 return SDOperand(); 2423 2424 // Special case for single non-zero element. 2425 if (NumNonZero == 1) { 2426 unsigned Idx = CountTrailingZeros_32(NonZeros); 2427 SDOperand Item = Op.getOperand(Idx); 2428 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2429 if (Idx == 0) 2430 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2431 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2432 NumZero > 0, DAG); 2433 2434 if (EVTBits == 32) { 2435 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2436 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2437 DAG); 2438 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2439 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2440 std::vector<SDOperand> MaskVec; 2441 for (unsigned i = 0; i < NumElems; i++) 2442 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2443 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2444 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2445 DAG.getNode(ISD::UNDEF, VT), Mask); 2446 } 2447 } 2448 2449 // Let legalizer expand 2-widde build_vector's. 2450 if (EVTBits == 64) 2451 return SDOperand(); 2452 2453 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2454 if (EVTBits == 8) { 2455 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 2456 if (V.Val) return V; 2457 } 2458 2459 if (EVTBits == 16) { 2460 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 2461 if (V.Val) return V; 2462 } 2463 2464 // If element VT is == 32 bits, turn it into a number of shuffles. 2465 std::vector<SDOperand> V(NumElems); 2466 if (NumElems == 4 && NumZero > 0) { 2467 for (unsigned i = 0; i < 4; ++i) { 2468 bool isZero = !(NonZeros & (1 << i)); 2469 if (isZero) 2470 V[i] = getZeroVector(VT, DAG); 2471 else 2472 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2473 } 2474 2475 for (unsigned i = 0; i < 2; ++i) { 2476 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2477 default: break; 2478 case 0: 2479 V[i] = V[i*2]; // Must be a zero vector. 2480 break; 2481 case 1: 2482 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2483 getMOVLMask(NumElems, DAG)); 2484 break; 2485 case 2: 2486 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2487 getMOVLMask(NumElems, DAG)); 2488 break; 2489 case 3: 2490 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2491 getUnpacklMask(NumElems, DAG)); 2492 break; 2493 } 2494 } 2495 2496 // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) 2497 // clears the upper bits. 2498 // FIXME: we can do the same for v4f32 case when we know both parts of 2499 // the lower half come from scalar_to_vector (loadf32). We should do 2500 // that in post legalizer dag combiner with target specific hooks. 2501 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2502 return V[0]; 2503 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2504 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2505 std::vector<SDOperand> MaskVec; 2506 bool Reverse = (NonZeros & 0x3) == 2; 2507 for (unsigned i = 0; i < 2; ++i) 2508 if (Reverse) 2509 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2510 else 2511 MaskVec.push_back(DAG.getConstant(i, EVT)); 2512 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2513 for (unsigned i = 0; i < 2; ++i) 2514 if (Reverse) 2515 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2516 else 2517 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2518 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2519 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2520 } 2521 2522 if (Values.size() > 2) { 2523 // Expand into a number of unpckl*. 2524 // e.g. for v4f32 2525 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2526 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2527 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2528 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2529 for (unsigned i = 0; i < NumElems; ++i) 2530 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2531 NumElems >>= 1; 2532 while (NumElems != 0) { 2533 for (unsigned i = 0; i < NumElems; ++i) 2534 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2535 UnpckMask); 2536 NumElems >>= 1; 2537 } 2538 return V[0]; 2539 } 2540 2541 return SDOperand(); 2542} 2543 2544SDOperand 2545X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2546 SDOperand V1 = Op.getOperand(0); 2547 SDOperand V2 = Op.getOperand(1); 2548 SDOperand PermMask = Op.getOperand(2); 2549 MVT::ValueType VT = Op.getValueType(); 2550 unsigned NumElems = PermMask.getNumOperands(); 2551 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2552 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2553 2554 if (isSplatMask(PermMask.Val)) { 2555 if (NumElems <= 4) return Op; 2556 // Promote it to a v4i32 splat. 2557 return PromoteSplat(Op, DAG); 2558 } 2559 2560 if (X86::isMOVLMask(PermMask.Val)) 2561 return (V1IsUndef) ? V2 : Op; 2562 2563 if (X86::isMOVSHDUPMask(PermMask.Val) || 2564 X86::isMOVSLDUPMask(PermMask.Val) || 2565 X86::isMOVHLPSMask(PermMask.Val) || 2566 X86::isMOVHPMask(PermMask.Val) || 2567 X86::isMOVLPMask(PermMask.Val)) 2568 return Op; 2569 2570 if (ShouldXformToMOVHLPS(PermMask.Val) || 2571 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2572 return CommuteVectorShuffle(Op, DAG); 2573 2574 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 2575 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 2576 if (V1IsSplat && !V2IsSplat) { 2577 Op = CommuteVectorShuffle(Op, DAG); 2578 V1 = Op.getOperand(0); 2579 V2 = Op.getOperand(1); 2580 PermMask = Op.getOperand(2); 2581 V2IsSplat = true; 2582 } 2583 2584 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 2585 if (V2IsUndef) return V1; 2586 Op = CommuteVectorShuffle(Op, DAG); 2587 V1 = Op.getOperand(0); 2588 V2 = Op.getOperand(1); 2589 PermMask = Op.getOperand(2); 2590 if (V2IsSplat) { 2591 // V2 is a splat, so the mask may be malformed. That is, it may point 2592 // to any V2 element. The instruction selectior won't like this. Get 2593 // a corrected mask and commute to form a proper MOVS{S|D}. 2594 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2595 if (NewMask.Val != PermMask.Val) 2596 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2597 } 2598 return Op; 2599 } 2600 2601 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2602 X86::isUNPCKLMask(PermMask.Val) || 2603 X86::isUNPCKHMask(PermMask.Val)) 2604 return Op; 2605 2606 if (V2IsSplat) { 2607 // Normalize mask so all entries that point to V2 points to its first 2608 // element then try to match unpck{h|l} again. If match, return a 2609 // new vector_shuffle with the corrected mask. 2610 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2611 if (NewMask.Val != PermMask.Val) { 2612 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2613 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2614 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2615 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2616 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2617 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2618 } 2619 } 2620 } 2621 2622 // Normalize the node to match x86 shuffle ops if needed 2623 if (V2.getOpcode() != ISD::UNDEF) 2624 if (isCommutedSHUFP(PermMask.Val)) { 2625 Op = CommuteVectorShuffle(Op, DAG); 2626 V1 = Op.getOperand(0); 2627 V2 = Op.getOperand(1); 2628 PermMask = Op.getOperand(2); 2629 } 2630 2631 // If VT is integer, try PSHUF* first, then SHUFP*. 2632 if (MVT::isInteger(VT)) { 2633 if (X86::isPSHUFDMask(PermMask.Val) || 2634 X86::isPSHUFHWMask(PermMask.Val) || 2635 X86::isPSHUFLWMask(PermMask.Val)) { 2636 if (V2.getOpcode() != ISD::UNDEF) 2637 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2638 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2639 return Op; 2640 } 2641 2642 if (X86::isSHUFPMask(PermMask.Val)) 2643 return Op; 2644 2645 // Handle v8i16 shuffle high / low shuffle node pair. 2646 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2647 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2648 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2649 std::vector<SDOperand> MaskVec; 2650 for (unsigned i = 0; i != 4; ++i) 2651 MaskVec.push_back(PermMask.getOperand(i)); 2652 for (unsigned i = 4; i != 8; ++i) 2653 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2654 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2655 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2656 MaskVec.clear(); 2657 for (unsigned i = 0; i != 4; ++i) 2658 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2659 for (unsigned i = 4; i != 8; ++i) 2660 MaskVec.push_back(PermMask.getOperand(i)); 2661 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2662 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2663 } 2664 } else { 2665 // Floating point cases in the other order. 2666 if (X86::isSHUFPMask(PermMask.Val)) 2667 return Op; 2668 if (X86::isPSHUFDMask(PermMask.Val) || 2669 X86::isPSHUFHWMask(PermMask.Val) || 2670 X86::isPSHUFLWMask(PermMask.Val)) { 2671 if (V2.getOpcode() != ISD::UNDEF) 2672 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2673 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2674 return Op; 2675 } 2676 } 2677 2678 if (NumElems == 4) { 2679 // Break it into (shuffle shuffle_hi, shuffle_lo). 2680 MVT::ValueType MaskVT = PermMask.getValueType(); 2681 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2682 std::map<unsigned, std::pair<int, int> > Locs; 2683 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2684 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2685 std::vector<SDOperand> *MaskPtr = &LoMask; 2686 unsigned MaskIdx = 0; 2687 unsigned LoIdx = 0; 2688 unsigned HiIdx = NumElems/2; 2689 for (unsigned i = 0; i != NumElems; ++i) { 2690 if (i == NumElems/2) { 2691 MaskPtr = &HiMask; 2692 MaskIdx = 1; 2693 LoIdx = 0; 2694 HiIdx = NumElems/2; 2695 } 2696 SDOperand Elt = PermMask.getOperand(i); 2697 if (Elt.getOpcode() == ISD::UNDEF) { 2698 Locs[i] = std::make_pair(-1, -1); 2699 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2700 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2701 (*MaskPtr)[LoIdx] = Elt; 2702 LoIdx++; 2703 } else { 2704 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2705 (*MaskPtr)[HiIdx] = Elt; 2706 HiIdx++; 2707 } 2708 } 2709 2710 SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2711 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 2712 SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2713 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 2714 std::vector<SDOperand> MaskOps; 2715 for (unsigned i = 0; i != NumElems; ++i) { 2716 if (Locs[i].first == -1) { 2717 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2718 } else { 2719 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2720 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2721 } 2722 } 2723 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2724 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 2725 } 2726 2727 return SDOperand(); 2728} 2729 2730SDOperand 2731X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2732 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2733 return SDOperand(); 2734 2735 MVT::ValueType VT = Op.getValueType(); 2736 // TODO: handle v16i8. 2737 if (MVT::getSizeInBits(VT) == 16) { 2738 // Transform it so it match pextrw which produces a 32-bit result. 2739 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2740 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2741 Op.getOperand(0), Op.getOperand(1)); 2742 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2743 DAG.getValueType(VT)); 2744 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2745 } else if (MVT::getSizeInBits(VT) == 32) { 2746 SDOperand Vec = Op.getOperand(0); 2747 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2748 if (Idx == 0) 2749 return Op; 2750 2751 // SHUFPS the element to the lowest double word, then movss. 2752 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2753 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2754 MVT::getVectorBaseType(MaskVT)); 2755 std::vector<SDOperand> IdxVec; 2756 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2757 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2758 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2759 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2760 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2761 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2762 Vec, Vec, Mask); 2763 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2764 DAG.getConstant(0, MVT::i32)); 2765 } else if (MVT::getSizeInBits(VT) == 64) { 2766 SDOperand Vec = Op.getOperand(0); 2767 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2768 if (Idx == 0) 2769 return Op; 2770 2771 // UNPCKHPD the element to the lowest double word, then movsd. 2772 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2773 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2774 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2775 std::vector<SDOperand> IdxVec; 2776 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2777 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2778 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2779 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2780 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2781 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2782 DAG.getConstant(0, MVT::i32)); 2783 } 2784 2785 return SDOperand(); 2786} 2787 2788SDOperand 2789X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2790 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2791 // as its second argument. 2792 MVT::ValueType VT = Op.getValueType(); 2793 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2794 SDOperand N0 = Op.getOperand(0); 2795 SDOperand N1 = Op.getOperand(1); 2796 SDOperand N2 = Op.getOperand(2); 2797 if (MVT::getSizeInBits(BaseVT) == 16) { 2798 if (N1.getValueType() != MVT::i32) 2799 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2800 if (N2.getValueType() != MVT::i32) 2801 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2802 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2803 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2804 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2805 if (Idx == 0) { 2806 // Use a movss. 2807 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2808 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2809 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2810 std::vector<SDOperand> MaskVec; 2811 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2812 for (unsigned i = 1; i <= 3; ++i) 2813 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2814 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2815 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 2816 } else { 2817 // Use two pinsrw instructions to insert a 32 bit value. 2818 Idx <<= 1; 2819 if (MVT::isFloatingPoint(N1.getValueType())) { 2820 if (N1.getOpcode() == ISD::LOAD) { 2821 // Just load directly from f32mem to R32. 2822 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 2823 N1.getOperand(2)); 2824 } else { 2825 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2826 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2827 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2828 DAG.getConstant(0, MVT::i32)); 2829 } 2830 } 2831 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2832 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2833 DAG.getConstant(Idx, MVT::i32)); 2834 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2835 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2836 DAG.getConstant(Idx+1, MVT::i32)); 2837 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 2838 } 2839 } 2840 2841 return SDOperand(); 2842} 2843 2844SDOperand 2845X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2846 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2847 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2848} 2849 2850// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2851// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2852// one of the above mentioned nodes. It has to be wrapped because otherwise 2853// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2854// be used to form addressing mode. These wrapped nodes will be selected 2855// into MOV32ri. 2856SDOperand 2857X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 2858 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2859 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2860 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2861 CP->getAlignment())); 2862 if (Subtarget->isTargetDarwin()) { 2863 // With PIC, the address is actually $g + Offset. 2864 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2865 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2866 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2867 } 2868 2869 return Result; 2870} 2871 2872SDOperand 2873X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 2874 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2875 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2876 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2877 if (Subtarget->isTargetDarwin()) { 2878 // With PIC, the address is actually $g + Offset. 2879 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2880 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2881 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2882 2883 // For Darwin, external and weak symbols are indirect, so we want to load 2884 // the value at address GV, not the value of GV itself. This means that 2885 // the GlobalAddress must be in the base or index register of the address, 2886 // not the GV offset field. 2887 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2888 DarwinGVRequiresExtraLoad(GV)) 2889 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2890 Result, DAG.getSrcValue(NULL)); 2891 } 2892 2893 return Result; 2894} 2895 2896SDOperand 2897X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 2898 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2899 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2900 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2901 if (Subtarget->isTargetDarwin()) { 2902 // With PIC, the address is actually $g + Offset. 2903 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2904 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2905 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2906 } 2907 2908 return Result; 2909} 2910 2911SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 2912 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2913 "Not an i64 shift!"); 2914 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2915 SDOperand ShOpLo = Op.getOperand(0); 2916 SDOperand ShOpHi = Op.getOperand(1); 2917 SDOperand ShAmt = Op.getOperand(2); 2918 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2919 DAG.getConstant(31, MVT::i8)) 2920 : DAG.getConstant(0, MVT::i32); 2921 2922 SDOperand Tmp2, Tmp3; 2923 if (Op.getOpcode() == ISD::SHL_PARTS) { 2924 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2925 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2926 } else { 2927 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2928 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2929 } 2930 2931 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2932 ShAmt, DAG.getConstant(32, MVT::i8)); 2933 2934 SDOperand Hi, Lo; 2935 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2936 2937 std::vector<MVT::ValueType> Tys; 2938 Tys.push_back(MVT::i32); 2939 Tys.push_back(MVT::Flag); 2940 std::vector<SDOperand> Ops; 2941 if (Op.getOpcode() == ISD::SHL_PARTS) { 2942 Ops.push_back(Tmp2); 2943 Ops.push_back(Tmp3); 2944 Ops.push_back(CC); 2945 Ops.push_back(InFlag); 2946 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2947 InFlag = Hi.getValue(1); 2948 2949 Ops.clear(); 2950 Ops.push_back(Tmp3); 2951 Ops.push_back(Tmp1); 2952 Ops.push_back(CC); 2953 Ops.push_back(InFlag); 2954 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2955 } else { 2956 Ops.push_back(Tmp2); 2957 Ops.push_back(Tmp3); 2958 Ops.push_back(CC); 2959 Ops.push_back(InFlag); 2960 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2961 InFlag = Lo.getValue(1); 2962 2963 Ops.clear(); 2964 Ops.push_back(Tmp3); 2965 Ops.push_back(Tmp1); 2966 Ops.push_back(CC); 2967 Ops.push_back(InFlag); 2968 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2969 } 2970 2971 Tys.clear(); 2972 Tys.push_back(MVT::i32); 2973 Tys.push_back(MVT::i32); 2974 Ops.clear(); 2975 Ops.push_back(Lo); 2976 Ops.push_back(Hi); 2977 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2978} 2979 2980SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 2981 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2982 Op.getOperand(0).getValueType() >= MVT::i16 && 2983 "Unknown SINT_TO_FP to lower!"); 2984 2985 SDOperand Result; 2986 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2987 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2988 MachineFunction &MF = DAG.getMachineFunction(); 2989 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2990 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2991 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2992 DAG.getEntryNode(), Op.getOperand(0), 2993 StackSlot, DAG.getSrcValue(NULL)); 2994 2995 // Build the FILD 2996 std::vector<MVT::ValueType> Tys; 2997 Tys.push_back(MVT::f64); 2998 Tys.push_back(MVT::Other); 2999 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3000 std::vector<SDOperand> Ops; 3001 Ops.push_back(Chain); 3002 Ops.push_back(StackSlot); 3003 Ops.push_back(DAG.getValueType(SrcVT)); 3004 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3005 Tys, Ops); 3006 3007 if (X86ScalarSSE) { 3008 Chain = Result.getValue(1); 3009 SDOperand InFlag = Result.getValue(2); 3010 3011 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3012 // shouldn't be necessary except that RFP cannot be live across 3013 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3014 MachineFunction &MF = DAG.getMachineFunction(); 3015 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3016 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3017 std::vector<MVT::ValueType> Tys; 3018 Tys.push_back(MVT::Other); 3019 std::vector<SDOperand> Ops; 3020 Ops.push_back(Chain); 3021 Ops.push_back(Result); 3022 Ops.push_back(StackSlot); 3023 Ops.push_back(DAG.getValueType(Op.getValueType())); 3024 Ops.push_back(InFlag); 3025 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 3026 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 3027 DAG.getSrcValue(NULL)); 3028 } 3029 3030 return Result; 3031} 3032 3033SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3034 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3035 "Unknown FP_TO_SINT to lower!"); 3036 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3037 // stack slot. 3038 MachineFunction &MF = DAG.getMachineFunction(); 3039 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3040 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3041 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3042 3043 unsigned Opc; 3044 switch (Op.getValueType()) { 3045 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3046 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3047 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3048 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3049 } 3050 3051 SDOperand Chain = DAG.getEntryNode(); 3052 SDOperand Value = Op.getOperand(0); 3053 if (X86ScalarSSE) { 3054 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3055 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 3056 DAG.getSrcValue(0)); 3057 std::vector<MVT::ValueType> Tys; 3058 Tys.push_back(MVT::f64); 3059 Tys.push_back(MVT::Other); 3060 std::vector<SDOperand> Ops; 3061 Ops.push_back(Chain); 3062 Ops.push_back(StackSlot); 3063 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 3064 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 3065 Chain = Value.getValue(1); 3066 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3067 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3068 } 3069 3070 // Build the FP_TO_INT*_IN_MEM 3071 std::vector<SDOperand> Ops; 3072 Ops.push_back(Chain); 3073 Ops.push_back(Value); 3074 Ops.push_back(StackSlot); 3075 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 3076 3077 // Load the result. 3078 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 3079 DAG.getSrcValue(NULL)); 3080} 3081 3082SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3083 MVT::ValueType VT = Op.getValueType(); 3084 const Type *OpNTy = MVT::getTypeForValueType(VT); 3085 std::vector<Constant*> CV; 3086 if (VT == MVT::f64) { 3087 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3088 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3089 } else { 3090 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3091 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3092 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3093 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3094 } 3095 Constant *CS = ConstantStruct::get(CV); 3096 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3097 SDOperand Mask 3098 = DAG.getNode(X86ISD::LOAD_PACK, 3099 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3100 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3101} 3102 3103SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3104 MVT::ValueType VT = Op.getValueType(); 3105 const Type *OpNTy = MVT::getTypeForValueType(VT); 3106 std::vector<Constant*> CV; 3107 if (VT == MVT::f64) { 3108 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3109 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3110 } else { 3111 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3112 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3113 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3114 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3115 } 3116 Constant *CS = ConstantStruct::get(CV); 3117 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3118 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, 3119 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 3120 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3121} 3122 3123SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3124 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3125 SDOperand Cond; 3126 SDOperand CC = Op.getOperand(2); 3127 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3128 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3129 bool Flip; 3130 unsigned X86CC; 3131 if (translateX86CC(CC, isFP, X86CC, Flip)) { 3132 if (Flip) 3133 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3134 Op.getOperand(1), Op.getOperand(0)); 3135 else 3136 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3137 Op.getOperand(0), Op.getOperand(1)); 3138 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3139 DAG.getConstant(X86CC, MVT::i8), Cond); 3140 } else { 3141 assert(isFP && "Illegal integer SetCC!"); 3142 3143 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3144 Op.getOperand(0), Op.getOperand(1)); 3145 std::vector<MVT::ValueType> Tys; 3146 std::vector<SDOperand> Ops; 3147 switch (SetCCOpcode) { 3148 default: assert(false && "Illegal floating point SetCC!"); 3149 case ISD::SETOEQ: { // !PF & ZF 3150 Tys.push_back(MVT::i8); 3151 Tys.push_back(MVT::Flag); 3152 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 3153 Ops.push_back(Cond); 3154 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3155 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3156 DAG.getConstant(X86ISD::COND_E, MVT::i8), 3157 Tmp1.getValue(1)); 3158 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3159 } 3160 case ISD::SETUNE: { // PF | !ZF 3161 Tys.push_back(MVT::i8); 3162 Tys.push_back(MVT::Flag); 3163 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 3164 Ops.push_back(Cond); 3165 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3166 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3167 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3168 Tmp1.getValue(1)); 3169 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3170 } 3171 } 3172 } 3173} 3174 3175SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3176 MVT::ValueType VT = Op.getValueType(); 3177 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 3178 bool addTest = false; 3179 SDOperand Op0 = Op.getOperand(0); 3180 SDOperand Cond, CC; 3181 if (Op0.getOpcode() == ISD::SETCC) 3182 Op0 = LowerOperation(Op0, DAG); 3183 3184 if (Op0.getOpcode() == X86ISD::SETCC) { 3185 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3186 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3187 // have another use it will be eliminated. 3188 // If the X86ISD::SETCC has more than one use, then it's probably better 3189 // to use a test instead of duplicating the X86ISD::CMP (for register 3190 // pressure reason). 3191 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 3192 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3193 CmpOpc == X86ISD::UCOMI) { 3194 if (!Op0.hasOneUse()) { 3195 std::vector<MVT::ValueType> Tys; 3196 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 3197 Tys.push_back(Op0.Val->getValueType(i)); 3198 std::vector<SDOperand> Ops; 3199 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 3200 Ops.push_back(Op0.getOperand(i)); 3201 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3202 } 3203 3204 CC = Op0.getOperand(0); 3205 Cond = Op0.getOperand(1); 3206 // Make a copy as flag result cannot be used by more than one. 3207 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3208 Cond.getOperand(0), Cond.getOperand(1)); 3209 addTest = 3210 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3211 } else 3212 addTest = true; 3213 } else 3214 addTest = true; 3215 3216 if (addTest) { 3217 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3218 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 3219 } 3220 3221 std::vector<MVT::ValueType> Tys; 3222 Tys.push_back(Op.getValueType()); 3223 Tys.push_back(MVT::Flag); 3224 std::vector<SDOperand> Ops; 3225 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3226 // condition is true. 3227 Ops.push_back(Op.getOperand(2)); 3228 Ops.push_back(Op.getOperand(1)); 3229 Ops.push_back(CC); 3230 Ops.push_back(Cond); 3231 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 3232} 3233 3234SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3235 bool addTest = false; 3236 SDOperand Cond = Op.getOperand(1); 3237 SDOperand Dest = Op.getOperand(2); 3238 SDOperand CC; 3239 if (Cond.getOpcode() == ISD::SETCC) 3240 Cond = LowerOperation(Cond, DAG); 3241 3242 if (Cond.getOpcode() == X86ISD::SETCC) { 3243 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3244 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3245 // have another use it will be eliminated. 3246 // If the X86ISD::SETCC has more than one use, then it's probably better 3247 // to use a test instead of duplicating the X86ISD::CMP (for register 3248 // pressure reason). 3249 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 3250 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3251 CmpOpc == X86ISD::UCOMI) { 3252 if (!Cond.hasOneUse()) { 3253 std::vector<MVT::ValueType> Tys; 3254 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 3255 Tys.push_back(Cond.Val->getValueType(i)); 3256 std::vector<SDOperand> Ops; 3257 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 3258 Ops.push_back(Cond.getOperand(i)); 3259 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 3260 } 3261 3262 CC = Cond.getOperand(0); 3263 Cond = Cond.getOperand(1); 3264 // Make a copy as flag result cannot be used by more than one. 3265 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3266 Cond.getOperand(0), Cond.getOperand(1)); 3267 } else 3268 addTest = true; 3269 } else 3270 addTest = true; 3271 3272 if (addTest) { 3273 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3274 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 3275 } 3276 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3277 Op.getOperand(0), Op.getOperand(2), CC, Cond); 3278} 3279 3280SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3281 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3282 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3283 DAG.getTargetJumpTable(JT->getIndex(), 3284 getPointerTy())); 3285 if (Subtarget->isTargetDarwin()) { 3286 // With PIC, the address is actually $g + Offset. 3287 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 3288 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3289 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3290 } 3291 3292 return Result; 3293} 3294 3295SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3296 SDOperand Copy; 3297 3298 switch(Op.getNumOperands()) { 3299 default: 3300 assert(0 && "Do not know how to return this many arguments!"); 3301 abort(); 3302 case 1: // ret void. 3303 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3304 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3305 case 2: { 3306 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3307 3308 if (MVT::isVector(ArgVT)) { 3309 // Integer or FP vector result -> XMM0. 3310 if (DAG.getMachineFunction().liveout_empty()) 3311 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3312 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3313 SDOperand()); 3314 } else if (MVT::isInteger(ArgVT)) { 3315 // Integer result -> EAX 3316 if (DAG.getMachineFunction().liveout_empty()) 3317 DAG.getMachineFunction().addLiveOut(X86::EAX); 3318 3319 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 3320 SDOperand()); 3321 } else if (!X86ScalarSSE) { 3322 // FP return with fp-stack value. 3323 if (DAG.getMachineFunction().liveout_empty()) 3324 DAG.getMachineFunction().addLiveOut(X86::ST0); 3325 3326 std::vector<MVT::ValueType> Tys; 3327 Tys.push_back(MVT::Other); 3328 Tys.push_back(MVT::Flag); 3329 std::vector<SDOperand> Ops; 3330 Ops.push_back(Op.getOperand(0)); 3331 Ops.push_back(Op.getOperand(1)); 3332 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3333 } else { 3334 // FP return with ScalarSSE (return on fp-stack). 3335 if (DAG.getMachineFunction().liveout_empty()) 3336 DAG.getMachineFunction().addLiveOut(X86::ST0); 3337 3338 SDOperand MemLoc; 3339 SDOperand Chain = Op.getOperand(0); 3340 SDOperand Value = Op.getOperand(1); 3341 3342 if (Value.getOpcode() == ISD::LOAD && 3343 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3344 Chain = Value.getOperand(0); 3345 MemLoc = Value.getOperand(1); 3346 } else { 3347 // Spill the value to memory and reload it into top of stack. 3348 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3349 MachineFunction &MF = DAG.getMachineFunction(); 3350 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3351 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3352 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3353 Value, MemLoc, DAG.getSrcValue(0)); 3354 } 3355 std::vector<MVT::ValueType> Tys; 3356 Tys.push_back(MVT::f64); 3357 Tys.push_back(MVT::Other); 3358 std::vector<SDOperand> Ops; 3359 Ops.push_back(Chain); 3360 Ops.push_back(MemLoc); 3361 Ops.push_back(DAG.getValueType(ArgVT)); 3362 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 3363 Tys.clear(); 3364 Tys.push_back(MVT::Other); 3365 Tys.push_back(MVT::Flag); 3366 Ops.clear(); 3367 Ops.push_back(Copy.getValue(1)); 3368 Ops.push_back(Copy); 3369 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3370 } 3371 break; 3372 } 3373 case 3: 3374 if (DAG.getMachineFunction().liveout_empty()) { 3375 DAG.getMachineFunction().addLiveOut(X86::EAX); 3376 DAG.getMachineFunction().addLiveOut(X86::EDX); 3377 } 3378 3379 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 3380 SDOperand()); 3381 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 3382 break; 3383 } 3384 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3385 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3386 Copy.getValue(1)); 3387} 3388 3389SDOperand 3390X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3391 if (FormalArgs.size() == 0) { 3392 unsigned CC = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3393 if (CC == CallingConv::Fast && EnableFastCC) 3394 LowerFastCCArguments(Op, DAG); 3395 else 3396 LowerCCCArguments(Op, DAG); 3397 } 3398 return FormalArgs[Op.ResNo]; 3399} 3400 3401SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3402 SDOperand InFlag(0, 0); 3403 SDOperand Chain = Op.getOperand(0); 3404 unsigned Align = 3405 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3406 if (Align == 0) Align = 1; 3407 3408 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3409 // If not DWORD aligned, call memset if size is less than the threshold. 3410 // It knows how to align to the right boundary first. 3411 if ((Align & 3) != 0 || 3412 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3413 MVT::ValueType IntPtr = getPointerTy(); 3414 const Type *IntPtrTy = getTargetData().getIntPtrType(); 3415 std::vector<std::pair<SDOperand, const Type*> > Args; 3416 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3417 // Extend the ubyte argument to be an int value for the call. 3418 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3419 Args.push_back(std::make_pair(Val, IntPtrTy)); 3420 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3421 std::pair<SDOperand,SDOperand> CallResult = 3422 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3423 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3424 return CallResult.second; 3425 } 3426 3427 MVT::ValueType AVT; 3428 SDOperand Count; 3429 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3430 unsigned BytesLeft = 0; 3431 bool TwoRepStos = false; 3432 if (ValC) { 3433 unsigned ValReg; 3434 unsigned Val = ValC->getValue() & 255; 3435 3436 // If the value is a constant, then we can potentially use larger sets. 3437 switch (Align & 3) { 3438 case 2: // WORD aligned 3439 AVT = MVT::i16; 3440 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3441 BytesLeft = I->getValue() % 2; 3442 Val = (Val << 8) | Val; 3443 ValReg = X86::AX; 3444 break; 3445 case 0: // DWORD aligned 3446 AVT = MVT::i32; 3447 if (I) { 3448 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3449 BytesLeft = I->getValue() % 4; 3450 } else { 3451 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3452 DAG.getConstant(2, MVT::i8)); 3453 TwoRepStos = true; 3454 } 3455 Val = (Val << 8) | Val; 3456 Val = (Val << 16) | Val; 3457 ValReg = X86::EAX; 3458 break; 3459 default: // Byte aligned 3460 AVT = MVT::i8; 3461 Count = Op.getOperand(3); 3462 ValReg = X86::AL; 3463 break; 3464 } 3465 3466 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3467 InFlag); 3468 InFlag = Chain.getValue(1); 3469 } else { 3470 AVT = MVT::i8; 3471 Count = Op.getOperand(3); 3472 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3473 InFlag = Chain.getValue(1); 3474 } 3475 3476 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3477 InFlag = Chain.getValue(1); 3478 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3479 InFlag = Chain.getValue(1); 3480 3481 std::vector<MVT::ValueType> Tys; 3482 Tys.push_back(MVT::Other); 3483 Tys.push_back(MVT::Flag); 3484 std::vector<SDOperand> Ops; 3485 Ops.push_back(Chain); 3486 Ops.push_back(DAG.getValueType(AVT)); 3487 Ops.push_back(InFlag); 3488 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3489 3490 if (TwoRepStos) { 3491 InFlag = Chain.getValue(1); 3492 Count = Op.getOperand(3); 3493 MVT::ValueType CVT = Count.getValueType(); 3494 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3495 DAG.getConstant(3, CVT)); 3496 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3497 InFlag = Chain.getValue(1); 3498 Tys.clear(); 3499 Tys.push_back(MVT::Other); 3500 Tys.push_back(MVT::Flag); 3501 Ops.clear(); 3502 Ops.push_back(Chain); 3503 Ops.push_back(DAG.getValueType(MVT::i8)); 3504 Ops.push_back(InFlag); 3505 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 3506 } else if (BytesLeft) { 3507 // Issue stores for the last 1 - 3 bytes. 3508 SDOperand Value; 3509 unsigned Val = ValC->getValue() & 255; 3510 unsigned Offset = I->getValue() - BytesLeft; 3511 SDOperand DstAddr = Op.getOperand(1); 3512 MVT::ValueType AddrVT = DstAddr.getValueType(); 3513 if (BytesLeft >= 2) { 3514 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3515 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3516 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3517 DAG.getConstant(Offset, AddrVT)), 3518 DAG.getSrcValue(NULL)); 3519 BytesLeft -= 2; 3520 Offset += 2; 3521 } 3522 3523 if (BytesLeft == 1) { 3524 Value = DAG.getConstant(Val, MVT::i8); 3525 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3526 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3527 DAG.getConstant(Offset, AddrVT)), 3528 DAG.getSrcValue(NULL)); 3529 } 3530 } 3531 3532 return Chain; 3533} 3534 3535SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3536 SDOperand Chain = Op.getOperand(0); 3537 unsigned Align = 3538 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3539 if (Align == 0) Align = 1; 3540 3541 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3542 // If not DWORD aligned, call memcpy if size is less than the threshold. 3543 // It knows how to align to the right boundary first. 3544 if ((Align & 3) != 0 || 3545 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3546 MVT::ValueType IntPtr = getPointerTy(); 3547 const Type *IntPtrTy = getTargetData().getIntPtrType(); 3548 std::vector<std::pair<SDOperand, const Type*> > Args; 3549 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3550 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 3551 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3552 std::pair<SDOperand,SDOperand> CallResult = 3553 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3554 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3555 return CallResult.second; 3556 } 3557 3558 MVT::ValueType AVT; 3559 SDOperand Count; 3560 unsigned BytesLeft = 0; 3561 bool TwoRepMovs = false; 3562 switch (Align & 3) { 3563 case 2: // WORD aligned 3564 AVT = MVT::i16; 3565 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3566 BytesLeft = I->getValue() % 2; 3567 break; 3568 case 0: // DWORD aligned 3569 AVT = MVT::i32; 3570 if (I) { 3571 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3572 BytesLeft = I->getValue() % 4; 3573 } else { 3574 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3575 DAG.getConstant(2, MVT::i8)); 3576 TwoRepMovs = true; 3577 } 3578 break; 3579 default: // Byte aligned 3580 AVT = MVT::i8; 3581 Count = Op.getOperand(3); 3582 break; 3583 } 3584 3585 SDOperand InFlag(0, 0); 3586 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3587 InFlag = Chain.getValue(1); 3588 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3589 InFlag = Chain.getValue(1); 3590 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 3591 InFlag = Chain.getValue(1); 3592 3593 std::vector<MVT::ValueType> Tys; 3594 Tys.push_back(MVT::Other); 3595 Tys.push_back(MVT::Flag); 3596 std::vector<SDOperand> Ops; 3597 Ops.push_back(Chain); 3598 Ops.push_back(DAG.getValueType(AVT)); 3599 Ops.push_back(InFlag); 3600 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3601 3602 if (TwoRepMovs) { 3603 InFlag = Chain.getValue(1); 3604 Count = Op.getOperand(3); 3605 MVT::ValueType CVT = Count.getValueType(); 3606 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3607 DAG.getConstant(3, CVT)); 3608 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3609 InFlag = Chain.getValue(1); 3610 Tys.clear(); 3611 Tys.push_back(MVT::Other); 3612 Tys.push_back(MVT::Flag); 3613 Ops.clear(); 3614 Ops.push_back(Chain); 3615 Ops.push_back(DAG.getValueType(MVT::i8)); 3616 Ops.push_back(InFlag); 3617 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 3618 } else if (BytesLeft) { 3619 // Issue loads and stores for the last 1 - 3 bytes. 3620 unsigned Offset = I->getValue() - BytesLeft; 3621 SDOperand DstAddr = Op.getOperand(1); 3622 MVT::ValueType DstVT = DstAddr.getValueType(); 3623 SDOperand SrcAddr = Op.getOperand(2); 3624 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3625 SDOperand Value; 3626 if (BytesLeft >= 2) { 3627 Value = DAG.getLoad(MVT::i16, Chain, 3628 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3629 DAG.getConstant(Offset, SrcVT)), 3630 DAG.getSrcValue(NULL)); 3631 Chain = Value.getValue(1); 3632 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3633 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3634 DAG.getConstant(Offset, DstVT)), 3635 DAG.getSrcValue(NULL)); 3636 BytesLeft -= 2; 3637 Offset += 2; 3638 } 3639 3640 if (BytesLeft == 1) { 3641 Value = DAG.getLoad(MVT::i8, Chain, 3642 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3643 DAG.getConstant(Offset, SrcVT)), 3644 DAG.getSrcValue(NULL)); 3645 Chain = Value.getValue(1); 3646 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3647 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3648 DAG.getConstant(Offset, DstVT)), 3649 DAG.getSrcValue(NULL)); 3650 } 3651 } 3652 3653 return Chain; 3654} 3655 3656SDOperand 3657X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3658 std::vector<MVT::ValueType> Tys; 3659 Tys.push_back(MVT::Other); 3660 Tys.push_back(MVT::Flag); 3661 std::vector<SDOperand> Ops; 3662 Ops.push_back(Op.getOperand(0)); 3663 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 3664 Ops.clear(); 3665 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 3666 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 3667 MVT::i32, Ops[0].getValue(2))); 3668 Ops.push_back(Ops[1].getValue(1)); 3669 Tys[0] = Tys[1] = MVT::i32; 3670 Tys.push_back(MVT::Other); 3671 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 3672} 3673 3674SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3675 // vastart just stores the address of the VarArgsFrameIndex slot into the 3676 // memory location argument. 3677 // FIXME: Replace MVT::i32 with PointerTy 3678 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 3679 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 3680 Op.getOperand(1), Op.getOperand(2)); 3681} 3682 3683SDOperand 3684X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 3685 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3686 switch (IntNo) { 3687 default: return SDOperand(); // Don't custom lower most intrinsics. 3688 // Comparison intrinsics. 3689 case Intrinsic::x86_sse_comieq_ss: 3690 case Intrinsic::x86_sse_comilt_ss: 3691 case Intrinsic::x86_sse_comile_ss: 3692 case Intrinsic::x86_sse_comigt_ss: 3693 case Intrinsic::x86_sse_comige_ss: 3694 case Intrinsic::x86_sse_comineq_ss: 3695 case Intrinsic::x86_sse_ucomieq_ss: 3696 case Intrinsic::x86_sse_ucomilt_ss: 3697 case Intrinsic::x86_sse_ucomile_ss: 3698 case Intrinsic::x86_sse_ucomigt_ss: 3699 case Intrinsic::x86_sse_ucomige_ss: 3700 case Intrinsic::x86_sse_ucomineq_ss: 3701 case Intrinsic::x86_sse2_comieq_sd: 3702 case Intrinsic::x86_sse2_comilt_sd: 3703 case Intrinsic::x86_sse2_comile_sd: 3704 case Intrinsic::x86_sse2_comigt_sd: 3705 case Intrinsic::x86_sse2_comige_sd: 3706 case Intrinsic::x86_sse2_comineq_sd: 3707 case Intrinsic::x86_sse2_ucomieq_sd: 3708 case Intrinsic::x86_sse2_ucomilt_sd: 3709 case Intrinsic::x86_sse2_ucomile_sd: 3710 case Intrinsic::x86_sse2_ucomigt_sd: 3711 case Intrinsic::x86_sse2_ucomige_sd: 3712 case Intrinsic::x86_sse2_ucomineq_sd: { 3713 unsigned Opc = 0; 3714 ISD::CondCode CC = ISD::SETCC_INVALID; 3715 switch (IntNo) { 3716 default: break; 3717 case Intrinsic::x86_sse_comieq_ss: 3718 case Intrinsic::x86_sse2_comieq_sd: 3719 Opc = X86ISD::COMI; 3720 CC = ISD::SETEQ; 3721 break; 3722 case Intrinsic::x86_sse_comilt_ss: 3723 case Intrinsic::x86_sse2_comilt_sd: 3724 Opc = X86ISD::COMI; 3725 CC = ISD::SETLT; 3726 break; 3727 case Intrinsic::x86_sse_comile_ss: 3728 case Intrinsic::x86_sse2_comile_sd: 3729 Opc = X86ISD::COMI; 3730 CC = ISD::SETLE; 3731 break; 3732 case Intrinsic::x86_sse_comigt_ss: 3733 case Intrinsic::x86_sse2_comigt_sd: 3734 Opc = X86ISD::COMI; 3735 CC = ISD::SETGT; 3736 break; 3737 case Intrinsic::x86_sse_comige_ss: 3738 case Intrinsic::x86_sse2_comige_sd: 3739 Opc = X86ISD::COMI; 3740 CC = ISD::SETGE; 3741 break; 3742 case Intrinsic::x86_sse_comineq_ss: 3743 case Intrinsic::x86_sse2_comineq_sd: 3744 Opc = X86ISD::COMI; 3745 CC = ISD::SETNE; 3746 break; 3747 case Intrinsic::x86_sse_ucomieq_ss: 3748 case Intrinsic::x86_sse2_ucomieq_sd: 3749 Opc = X86ISD::UCOMI; 3750 CC = ISD::SETEQ; 3751 break; 3752 case Intrinsic::x86_sse_ucomilt_ss: 3753 case Intrinsic::x86_sse2_ucomilt_sd: 3754 Opc = X86ISD::UCOMI; 3755 CC = ISD::SETLT; 3756 break; 3757 case Intrinsic::x86_sse_ucomile_ss: 3758 case Intrinsic::x86_sse2_ucomile_sd: 3759 Opc = X86ISD::UCOMI; 3760 CC = ISD::SETLE; 3761 break; 3762 case Intrinsic::x86_sse_ucomigt_ss: 3763 case Intrinsic::x86_sse2_ucomigt_sd: 3764 Opc = X86ISD::UCOMI; 3765 CC = ISD::SETGT; 3766 break; 3767 case Intrinsic::x86_sse_ucomige_ss: 3768 case Intrinsic::x86_sse2_ucomige_sd: 3769 Opc = X86ISD::UCOMI; 3770 CC = ISD::SETGE; 3771 break; 3772 case Intrinsic::x86_sse_ucomineq_ss: 3773 case Intrinsic::x86_sse2_ucomineq_sd: 3774 Opc = X86ISD::UCOMI; 3775 CC = ISD::SETNE; 3776 break; 3777 } 3778 bool Flip; 3779 unsigned X86CC; 3780 translateX86CC(CC, true, X86CC, Flip); 3781 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3782 Op.getOperand(Flip?1:2)); 3783 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3784 DAG.getConstant(X86CC, MVT::i8), Cond); 3785 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3786 } 3787 } 3788} 3789 3790/// LowerOperation - Provide custom lowering hooks for some operations. 3791/// 3792SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3793 switch (Op.getOpcode()) { 3794 default: assert(0 && "Should not custom lower this!"); 3795 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3796 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3797 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3798 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 3799 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3800 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3801 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3802 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 3803 case ISD::SHL_PARTS: 3804 case ISD::SRA_PARTS: 3805 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 3806 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3807 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3808 case ISD::FABS: return LowerFABS(Op, DAG); 3809 case ISD::FNEG: return LowerFNEG(Op, DAG); 3810 case ISD::SETCC: return LowerSETCC(Op, DAG); 3811 case ISD::SELECT: return LowerSELECT(Op, DAG); 3812 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 3813 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3814 case ISD::RET: return LowerRET(Op, DAG); 3815 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 3816 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 3817 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 3818 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 3819 case ISD::VASTART: return LowerVASTART(Op, DAG); 3820 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3821 } 3822} 3823 3824const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3825 switch (Opcode) { 3826 default: return NULL; 3827 case X86ISD::SHLD: return "X86ISD::SHLD"; 3828 case X86ISD::SHRD: return "X86ISD::SHRD"; 3829 case X86ISD::FAND: return "X86ISD::FAND"; 3830 case X86ISD::FXOR: return "X86ISD::FXOR"; 3831 case X86ISD::FILD: return "X86ISD::FILD"; 3832 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3833 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3834 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3835 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3836 case X86ISD::FLD: return "X86ISD::FLD"; 3837 case X86ISD::FST: return "X86ISD::FST"; 3838 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3839 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3840 case X86ISD::CALL: return "X86ISD::CALL"; 3841 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3842 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3843 case X86ISD::CMP: return "X86ISD::CMP"; 3844 case X86ISD::TEST: return "X86ISD::TEST"; 3845 case X86ISD::COMI: return "X86ISD::COMI"; 3846 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3847 case X86ISD::SETCC: return "X86ISD::SETCC"; 3848 case X86ISD::CMOV: return "X86ISD::CMOV"; 3849 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3850 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3851 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3852 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3853 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3854 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3855 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3856 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3857 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3858 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3859 } 3860} 3861 3862void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3863 uint64_t Mask, 3864 uint64_t &KnownZero, 3865 uint64_t &KnownOne, 3866 unsigned Depth) const { 3867 unsigned Opc = Op.getOpcode(); 3868 assert((Opc >= ISD::BUILTIN_OP_END || 3869 Opc == ISD::INTRINSIC_WO_CHAIN || 3870 Opc == ISD::INTRINSIC_W_CHAIN || 3871 Opc == ISD::INTRINSIC_VOID) && 3872 "Should use MaskedValueIsZero if you don't know whether Op" 3873 " is a target node!"); 3874 3875 KnownZero = KnownOne = 0; // Don't know anything. 3876 switch (Opc) { 3877 default: break; 3878 case X86ISD::SETCC: 3879 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3880 break; 3881 } 3882} 3883 3884std::vector<unsigned> X86TargetLowering:: 3885getRegClassForInlineAsmConstraint(const std::string &Constraint, 3886 MVT::ValueType VT) const { 3887 if (Constraint.size() == 1) { 3888 // FIXME: not handling fp-stack yet! 3889 // FIXME: not handling MMX registers yet ('y' constraint). 3890 switch (Constraint[0]) { // GCC X86 Constraint Letters 3891 default: break; // Unknown constriant letter 3892 case 'r': // GENERAL_REGS 3893 case 'R': // LEGACY_REGS 3894 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3895 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3896 case 'l': // INDEX_REGS 3897 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3898 X86::ESI, X86::EDI, X86::EBP, 0); 3899 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3900 case 'Q': // Q_REGS 3901 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3902 case 'x': // SSE_REGS if SSE1 allowed 3903 if (Subtarget->hasSSE1()) 3904 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3905 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3906 0); 3907 return std::vector<unsigned>(); 3908 case 'Y': // SSE_REGS if SSE2 allowed 3909 if (Subtarget->hasSSE2()) 3910 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3911 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3912 0); 3913 return std::vector<unsigned>(); 3914 } 3915 } 3916 3917 return std::vector<unsigned>(); 3918} 3919 3920/// isLegalAddressImmediate - Return true if the integer value or 3921/// GlobalValue can be used as the offset of the target addressing mode. 3922bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3923 // X86 allows a sign-extended 32-bit immediate field. 3924 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3925} 3926 3927bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3928 if (Subtarget->isTargetDarwin()) { 3929 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3930 if (RModel == Reloc::Static) 3931 return true; 3932 else if (RModel == Reloc::DynamicNoPIC) 3933 return !DarwinGVRequiresExtraLoad(GV); 3934 else 3935 return false; 3936 } else 3937 return true; 3938} 3939 3940/// isShuffleMaskLegal - Targets can use this to indicate that they only 3941/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3942/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3943/// are assumed to be legal. 3944bool 3945X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3946 // Only do shuffles on 128-bit vector types for now. 3947 if (MVT::getSizeInBits(VT) == 64) return false; 3948 return (Mask.Val->getNumOperands() <= 4 || 3949 isSplatMask(Mask.Val) || 3950 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3951 X86::isUNPCKLMask(Mask.Val) || 3952 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3953 X86::isUNPCKHMask(Mask.Val)); 3954} 3955 3956bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 3957 MVT::ValueType EVT, 3958 SelectionDAG &DAG) const { 3959 unsigned NumElts = BVOps.size(); 3960 // Only do shuffles on 128-bit vector types for now. 3961 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 3962 if (NumElts == 2) return true; 3963 if (NumElts == 4) { 3964 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 3965 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 3966 } 3967 return false; 3968} 3969