X86ISelLowering.cpp revision e219945348207453a2d5e21021ba3211f8f94e25
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/ADT/VectorExtras.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/SSARegMap.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36// FIXME: temporary. 37#include "llvm/Support/CommandLine.h" 38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 39 cl::desc("Enable fastcc on X86")); 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 46 // Set up the TargetLowering object. 47 48 // X86 is weird, it always uses i8 for shift amounts and setcc results. 49 setShiftAmountType(MVT::i8); 50 setSetCCResultType(MVT::i8); 51 setSetCCResultContents(ZeroOrOneSetCCResult); 52 setSchedulingPreference(SchedulingForRegPressure); 53 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 54 setStackPointerRegisterToSaveRestore(X86::ESP); 55 56 if (!Subtarget->isTargetDarwin()) 57 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 58 setUseUnderscoreSetJmpLongJmp(true); 59 60 // Add legal addressing mode scale values. 61 addLegalAddressScale(8); 62 addLegalAddressScale(4); 63 addLegalAddressScale(2); 64 // Enter the ones which require both scale + index last. These are more 65 // expensive. 66 addLegalAddressScale(9); 67 addLegalAddressScale(5); 68 addLegalAddressScale(3); 69 70 // Set up the register classes. 71 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 72 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 73 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 74 75 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 76 // operation. 77 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 78 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 79 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 80 81 if (X86ScalarSSE) 82 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 84 else 85 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 86 87 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 88 // this operation. 89 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 90 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 91 // SSE has no i16 to fp conversion, only i32 92 if (X86ScalarSSE) 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 94 else { 95 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 96 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 97 } 98 99 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 100 // isn't legal. 101 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 102 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 103 104 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 105 // this operation. 106 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 107 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 108 109 if (X86ScalarSSE) { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 111 } else { 112 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 113 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 114 } 115 116 // Handle FP_TO_UINT by promoting the destination to a larger signed 117 // conversion. 118 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 119 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 120 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 121 122 if (X86ScalarSSE && !Subtarget->hasSSE3()) 123 // Expand FP_TO_UINT into a select. 124 // FIXME: We would like to use a Custom expander here eventually to do 125 // the optimal thing for SSE vs. the default expansion in the legalizer. 126 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 127 else 128 // With SSE3 we can use fisttpll to convert to a signed i64. 129 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 130 131 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 132 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 133 134 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 135 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 136 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 137 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 139 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 140 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 141 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 142 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 143 setOperationAction(ISD::FREM , MVT::f64 , Expand); 144 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 145 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 146 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 147 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 148 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 149 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 150 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 151 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 152 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 153 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 154 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 155 156 // These should be promoted to a larger select which is supported. 157 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 158 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 159 160 // X86 wants to expand cmov itself. 161 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 162 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 163 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 164 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 166 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 167 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 168 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 169 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 170 // X86 ret instruction may pop stack. 171 setOperationAction(ISD::RET , MVT::Other, Custom); 172 // Darwin ABI issue. 173 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 174 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 175 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 176 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 177 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 178 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 179 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 180 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 181 // X86 wants to expand memset / memcpy itself. 182 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 183 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 184 185 // We don't have line number support yet. 186 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 187 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 188 // FIXME - use subtarget debug flags 189 if (!Subtarget->isTargetDarwin()) 190 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 191 192 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 193 setOperationAction(ISD::VASTART , MVT::Other, Custom); 194 195 // Use the default implementation. 196 setOperationAction(ISD::VAARG , MVT::Other, Expand); 197 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 198 setOperationAction(ISD::VAEND , MVT::Other, Expand); 199 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 200 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 201 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 202 203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 205 206 if (X86ScalarSSE) { 207 // Set up the FP register classes. 208 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 209 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 210 211 // Use ANDPD to simulate FABS. 212 setOperationAction(ISD::FABS , MVT::f64, Custom); 213 setOperationAction(ISD::FABS , MVT::f32, Custom); 214 215 // Use XORP to simulate FNEG. 216 setOperationAction(ISD::FNEG , MVT::f64, Custom); 217 setOperationAction(ISD::FNEG , MVT::f32, Custom); 218 219 // We don't support sin/cos/fmod 220 setOperationAction(ISD::FSIN , MVT::f64, Expand); 221 setOperationAction(ISD::FCOS , MVT::f64, Expand); 222 setOperationAction(ISD::FREM , MVT::f64, Expand); 223 setOperationAction(ISD::FSIN , MVT::f32, Expand); 224 setOperationAction(ISD::FCOS , MVT::f32, Expand); 225 setOperationAction(ISD::FREM , MVT::f32, Expand); 226 227 // Expand FP immediates into loads from the stack, except for the special 228 // cases we handle. 229 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 230 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 231 addLegalFPImmediate(+0.0); // xorps / xorpd 232 } else { 233 // Set up the FP register classes. 234 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 235 236 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 237 238 if (!UnsafeFPMath) { 239 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 240 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 241 } 242 243 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 244 addLegalFPImmediate(+0.0); // FLD0 245 addLegalFPImmediate(+1.0); // FLD1 246 addLegalFPImmediate(-0.0); // FLD0/FCHS 247 addLegalFPImmediate(-1.0); // FLD1/FCHS 248 } 249 250 // First set operation action for all vector types to expand. Then we 251 // will selectively turn on ones that can be effectively codegen'd. 252 for (unsigned VT = (unsigned)MVT::Vector + 1; 253 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 254 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 255 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 } 262 263 if (Subtarget->hasMMX()) { 264 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 265 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 267 268 // FIXME: add MMX packed arithmetics 269 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 272 } 273 274 if (Subtarget->hasSSE1()) { 275 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 276 277 setOperationAction(ISD::AND, MVT::v4f32, Legal); 278 setOperationAction(ISD::OR, MVT::v4f32, Legal); 279 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 280 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 281 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 282 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 283 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 284 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 285 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 286 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 287 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 288 } 289 290 if (Subtarget->hasSSE2()) { 291 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 292 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 296 297 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 298 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 299 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 300 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 301 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 302 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 303 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 304 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 305 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 306 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 307 308 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 310 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 312 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 314 315 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 316 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 317 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 318 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 319 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 320 } 321 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 322 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 323 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 324 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 325 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 326 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 327 328 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 329 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 330 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 331 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 332 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 338 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 339 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 340 } 341 342 // Custom lower v2i64 and v2f64 selects. 343 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 344 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 345 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 346 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 347 } 348 349 // We want to custom lower some of our intrinsics. 350 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 351 352 // We have target-specific dag combine patterns for the following nodes: 353 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 354 355 computeRegisterProperties(); 356 357 // FIXME: These should be based on subtarget info. Plus, the values should 358 // be smaller when we are in optimizing for size mode. 359 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 360 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 361 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 362 allowUnalignedMemoryAccesses = true; // x86 supports it! 363} 364 365//===----------------------------------------------------------------------===// 366// C Calling Convention implementation 367//===----------------------------------------------------------------------===// 368 369/// AddLiveIn - This helper function adds the specified physical register to the 370/// MachineFunction as a live in value. It also creates a corresponding virtual 371/// register for it. 372static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 373 TargetRegisterClass *RC) { 374 assert(RC->contains(PReg) && "Not the correct regclass!"); 375 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 376 MF.addLiveIn(PReg, VReg); 377 return VReg; 378} 379 380/// HowToPassCCCArgument - Returns how an formal argument of the specified type 381/// should be passed. If it is through stack, returns the size of the stack 382/// slot; if it is through XMM register, returns the number of XMM registers 383/// are needed. 384static void 385HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 386 unsigned &ObjSize, unsigned &ObjXMMRegs) { 387 ObjXMMRegs = 0; 388 389 switch (ObjectVT) { 390 default: assert(0 && "Unhandled argument type!"); 391 case MVT::i8: ObjSize = 1; break; 392 case MVT::i16: ObjSize = 2; break; 393 case MVT::i32: ObjSize = 4; break; 394 case MVT::i64: ObjSize = 8; break; 395 case MVT::f32: ObjSize = 4; break; 396 case MVT::f64: ObjSize = 8; break; 397 case MVT::v16i8: 398 case MVT::v8i16: 399 case MVT::v4i32: 400 case MVT::v2i64: 401 case MVT::v4f32: 402 case MVT::v2f64: 403 if (NumXMMRegs < 4) 404 ObjXMMRegs = 1; 405 else 406 ObjSize = 16; 407 break; 408 } 409} 410 411SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 412 unsigned NumArgs = Op.Val->getNumValues() - 1; 413 MachineFunction &MF = DAG.getMachineFunction(); 414 MachineFrameInfo *MFI = MF.getFrameInfo(); 415 SDOperand Root = Op.getOperand(0); 416 std::vector<SDOperand> ArgValues; 417 418 // Add DAG nodes to load the arguments... On entry to a function on the X86, 419 // the stack frame looks like this: 420 // 421 // [ESP] -- return address 422 // [ESP + 4] -- first argument (leftmost lexically) 423 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 424 // ... 425 // 426 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 427 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 428 static const unsigned XMMArgRegs[] = { 429 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 430 }; 431 for (unsigned i = 0; i < NumArgs; ++i) { 432 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 433 unsigned ArgIncrement = 4; 434 unsigned ObjSize = 0; 435 unsigned ObjXMMRegs = 0; 436 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 437 if (ObjSize > 4) 438 ArgIncrement = ObjSize; 439 440 SDOperand ArgValue; 441 if (ObjXMMRegs) { 442 // Passed in a XMM register. 443 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 444 X86::VR128RegisterClass); 445 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 446 ArgValues.push_back(ArgValue); 447 NumXMMRegs += ObjXMMRegs; 448 } else { 449 // XMM arguments have to be aligned on 16-byte boundary. 450 if (ObjSize == 16) 451 ArgOffset = ((ArgOffset + 15) / 16) * 16; 452 // Create the frame index object for this incoming parameter... 453 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 454 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 455 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 456 DAG.getSrcValue(NULL)); 457 ArgValues.push_back(ArgValue); 458 ArgOffset += ArgIncrement; // Move on to the next argument... 459 } 460 } 461 462 ArgValues.push_back(Root); 463 464 // If the function takes variable number of arguments, make a frame index for 465 // the start of the first vararg value... for expansion of llvm.va_start. 466 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 467 if (isVarArg) 468 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 469 ReturnAddrIndex = 0; // No return address slot generated yet. 470 BytesToPopOnReturn = 0; // Callee pops nothing. 471 BytesCallerReserves = ArgOffset; 472 473 // If this is a struct return on Darwin/X86, the callee pops the hidden struct 474 // pointer. 475 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet && 476 Subtarget->isTargetDarwin()) 477 BytesToPopOnReturn = 4; 478 479 // Return the new list of results. 480 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 481 Op.Val->value_end()); 482 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 483} 484 485 486SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 487 SDOperand Chain = Op.getOperand(0); 488 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 489 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 490 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 491 SDOperand Callee = Op.getOperand(4); 492 MVT::ValueType RetVT= Op.Val->getValueType(0); 493 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 494 495 // Keep track of the number of XMM regs passed so far. 496 unsigned NumXMMRegs = 0; 497 static const unsigned XMMArgRegs[] = { 498 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 499 }; 500 501 // Count how many bytes are to be pushed on the stack. 502 unsigned NumBytes = 0; 503 for (unsigned i = 0; i != NumOps; ++i) { 504 SDOperand Arg = Op.getOperand(5+2*i); 505 506 switch (Arg.getValueType()) { 507 default: assert(0 && "Unexpected ValueType for argument!"); 508 case MVT::i8: 509 case MVT::i16: 510 case MVT::i32: 511 case MVT::f32: 512 NumBytes += 4; 513 break; 514 case MVT::i64: 515 case MVT::f64: 516 NumBytes += 8; 517 break; 518 case MVT::v16i8: 519 case MVT::v8i16: 520 case MVT::v4i32: 521 case MVT::v2i64: 522 case MVT::v4f32: 523 case MVT::v2f64: 524 if (NumXMMRegs < 4) 525 ++NumXMMRegs; 526 else { 527 // XMM arguments have to be aligned on 16-byte boundary. 528 NumBytes = ((NumBytes + 15) / 16) * 16; 529 NumBytes += 16; 530 } 531 break; 532 } 533 } 534 535 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 536 537 // Arguments go on the stack in reverse order, as specified by the ABI. 538 unsigned ArgOffset = 0; 539 NumXMMRegs = 0; 540 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 541 std::vector<SDOperand> MemOpChains; 542 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); 543 for (unsigned i = 0; i != NumOps; ++i) { 544 SDOperand Arg = Op.getOperand(5+2*i); 545 546 switch (Arg.getValueType()) { 547 default: assert(0 && "Unexpected ValueType for argument!"); 548 case MVT::i8: 549 case MVT::i16: { 550 // Promote the integer to 32 bits. If the input type is signed use a 551 // sign extend, otherwise use a zero extend. 552 unsigned ExtOp = 553 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 554 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 555 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 556 } 557 // Fallthrough 558 559 case MVT::i32: 560 case MVT::f32: { 561 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 562 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 563 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 564 Arg, PtrOff, DAG.getSrcValue(NULL))); 565 ArgOffset += 4; 566 break; 567 } 568 case MVT::i64: 569 case MVT::f64: { 570 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 571 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 572 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 573 Arg, PtrOff, DAG.getSrcValue(NULL))); 574 ArgOffset += 8; 575 break; 576 } 577 case MVT::v16i8: 578 case MVT::v8i16: 579 case MVT::v4i32: 580 case MVT::v2i64: 581 case MVT::v4f32: 582 case MVT::v2f64: 583 if (NumXMMRegs < 4) { 584 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 585 NumXMMRegs++; 586 } else { 587 // XMM arguments have to be aligned on 16-byte boundary. 588 ArgOffset = ((ArgOffset + 15) / 16) * 16; 589 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 590 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 591 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 592 Arg, PtrOff, DAG.getSrcValue(NULL))); 593 ArgOffset += 16; 594 } 595 } 596 } 597 598 if (!MemOpChains.empty()) 599 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 600 &MemOpChains[0], MemOpChains.size()); 601 602 // Build a sequence of copy-to-reg nodes chained together with token chain 603 // and flag operands which copy the outgoing args into registers. 604 SDOperand InFlag; 605 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 606 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 607 InFlag); 608 InFlag = Chain.getValue(1); 609 } 610 611 // If the callee is a GlobalAddress node (quite common, every direct call is) 612 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 613 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 614 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 615 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 616 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 617 618 std::vector<MVT::ValueType> NodeTys; 619 NodeTys.push_back(MVT::Other); // Returns a chain 620 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 621 std::vector<SDOperand> Ops; 622 Ops.push_back(Chain); 623 Ops.push_back(Callee); 624 625 // Add argument registers to the end of the list so that they are known live 626 // into the call. 627 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 628 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 629 RegsToPass[i].second.getValueType())); 630 631 if (InFlag.Val) 632 Ops.push_back(InFlag); 633 634 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 635 NodeTys, &Ops[0], Ops.size()); 636 InFlag = Chain.getValue(1); 637 638 // Create the CALLSEQ_END node. 639 unsigned NumBytesForCalleeToPush = 0; 640 641 // If this is is a call to a struct-return function on Darwin/X86, the callee 642 // pops the hidden struct pointer, so we have to push it back. 643 if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) 644 NumBytesForCalleeToPush = 4; 645 646 NodeTys.clear(); 647 NodeTys.push_back(MVT::Other); // Returns a chain 648 if (RetVT != MVT::Other) 649 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 650 Ops.clear(); 651 Ops.push_back(Chain); 652 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 653 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 654 Ops.push_back(InFlag); 655 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 656 if (RetVT != MVT::Other) 657 InFlag = Chain.getValue(1); 658 659 std::vector<SDOperand> ResultVals; 660 NodeTys.clear(); 661 switch (RetVT) { 662 default: assert(0 && "Unknown value type to return!"); 663 case MVT::Other: break; 664 case MVT::i8: 665 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 666 ResultVals.push_back(Chain.getValue(0)); 667 NodeTys.push_back(MVT::i8); 668 break; 669 case MVT::i16: 670 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 671 ResultVals.push_back(Chain.getValue(0)); 672 NodeTys.push_back(MVT::i16); 673 break; 674 case MVT::i32: 675 if (Op.Val->getValueType(1) == MVT::i32) { 676 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 677 ResultVals.push_back(Chain.getValue(0)); 678 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 679 Chain.getValue(2)).getValue(1); 680 ResultVals.push_back(Chain.getValue(0)); 681 NodeTys.push_back(MVT::i32); 682 } else { 683 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 684 ResultVals.push_back(Chain.getValue(0)); 685 } 686 NodeTys.push_back(MVT::i32); 687 break; 688 case MVT::v16i8: 689 case MVT::v8i16: 690 case MVT::v4i32: 691 case MVT::v2i64: 692 case MVT::v4f32: 693 case MVT::v2f64: 694 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 695 ResultVals.push_back(Chain.getValue(0)); 696 NodeTys.push_back(RetVT); 697 break; 698 case MVT::f32: 699 case MVT::f64: { 700 std::vector<MVT::ValueType> Tys; 701 Tys.push_back(MVT::f64); 702 Tys.push_back(MVT::Other); 703 Tys.push_back(MVT::Flag); 704 std::vector<SDOperand> Ops; 705 Ops.push_back(Chain); 706 Ops.push_back(InFlag); 707 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 708 &Ops[0], Ops.size()); 709 Chain = RetVal.getValue(1); 710 InFlag = RetVal.getValue(2); 711 if (X86ScalarSSE) { 712 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 713 // shouldn't be necessary except that RFP cannot be live across 714 // multiple blocks. When stackifier is fixed, they can be uncoupled. 715 MachineFunction &MF = DAG.getMachineFunction(); 716 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 717 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 718 Tys.clear(); 719 Tys.push_back(MVT::Other); 720 Ops.clear(); 721 Ops.push_back(Chain); 722 Ops.push_back(RetVal); 723 Ops.push_back(StackSlot); 724 Ops.push_back(DAG.getValueType(RetVT)); 725 Ops.push_back(InFlag); 726 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 727 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 728 DAG.getSrcValue(NULL)); 729 Chain = RetVal.getValue(1); 730 } 731 732 if (RetVT == MVT::f32 && !X86ScalarSSE) 733 // FIXME: we would really like to remember that this FP_ROUND 734 // operation is okay to eliminate if we allow excess FP precision. 735 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 736 ResultVals.push_back(RetVal); 737 NodeTys.push_back(RetVT); 738 break; 739 } 740 } 741 742 // If the function returns void, just return the chain. 743 if (ResultVals.empty()) 744 return Chain; 745 746 // Otherwise, merge everything together with a MERGE_VALUES node. 747 NodeTys.push_back(MVT::Other); 748 ResultVals.push_back(Chain); 749 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 750 &ResultVals[0], ResultVals.size()); 751 return Res.getValue(Op.ResNo); 752} 753 754//===----------------------------------------------------------------------===// 755// Fast Calling Convention implementation 756//===----------------------------------------------------------------------===// 757// 758// The X86 'fast' calling convention passes up to two integer arguments in 759// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 760// and requires that the callee pop its arguments off the stack (allowing proper 761// tail calls), and has the same return value conventions as C calling convs. 762// 763// This calling convention always arranges for the callee pop value to be 8n+4 764// bytes, which is needed for tail recursion elimination and stack alignment 765// reasons. 766// 767// Note that this can be enhanced in the future to pass fp vals in registers 768// (when we have a global fp allocator) and do other tricks. 769// 770 771/// HowToPassFastCCArgument - Returns how an formal argument of the specified 772/// type should be passed. If it is through stack, returns the size of the stack 773/// slot; if it is through integer or XMM register, returns the number of 774/// integer or XMM registers are needed. 775static void 776HowToPassFastCCArgument(MVT::ValueType ObjectVT, 777 unsigned NumIntRegs, unsigned NumXMMRegs, 778 unsigned &ObjSize, unsigned &ObjIntRegs, 779 unsigned &ObjXMMRegs) { 780 ObjSize = 0; 781 ObjIntRegs = 0; 782 ObjXMMRegs = 0; 783 784 switch (ObjectVT) { 785 default: assert(0 && "Unhandled argument type!"); 786 case MVT::i8: 787#if FASTCC_NUM_INT_ARGS_INREGS > 0 788 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 789 ObjIntRegs = 1; 790 else 791#endif 792 ObjSize = 1; 793 break; 794 case MVT::i16: 795#if FASTCC_NUM_INT_ARGS_INREGS > 0 796 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 797 ObjIntRegs = 1; 798 else 799#endif 800 ObjSize = 2; 801 break; 802 case MVT::i32: 803#if FASTCC_NUM_INT_ARGS_INREGS > 0 804 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 805 ObjIntRegs = 1; 806 else 807#endif 808 ObjSize = 4; 809 break; 810 case MVT::i64: 811#if FASTCC_NUM_INT_ARGS_INREGS > 0 812 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 813 ObjIntRegs = 2; 814 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 815 ObjIntRegs = 1; 816 ObjSize = 4; 817 } else 818#endif 819 ObjSize = 8; 820 case MVT::f32: 821 ObjSize = 4; 822 break; 823 case MVT::f64: 824 ObjSize = 8; 825 break; 826 case MVT::v16i8: 827 case MVT::v8i16: 828 case MVT::v4i32: 829 case MVT::v2i64: 830 case MVT::v4f32: 831 case MVT::v2f64: 832 if (NumXMMRegs < 4) 833 ObjXMMRegs = 1; 834 else 835 ObjSize = 16; 836 break; 837 } 838} 839 840SDOperand 841X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 842 unsigned NumArgs = Op.Val->getNumValues()-1; 843 MachineFunction &MF = DAG.getMachineFunction(); 844 MachineFrameInfo *MFI = MF.getFrameInfo(); 845 SDOperand Root = Op.getOperand(0); 846 std::vector<SDOperand> ArgValues; 847 848 // Add DAG nodes to load the arguments... On entry to a function the stack 849 // frame looks like this: 850 // 851 // [ESP] -- return address 852 // [ESP + 4] -- first nonreg argument (leftmost lexically) 853 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 854 // ... 855 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 856 857 // Keep track of the number of integer regs passed so far. This can be either 858 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 859 // used). 860 unsigned NumIntRegs = 0; 861 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 862 863 static const unsigned XMMArgRegs[] = { 864 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 865 }; 866 867 for (unsigned i = 0; i < NumArgs; ++i) { 868 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 869 unsigned ArgIncrement = 4; 870 unsigned ObjSize = 0; 871 unsigned ObjIntRegs = 0; 872 unsigned ObjXMMRegs = 0; 873 874 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 875 ObjSize, ObjIntRegs, ObjXMMRegs); 876 if (ObjSize > 4) 877 ArgIncrement = ObjSize; 878 879 unsigned Reg = 0; 880 SDOperand ArgValue; 881 if (ObjIntRegs || ObjXMMRegs) { 882 switch (ObjectVT) { 883 default: assert(0 && "Unhandled argument type!"); 884 case MVT::i8: 885 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 886 X86::GR8RegisterClass); 887 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 888 break; 889 case MVT::i16: 890 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 891 X86::GR16RegisterClass); 892 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 893 break; 894 case MVT::i32: 895 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 896 X86::GR32RegisterClass); 897 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 898 break; 899 case MVT::i64: 900 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 901 X86::GR32RegisterClass); 902 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 903 if (ObjIntRegs == 2) { 904 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 905 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 906 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 907 } 908 break; 909 case MVT::v16i8: 910 case MVT::v8i16: 911 case MVT::v4i32: 912 case MVT::v2i64: 913 case MVT::v4f32: 914 case MVT::v2f64: 915 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 916 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 917 break; 918 } 919 NumIntRegs += ObjIntRegs; 920 NumXMMRegs += ObjXMMRegs; 921 } 922 923 if (ObjSize) { 924 // XMM arguments have to be aligned on 16-byte boundary. 925 if (ObjSize == 16) 926 ArgOffset = ((ArgOffset + 15) / 16) * 16; 927 // Create the SelectionDAG nodes corresponding to a load from this 928 // parameter. 929 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 930 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 931 if (ObjectVT == MVT::i64 && ObjIntRegs) { 932 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 933 DAG.getSrcValue(NULL)); 934 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 935 } else 936 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 937 DAG.getSrcValue(NULL)); 938 ArgOffset += ArgIncrement; // Move on to the next argument. 939 } 940 941 ArgValues.push_back(ArgValue); 942 } 943 944 ArgValues.push_back(Root); 945 946 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 947 // arguments and the arguments after the retaddr has been pushed are aligned. 948 if ((ArgOffset & 7) == 0) 949 ArgOffset += 4; 950 951 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 952 ReturnAddrIndex = 0; // No return address slot generated yet. 953 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 954 BytesCallerReserves = 0; 955 956 // Finally, inform the code generator which regs we return values in. 957 switch (getValueType(MF.getFunction()->getReturnType())) { 958 default: assert(0 && "Unknown type!"); 959 case MVT::isVoid: break; 960 case MVT::i8: 961 case MVT::i16: 962 case MVT::i32: 963 MF.addLiveOut(X86::EAX); 964 break; 965 case MVT::i64: 966 MF.addLiveOut(X86::EAX); 967 MF.addLiveOut(X86::EDX); 968 break; 969 case MVT::f32: 970 case MVT::f64: 971 MF.addLiveOut(X86::ST0); 972 break; 973 case MVT::v16i8: 974 case MVT::v8i16: 975 case MVT::v4i32: 976 case MVT::v2i64: 977 case MVT::v4f32: 978 case MVT::v2f64: 979 MF.addLiveOut(X86::XMM0); 980 break; 981 } 982 983 // Return the new list of results. 984 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 985 Op.Val->value_end()); 986 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 987} 988 989SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){ 990 SDOperand Chain = Op.getOperand(0); 991 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 992 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 993 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 994 SDOperand Callee = Op.getOperand(4); 995 MVT::ValueType RetVT= Op.Val->getValueType(0); 996 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 997 998 // Count how many bytes are to be pushed on the stack. 999 unsigned NumBytes = 0; 1000 1001 // Keep track of the number of integer regs passed so far. This can be either 1002 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1003 // used). 1004 unsigned NumIntRegs = 0; 1005 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1006 1007 static const unsigned GPRArgRegs[][2] = { 1008 { X86::AL, X86::DL }, 1009 { X86::AX, X86::DX }, 1010 { X86::EAX, X86::EDX } 1011 }; 1012 static const unsigned XMMArgRegs[] = { 1013 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1014 }; 1015 1016 for (unsigned i = 0; i != NumOps; ++i) { 1017 SDOperand Arg = Op.getOperand(5+2*i); 1018 1019 switch (Arg.getValueType()) { 1020 default: assert(0 && "Unknown value type!"); 1021 case MVT::i8: 1022 case MVT::i16: 1023 case MVT::i32: 1024#if FASTCC_NUM_INT_ARGS_INREGS > 0 1025 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1026 ++NumIntRegs; 1027 break; 1028 } 1029#endif 1030 // Fall through 1031 case MVT::f32: 1032 NumBytes += 4; 1033 break; 1034 case MVT::f64: 1035 NumBytes += 8; 1036 break; 1037 case MVT::v16i8: 1038 case MVT::v8i16: 1039 case MVT::v4i32: 1040 case MVT::v2i64: 1041 case MVT::v4f32: 1042 case MVT::v2f64: 1043 if (NumXMMRegs < 4) 1044 NumXMMRegs++; 1045 else { 1046 // XMM arguments have to be aligned on 16-byte boundary. 1047 NumBytes = ((NumBytes + 15) / 16) * 16; 1048 NumBytes += 16; 1049 } 1050 break; 1051 } 1052 } 1053 1054 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1055 // arguments and the arguments after the retaddr has been pushed are aligned. 1056 if ((NumBytes & 7) == 0) 1057 NumBytes += 4; 1058 1059 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1060 1061 // Arguments go on the stack in reverse order, as specified by the ABI. 1062 unsigned ArgOffset = 0; 1063 NumIntRegs = 0; 1064 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1065 std::vector<SDOperand> MemOpChains; 1066 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); 1067 for (unsigned i = 0; i != NumOps; ++i) { 1068 SDOperand Arg = Op.getOperand(5+2*i); 1069 1070 switch (Arg.getValueType()) { 1071 default: assert(0 && "Unexpected ValueType for argument!"); 1072 case MVT::i8: 1073 case MVT::i16: 1074 case MVT::i32: 1075#if FASTCC_NUM_INT_ARGS_INREGS > 0 1076 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1077 RegsToPass.push_back( 1078 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 1079 Arg)); 1080 ++NumIntRegs; 1081 break; 1082 } 1083#endif 1084 // Fall through 1085 case MVT::f32: { 1086 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1087 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1088 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1089 Arg, PtrOff, DAG.getSrcValue(NULL))); 1090 ArgOffset += 4; 1091 break; 1092 } 1093 case MVT::f64: { 1094 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1095 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1096 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1097 Arg, PtrOff, DAG.getSrcValue(NULL))); 1098 ArgOffset += 8; 1099 break; 1100 } 1101 case MVT::v16i8: 1102 case MVT::v8i16: 1103 case MVT::v4i32: 1104 case MVT::v2i64: 1105 case MVT::v4f32: 1106 case MVT::v2f64: 1107 if (NumXMMRegs < 4) { 1108 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1109 NumXMMRegs++; 1110 } else { 1111 // XMM arguments have to be aligned on 16-byte boundary. 1112 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1113 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1114 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1115 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1116 Arg, PtrOff, DAG.getSrcValue(NULL))); 1117 ArgOffset += 16; 1118 } 1119 } 1120 } 1121 1122 if (!MemOpChains.empty()) 1123 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1124 &MemOpChains[0], MemOpChains.size()); 1125 1126 // Build a sequence of copy-to-reg nodes chained together with token chain 1127 // and flag operands which copy the outgoing args into registers. 1128 SDOperand InFlag; 1129 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1130 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1131 InFlag); 1132 InFlag = Chain.getValue(1); 1133 } 1134 1135 // If the callee is a GlobalAddress node (quite common, every direct call is) 1136 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1137 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1138 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1139 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1140 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1141 1142 std::vector<MVT::ValueType> NodeTys; 1143 NodeTys.push_back(MVT::Other); // Returns a chain 1144 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1145 std::vector<SDOperand> Ops; 1146 Ops.push_back(Chain); 1147 Ops.push_back(Callee); 1148 1149 // Add argument registers to the end of the list so that they are known live 1150 // into the call. 1151 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1152 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1153 RegsToPass[i].second.getValueType())); 1154 1155 if (InFlag.Val) 1156 Ops.push_back(InFlag); 1157 1158 // FIXME: Do not generate X86ISD::TAILCALL for now. 1159 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1160 NodeTys, &Ops[0], Ops.size()); 1161 InFlag = Chain.getValue(1); 1162 1163 NodeTys.clear(); 1164 NodeTys.push_back(MVT::Other); // Returns a chain 1165 if (RetVT != MVT::Other) 1166 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1167 Ops.clear(); 1168 Ops.push_back(Chain); 1169 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1170 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1171 Ops.push_back(InFlag); 1172 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1173 if (RetVT != MVT::Other) 1174 InFlag = Chain.getValue(1); 1175 1176 std::vector<SDOperand> ResultVals; 1177 NodeTys.clear(); 1178 switch (RetVT) { 1179 default: assert(0 && "Unknown value type to return!"); 1180 case MVT::Other: break; 1181 case MVT::i8: 1182 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1183 ResultVals.push_back(Chain.getValue(0)); 1184 NodeTys.push_back(MVT::i8); 1185 break; 1186 case MVT::i16: 1187 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1188 ResultVals.push_back(Chain.getValue(0)); 1189 NodeTys.push_back(MVT::i16); 1190 break; 1191 case MVT::i32: 1192 if (Op.Val->getValueType(1) == MVT::i32) { 1193 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1194 ResultVals.push_back(Chain.getValue(0)); 1195 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 1196 Chain.getValue(2)).getValue(1); 1197 ResultVals.push_back(Chain.getValue(0)); 1198 NodeTys.push_back(MVT::i32); 1199 } else { 1200 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1201 ResultVals.push_back(Chain.getValue(0)); 1202 } 1203 NodeTys.push_back(MVT::i32); 1204 break; 1205 case MVT::v16i8: 1206 case MVT::v8i16: 1207 case MVT::v4i32: 1208 case MVT::v2i64: 1209 case MVT::v4f32: 1210 case MVT::v2f64: 1211 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1212 ResultVals.push_back(Chain.getValue(0)); 1213 NodeTys.push_back(RetVT); 1214 break; 1215 case MVT::f32: 1216 case MVT::f64: { 1217 std::vector<MVT::ValueType> Tys; 1218 Tys.push_back(MVT::f64); 1219 Tys.push_back(MVT::Other); 1220 Tys.push_back(MVT::Flag); 1221 std::vector<SDOperand> Ops; 1222 Ops.push_back(Chain); 1223 Ops.push_back(InFlag); 1224 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 1225 &Ops[0], Ops.size()); 1226 Chain = RetVal.getValue(1); 1227 InFlag = RetVal.getValue(2); 1228 if (X86ScalarSSE) { 1229 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1230 // shouldn't be necessary except that RFP cannot be live across 1231 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1232 MachineFunction &MF = DAG.getMachineFunction(); 1233 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1234 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1235 Tys.clear(); 1236 Tys.push_back(MVT::Other); 1237 Ops.clear(); 1238 Ops.push_back(Chain); 1239 Ops.push_back(RetVal); 1240 Ops.push_back(StackSlot); 1241 Ops.push_back(DAG.getValueType(RetVT)); 1242 Ops.push_back(InFlag); 1243 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 1244 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 1245 DAG.getSrcValue(NULL)); 1246 Chain = RetVal.getValue(1); 1247 } 1248 1249 if (RetVT == MVT::f32 && !X86ScalarSSE) 1250 // FIXME: we would really like to remember that this FP_ROUND 1251 // operation is okay to eliminate if we allow excess FP precision. 1252 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1253 ResultVals.push_back(RetVal); 1254 NodeTys.push_back(RetVT); 1255 break; 1256 } 1257 } 1258 1259 1260 // If the function returns void, just return the chain. 1261 if (ResultVals.empty()) 1262 return Chain; 1263 1264 // Otherwise, merge everything together with a MERGE_VALUES node. 1265 NodeTys.push_back(MVT::Other); 1266 ResultVals.push_back(Chain); 1267 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1268 &ResultVals[0], ResultVals.size()); 1269 return Res.getValue(Op.ResNo); 1270} 1271 1272SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1273 if (ReturnAddrIndex == 0) { 1274 // Set up a frame object for the return address. 1275 MachineFunction &MF = DAG.getMachineFunction(); 1276 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1277 } 1278 1279 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1280} 1281 1282 1283 1284std::pair<SDOperand, SDOperand> X86TargetLowering:: 1285LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1286 SelectionDAG &DAG) { 1287 SDOperand Result; 1288 if (Depth) // Depths > 0 not supported yet! 1289 Result = DAG.getConstant(0, getPointerTy()); 1290 else { 1291 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1292 if (!isFrameAddress) 1293 // Just load the return address 1294 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1295 DAG.getSrcValue(NULL)); 1296 else 1297 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1298 DAG.getConstant(4, MVT::i32)); 1299 } 1300 return std::make_pair(Result, Chain); 1301} 1302 1303/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1304/// which corresponds to the condition code. 1305static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1306 switch (X86CC) { 1307 default: assert(0 && "Unknown X86 conditional code!"); 1308 case X86ISD::COND_A: return X86::JA; 1309 case X86ISD::COND_AE: return X86::JAE; 1310 case X86ISD::COND_B: return X86::JB; 1311 case X86ISD::COND_BE: return X86::JBE; 1312 case X86ISD::COND_E: return X86::JE; 1313 case X86ISD::COND_G: return X86::JG; 1314 case X86ISD::COND_GE: return X86::JGE; 1315 case X86ISD::COND_L: return X86::JL; 1316 case X86ISD::COND_LE: return X86::JLE; 1317 case X86ISD::COND_NE: return X86::JNE; 1318 case X86ISD::COND_NO: return X86::JNO; 1319 case X86ISD::COND_NP: return X86::JNP; 1320 case X86ISD::COND_NS: return X86::JNS; 1321 case X86ISD::COND_O: return X86::JO; 1322 case X86ISD::COND_P: return X86::JP; 1323 case X86ISD::COND_S: return X86::JS; 1324 } 1325} 1326 1327/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1328/// specific condition code. It returns a false if it cannot do a direct 1329/// translation. X86CC is the translated CondCode. Flip is set to true if the 1330/// the order of comparison operands should be flipped. 1331static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1332 unsigned &X86CC, bool &Flip) { 1333 Flip = false; 1334 X86CC = X86ISD::COND_INVALID; 1335 if (!isFP) { 1336 switch (SetCCOpcode) { 1337 default: break; 1338 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1339 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1340 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1341 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1342 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1343 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1344 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1345 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1346 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1347 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1348 } 1349 } else { 1350 // On a floating point condition, the flags are set as follows: 1351 // ZF PF CF op 1352 // 0 | 0 | 0 | X > Y 1353 // 0 | 0 | 1 | X < Y 1354 // 1 | 0 | 0 | X == Y 1355 // 1 | 1 | 1 | unordered 1356 switch (SetCCOpcode) { 1357 default: break; 1358 case ISD::SETUEQ: 1359 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1360 case ISD::SETOLT: Flip = true; // Fallthrough 1361 case ISD::SETOGT: 1362 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1363 case ISD::SETOLE: Flip = true; // Fallthrough 1364 case ISD::SETOGE: 1365 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1366 case ISD::SETUGT: Flip = true; // Fallthrough 1367 case ISD::SETULT: 1368 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1369 case ISD::SETUGE: Flip = true; // Fallthrough 1370 case ISD::SETULE: 1371 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1372 case ISD::SETONE: 1373 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1374 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1375 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1376 } 1377 } 1378 1379 return X86CC != X86ISD::COND_INVALID; 1380} 1381 1382static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1383 bool &Flip) { 1384 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1385} 1386 1387/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1388/// code. Current x86 isa includes the following FP cmov instructions: 1389/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1390static bool hasFPCMov(unsigned X86CC) { 1391 switch (X86CC) { 1392 default: 1393 return false; 1394 case X86ISD::COND_B: 1395 case X86ISD::COND_BE: 1396 case X86ISD::COND_E: 1397 case X86ISD::COND_P: 1398 case X86ISD::COND_A: 1399 case X86ISD::COND_AE: 1400 case X86ISD::COND_NE: 1401 case X86ISD::COND_NP: 1402 return true; 1403 } 1404} 1405 1406/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1407/// load. For Darwin, external and weak symbols are indirect, loading the value 1408/// at address GV rather then the value of GV itself. This means that the 1409/// GlobalAddress must be in the base or index register of the address, not the 1410/// GV offset field. 1411static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1412 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1413 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1414} 1415 1416/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1417/// true if Op is undef or if its value falls within the specified range (L, H]. 1418static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1419 if (Op.getOpcode() == ISD::UNDEF) 1420 return true; 1421 1422 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1423 return (Val >= Low && Val < Hi); 1424} 1425 1426/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1427/// true if Op is undef or if its value equal to the specified value. 1428static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1429 if (Op.getOpcode() == ISD::UNDEF) 1430 return true; 1431 return cast<ConstantSDNode>(Op)->getValue() == Val; 1432} 1433 1434/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1435/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1436bool X86::isPSHUFDMask(SDNode *N) { 1437 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1438 1439 if (N->getNumOperands() != 4) 1440 return false; 1441 1442 // Check if the value doesn't reference the second vector. 1443 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1444 SDOperand Arg = N->getOperand(i); 1445 if (Arg.getOpcode() == ISD::UNDEF) continue; 1446 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1447 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1448 return false; 1449 } 1450 1451 return true; 1452} 1453 1454/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1455/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1456bool X86::isPSHUFHWMask(SDNode *N) { 1457 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1458 1459 if (N->getNumOperands() != 8) 1460 return false; 1461 1462 // Lower quadword copied in order. 1463 for (unsigned i = 0; i != 4; ++i) { 1464 SDOperand Arg = N->getOperand(i); 1465 if (Arg.getOpcode() == ISD::UNDEF) continue; 1466 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1467 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1468 return false; 1469 } 1470 1471 // Upper quadword shuffled. 1472 for (unsigned i = 4; i != 8; ++i) { 1473 SDOperand Arg = N->getOperand(i); 1474 if (Arg.getOpcode() == ISD::UNDEF) continue; 1475 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1476 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1477 if (Val < 4 || Val > 7) 1478 return false; 1479 } 1480 1481 return true; 1482} 1483 1484/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1485/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1486bool X86::isPSHUFLWMask(SDNode *N) { 1487 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1488 1489 if (N->getNumOperands() != 8) 1490 return false; 1491 1492 // Upper quadword copied in order. 1493 for (unsigned i = 4; i != 8; ++i) 1494 if (!isUndefOrEqual(N->getOperand(i), i)) 1495 return false; 1496 1497 // Lower quadword shuffled. 1498 for (unsigned i = 0; i != 4; ++i) 1499 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1500 return false; 1501 1502 return true; 1503} 1504 1505/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1506/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1507static bool isSHUFPMask(std::vector<SDOperand> &N) { 1508 unsigned NumElems = N.size(); 1509 if (NumElems != 2 && NumElems != 4) return false; 1510 1511 unsigned Half = NumElems / 2; 1512 for (unsigned i = 0; i < Half; ++i) 1513 if (!isUndefOrInRange(N[i], 0, NumElems)) 1514 return false; 1515 for (unsigned i = Half; i < NumElems; ++i) 1516 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1517 return false; 1518 1519 return true; 1520} 1521 1522bool X86::isSHUFPMask(SDNode *N) { 1523 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1524 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1525 return ::isSHUFPMask(Ops); 1526} 1527 1528/// isCommutedSHUFP - Returns true if the shuffle mask is except 1529/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1530/// half elements to come from vector 1 (which would equal the dest.) and 1531/// the upper half to come from vector 2. 1532static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1533 unsigned NumElems = Ops.size(); 1534 if (NumElems != 2 && NumElems != 4) return false; 1535 1536 unsigned Half = NumElems / 2; 1537 for (unsigned i = 0; i < Half; ++i) 1538 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1539 return false; 1540 for (unsigned i = Half; i < NumElems; ++i) 1541 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1542 return false; 1543 return true; 1544} 1545 1546static bool isCommutedSHUFP(SDNode *N) { 1547 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1548 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1549 return isCommutedSHUFP(Ops); 1550} 1551 1552/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1553/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1554bool X86::isMOVHLPSMask(SDNode *N) { 1555 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1556 1557 if (N->getNumOperands() != 4) 1558 return false; 1559 1560 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1561 return isUndefOrEqual(N->getOperand(0), 6) && 1562 isUndefOrEqual(N->getOperand(1), 7) && 1563 isUndefOrEqual(N->getOperand(2), 2) && 1564 isUndefOrEqual(N->getOperand(3), 3); 1565} 1566 1567/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1568/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1569bool X86::isMOVLPMask(SDNode *N) { 1570 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1571 1572 unsigned NumElems = N->getNumOperands(); 1573 if (NumElems != 2 && NumElems != 4) 1574 return false; 1575 1576 for (unsigned i = 0; i < NumElems/2; ++i) 1577 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1578 return false; 1579 1580 for (unsigned i = NumElems/2; i < NumElems; ++i) 1581 if (!isUndefOrEqual(N->getOperand(i), i)) 1582 return false; 1583 1584 return true; 1585} 1586 1587/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1588/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1589/// and MOVLHPS. 1590bool X86::isMOVHPMask(SDNode *N) { 1591 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1592 1593 unsigned NumElems = N->getNumOperands(); 1594 if (NumElems != 2 && NumElems != 4) 1595 return false; 1596 1597 for (unsigned i = 0; i < NumElems/2; ++i) 1598 if (!isUndefOrEqual(N->getOperand(i), i)) 1599 return false; 1600 1601 for (unsigned i = 0; i < NumElems/2; ++i) { 1602 SDOperand Arg = N->getOperand(i + NumElems/2); 1603 if (!isUndefOrEqual(Arg, i + NumElems)) 1604 return false; 1605 } 1606 1607 return true; 1608} 1609 1610/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1611/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1612bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1613 unsigned NumElems = N.size(); 1614 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1615 return false; 1616 1617 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1618 SDOperand BitI = N[i]; 1619 SDOperand BitI1 = N[i+1]; 1620 if (!isUndefOrEqual(BitI, j)) 1621 return false; 1622 if (V2IsSplat) { 1623 if (isUndefOrEqual(BitI1, NumElems)) 1624 return false; 1625 } else { 1626 if (!isUndefOrEqual(BitI1, j + NumElems)) 1627 return false; 1628 } 1629 } 1630 1631 return true; 1632} 1633 1634bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1635 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1636 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1637 return ::isUNPCKLMask(Ops, V2IsSplat); 1638} 1639 1640/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1641/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1642bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1643 unsigned NumElems = N.size(); 1644 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1645 return false; 1646 1647 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1648 SDOperand BitI = N[i]; 1649 SDOperand BitI1 = N[i+1]; 1650 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1651 return false; 1652 if (V2IsSplat) { 1653 if (isUndefOrEqual(BitI1, NumElems)) 1654 return false; 1655 } else { 1656 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1657 return false; 1658 } 1659 } 1660 1661 return true; 1662} 1663 1664bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1665 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1666 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1667 return ::isUNPCKHMask(Ops, V2IsSplat); 1668} 1669 1670/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1671/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1672/// <0, 0, 1, 1> 1673bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1674 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1675 1676 unsigned NumElems = N->getNumOperands(); 1677 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1678 return false; 1679 1680 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1681 SDOperand BitI = N->getOperand(i); 1682 SDOperand BitI1 = N->getOperand(i+1); 1683 1684 if (!isUndefOrEqual(BitI, j)) 1685 return false; 1686 if (!isUndefOrEqual(BitI1, j)) 1687 return false; 1688 } 1689 1690 return true; 1691} 1692 1693/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1694/// specifies a shuffle of elements that is suitable for input to MOVSS, 1695/// MOVSD, and MOVD, i.e. setting the lowest element. 1696static bool isMOVLMask(std::vector<SDOperand> &N) { 1697 unsigned NumElems = N.size(); 1698 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1699 return false; 1700 1701 if (!isUndefOrEqual(N[0], NumElems)) 1702 return false; 1703 1704 for (unsigned i = 1; i < NumElems; ++i) { 1705 SDOperand Arg = N[i]; 1706 if (!isUndefOrEqual(Arg, i)) 1707 return false; 1708 } 1709 1710 return true; 1711} 1712 1713bool X86::isMOVLMask(SDNode *N) { 1714 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1715 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1716 return ::isMOVLMask(Ops); 1717} 1718 1719/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1720/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1721/// element of vector 2 and the other elements to come from vector 1 in order. 1722static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1723 unsigned NumElems = Ops.size(); 1724 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1725 return false; 1726 1727 if (!isUndefOrEqual(Ops[0], 0)) 1728 return false; 1729 1730 for (unsigned i = 1; i < NumElems; ++i) { 1731 SDOperand Arg = Ops[i]; 1732 if (V2IsSplat) { 1733 if (!isUndefOrEqual(Arg, NumElems)) 1734 return false; 1735 } else { 1736 if (!isUndefOrEqual(Arg, i+NumElems)) 1737 return false; 1738 } 1739 } 1740 1741 return true; 1742} 1743 1744static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1745 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1746 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1747 return isCommutedMOVL(Ops, V2IsSplat); 1748} 1749 1750/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1751/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1752bool X86::isMOVSHDUPMask(SDNode *N) { 1753 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1754 1755 if (N->getNumOperands() != 4) 1756 return false; 1757 1758 // Expect 1, 1, 3, 3 1759 for (unsigned i = 0; i < 2; ++i) { 1760 SDOperand Arg = N->getOperand(i); 1761 if (Arg.getOpcode() == ISD::UNDEF) continue; 1762 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1763 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1764 if (Val != 1) return false; 1765 } 1766 1767 bool HasHi = false; 1768 for (unsigned i = 2; i < 4; ++i) { 1769 SDOperand Arg = N->getOperand(i); 1770 if (Arg.getOpcode() == ISD::UNDEF) continue; 1771 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1772 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1773 if (Val != 3) return false; 1774 HasHi = true; 1775 } 1776 1777 // Don't use movshdup if it can be done with a shufps. 1778 return HasHi; 1779} 1780 1781/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1782/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1783bool X86::isMOVSLDUPMask(SDNode *N) { 1784 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1785 1786 if (N->getNumOperands() != 4) 1787 return false; 1788 1789 // Expect 0, 0, 2, 2 1790 for (unsigned i = 0; i < 2; ++i) { 1791 SDOperand Arg = N->getOperand(i); 1792 if (Arg.getOpcode() == ISD::UNDEF) continue; 1793 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1794 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1795 if (Val != 0) return false; 1796 } 1797 1798 bool HasHi = false; 1799 for (unsigned i = 2; i < 4; ++i) { 1800 SDOperand Arg = N->getOperand(i); 1801 if (Arg.getOpcode() == ISD::UNDEF) continue; 1802 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1803 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1804 if (Val != 2) return false; 1805 HasHi = true; 1806 } 1807 1808 // Don't use movshdup if it can be done with a shufps. 1809 return HasHi; 1810} 1811 1812/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1813/// a splat of a single element. 1814static bool isSplatMask(SDNode *N) { 1815 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1816 1817 // This is a splat operation if each element of the permute is the same, and 1818 // if the value doesn't reference the second vector. 1819 unsigned NumElems = N->getNumOperands(); 1820 SDOperand ElementBase; 1821 unsigned i = 0; 1822 for (; i != NumElems; ++i) { 1823 SDOperand Elt = N->getOperand(i); 1824 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 1825 ElementBase = Elt; 1826 break; 1827 } 1828 } 1829 1830 if (!ElementBase.Val) 1831 return false; 1832 1833 for (; i != NumElems; ++i) { 1834 SDOperand Arg = N->getOperand(i); 1835 if (Arg.getOpcode() == ISD::UNDEF) continue; 1836 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1837 if (Arg != ElementBase) return false; 1838 } 1839 1840 // Make sure it is a splat of the first vector operand. 1841 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1842} 1843 1844/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1845/// a splat of a single element and it's a 2 or 4 element mask. 1846bool X86::isSplatMask(SDNode *N) { 1847 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1848 1849 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 1850 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1851 return false; 1852 return ::isSplatMask(N); 1853} 1854 1855/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1856/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1857/// instructions. 1858unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1859 unsigned NumOperands = N->getNumOperands(); 1860 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1861 unsigned Mask = 0; 1862 for (unsigned i = 0; i < NumOperands; ++i) { 1863 unsigned Val = 0; 1864 SDOperand Arg = N->getOperand(NumOperands-i-1); 1865 if (Arg.getOpcode() != ISD::UNDEF) 1866 Val = cast<ConstantSDNode>(Arg)->getValue(); 1867 if (Val >= NumOperands) Val -= NumOperands; 1868 Mask |= Val; 1869 if (i != NumOperands - 1) 1870 Mask <<= Shift; 1871 } 1872 1873 return Mask; 1874} 1875 1876/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1877/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1878/// instructions. 1879unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1880 unsigned Mask = 0; 1881 // 8 nodes, but we only care about the last 4. 1882 for (unsigned i = 7; i >= 4; --i) { 1883 unsigned Val = 0; 1884 SDOperand Arg = N->getOperand(i); 1885 if (Arg.getOpcode() != ISD::UNDEF) 1886 Val = cast<ConstantSDNode>(Arg)->getValue(); 1887 Mask |= (Val - 4); 1888 if (i != 4) 1889 Mask <<= 2; 1890 } 1891 1892 return Mask; 1893} 1894 1895/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1896/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1897/// instructions. 1898unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1899 unsigned Mask = 0; 1900 // 8 nodes, but we only care about the first 4. 1901 for (int i = 3; i >= 0; --i) { 1902 unsigned Val = 0; 1903 SDOperand Arg = N->getOperand(i); 1904 if (Arg.getOpcode() != ISD::UNDEF) 1905 Val = cast<ConstantSDNode>(Arg)->getValue(); 1906 Mask |= Val; 1907 if (i != 0) 1908 Mask <<= 2; 1909 } 1910 1911 return Mask; 1912} 1913 1914/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1915/// specifies a 8 element shuffle that can be broken into a pair of 1916/// PSHUFHW and PSHUFLW. 1917static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1918 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1919 1920 if (N->getNumOperands() != 8) 1921 return false; 1922 1923 // Lower quadword shuffled. 1924 for (unsigned i = 0; i != 4; ++i) { 1925 SDOperand Arg = N->getOperand(i); 1926 if (Arg.getOpcode() == ISD::UNDEF) continue; 1927 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1928 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1929 if (Val > 4) 1930 return false; 1931 } 1932 1933 // Upper quadword shuffled. 1934 for (unsigned i = 4; i != 8; ++i) { 1935 SDOperand Arg = N->getOperand(i); 1936 if (Arg.getOpcode() == ISD::UNDEF) continue; 1937 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1938 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1939 if (Val < 4 || Val > 7) 1940 return false; 1941 } 1942 1943 return true; 1944} 1945 1946/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1947/// values in ther permute mask. 1948static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1949 SDOperand V1 = Op.getOperand(0); 1950 SDOperand V2 = Op.getOperand(1); 1951 SDOperand Mask = Op.getOperand(2); 1952 MVT::ValueType VT = Op.getValueType(); 1953 MVT::ValueType MaskVT = Mask.getValueType(); 1954 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1955 unsigned NumElems = Mask.getNumOperands(); 1956 std::vector<SDOperand> MaskVec; 1957 1958 for (unsigned i = 0; i != NumElems; ++i) { 1959 SDOperand Arg = Mask.getOperand(i); 1960 if (Arg.getOpcode() == ISD::UNDEF) { 1961 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 1962 continue; 1963 } 1964 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1965 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1966 if (Val < NumElems) 1967 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1968 else 1969 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1970 } 1971 1972 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 1973 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1974} 1975 1976/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 1977/// match movhlps. The lower half elements should come from upper half of 1978/// V1 (and in order), and the upper half elements should come from the upper 1979/// half of V2 (and in order). 1980static bool ShouldXformToMOVHLPS(SDNode *Mask) { 1981 unsigned NumElems = Mask->getNumOperands(); 1982 if (NumElems != 4) 1983 return false; 1984 for (unsigned i = 0, e = 2; i != e; ++i) 1985 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 1986 return false; 1987 for (unsigned i = 2; i != 4; ++i) 1988 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 1989 return false; 1990 return true; 1991} 1992 1993/// isScalarLoadToVector - Returns true if the node is a scalar load that 1994/// is promoted to a vector. 1995static inline bool isScalarLoadToVector(SDNode *N) { 1996 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 1997 N = N->getOperand(0).Val; 1998 return (N->getOpcode() == ISD::LOAD); 1999 } 2000 return false; 2001} 2002 2003/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2004/// match movlp{s|d}. The lower half elements should come from lower half of 2005/// V1 (and in order), and the upper half elements should come from the upper 2006/// half of V2 (and in order). And since V1 will become the source of the 2007/// MOVLP, it must be either a vector load or a scalar load to vector. 2008static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2009 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2010 return false; 2011 2012 unsigned NumElems = Mask->getNumOperands(); 2013 if (NumElems != 2 && NumElems != 4) 2014 return false; 2015 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2016 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2017 return false; 2018 for (unsigned i = NumElems/2; i != NumElems; ++i) 2019 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2020 return false; 2021 return true; 2022} 2023 2024/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2025/// all the same. 2026static bool isSplatVector(SDNode *N) { 2027 if (N->getOpcode() != ISD::BUILD_VECTOR) 2028 return false; 2029 2030 SDOperand SplatValue = N->getOperand(0); 2031 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2032 if (N->getOperand(i) != SplatValue) 2033 return false; 2034 return true; 2035} 2036 2037/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2038/// that point to V2 points to its first element. 2039static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2040 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2041 2042 bool Changed = false; 2043 std::vector<SDOperand> MaskVec; 2044 unsigned NumElems = Mask.getNumOperands(); 2045 for (unsigned i = 0; i != NumElems; ++i) { 2046 SDOperand Arg = Mask.getOperand(i); 2047 if (Arg.getOpcode() != ISD::UNDEF) { 2048 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2049 if (Val > NumElems) { 2050 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2051 Changed = true; 2052 } 2053 } 2054 MaskVec.push_back(Arg); 2055 } 2056 2057 if (Changed) 2058 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2059 &MaskVec[0], MaskVec.size()); 2060 return Mask; 2061} 2062 2063/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2064/// operation of specified width. 2065static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2066 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2067 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2068 2069 std::vector<SDOperand> MaskVec; 2070 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2071 for (unsigned i = 1; i != NumElems; ++i) 2072 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2073 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2074} 2075 2076/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2077/// of specified width. 2078static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2079 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2080 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2081 std::vector<SDOperand> MaskVec; 2082 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2083 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2084 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2085 } 2086 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2087} 2088 2089/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2090/// of specified width. 2091static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2092 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2093 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2094 unsigned Half = NumElems/2; 2095 std::vector<SDOperand> MaskVec; 2096 for (unsigned i = 0; i != Half; ++i) { 2097 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2098 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2099 } 2100 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2101} 2102 2103/// getZeroVector - Returns a vector of specified type with all zero elements. 2104/// 2105static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2106 assert(MVT::isVector(VT) && "Expected a vector type"); 2107 unsigned NumElems = getVectorNumElements(VT); 2108 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2109 bool isFP = MVT::isFloatingPoint(EVT); 2110 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2111 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2112 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2113} 2114 2115/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2116/// 2117static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2118 SDOperand V1 = Op.getOperand(0); 2119 SDOperand Mask = Op.getOperand(2); 2120 MVT::ValueType VT = Op.getValueType(); 2121 unsigned NumElems = Mask.getNumOperands(); 2122 Mask = getUnpacklMask(NumElems, DAG); 2123 while (NumElems != 4) { 2124 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2125 NumElems >>= 1; 2126 } 2127 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2128 2129 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2130 Mask = getZeroVector(MaskVT, DAG); 2131 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2132 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2133 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2134} 2135 2136/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2137/// constant +0.0. 2138static inline bool isZeroNode(SDOperand Elt) { 2139 return ((isa<ConstantSDNode>(Elt) && 2140 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2141 (isa<ConstantFPSDNode>(Elt) && 2142 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2143} 2144 2145/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2146/// vector and zero or undef vector. 2147static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2148 unsigned NumElems, unsigned Idx, 2149 bool isZero, SelectionDAG &DAG) { 2150 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2151 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2152 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2153 SDOperand Zero = DAG.getConstant(0, EVT); 2154 std::vector<SDOperand> MaskVec(NumElems, Zero); 2155 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2156 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2157 &MaskVec[0], MaskVec.size()); 2158 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2159} 2160 2161/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2162/// 2163static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2164 unsigned NumNonZero, unsigned NumZero, 2165 SelectionDAG &DAG) { 2166 if (NumNonZero > 8) 2167 return SDOperand(); 2168 2169 SDOperand V(0, 0); 2170 bool First = true; 2171 for (unsigned i = 0; i < 16; ++i) { 2172 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2173 if (ThisIsNonZero && First) { 2174 if (NumZero) 2175 V = getZeroVector(MVT::v8i16, DAG); 2176 else 2177 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2178 First = false; 2179 } 2180 2181 if ((i & 1) != 0) { 2182 SDOperand ThisElt(0, 0), LastElt(0, 0); 2183 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2184 if (LastIsNonZero) { 2185 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2186 } 2187 if (ThisIsNonZero) { 2188 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2189 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2190 ThisElt, DAG.getConstant(8, MVT::i8)); 2191 if (LastIsNonZero) 2192 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2193 } else 2194 ThisElt = LastElt; 2195 2196 if (ThisElt.Val) 2197 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2198 DAG.getConstant(i/2, MVT::i32)); 2199 } 2200 } 2201 2202 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2203} 2204 2205/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2206/// 2207static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2208 unsigned NumNonZero, unsigned NumZero, 2209 SelectionDAG &DAG) { 2210 if (NumNonZero > 4) 2211 return SDOperand(); 2212 2213 SDOperand V(0, 0); 2214 bool First = true; 2215 for (unsigned i = 0; i < 8; ++i) { 2216 bool isNonZero = (NonZeros & (1 << i)) != 0; 2217 if (isNonZero) { 2218 if (First) { 2219 if (NumZero) 2220 V = getZeroVector(MVT::v8i16, DAG); 2221 else 2222 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2223 First = false; 2224 } 2225 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2226 DAG.getConstant(i, MVT::i32)); 2227 } 2228 } 2229 2230 return V; 2231} 2232 2233SDOperand 2234X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2235 // All zero's are handled with pxor. 2236 if (ISD::isBuildVectorAllZeros(Op.Val)) 2237 return Op; 2238 2239 // All one's are handled with pcmpeqd. 2240 if (ISD::isBuildVectorAllOnes(Op.Val)) 2241 return Op; 2242 2243 MVT::ValueType VT = Op.getValueType(); 2244 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2245 unsigned EVTBits = MVT::getSizeInBits(EVT); 2246 2247 unsigned NumElems = Op.getNumOperands(); 2248 unsigned NumZero = 0; 2249 unsigned NumNonZero = 0; 2250 unsigned NonZeros = 0; 2251 std::set<SDOperand> Values; 2252 for (unsigned i = 0; i < NumElems; ++i) { 2253 SDOperand Elt = Op.getOperand(i); 2254 if (Elt.getOpcode() != ISD::UNDEF) { 2255 Values.insert(Elt); 2256 if (isZeroNode(Elt)) 2257 NumZero++; 2258 else { 2259 NonZeros |= (1 << i); 2260 NumNonZero++; 2261 } 2262 } 2263 } 2264 2265 if (NumNonZero == 0) 2266 // Must be a mix of zero and undef. Return a zero vector. 2267 return getZeroVector(VT, DAG); 2268 2269 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2270 if (Values.size() == 1) 2271 return SDOperand(); 2272 2273 // Special case for single non-zero element. 2274 if (NumNonZero == 1) { 2275 unsigned Idx = CountTrailingZeros_32(NonZeros); 2276 SDOperand Item = Op.getOperand(Idx); 2277 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2278 if (Idx == 0) 2279 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2280 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2281 NumZero > 0, DAG); 2282 2283 if (EVTBits == 32) { 2284 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2285 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2286 DAG); 2287 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2288 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2289 std::vector<SDOperand> MaskVec; 2290 for (unsigned i = 0; i < NumElems; i++) 2291 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2292 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2293 &MaskVec[0], MaskVec.size()); 2294 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2295 DAG.getNode(ISD::UNDEF, VT), Mask); 2296 } 2297 } 2298 2299 // Let legalizer expand 2-widde build_vector's. 2300 if (EVTBits == 64) 2301 return SDOperand(); 2302 2303 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2304 if (EVTBits == 8) { 2305 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 2306 if (V.Val) return V; 2307 } 2308 2309 if (EVTBits == 16) { 2310 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 2311 if (V.Val) return V; 2312 } 2313 2314 // If element VT is == 32 bits, turn it into a number of shuffles. 2315 std::vector<SDOperand> V(NumElems); 2316 if (NumElems == 4 && NumZero > 0) { 2317 for (unsigned i = 0; i < 4; ++i) { 2318 bool isZero = !(NonZeros & (1 << i)); 2319 if (isZero) 2320 V[i] = getZeroVector(VT, DAG); 2321 else 2322 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2323 } 2324 2325 for (unsigned i = 0; i < 2; ++i) { 2326 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2327 default: break; 2328 case 0: 2329 V[i] = V[i*2]; // Must be a zero vector. 2330 break; 2331 case 1: 2332 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2333 getMOVLMask(NumElems, DAG)); 2334 break; 2335 case 2: 2336 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2337 getMOVLMask(NumElems, DAG)); 2338 break; 2339 case 3: 2340 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2341 getUnpacklMask(NumElems, DAG)); 2342 break; 2343 } 2344 } 2345 2346 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2347 // clears the upper bits. 2348 // FIXME: we can do the same for v4f32 case when we know both parts of 2349 // the lower half come from scalar_to_vector (loadf32). We should do 2350 // that in post legalizer dag combiner with target specific hooks. 2351 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2352 return V[0]; 2353 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2354 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2355 std::vector<SDOperand> MaskVec; 2356 bool Reverse = (NonZeros & 0x3) == 2; 2357 for (unsigned i = 0; i < 2; ++i) 2358 if (Reverse) 2359 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2360 else 2361 MaskVec.push_back(DAG.getConstant(i, EVT)); 2362 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2363 for (unsigned i = 0; i < 2; ++i) 2364 if (Reverse) 2365 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2366 else 2367 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2368 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2369 &MaskVec[0], MaskVec.size()); 2370 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2371 } 2372 2373 if (Values.size() > 2) { 2374 // Expand into a number of unpckl*. 2375 // e.g. for v4f32 2376 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2377 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2378 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2379 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2380 for (unsigned i = 0; i < NumElems; ++i) 2381 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2382 NumElems >>= 1; 2383 while (NumElems != 0) { 2384 for (unsigned i = 0; i < NumElems; ++i) 2385 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2386 UnpckMask); 2387 NumElems >>= 1; 2388 } 2389 return V[0]; 2390 } 2391 2392 return SDOperand(); 2393} 2394 2395SDOperand 2396X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2397 SDOperand V1 = Op.getOperand(0); 2398 SDOperand V2 = Op.getOperand(1); 2399 SDOperand PermMask = Op.getOperand(2); 2400 MVT::ValueType VT = Op.getValueType(); 2401 unsigned NumElems = PermMask.getNumOperands(); 2402 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2403 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2404 2405 if (isSplatMask(PermMask.Val)) { 2406 if (NumElems <= 4) return Op; 2407 // Promote it to a v4i32 splat. 2408 return PromoteSplat(Op, DAG); 2409 } 2410 2411 if (X86::isMOVLMask(PermMask.Val)) 2412 return (V1IsUndef) ? V2 : Op; 2413 2414 if (X86::isMOVSHDUPMask(PermMask.Val) || 2415 X86::isMOVSLDUPMask(PermMask.Val) || 2416 X86::isMOVHLPSMask(PermMask.Val) || 2417 X86::isMOVHPMask(PermMask.Val) || 2418 X86::isMOVLPMask(PermMask.Val)) 2419 return Op; 2420 2421 if (ShouldXformToMOVHLPS(PermMask.Val) || 2422 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2423 return CommuteVectorShuffle(Op, DAG); 2424 2425 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 2426 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 2427 if (V1IsSplat && !V2IsSplat) { 2428 Op = CommuteVectorShuffle(Op, DAG); 2429 V1 = Op.getOperand(0); 2430 V2 = Op.getOperand(1); 2431 PermMask = Op.getOperand(2); 2432 V2IsSplat = true; 2433 } 2434 2435 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 2436 if (V2IsUndef) return V1; 2437 Op = CommuteVectorShuffle(Op, DAG); 2438 V1 = Op.getOperand(0); 2439 V2 = Op.getOperand(1); 2440 PermMask = Op.getOperand(2); 2441 if (V2IsSplat) { 2442 // V2 is a splat, so the mask may be malformed. That is, it may point 2443 // to any V2 element. The instruction selectior won't like this. Get 2444 // a corrected mask and commute to form a proper MOVS{S|D}. 2445 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2446 if (NewMask.Val != PermMask.Val) 2447 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2448 } 2449 return Op; 2450 } 2451 2452 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2453 X86::isUNPCKLMask(PermMask.Val) || 2454 X86::isUNPCKHMask(PermMask.Val)) 2455 return Op; 2456 2457 if (V2IsSplat) { 2458 // Normalize mask so all entries that point to V2 points to its first 2459 // element then try to match unpck{h|l} again. If match, return a 2460 // new vector_shuffle with the corrected mask. 2461 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2462 if (NewMask.Val != PermMask.Val) { 2463 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2464 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2465 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2466 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2467 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2468 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2469 } 2470 } 2471 } 2472 2473 // Normalize the node to match x86 shuffle ops if needed 2474 if (V2.getOpcode() != ISD::UNDEF) 2475 if (isCommutedSHUFP(PermMask.Val)) { 2476 Op = CommuteVectorShuffle(Op, DAG); 2477 V1 = Op.getOperand(0); 2478 V2 = Op.getOperand(1); 2479 PermMask = Op.getOperand(2); 2480 } 2481 2482 // If VT is integer, try PSHUF* first, then SHUFP*. 2483 if (MVT::isInteger(VT)) { 2484 if (X86::isPSHUFDMask(PermMask.Val) || 2485 X86::isPSHUFHWMask(PermMask.Val) || 2486 X86::isPSHUFLWMask(PermMask.Val)) { 2487 if (V2.getOpcode() != ISD::UNDEF) 2488 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2489 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2490 return Op; 2491 } 2492 2493 if (X86::isSHUFPMask(PermMask.Val)) 2494 return Op; 2495 2496 // Handle v8i16 shuffle high / low shuffle node pair. 2497 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2498 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2499 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2500 std::vector<SDOperand> MaskVec; 2501 for (unsigned i = 0; i != 4; ++i) 2502 MaskVec.push_back(PermMask.getOperand(i)); 2503 for (unsigned i = 4; i != 8; ++i) 2504 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2505 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2506 &MaskVec[0], MaskVec.size()); 2507 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2508 MaskVec.clear(); 2509 for (unsigned i = 0; i != 4; ++i) 2510 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2511 for (unsigned i = 4; i != 8; ++i) 2512 MaskVec.push_back(PermMask.getOperand(i)); 2513 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2514 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2515 } 2516 } else { 2517 // Floating point cases in the other order. 2518 if (X86::isSHUFPMask(PermMask.Val)) 2519 return Op; 2520 if (X86::isPSHUFDMask(PermMask.Val) || 2521 X86::isPSHUFHWMask(PermMask.Val) || 2522 X86::isPSHUFLWMask(PermMask.Val)) { 2523 if (V2.getOpcode() != ISD::UNDEF) 2524 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2525 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2526 return Op; 2527 } 2528 } 2529 2530 if (NumElems == 4) { 2531 MVT::ValueType MaskVT = PermMask.getValueType(); 2532 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2533 std::vector<std::pair<int, int> > Locs; 2534 Locs.reserve(NumElems); 2535 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2536 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2537 unsigned NumHi = 0; 2538 unsigned NumLo = 0; 2539 // If no more than two elements come from either vector. This can be 2540 // implemented with two shuffles. First shuffle gather the elements. 2541 // The second shuffle, which takes the first shuffle as both of its 2542 // vector operands, put the elements into the right order. 2543 for (unsigned i = 0; i != NumElems; ++i) { 2544 SDOperand Elt = PermMask.getOperand(i); 2545 if (Elt.getOpcode() == ISD::UNDEF) { 2546 Locs[i] = std::make_pair(-1, -1); 2547 } else { 2548 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2549 if (Val < NumElems) { 2550 Locs[i] = std::make_pair(0, NumLo); 2551 Mask1[NumLo] = Elt; 2552 NumLo++; 2553 } else { 2554 Locs[i] = std::make_pair(1, NumHi); 2555 if (2+NumHi < NumElems) 2556 Mask1[2+NumHi] = Elt; 2557 NumHi++; 2558 } 2559 } 2560 } 2561 if (NumLo <= 2 && NumHi <= 2) { 2562 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2563 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2564 &Mask1[0], Mask1.size())); 2565 for (unsigned i = 0; i != NumElems; ++i) { 2566 if (Locs[i].first == -1) 2567 continue; 2568 else { 2569 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2570 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2571 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2572 } 2573 } 2574 2575 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2576 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2577 &Mask2[0], Mask2.size())); 2578 } 2579 2580 // Break it into (shuffle shuffle_hi, shuffle_lo). 2581 Locs.clear(); 2582 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2583 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2584 std::vector<SDOperand> *MaskPtr = &LoMask; 2585 unsigned MaskIdx = 0; 2586 unsigned LoIdx = 0; 2587 unsigned HiIdx = NumElems/2; 2588 for (unsigned i = 0; i != NumElems; ++i) { 2589 if (i == NumElems/2) { 2590 MaskPtr = &HiMask; 2591 MaskIdx = 1; 2592 LoIdx = 0; 2593 HiIdx = NumElems/2; 2594 } 2595 SDOperand Elt = PermMask.getOperand(i); 2596 if (Elt.getOpcode() == ISD::UNDEF) { 2597 Locs[i] = std::make_pair(-1, -1); 2598 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2599 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2600 (*MaskPtr)[LoIdx] = Elt; 2601 LoIdx++; 2602 } else { 2603 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2604 (*MaskPtr)[HiIdx] = Elt; 2605 HiIdx++; 2606 } 2607 } 2608 2609 SDOperand LoShuffle = 2610 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2611 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2612 &LoMask[0], LoMask.size())); 2613 SDOperand HiShuffle = 2614 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2615 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2616 &HiMask[0], HiMask.size())); 2617 std::vector<SDOperand> MaskOps; 2618 for (unsigned i = 0; i != NumElems; ++i) { 2619 if (Locs[i].first == -1) { 2620 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2621 } else { 2622 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2623 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2624 } 2625 } 2626 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2627 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2628 &MaskOps[0], MaskOps.size())); 2629 } 2630 2631 return SDOperand(); 2632} 2633 2634SDOperand 2635X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2636 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2637 return SDOperand(); 2638 2639 MVT::ValueType VT = Op.getValueType(); 2640 // TODO: handle v16i8. 2641 if (MVT::getSizeInBits(VT) == 16) { 2642 // Transform it so it match pextrw which produces a 32-bit result. 2643 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2644 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2645 Op.getOperand(0), Op.getOperand(1)); 2646 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2647 DAG.getValueType(VT)); 2648 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2649 } else if (MVT::getSizeInBits(VT) == 32) { 2650 SDOperand Vec = Op.getOperand(0); 2651 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2652 if (Idx == 0) 2653 return Op; 2654 // SHUFPS the element to the lowest double word, then movss. 2655 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2656 std::vector<SDOperand> IdxVec; 2657 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2658 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2659 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2660 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2661 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2662 &IdxVec[0], IdxVec.size()); 2663 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2664 Vec, Vec, Mask); 2665 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2666 DAG.getConstant(0, getPointerTy())); 2667 } else if (MVT::getSizeInBits(VT) == 64) { 2668 SDOperand Vec = Op.getOperand(0); 2669 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2670 if (Idx == 0) 2671 return Op; 2672 2673 // UNPCKHPD the element to the lowest double word, then movsd. 2674 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2675 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2676 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2677 std::vector<SDOperand> IdxVec; 2678 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2679 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2680 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2681 &IdxVec[0], IdxVec.size()); 2682 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2683 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2684 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2685 DAG.getConstant(0, getPointerTy())); 2686 } 2687 2688 return SDOperand(); 2689} 2690 2691SDOperand 2692X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2693 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 2694 // as its second argument. 2695 MVT::ValueType VT = Op.getValueType(); 2696 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2697 SDOperand N0 = Op.getOperand(0); 2698 SDOperand N1 = Op.getOperand(1); 2699 SDOperand N2 = Op.getOperand(2); 2700 if (MVT::getSizeInBits(BaseVT) == 16) { 2701 if (N1.getValueType() != MVT::i32) 2702 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2703 if (N2.getValueType() != MVT::i32) 2704 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2705 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2706 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2707 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2708 if (Idx == 0) { 2709 // Use a movss. 2710 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 2711 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2712 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2713 std::vector<SDOperand> MaskVec; 2714 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 2715 for (unsigned i = 1; i <= 3; ++i) 2716 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2717 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 2718 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2719 &MaskVec[0], MaskVec.size())); 2720 } else { 2721 // Use two pinsrw instructions to insert a 32 bit value. 2722 Idx <<= 1; 2723 if (MVT::isFloatingPoint(N1.getValueType())) { 2724 if (N1.getOpcode() == ISD::LOAD) { 2725 // Just load directly from f32mem to GR32. 2726 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 2727 N1.getOperand(2)); 2728 } else { 2729 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 2730 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 2731 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 2732 DAG.getConstant(0, getPointerTy())); 2733 } 2734 } 2735 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 2736 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2737 DAG.getConstant(Idx, getPointerTy())); 2738 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 2739 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 2740 DAG.getConstant(Idx+1, getPointerTy())); 2741 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 2742 } 2743 } 2744 2745 return SDOperand(); 2746} 2747 2748SDOperand 2749X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2750 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2751 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2752} 2753 2754// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2755// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2756// one of the above mentioned nodes. It has to be wrapped because otherwise 2757// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2758// be used to form addressing mode. These wrapped nodes will be selected 2759// into MOV32ri. 2760SDOperand 2761X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 2762 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2763 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2764 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2765 CP->getAlignment())); 2766 if (Subtarget->isTargetDarwin()) { 2767 // With PIC, the address is actually $g + Offset. 2768 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2769 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2770 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2771 } 2772 2773 return Result; 2774} 2775 2776SDOperand 2777X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 2778 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2779 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2780 DAG.getTargetGlobalAddress(GV, 2781 getPointerTy())); 2782 if (Subtarget->isTargetDarwin()) { 2783 // With PIC, the address is actually $g + Offset. 2784 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2785 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2786 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 2787 Result); 2788 2789 // For Darwin, external and weak symbols are indirect, so we want to load 2790 // the value at address GV, not the value of GV itself. This means that 2791 // the GlobalAddress must be in the base or index register of the address, 2792 // not the GV offset field. 2793 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2794 DarwinGVRequiresExtraLoad(GV)) 2795 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2796 Result, DAG.getSrcValue(NULL)); 2797 } 2798 2799 return Result; 2800} 2801 2802SDOperand 2803X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 2804 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2805 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2806 DAG.getTargetExternalSymbol(Sym, 2807 getPointerTy())); 2808 if (Subtarget->isTargetDarwin()) { 2809 // With PIC, the address is actually $g + Offset. 2810 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2811 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2812 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 2813 Result); 2814 } 2815 2816 return Result; 2817} 2818 2819SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 2820 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2821 "Not an i64 shift!"); 2822 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2823 SDOperand ShOpLo = Op.getOperand(0); 2824 SDOperand ShOpHi = Op.getOperand(1); 2825 SDOperand ShAmt = Op.getOperand(2); 2826 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2827 DAG.getConstant(31, MVT::i8)) 2828 : DAG.getConstant(0, MVT::i32); 2829 2830 SDOperand Tmp2, Tmp3; 2831 if (Op.getOpcode() == ISD::SHL_PARTS) { 2832 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2833 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2834 } else { 2835 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2836 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2837 } 2838 2839 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2840 ShAmt, DAG.getConstant(32, MVT::i8)); 2841 2842 SDOperand Hi, Lo; 2843 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2844 2845 std::vector<MVT::ValueType> Tys; 2846 Tys.push_back(MVT::i32); 2847 Tys.push_back(MVT::Flag); 2848 std::vector<SDOperand> Ops; 2849 if (Op.getOpcode() == ISD::SHL_PARTS) { 2850 Ops.push_back(Tmp2); 2851 Ops.push_back(Tmp3); 2852 Ops.push_back(CC); 2853 Ops.push_back(InFlag); 2854 Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 2855 InFlag = Hi.getValue(1); 2856 2857 Ops.clear(); 2858 Ops.push_back(Tmp3); 2859 Ops.push_back(Tmp1); 2860 Ops.push_back(CC); 2861 Ops.push_back(InFlag); 2862 Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 2863 } else { 2864 Ops.push_back(Tmp2); 2865 Ops.push_back(Tmp3); 2866 Ops.push_back(CC); 2867 Ops.push_back(InFlag); 2868 Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 2869 InFlag = Lo.getValue(1); 2870 2871 Ops.clear(); 2872 Ops.push_back(Tmp3); 2873 Ops.push_back(Tmp1); 2874 Ops.push_back(CC); 2875 Ops.push_back(InFlag); 2876 Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 2877 } 2878 2879 Tys.clear(); 2880 Tys.push_back(MVT::i32); 2881 Tys.push_back(MVT::i32); 2882 Ops.clear(); 2883 Ops.push_back(Lo); 2884 Ops.push_back(Hi); 2885 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 2886} 2887 2888SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 2889 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2890 Op.getOperand(0).getValueType() >= MVT::i16 && 2891 "Unknown SINT_TO_FP to lower!"); 2892 2893 SDOperand Result; 2894 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2895 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2896 MachineFunction &MF = DAG.getMachineFunction(); 2897 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2898 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2899 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2900 DAG.getEntryNode(), Op.getOperand(0), 2901 StackSlot, DAG.getSrcValue(NULL)); 2902 2903 // Build the FILD 2904 std::vector<MVT::ValueType> Tys; 2905 Tys.push_back(MVT::f64); 2906 Tys.push_back(MVT::Other); 2907 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2908 std::vector<SDOperand> Ops; 2909 Ops.push_back(Chain); 2910 Ops.push_back(StackSlot); 2911 Ops.push_back(DAG.getValueType(SrcVT)); 2912 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2913 Tys, &Ops[0], Ops.size()); 2914 2915 if (X86ScalarSSE) { 2916 Chain = Result.getValue(1); 2917 SDOperand InFlag = Result.getValue(2); 2918 2919 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2920 // shouldn't be necessary except that RFP cannot be live across 2921 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2922 MachineFunction &MF = DAG.getMachineFunction(); 2923 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2924 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2925 std::vector<MVT::ValueType> Tys; 2926 Tys.push_back(MVT::Other); 2927 std::vector<SDOperand> Ops; 2928 Ops.push_back(Chain); 2929 Ops.push_back(Result); 2930 Ops.push_back(StackSlot); 2931 Ops.push_back(DAG.getValueType(Op.getValueType())); 2932 Ops.push_back(InFlag); 2933 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 2934 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2935 DAG.getSrcValue(NULL)); 2936 } 2937 2938 return Result; 2939} 2940 2941SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2942 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2943 "Unknown FP_TO_SINT to lower!"); 2944 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2945 // stack slot. 2946 MachineFunction &MF = DAG.getMachineFunction(); 2947 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2948 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2949 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2950 2951 unsigned Opc; 2952 switch (Op.getValueType()) { 2953 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2954 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2955 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2956 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2957 } 2958 2959 SDOperand Chain = DAG.getEntryNode(); 2960 SDOperand Value = Op.getOperand(0); 2961 if (X86ScalarSSE) { 2962 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2963 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2964 DAG.getSrcValue(0)); 2965 std::vector<MVT::ValueType> Tys; 2966 Tys.push_back(MVT::f64); 2967 Tys.push_back(MVT::Other); 2968 std::vector<SDOperand> Ops; 2969 Ops.push_back(Chain); 2970 Ops.push_back(StackSlot); 2971 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2972 Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 2973 Chain = Value.getValue(1); 2974 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2975 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2976 } 2977 2978 // Build the FP_TO_INT*_IN_MEM 2979 std::vector<SDOperand> Ops; 2980 Ops.push_back(Chain); 2981 Ops.push_back(Value); 2982 Ops.push_back(StackSlot); 2983 SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size()); 2984 2985 // Load the result. 2986 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2987 DAG.getSrcValue(NULL)); 2988} 2989 2990SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 2991 MVT::ValueType VT = Op.getValueType(); 2992 const Type *OpNTy = MVT::getTypeForValueType(VT); 2993 std::vector<Constant*> CV; 2994 if (VT == MVT::f64) { 2995 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2996 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2997 } else { 2998 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2999 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3000 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3001 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3002 } 3003 Constant *CS = ConstantStruct::get(CV); 3004 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3005 std::vector<MVT::ValueType> Tys; 3006 Tys.push_back(VT); 3007 Tys.push_back(MVT::Other); 3008 SmallVector<SDOperand, 3> Ops; 3009 Ops.push_back(DAG.getEntryNode()); 3010 Ops.push_back(CPIdx); 3011 Ops.push_back(DAG.getSrcValue(NULL)); 3012 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3013 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3014} 3015 3016SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3017 MVT::ValueType VT = Op.getValueType(); 3018 const Type *OpNTy = MVT::getTypeForValueType(VT); 3019 std::vector<Constant*> CV; 3020 if (VT == MVT::f64) { 3021 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3022 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3023 } else { 3024 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3025 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3026 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3027 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3028 } 3029 Constant *CS = ConstantStruct::get(CV); 3030 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3031 std::vector<MVT::ValueType> Tys; 3032 Tys.push_back(VT); 3033 Tys.push_back(MVT::Other); 3034 SmallVector<SDOperand, 3> Ops; 3035 Ops.push_back(DAG.getEntryNode()); 3036 Ops.push_back(CPIdx); 3037 Ops.push_back(DAG.getSrcValue(NULL)); 3038 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3039 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3040} 3041 3042SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3043 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3044 SDOperand Cond; 3045 SDOperand CC = Op.getOperand(2); 3046 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3047 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3048 bool Flip; 3049 unsigned X86CC; 3050 if (translateX86CC(CC, isFP, X86CC, Flip)) { 3051 if (Flip) 3052 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3053 Op.getOperand(1), Op.getOperand(0)); 3054 else 3055 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3056 Op.getOperand(0), Op.getOperand(1)); 3057 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3058 DAG.getConstant(X86CC, MVT::i8), Cond); 3059 } else { 3060 assert(isFP && "Illegal integer SetCC!"); 3061 3062 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3063 Op.getOperand(0), Op.getOperand(1)); 3064 std::vector<MVT::ValueType> Tys; 3065 std::vector<SDOperand> Ops; 3066 switch (SetCCOpcode) { 3067 default: assert(false && "Illegal floating point SetCC!"); 3068 case ISD::SETOEQ: { // !PF & ZF 3069 Tys.push_back(MVT::i8); 3070 Tys.push_back(MVT::Flag); 3071 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 3072 Ops.push_back(Cond); 3073 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3074 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3075 DAG.getConstant(X86ISD::COND_E, MVT::i8), 3076 Tmp1.getValue(1)); 3077 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3078 } 3079 case ISD::SETUNE: { // PF | !ZF 3080 Tys.push_back(MVT::i8); 3081 Tys.push_back(MVT::Flag); 3082 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 3083 Ops.push_back(Cond); 3084 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3085 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3086 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3087 Tmp1.getValue(1)); 3088 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3089 } 3090 } 3091 } 3092} 3093 3094SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3095 MVT::ValueType VT = Op.getValueType(); 3096 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 3097 bool addTest = false; 3098 SDOperand Op0 = Op.getOperand(0); 3099 SDOperand Cond, CC; 3100 if (Op0.getOpcode() == ISD::SETCC) 3101 Op0 = LowerOperation(Op0, DAG); 3102 3103 if (Op0.getOpcode() == X86ISD::SETCC) { 3104 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3105 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3106 // have another use it will be eliminated. 3107 // If the X86ISD::SETCC has more than one use, then it's probably better 3108 // to use a test instead of duplicating the X86ISD::CMP (for register 3109 // pressure reason). 3110 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 3111 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3112 CmpOpc == X86ISD::UCOMI) { 3113 if (!Op0.hasOneUse()) { 3114 std::vector<MVT::ValueType> Tys; 3115 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 3116 Tys.push_back(Op0.Val->getValueType(i)); 3117 std::vector<SDOperand> Ops; 3118 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 3119 Ops.push_back(Op0.getOperand(i)); 3120 Op0 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3121 } 3122 3123 CC = Op0.getOperand(0); 3124 Cond = Op0.getOperand(1); 3125 // Make a copy as flag result cannot be used by more than one. 3126 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3127 Cond.getOperand(0), Cond.getOperand(1)); 3128 addTest = 3129 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3130 } else 3131 addTest = true; 3132 } else 3133 addTest = true; 3134 3135 if (addTest) { 3136 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3137 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 3138 } 3139 3140 std::vector<MVT::ValueType> Tys; 3141 Tys.push_back(Op.getValueType()); 3142 Tys.push_back(MVT::Flag); 3143 std::vector<SDOperand> Ops; 3144 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3145 // condition is true. 3146 Ops.push_back(Op.getOperand(2)); 3147 Ops.push_back(Op.getOperand(1)); 3148 Ops.push_back(CC); 3149 Ops.push_back(Cond); 3150 return DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3151} 3152 3153SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3154 bool addTest = false; 3155 SDOperand Cond = Op.getOperand(1); 3156 SDOperand Dest = Op.getOperand(2); 3157 SDOperand CC; 3158 if (Cond.getOpcode() == ISD::SETCC) 3159 Cond = LowerOperation(Cond, DAG); 3160 3161 if (Cond.getOpcode() == X86ISD::SETCC) { 3162 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3163 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3164 // have another use it will be eliminated. 3165 // If the X86ISD::SETCC has more than one use, then it's probably better 3166 // to use a test instead of duplicating the X86ISD::CMP (for register 3167 // pressure reason). 3168 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 3169 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3170 CmpOpc == X86ISD::UCOMI) { 3171 if (!Cond.hasOneUse()) { 3172 std::vector<MVT::ValueType> Tys; 3173 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 3174 Tys.push_back(Cond.Val->getValueType(i)); 3175 std::vector<SDOperand> Ops; 3176 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 3177 Ops.push_back(Cond.getOperand(i)); 3178 Cond = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3179 } 3180 3181 CC = Cond.getOperand(0); 3182 Cond = Cond.getOperand(1); 3183 // Make a copy as flag result cannot be used by more than one. 3184 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3185 Cond.getOperand(0), Cond.getOperand(1)); 3186 } else 3187 addTest = true; 3188 } else 3189 addTest = true; 3190 3191 if (addTest) { 3192 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3193 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 3194 } 3195 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3196 Op.getOperand(0), Op.getOperand(2), CC, Cond); 3197} 3198 3199SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3200 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3201 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3202 DAG.getTargetJumpTable(JT->getIndex(), 3203 getPointerTy())); 3204 if (Subtarget->isTargetDarwin()) { 3205 // With PIC, the address is actually $g + Offset. 3206 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3207 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3208 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3209 Result); 3210 } 3211 3212 return Result; 3213} 3214 3215SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3216 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3217 if (CallingConv == CallingConv::Fast && EnableFastCC) 3218 return LowerFastCCCallTo(Op, DAG); 3219 else 3220 return LowerCCCCallTo(Op, DAG); 3221} 3222 3223SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3224 SDOperand Copy; 3225 3226 switch(Op.getNumOperands()) { 3227 default: 3228 assert(0 && "Do not know how to return this many arguments!"); 3229 abort(); 3230 case 1: // ret void. 3231 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3232 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3233 case 3: { 3234 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3235 3236 if (MVT::isVector(ArgVT)) { 3237 // Integer or FP vector result -> XMM0. 3238 if (DAG.getMachineFunction().liveout_empty()) 3239 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3240 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3241 SDOperand()); 3242 } else if (MVT::isInteger(ArgVT)) { 3243 // Integer result -> EAX 3244 if (DAG.getMachineFunction().liveout_empty()) 3245 DAG.getMachineFunction().addLiveOut(X86::EAX); 3246 3247 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 3248 SDOperand()); 3249 } else if (!X86ScalarSSE) { 3250 // FP return with fp-stack value. 3251 if (DAG.getMachineFunction().liveout_empty()) 3252 DAG.getMachineFunction().addLiveOut(X86::ST0); 3253 3254 std::vector<MVT::ValueType> Tys; 3255 Tys.push_back(MVT::Other); 3256 Tys.push_back(MVT::Flag); 3257 std::vector<SDOperand> Ops; 3258 Ops.push_back(Op.getOperand(0)); 3259 Ops.push_back(Op.getOperand(1)); 3260 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3261 } else { 3262 // FP return with ScalarSSE (return on fp-stack). 3263 if (DAG.getMachineFunction().liveout_empty()) 3264 DAG.getMachineFunction().addLiveOut(X86::ST0); 3265 3266 SDOperand MemLoc; 3267 SDOperand Chain = Op.getOperand(0); 3268 SDOperand Value = Op.getOperand(1); 3269 3270 if (Value.getOpcode() == ISD::LOAD && 3271 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3272 Chain = Value.getOperand(0); 3273 MemLoc = Value.getOperand(1); 3274 } else { 3275 // Spill the value to memory and reload it into top of stack. 3276 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3277 MachineFunction &MF = DAG.getMachineFunction(); 3278 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3279 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3280 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3281 Value, MemLoc, DAG.getSrcValue(0)); 3282 } 3283 std::vector<MVT::ValueType> Tys; 3284 Tys.push_back(MVT::f64); 3285 Tys.push_back(MVT::Other); 3286 std::vector<SDOperand> Ops; 3287 Ops.push_back(Chain); 3288 Ops.push_back(MemLoc); 3289 Ops.push_back(DAG.getValueType(ArgVT)); 3290 Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 3291 Tys.clear(); 3292 Tys.push_back(MVT::Other); 3293 Tys.push_back(MVT::Flag); 3294 Ops.clear(); 3295 Ops.push_back(Copy.getValue(1)); 3296 Ops.push_back(Copy); 3297 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3298 } 3299 break; 3300 } 3301 case 5: 3302 if (DAG.getMachineFunction().liveout_empty()) { 3303 DAG.getMachineFunction().addLiveOut(X86::EAX); 3304 DAG.getMachineFunction().addLiveOut(X86::EDX); 3305 } 3306 3307 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3), 3308 SDOperand()); 3309 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 3310 break; 3311 } 3312 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3313 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3314 Copy.getValue(1)); 3315} 3316 3317SDOperand 3318X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3319 MachineFunction &MF = DAG.getMachineFunction(); 3320 const Function* Fn = MF.getFunction(); 3321 if (Fn->hasExternalLinkage() && 3322 Subtarget->TargetType == X86Subtarget::isCygwin && 3323 Fn->getName() == "main") 3324 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 3325 3326 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3327 if (CC == CallingConv::Fast && EnableFastCC) 3328 return LowerFastCCArguments(Op, DAG); 3329 else 3330 return LowerCCCArguments(Op, DAG); 3331} 3332 3333SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3334 SDOperand InFlag(0, 0); 3335 SDOperand Chain = Op.getOperand(0); 3336 unsigned Align = 3337 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3338 if (Align == 0) Align = 1; 3339 3340 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3341 // If not DWORD aligned, call memset if size is less than the threshold. 3342 // It knows how to align to the right boundary first. 3343 if ((Align & 3) != 0 || 3344 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3345 MVT::ValueType IntPtr = getPointerTy(); 3346 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3347 std::vector<std::pair<SDOperand, const Type*> > Args; 3348 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3349 // Extend the ubyte argument to be an int value for the call. 3350 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3351 Args.push_back(std::make_pair(Val, IntPtrTy)); 3352 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3353 std::pair<SDOperand,SDOperand> CallResult = 3354 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3355 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3356 return CallResult.second; 3357 } 3358 3359 MVT::ValueType AVT; 3360 SDOperand Count; 3361 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3362 unsigned BytesLeft = 0; 3363 bool TwoRepStos = false; 3364 if (ValC) { 3365 unsigned ValReg; 3366 unsigned Val = ValC->getValue() & 255; 3367 3368 // If the value is a constant, then we can potentially use larger sets. 3369 switch (Align & 3) { 3370 case 2: // WORD aligned 3371 AVT = MVT::i16; 3372 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3373 BytesLeft = I->getValue() % 2; 3374 Val = (Val << 8) | Val; 3375 ValReg = X86::AX; 3376 break; 3377 case 0: // DWORD aligned 3378 AVT = MVT::i32; 3379 if (I) { 3380 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3381 BytesLeft = I->getValue() % 4; 3382 } else { 3383 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3384 DAG.getConstant(2, MVT::i8)); 3385 TwoRepStos = true; 3386 } 3387 Val = (Val << 8) | Val; 3388 Val = (Val << 16) | Val; 3389 ValReg = X86::EAX; 3390 break; 3391 default: // Byte aligned 3392 AVT = MVT::i8; 3393 Count = Op.getOperand(3); 3394 ValReg = X86::AL; 3395 break; 3396 } 3397 3398 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3399 InFlag); 3400 InFlag = Chain.getValue(1); 3401 } else { 3402 AVT = MVT::i8; 3403 Count = Op.getOperand(3); 3404 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3405 InFlag = Chain.getValue(1); 3406 } 3407 3408 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3409 InFlag = Chain.getValue(1); 3410 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3411 InFlag = Chain.getValue(1); 3412 3413 std::vector<MVT::ValueType> Tys; 3414 Tys.push_back(MVT::Other); 3415 Tys.push_back(MVT::Flag); 3416 std::vector<SDOperand> Ops; 3417 Ops.push_back(Chain); 3418 Ops.push_back(DAG.getValueType(AVT)); 3419 Ops.push_back(InFlag); 3420 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3421 3422 if (TwoRepStos) { 3423 InFlag = Chain.getValue(1); 3424 Count = Op.getOperand(3); 3425 MVT::ValueType CVT = Count.getValueType(); 3426 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3427 DAG.getConstant(3, CVT)); 3428 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3429 InFlag = Chain.getValue(1); 3430 Tys.clear(); 3431 Tys.push_back(MVT::Other); 3432 Tys.push_back(MVT::Flag); 3433 Ops.clear(); 3434 Ops.push_back(Chain); 3435 Ops.push_back(DAG.getValueType(MVT::i8)); 3436 Ops.push_back(InFlag); 3437 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3438 } else if (BytesLeft) { 3439 // Issue stores for the last 1 - 3 bytes. 3440 SDOperand Value; 3441 unsigned Val = ValC->getValue() & 255; 3442 unsigned Offset = I->getValue() - BytesLeft; 3443 SDOperand DstAddr = Op.getOperand(1); 3444 MVT::ValueType AddrVT = DstAddr.getValueType(); 3445 if (BytesLeft >= 2) { 3446 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3447 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3448 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3449 DAG.getConstant(Offset, AddrVT)), 3450 DAG.getSrcValue(NULL)); 3451 BytesLeft -= 2; 3452 Offset += 2; 3453 } 3454 3455 if (BytesLeft == 1) { 3456 Value = DAG.getConstant(Val, MVT::i8); 3457 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3458 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3459 DAG.getConstant(Offset, AddrVT)), 3460 DAG.getSrcValue(NULL)); 3461 } 3462 } 3463 3464 return Chain; 3465} 3466 3467SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3468 SDOperand Chain = Op.getOperand(0); 3469 unsigned Align = 3470 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3471 if (Align == 0) Align = 1; 3472 3473 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3474 // If not DWORD aligned, call memcpy if size is less than the threshold. 3475 // It knows how to align to the right boundary first. 3476 if ((Align & 3) != 0 || 3477 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3478 MVT::ValueType IntPtr = getPointerTy(); 3479 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3480 std::vector<std::pair<SDOperand, const Type*> > Args; 3481 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3482 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 3483 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3484 std::pair<SDOperand,SDOperand> CallResult = 3485 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3486 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3487 return CallResult.second; 3488 } 3489 3490 MVT::ValueType AVT; 3491 SDOperand Count; 3492 unsigned BytesLeft = 0; 3493 bool TwoRepMovs = false; 3494 switch (Align & 3) { 3495 case 2: // WORD aligned 3496 AVT = MVT::i16; 3497 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 3498 BytesLeft = I->getValue() % 2; 3499 break; 3500 case 0: // DWORD aligned 3501 AVT = MVT::i32; 3502 if (I) { 3503 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 3504 BytesLeft = I->getValue() % 4; 3505 } else { 3506 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 3507 DAG.getConstant(2, MVT::i8)); 3508 TwoRepMovs = true; 3509 } 3510 break; 3511 default: // Byte aligned 3512 AVT = MVT::i8; 3513 Count = Op.getOperand(3); 3514 break; 3515 } 3516 3517 SDOperand InFlag(0, 0); 3518 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 3519 InFlag = Chain.getValue(1); 3520 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 3521 InFlag = Chain.getValue(1); 3522 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 3523 InFlag = Chain.getValue(1); 3524 3525 std::vector<MVT::ValueType> Tys; 3526 Tys.push_back(MVT::Other); 3527 Tys.push_back(MVT::Flag); 3528 std::vector<SDOperand> Ops; 3529 Ops.push_back(Chain); 3530 Ops.push_back(DAG.getValueType(AVT)); 3531 Ops.push_back(InFlag); 3532 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3533 3534 if (TwoRepMovs) { 3535 InFlag = Chain.getValue(1); 3536 Count = Op.getOperand(3); 3537 MVT::ValueType CVT = Count.getValueType(); 3538 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3539 DAG.getConstant(3, CVT)); 3540 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 3541 InFlag = Chain.getValue(1); 3542 Tys.clear(); 3543 Tys.push_back(MVT::Other); 3544 Tys.push_back(MVT::Flag); 3545 Ops.clear(); 3546 Ops.push_back(Chain); 3547 Ops.push_back(DAG.getValueType(MVT::i8)); 3548 Ops.push_back(InFlag); 3549 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3550 } else if (BytesLeft) { 3551 // Issue loads and stores for the last 1 - 3 bytes. 3552 unsigned Offset = I->getValue() - BytesLeft; 3553 SDOperand DstAddr = Op.getOperand(1); 3554 MVT::ValueType DstVT = DstAddr.getValueType(); 3555 SDOperand SrcAddr = Op.getOperand(2); 3556 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3557 SDOperand Value; 3558 if (BytesLeft >= 2) { 3559 Value = DAG.getLoad(MVT::i16, Chain, 3560 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3561 DAG.getConstant(Offset, SrcVT)), 3562 DAG.getSrcValue(NULL)); 3563 Chain = Value.getValue(1); 3564 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3565 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3566 DAG.getConstant(Offset, DstVT)), 3567 DAG.getSrcValue(NULL)); 3568 BytesLeft -= 2; 3569 Offset += 2; 3570 } 3571 3572 if (BytesLeft == 1) { 3573 Value = DAG.getLoad(MVT::i8, Chain, 3574 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3575 DAG.getConstant(Offset, SrcVT)), 3576 DAG.getSrcValue(NULL)); 3577 Chain = Value.getValue(1); 3578 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 3579 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3580 DAG.getConstant(Offset, DstVT)), 3581 DAG.getSrcValue(NULL)); 3582 } 3583 } 3584 3585 return Chain; 3586} 3587 3588SDOperand 3589X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 3590 std::vector<MVT::ValueType> Tys; 3591 Tys.push_back(MVT::Other); 3592 Tys.push_back(MVT::Flag); 3593 std::vector<SDOperand> Ops; 3594 Ops.push_back(Op.getOperand(0)); 3595 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size()); 3596 Ops.clear(); 3597 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 3598 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 3599 MVT::i32, Ops[0].getValue(2))); 3600 Ops.push_back(Ops[1].getValue(1)); 3601 Tys[0] = Tys[1] = MVT::i32; 3602 Tys.push_back(MVT::Other); 3603 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 3604} 3605 3606SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 3607 // vastart just stores the address of the VarArgsFrameIndex slot into the 3608 // memory location argument. 3609 // FIXME: Replace MVT::i32 with PointerTy 3610 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 3611 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 3612 Op.getOperand(1), Op.getOperand(2)); 3613} 3614 3615SDOperand 3616X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 3617 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3618 switch (IntNo) { 3619 default: return SDOperand(); // Don't custom lower most intrinsics. 3620 // Comparison intrinsics. 3621 case Intrinsic::x86_sse_comieq_ss: 3622 case Intrinsic::x86_sse_comilt_ss: 3623 case Intrinsic::x86_sse_comile_ss: 3624 case Intrinsic::x86_sse_comigt_ss: 3625 case Intrinsic::x86_sse_comige_ss: 3626 case Intrinsic::x86_sse_comineq_ss: 3627 case Intrinsic::x86_sse_ucomieq_ss: 3628 case Intrinsic::x86_sse_ucomilt_ss: 3629 case Intrinsic::x86_sse_ucomile_ss: 3630 case Intrinsic::x86_sse_ucomigt_ss: 3631 case Intrinsic::x86_sse_ucomige_ss: 3632 case Intrinsic::x86_sse_ucomineq_ss: 3633 case Intrinsic::x86_sse2_comieq_sd: 3634 case Intrinsic::x86_sse2_comilt_sd: 3635 case Intrinsic::x86_sse2_comile_sd: 3636 case Intrinsic::x86_sse2_comigt_sd: 3637 case Intrinsic::x86_sse2_comige_sd: 3638 case Intrinsic::x86_sse2_comineq_sd: 3639 case Intrinsic::x86_sse2_ucomieq_sd: 3640 case Intrinsic::x86_sse2_ucomilt_sd: 3641 case Intrinsic::x86_sse2_ucomile_sd: 3642 case Intrinsic::x86_sse2_ucomigt_sd: 3643 case Intrinsic::x86_sse2_ucomige_sd: 3644 case Intrinsic::x86_sse2_ucomineq_sd: { 3645 unsigned Opc = 0; 3646 ISD::CondCode CC = ISD::SETCC_INVALID; 3647 switch (IntNo) { 3648 default: break; 3649 case Intrinsic::x86_sse_comieq_ss: 3650 case Intrinsic::x86_sse2_comieq_sd: 3651 Opc = X86ISD::COMI; 3652 CC = ISD::SETEQ; 3653 break; 3654 case Intrinsic::x86_sse_comilt_ss: 3655 case Intrinsic::x86_sse2_comilt_sd: 3656 Opc = X86ISD::COMI; 3657 CC = ISD::SETLT; 3658 break; 3659 case Intrinsic::x86_sse_comile_ss: 3660 case Intrinsic::x86_sse2_comile_sd: 3661 Opc = X86ISD::COMI; 3662 CC = ISD::SETLE; 3663 break; 3664 case Intrinsic::x86_sse_comigt_ss: 3665 case Intrinsic::x86_sse2_comigt_sd: 3666 Opc = X86ISD::COMI; 3667 CC = ISD::SETGT; 3668 break; 3669 case Intrinsic::x86_sse_comige_ss: 3670 case Intrinsic::x86_sse2_comige_sd: 3671 Opc = X86ISD::COMI; 3672 CC = ISD::SETGE; 3673 break; 3674 case Intrinsic::x86_sse_comineq_ss: 3675 case Intrinsic::x86_sse2_comineq_sd: 3676 Opc = X86ISD::COMI; 3677 CC = ISD::SETNE; 3678 break; 3679 case Intrinsic::x86_sse_ucomieq_ss: 3680 case Intrinsic::x86_sse2_ucomieq_sd: 3681 Opc = X86ISD::UCOMI; 3682 CC = ISD::SETEQ; 3683 break; 3684 case Intrinsic::x86_sse_ucomilt_ss: 3685 case Intrinsic::x86_sse2_ucomilt_sd: 3686 Opc = X86ISD::UCOMI; 3687 CC = ISD::SETLT; 3688 break; 3689 case Intrinsic::x86_sse_ucomile_ss: 3690 case Intrinsic::x86_sse2_ucomile_sd: 3691 Opc = X86ISD::UCOMI; 3692 CC = ISD::SETLE; 3693 break; 3694 case Intrinsic::x86_sse_ucomigt_ss: 3695 case Intrinsic::x86_sse2_ucomigt_sd: 3696 Opc = X86ISD::UCOMI; 3697 CC = ISD::SETGT; 3698 break; 3699 case Intrinsic::x86_sse_ucomige_ss: 3700 case Intrinsic::x86_sse2_ucomige_sd: 3701 Opc = X86ISD::UCOMI; 3702 CC = ISD::SETGE; 3703 break; 3704 case Intrinsic::x86_sse_ucomineq_ss: 3705 case Intrinsic::x86_sse2_ucomineq_sd: 3706 Opc = X86ISD::UCOMI; 3707 CC = ISD::SETNE; 3708 break; 3709 } 3710 bool Flip; 3711 unsigned X86CC; 3712 translateX86CC(CC, true, X86CC, Flip); 3713 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3714 Op.getOperand(Flip?1:2)); 3715 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3716 DAG.getConstant(X86CC, MVT::i8), Cond); 3717 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3718 } 3719 } 3720} 3721 3722/// LowerOperation - Provide custom lowering hooks for some operations. 3723/// 3724SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3725 switch (Op.getOpcode()) { 3726 default: assert(0 && "Should not custom lower this!"); 3727 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3728 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3729 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3730 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 3731 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3732 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3733 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3734 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 3735 case ISD::SHL_PARTS: 3736 case ISD::SRA_PARTS: 3737 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 3738 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3739 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3740 case ISD::FABS: return LowerFABS(Op, DAG); 3741 case ISD::FNEG: return LowerFNEG(Op, DAG); 3742 case ISD::SETCC: return LowerSETCC(Op, DAG); 3743 case ISD::SELECT: return LowerSELECT(Op, DAG); 3744 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 3745 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3746 case ISD::CALL: return LowerCALL(Op, DAG); 3747 case ISD::RET: return LowerRET(Op, DAG); 3748 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 3749 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 3750 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 3751 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 3752 case ISD::VASTART: return LowerVASTART(Op, DAG); 3753 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3754 } 3755} 3756 3757const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3758 switch (Opcode) { 3759 default: return NULL; 3760 case X86ISD::SHLD: return "X86ISD::SHLD"; 3761 case X86ISD::SHRD: return "X86ISD::SHRD"; 3762 case X86ISD::FAND: return "X86ISD::FAND"; 3763 case X86ISD::FXOR: return "X86ISD::FXOR"; 3764 case X86ISD::FILD: return "X86ISD::FILD"; 3765 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3766 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3767 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3768 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3769 case X86ISD::FLD: return "X86ISD::FLD"; 3770 case X86ISD::FST: return "X86ISD::FST"; 3771 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3772 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3773 case X86ISD::CALL: return "X86ISD::CALL"; 3774 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3775 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3776 case X86ISD::CMP: return "X86ISD::CMP"; 3777 case X86ISD::TEST: return "X86ISD::TEST"; 3778 case X86ISD::COMI: return "X86ISD::COMI"; 3779 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3780 case X86ISD::SETCC: return "X86ISD::SETCC"; 3781 case X86ISD::CMOV: return "X86ISD::CMOV"; 3782 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3783 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3784 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3785 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3786 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3787 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 3788 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3789 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3790 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3791 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3792 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3793 } 3794} 3795 3796/// isLegalAddressImmediate - Return true if the integer value or 3797/// GlobalValue can be used as the offset of the target addressing mode. 3798bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3799 // X86 allows a sign-extended 32-bit immediate field. 3800 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3801} 3802 3803bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3804 // GV is 64-bit but displacement field is 32-bit unless we are in small code 3805 // model. Mac OS X happens to support only small PIC code model. 3806 // FIXME: better support for other OS's. 3807 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 3808 return false; 3809 if (Subtarget->isTargetDarwin()) { 3810 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3811 if (RModel == Reloc::Static) 3812 return true; 3813 else if (RModel == Reloc::DynamicNoPIC) 3814 return !DarwinGVRequiresExtraLoad(GV); 3815 else 3816 return false; 3817 } else 3818 return true; 3819} 3820 3821/// isShuffleMaskLegal - Targets can use this to indicate that they only 3822/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3823/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3824/// are assumed to be legal. 3825bool 3826X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3827 // Only do shuffles on 128-bit vector types for now. 3828 if (MVT::getSizeInBits(VT) == 64) return false; 3829 return (Mask.Val->getNumOperands() <= 4 || 3830 isSplatMask(Mask.Val) || 3831 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3832 X86::isUNPCKLMask(Mask.Val) || 3833 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3834 X86::isUNPCKHMask(Mask.Val)); 3835} 3836 3837bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 3838 MVT::ValueType EVT, 3839 SelectionDAG &DAG) const { 3840 unsigned NumElts = BVOps.size(); 3841 // Only do shuffles on 128-bit vector types for now. 3842 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 3843 if (NumElts == 2) return true; 3844 if (NumElts == 4) { 3845 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 3846 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 3847 } 3848 return false; 3849} 3850 3851//===----------------------------------------------------------------------===// 3852// X86 Scheduler Hooks 3853//===----------------------------------------------------------------------===// 3854 3855MachineBasicBlock * 3856X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 3857 MachineBasicBlock *BB) { 3858 switch (MI->getOpcode()) { 3859 default: assert(false && "Unexpected instr type to insert"); 3860 case X86::CMOV_FR32: 3861 case X86::CMOV_FR64: 3862 case X86::CMOV_V4F32: 3863 case X86::CMOV_V2F64: 3864 case X86::CMOV_V2I64: { 3865 // To "insert" a SELECT_CC instruction, we actually have to insert the 3866 // diamond control-flow pattern. The incoming instruction knows the 3867 // destination vreg to set, the condition code register to branch on, the 3868 // true/false values to select between, and a branch opcode to use. 3869 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3870 ilist<MachineBasicBlock>::iterator It = BB; 3871 ++It; 3872 3873 // thisMBB: 3874 // ... 3875 // TrueVal = ... 3876 // cmpTY ccX, r1, r2 3877 // bCC copy1MBB 3878 // fallthrough --> copy0MBB 3879 MachineBasicBlock *thisMBB = BB; 3880 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 3881 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 3882 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 3883 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 3884 MachineFunction *F = BB->getParent(); 3885 F->getBasicBlockList().insert(It, copy0MBB); 3886 F->getBasicBlockList().insert(It, sinkMBB); 3887 // Update machine-CFG edges by first adding all successors of the current 3888 // block to the new block which will contain the Phi node for the select. 3889 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 3890 e = BB->succ_end(); i != e; ++i) 3891 sinkMBB->addSuccessor(*i); 3892 // Next, remove all successors of the current block, and add the true 3893 // and fallthrough blocks as its successors. 3894 while(!BB->succ_empty()) 3895 BB->removeSuccessor(BB->succ_begin()); 3896 BB->addSuccessor(copy0MBB); 3897 BB->addSuccessor(sinkMBB); 3898 3899 // copy0MBB: 3900 // %FalseValue = ... 3901 // # fallthrough to sinkMBB 3902 BB = copy0MBB; 3903 3904 // Update machine-CFG edges 3905 BB->addSuccessor(sinkMBB); 3906 3907 // sinkMBB: 3908 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3909 // ... 3910 BB = sinkMBB; 3911 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 3912 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 3913 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3914 3915 delete MI; // The pseudo instruction is gone now. 3916 return BB; 3917 } 3918 3919 case X86::FP_TO_INT16_IN_MEM: 3920 case X86::FP_TO_INT32_IN_MEM: 3921 case X86::FP_TO_INT64_IN_MEM: { 3922 // Change the floating point control register to use "round towards zero" 3923 // mode when truncating to an integer value. 3924 MachineFunction *F = BB->getParent(); 3925 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 3926 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 3927 3928 // Load the old value of the high byte of the control word... 3929 unsigned OldCW = 3930 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 3931 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 3932 3933 // Set the high part to be round to zero... 3934 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 3935 3936 // Reload the modified control word now... 3937 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 3938 3939 // Restore the memory image of control word to original value 3940 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 3941 3942 // Get the X86 opcode to use. 3943 unsigned Opc; 3944 switch (MI->getOpcode()) { 3945 default: assert(0 && "illegal opcode!"); 3946 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 3947 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 3948 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 3949 } 3950 3951 X86AddressMode AM; 3952 MachineOperand &Op = MI->getOperand(0); 3953 if (Op.isRegister()) { 3954 AM.BaseType = X86AddressMode::RegBase; 3955 AM.Base.Reg = Op.getReg(); 3956 } else { 3957 AM.BaseType = X86AddressMode::FrameIndexBase; 3958 AM.Base.FrameIndex = Op.getFrameIndex(); 3959 } 3960 Op = MI->getOperand(1); 3961 if (Op.isImmediate()) 3962 AM.Scale = Op.getImmedValue(); 3963 Op = MI->getOperand(2); 3964 if (Op.isImmediate()) 3965 AM.IndexReg = Op.getImmedValue(); 3966 Op = MI->getOperand(3); 3967 if (Op.isGlobalAddress()) { 3968 AM.GV = Op.getGlobal(); 3969 } else { 3970 AM.Disp = Op.getImmedValue(); 3971 } 3972 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 3973 3974 // Reload the original control word now. 3975 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 3976 3977 delete MI; // The pseudo instruction is gone now. 3978 return BB; 3979 } 3980 } 3981} 3982 3983//===----------------------------------------------------------------------===// 3984// X86 Optimization Hooks 3985//===----------------------------------------------------------------------===// 3986 3987void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3988 uint64_t Mask, 3989 uint64_t &KnownZero, 3990 uint64_t &KnownOne, 3991 unsigned Depth) const { 3992 unsigned Opc = Op.getOpcode(); 3993 assert((Opc >= ISD::BUILTIN_OP_END || 3994 Opc == ISD::INTRINSIC_WO_CHAIN || 3995 Opc == ISD::INTRINSIC_W_CHAIN || 3996 Opc == ISD::INTRINSIC_VOID) && 3997 "Should use MaskedValueIsZero if you don't know whether Op" 3998 " is a target node!"); 3999 4000 KnownZero = KnownOne = 0; // Don't know anything. 4001 switch (Opc) { 4002 default: break; 4003 case X86ISD::SETCC: 4004 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4005 break; 4006 } 4007} 4008 4009/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4010/// element of the result of the vector shuffle. 4011static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4012 MVT::ValueType VT = N->getValueType(0); 4013 SDOperand PermMask = N->getOperand(2); 4014 unsigned NumElems = PermMask.getNumOperands(); 4015 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4016 i %= NumElems; 4017 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4018 return (i == 0) 4019 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4020 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4021 SDOperand Idx = PermMask.getOperand(i); 4022 if (Idx.getOpcode() == ISD::UNDEF) 4023 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4024 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4025 } 4026 return SDOperand(); 4027} 4028 4029/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4030/// node is a GlobalAddress + an offset. 4031static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4032 if (N->getOpcode() == X86ISD::Wrapper) { 4033 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4034 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4035 return true; 4036 } 4037 } else if (N->getOpcode() == ISD::ADD) { 4038 SDOperand N1 = N->getOperand(0); 4039 SDOperand N2 = N->getOperand(1); 4040 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4041 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4042 if (V) { 4043 Offset += V->getSignExtended(); 4044 return true; 4045 } 4046 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4047 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4048 if (V) { 4049 Offset += V->getSignExtended(); 4050 return true; 4051 } 4052 } 4053 } 4054 return false; 4055} 4056 4057/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4058/// + Dist * Size. 4059static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4060 MachineFrameInfo *MFI) { 4061 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4062 return false; 4063 4064 SDOperand Loc = N->getOperand(1); 4065 SDOperand BaseLoc = Base->getOperand(1); 4066 if (Loc.getOpcode() == ISD::FrameIndex) { 4067 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4068 return false; 4069 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 4070 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4071 int FS = MFI->getObjectSize(FI); 4072 int BFS = MFI->getObjectSize(BFI); 4073 if (FS != BFS || FS != Size) return false; 4074 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4075 } else { 4076 GlobalValue *GV1 = NULL; 4077 GlobalValue *GV2 = NULL; 4078 int64_t Offset1 = 0; 4079 int64_t Offset2 = 0; 4080 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4081 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4082 if (isGA1 && isGA2 && GV1 == GV2) 4083 return Offset1 == (Offset2 + Dist*Size); 4084 } 4085 4086 return false; 4087} 4088 4089static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4090 const X86Subtarget *Subtarget) { 4091 GlobalValue *GV; 4092 int64_t Offset; 4093 if (isGAPlusOffset(Base, GV, Offset)) 4094 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4095 else { 4096 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4097 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 4098 if (BFI < 0) 4099 // Fixed objects do not specify alignment, however the offsets are known. 4100 return ((Subtarget->getStackAlignment() % 16) == 0 && 4101 (MFI->getObjectOffset(BFI) % 16) == 0); 4102 else 4103 return MFI->getObjectAlignment(BFI) >= 16; 4104 } 4105 return false; 4106} 4107 4108 4109/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4110/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4111/// if the load addresses are consecutive, non-overlapping, and in the right 4112/// order. 4113static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4114 const X86Subtarget *Subtarget) { 4115 MachineFunction &MF = DAG.getMachineFunction(); 4116 MachineFrameInfo *MFI = MF.getFrameInfo(); 4117 MVT::ValueType VT = N->getValueType(0); 4118 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 4119 SDOperand PermMask = N->getOperand(2); 4120 int NumElems = (int)PermMask.getNumOperands(); 4121 SDNode *Base = NULL; 4122 for (int i = 0; i < NumElems; ++i) { 4123 SDOperand Idx = PermMask.getOperand(i); 4124 if (Idx.getOpcode() == ISD::UNDEF) { 4125 if (!Base) return SDOperand(); 4126 } else { 4127 SDOperand Arg = 4128 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4129 if (!Arg.Val || Arg.getOpcode() != ISD::LOAD) 4130 return SDOperand(); 4131 if (!Base) 4132 Base = Arg.Val; 4133 else if (!isConsecutiveLoad(Arg.Val, Base, 4134 i, MVT::getSizeInBits(EVT)/8,MFI)) 4135 return SDOperand(); 4136 } 4137 } 4138 4139 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4140 if (isAlign16) 4141 return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1), 4142 Base->getOperand(2)); 4143 else { 4144 // Just use movups, it's shorter. 4145 std::vector<MVT::ValueType> Tys; 4146 Tys.push_back(MVT::v4f32); 4147 Tys.push_back(MVT::Other); 4148 SmallVector<SDOperand, 3> Ops; 4149 Ops.push_back(Base->getOperand(0)); 4150 Ops.push_back(Base->getOperand(1)); 4151 Ops.push_back(Base->getOperand(2)); 4152 return DAG.getNode(ISD::BIT_CONVERT, VT, 4153 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 4154 } 4155} 4156 4157SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4158 DAGCombinerInfo &DCI) const { 4159 TargetMachine &TM = getTargetMachine(); 4160 SelectionDAG &DAG = DCI.DAG; 4161 switch (N->getOpcode()) { 4162 default: break; 4163 case ISD::VECTOR_SHUFFLE: 4164 return PerformShuffleCombine(N, DAG, Subtarget); 4165 } 4166 4167 return SDOperand(); 4168} 4169 4170//===----------------------------------------------------------------------===// 4171// X86 Inline Assembly Support 4172//===----------------------------------------------------------------------===// 4173 4174/// getConstraintType - Given a constraint letter, return the type of 4175/// constraint it is for this target. 4176X86TargetLowering::ConstraintType 4177X86TargetLowering::getConstraintType(char ConstraintLetter) const { 4178 switch (ConstraintLetter) { 4179 case 'A': 4180 case 'r': 4181 case 'R': 4182 case 'l': 4183 case 'q': 4184 case 'Q': 4185 case 'x': 4186 case 'Y': 4187 return C_RegisterClass; 4188 default: return TargetLowering::getConstraintType(ConstraintLetter); 4189 } 4190} 4191 4192std::vector<unsigned> X86TargetLowering:: 4193getRegClassForInlineAsmConstraint(const std::string &Constraint, 4194 MVT::ValueType VT) const { 4195 if (Constraint.size() == 1) { 4196 // FIXME: not handling fp-stack yet! 4197 // FIXME: not handling MMX registers yet ('y' constraint). 4198 switch (Constraint[0]) { // GCC X86 Constraint Letters 4199 default: break; // Unknown constraint letter 4200 case 'A': // EAX/EDX 4201 if (VT == MVT::i32 || VT == MVT::i64) 4202 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 4203 break; 4204 case 'r': // GENERAL_REGS 4205 case 'R': // LEGACY_REGS 4206 if (VT == MVT::i32) 4207 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4208 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 4209 else if (VT == MVT::i16) 4210 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4211 X86::SI, X86::DI, X86::BP, X86::SP, 0); 4212 else if (VT == MVT::i8) 4213 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4214 break; 4215 case 'l': // INDEX_REGS 4216 if (VT == MVT::i32) 4217 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4218 X86::ESI, X86::EDI, X86::EBP, 0); 4219 else if (VT == MVT::i16) 4220 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4221 X86::SI, X86::DI, X86::BP, 0); 4222 else if (VT == MVT::i8) 4223 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4224 break; 4225 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4226 case 'Q': // Q_REGS 4227 if (VT == MVT::i32) 4228 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 4229 else if (VT == MVT::i16) 4230 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 4231 else if (VT == MVT::i8) 4232 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4233 break; 4234 case 'x': // SSE_REGS if SSE1 allowed 4235 if (Subtarget->hasSSE1()) 4236 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4237 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4238 0); 4239 return std::vector<unsigned>(); 4240 case 'Y': // SSE_REGS if SSE2 allowed 4241 if (Subtarget->hasSSE2()) 4242 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4243 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4244 0); 4245 return std::vector<unsigned>(); 4246 } 4247 } 4248 4249 return std::vector<unsigned>(); 4250} 4251 4252std::pair<unsigned, const TargetRegisterClass*> 4253X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4254 MVT::ValueType VT) const { 4255 // Use the default implementation in TargetLowering to convert the register 4256 // constraint into a member of a register class. 4257 std::pair<unsigned, const TargetRegisterClass*> Res; 4258 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4259 4260 // Not found? Bail out. 4261 if (Res.second == 0) return Res; 4262 4263 // Otherwise, check to see if this is a register class of the wrong value 4264 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 4265 // turn into {ax},{dx}. 4266 if (Res.second->hasType(VT)) 4267 return Res; // Correct type already, nothing to do. 4268 4269 // All of the single-register GCC register classes map their values onto 4270 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 4271 // really want an 8-bit or 32-bit register, map to the appropriate register 4272 // class and return the appropriate register. 4273 if (Res.second != X86::GR16RegisterClass) 4274 return Res; 4275 4276 if (VT == MVT::i8) { 4277 unsigned DestReg = 0; 4278 switch (Res.first) { 4279 default: break; 4280 case X86::AX: DestReg = X86::AL; break; 4281 case X86::DX: DestReg = X86::DL; break; 4282 case X86::CX: DestReg = X86::CL; break; 4283 case X86::BX: DestReg = X86::BL; break; 4284 } 4285 if (DestReg) { 4286 Res.first = DestReg; 4287 Res.second = Res.second = X86::GR8RegisterClass; 4288 } 4289 } else if (VT == MVT::i32) { 4290 unsigned DestReg = 0; 4291 switch (Res.first) { 4292 default: break; 4293 case X86::AX: DestReg = X86::EAX; break; 4294 case X86::DX: DestReg = X86::EDX; break; 4295 case X86::CX: DestReg = X86::ECX; break; 4296 case X86::BX: DestReg = X86::EBX; break; 4297 case X86::SI: DestReg = X86::ESI; break; 4298 case X86::DI: DestReg = X86::EDI; break; 4299 case X86::BP: DestReg = X86::EBP; break; 4300 case X86::SP: DestReg = X86::ESP; break; 4301 } 4302 if (DestReg) { 4303 Res.first = DestReg; 4304 Res.second = Res.second = X86::GR32RegisterClass; 4305 } 4306 } 4307 4308 return Res; 4309} 4310 4311