X86ISelLowering.cpp revision 91b740da12b1152e740fadc6789a3d2a596dbabe
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 279 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 280 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 281 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 282 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 283 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 284 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 285 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 286 } 287 288 if (Subtarget->hasSSE2()) { 289 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 290 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 291 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 292 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 294 295 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 296 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 297 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 298 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 299 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 300 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 301 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 302 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 303 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 304 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 305 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 306 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 307 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 308 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 309 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 310 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 311 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 312 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 313 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 314 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); 315 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); 316 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 318 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 319 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 320 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 321 322 // Promote v16i8, v8i16, v4i32 selects to v2i64. Custom lower v2i64, v2f64, 323 // and v4f32 selects. 324 for (unsigned VT = (unsigned)MVT::v16i8; 325 VT != (unsigned)MVT::v2i64; VT++) { 326 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 327 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 328 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 329 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 330 } 331 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 332 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 333 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 334 } 335 336 // We want to custom lower some of our intrinsics. 337 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 338 339 computeRegisterProperties(); 340 341 // FIXME: These should be based on subtarget info. Plus, the values should 342 // be smaller when we are in optimizing for size mode. 343 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 344 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 345 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 346 allowUnalignedMemoryAccesses = true; // x86 supports it! 347} 348 349std::vector<SDOperand> 350X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 351 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 352 return LowerFastCCArguments(F, DAG); 353 return LowerCCCArguments(F, DAG); 354} 355 356std::pair<SDOperand, SDOperand> 357X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 358 bool isVarArg, unsigned CallingConv, 359 bool isTailCall, 360 SDOperand Callee, ArgListTy &Args, 361 SelectionDAG &DAG) { 362 assert((!isVarArg || CallingConv == CallingConv::C) && 363 "Only C takes varargs!"); 364 365 // If the callee is a GlobalAddress node (quite common, every direct call is) 366 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 367 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 368 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 369 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 370 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 371 372 if (CallingConv == CallingConv::Fast && EnableFastCC) 373 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 374 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 375} 376 377//===----------------------------------------------------------------------===// 378// C Calling Convention implementation 379//===----------------------------------------------------------------------===// 380 381std::vector<SDOperand> 382X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 383 std::vector<SDOperand> ArgValues; 384 385 MachineFunction &MF = DAG.getMachineFunction(); 386 MachineFrameInfo *MFI = MF.getFrameInfo(); 387 388 // Add DAG nodes to load the arguments... On entry to a function on the X86, 389 // the stack frame looks like this: 390 // 391 // [ESP] -- return address 392 // [ESP + 4] -- first argument (leftmost lexically) 393 // [ESP + 8] -- second argument, if first argument is four bytes in size 394 // ... 395 // 396 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 397 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 398 MVT::ValueType ObjectVT = getValueType(I->getType()); 399 unsigned ArgIncrement = 4; 400 unsigned ObjSize; 401 switch (ObjectVT) { 402 default: assert(0 && "Unhandled argument type!"); 403 case MVT::i1: 404 case MVT::i8: ObjSize = 1; break; 405 case MVT::i16: ObjSize = 2; break; 406 case MVT::i32: ObjSize = 4; break; 407 case MVT::i64: ObjSize = ArgIncrement = 8; break; 408 case MVT::f32: ObjSize = 4; break; 409 case MVT::f64: ObjSize = ArgIncrement = 8; break; 410 } 411 // Create the frame index object for this incoming parameter... 412 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 413 414 // Create the SelectionDAG nodes corresponding to a load from this parameter 415 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 416 417 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 418 // dead loads. 419 SDOperand ArgValue; 420 if (!I->use_empty()) 421 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 422 DAG.getSrcValue(NULL)); 423 else { 424 if (MVT::isInteger(ObjectVT)) 425 ArgValue = DAG.getConstant(0, ObjectVT); 426 else 427 ArgValue = DAG.getConstantFP(0, ObjectVT); 428 } 429 ArgValues.push_back(ArgValue); 430 431 ArgOffset += ArgIncrement; // Move on to the next argument... 432 } 433 434 // If the function takes variable number of arguments, make a frame index for 435 // the start of the first vararg value... for expansion of llvm.va_start. 436 if (F.isVarArg()) 437 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 438 ReturnAddrIndex = 0; // No return address slot generated yet. 439 BytesToPopOnReturn = 0; // Callee pops nothing. 440 BytesCallerReserves = ArgOffset; 441 442 // Finally, inform the code generator which regs we return values in. 443 switch (getValueType(F.getReturnType())) { 444 default: assert(0 && "Unknown type!"); 445 case MVT::isVoid: break; 446 case MVT::i1: 447 case MVT::i8: 448 case MVT::i16: 449 case MVT::i32: 450 MF.addLiveOut(X86::EAX); 451 break; 452 case MVT::i64: 453 MF.addLiveOut(X86::EAX); 454 MF.addLiveOut(X86::EDX); 455 break; 456 case MVT::f32: 457 case MVT::f64: 458 MF.addLiveOut(X86::ST0); 459 break; 460 } 461 return ArgValues; 462} 463 464std::pair<SDOperand, SDOperand> 465X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 466 bool isVarArg, bool isTailCall, 467 SDOperand Callee, ArgListTy &Args, 468 SelectionDAG &DAG) { 469 // Count how many bytes are to be pushed on the stack. 470 unsigned NumBytes = 0; 471 472 if (Args.empty()) { 473 // Save zero bytes. 474 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 475 } else { 476 for (unsigned i = 0, e = Args.size(); i != e; ++i) 477 switch (getValueType(Args[i].second)) { 478 default: assert(0 && "Unknown value type!"); 479 case MVT::i1: 480 case MVT::i8: 481 case MVT::i16: 482 case MVT::i32: 483 case MVT::f32: 484 NumBytes += 4; 485 break; 486 case MVT::i64: 487 case MVT::f64: 488 NumBytes += 8; 489 break; 490 } 491 492 Chain = DAG.getCALLSEQ_START(Chain, 493 DAG.getConstant(NumBytes, getPointerTy())); 494 495 // Arguments go on the stack in reverse order, as specified by the ABI. 496 unsigned ArgOffset = 0; 497 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 498 std::vector<SDOperand> Stores; 499 500 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 501 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 502 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 503 504 switch (getValueType(Args[i].second)) { 505 default: assert(0 && "Unexpected ValueType for argument!"); 506 case MVT::i1: 507 case MVT::i8: 508 case MVT::i16: 509 // Promote the integer to 32 bits. If the input type is signed use a 510 // sign extend, otherwise use a zero extend. 511 if (Args[i].second->isSigned()) 512 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 513 else 514 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 515 516 // FALL THROUGH 517 case MVT::i32: 518 case MVT::f32: 519 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 520 Args[i].first, PtrOff, 521 DAG.getSrcValue(NULL))); 522 ArgOffset += 4; 523 break; 524 case MVT::i64: 525 case MVT::f64: 526 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 527 Args[i].first, PtrOff, 528 DAG.getSrcValue(NULL))); 529 ArgOffset += 8; 530 break; 531 } 532 } 533 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 534 } 535 536 std::vector<MVT::ValueType> RetVals; 537 MVT::ValueType RetTyVT = getValueType(RetTy); 538 RetVals.push_back(MVT::Other); 539 540 // The result values produced have to be legal. Promote the result. 541 switch (RetTyVT) { 542 case MVT::isVoid: break; 543 default: 544 RetVals.push_back(RetTyVT); 545 break; 546 case MVT::i1: 547 case MVT::i8: 548 case MVT::i16: 549 RetVals.push_back(MVT::i32); 550 break; 551 case MVT::f32: 552 if (X86ScalarSSE) 553 RetVals.push_back(MVT::f32); 554 else 555 RetVals.push_back(MVT::f64); 556 break; 557 case MVT::i64: 558 RetVals.push_back(MVT::i32); 559 RetVals.push_back(MVT::i32); 560 break; 561 } 562 563 std::vector<MVT::ValueType> NodeTys; 564 NodeTys.push_back(MVT::Other); // Returns a chain 565 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 566 std::vector<SDOperand> Ops; 567 Ops.push_back(Chain); 568 Ops.push_back(Callee); 569 570 // FIXME: Do not generate X86ISD::TAILCALL for now. 571 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 572 SDOperand InFlag = Chain.getValue(1); 573 574 NodeTys.clear(); 575 NodeTys.push_back(MVT::Other); // Returns a chain 576 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 577 Ops.clear(); 578 Ops.push_back(Chain); 579 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 580 Ops.push_back(DAG.getConstant(0, getPointerTy())); 581 Ops.push_back(InFlag); 582 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 583 InFlag = Chain.getValue(1); 584 585 SDOperand RetVal; 586 if (RetTyVT != MVT::isVoid) { 587 switch (RetTyVT) { 588 default: assert(0 && "Unknown value type to return!"); 589 case MVT::i1: 590 case MVT::i8: 591 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 592 Chain = RetVal.getValue(1); 593 if (RetTyVT == MVT::i1) 594 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 595 break; 596 case MVT::i16: 597 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 598 Chain = RetVal.getValue(1); 599 break; 600 case MVT::i32: 601 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 602 Chain = RetVal.getValue(1); 603 break; 604 case MVT::i64: { 605 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 606 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 607 Lo.getValue(2)); 608 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 609 Chain = Hi.getValue(1); 610 break; 611 } 612 case MVT::f32: 613 case MVT::f64: { 614 std::vector<MVT::ValueType> Tys; 615 Tys.push_back(MVT::f64); 616 Tys.push_back(MVT::Other); 617 Tys.push_back(MVT::Flag); 618 std::vector<SDOperand> Ops; 619 Ops.push_back(Chain); 620 Ops.push_back(InFlag); 621 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 622 Chain = RetVal.getValue(1); 623 InFlag = RetVal.getValue(2); 624 if (X86ScalarSSE) { 625 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 626 // shouldn't be necessary except that RFP cannot be live across 627 // multiple blocks. When stackifier is fixed, they can be uncoupled. 628 MachineFunction &MF = DAG.getMachineFunction(); 629 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 630 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 631 Tys.clear(); 632 Tys.push_back(MVT::Other); 633 Ops.clear(); 634 Ops.push_back(Chain); 635 Ops.push_back(RetVal); 636 Ops.push_back(StackSlot); 637 Ops.push_back(DAG.getValueType(RetTyVT)); 638 Ops.push_back(InFlag); 639 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 640 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 641 DAG.getSrcValue(NULL)); 642 Chain = RetVal.getValue(1); 643 } 644 645 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 646 // FIXME: we would really like to remember that this FP_ROUND 647 // operation is okay to eliminate if we allow excess FP precision. 648 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 649 break; 650 } 651 } 652 } 653 654 return std::make_pair(RetVal, Chain); 655} 656 657//===----------------------------------------------------------------------===// 658// Fast Calling Convention implementation 659//===----------------------------------------------------------------------===// 660// 661// The X86 'fast' calling convention passes up to two integer arguments in 662// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 663// and requires that the callee pop its arguments off the stack (allowing proper 664// tail calls), and has the same return value conventions as C calling convs. 665// 666// This calling convention always arranges for the callee pop value to be 8n+4 667// bytes, which is needed for tail recursion elimination and stack alignment 668// reasons. 669// 670// Note that this can be enhanced in the future to pass fp vals in registers 671// (when we have a global fp allocator) and do other tricks. 672// 673 674/// AddLiveIn - This helper function adds the specified physical register to the 675/// MachineFunction as a live in value. It also creates a corresponding virtual 676/// register for it. 677static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 678 TargetRegisterClass *RC) { 679 assert(RC->contains(PReg) && "Not the correct regclass!"); 680 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 681 MF.addLiveIn(PReg, VReg); 682 return VReg; 683} 684 685// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 686// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 687// EDX". Anything more is illegal. 688// 689// FIXME: The linscan register allocator currently has problem with 690// coalescing. At the time of this writing, whenever it decides to coalesce 691// a physreg with a virtreg, this increases the size of the physreg's live 692// range, and the live range cannot ever be reduced. This causes problems if 693// too many physregs are coaleced with virtregs, which can cause the register 694// allocator to wedge itself. 695// 696// This code triggers this problem more often if we pass args in registers, 697// so disable it until this is fixed. 698// 699// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 700// about code being dead. 701// 702static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 703 704 705std::vector<SDOperand> 706X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 707 std::vector<SDOperand> ArgValues; 708 709 MachineFunction &MF = DAG.getMachineFunction(); 710 MachineFrameInfo *MFI = MF.getFrameInfo(); 711 712 // Add DAG nodes to load the arguments... On entry to a function the stack 713 // frame looks like this: 714 // 715 // [ESP] -- return address 716 // [ESP + 4] -- first nonreg argument (leftmost lexically) 717 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 718 // ... 719 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 720 721 // Keep track of the number of integer regs passed so far. This can be either 722 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 723 // used). 724 unsigned NumIntRegs = 0; 725 726 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 727 MVT::ValueType ObjectVT = getValueType(I->getType()); 728 unsigned ArgIncrement = 4; 729 unsigned ObjSize = 0; 730 SDOperand ArgValue; 731 732 switch (ObjectVT) { 733 default: assert(0 && "Unhandled argument type!"); 734 case MVT::i1: 735 case MVT::i8: 736 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 737 if (!I->use_empty()) { 738 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 739 X86::R8RegisterClass); 740 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 741 DAG.setRoot(ArgValue.getValue(1)); 742 if (ObjectVT == MVT::i1) 743 // FIXME: Should insert a assertzext here. 744 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 745 } 746 ++NumIntRegs; 747 break; 748 } 749 750 ObjSize = 1; 751 break; 752 case MVT::i16: 753 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 754 if (!I->use_empty()) { 755 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 756 X86::R16RegisterClass); 757 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 758 DAG.setRoot(ArgValue.getValue(1)); 759 } 760 ++NumIntRegs; 761 break; 762 } 763 ObjSize = 2; 764 break; 765 case MVT::i32: 766 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 767 if (!I->use_empty()) { 768 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 769 X86::R32RegisterClass); 770 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 771 DAG.setRoot(ArgValue.getValue(1)); 772 } 773 ++NumIntRegs; 774 break; 775 } 776 ObjSize = 4; 777 break; 778 case MVT::i64: 779 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 780 if (!I->use_empty()) { 781 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 782 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 783 784 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 785 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 786 DAG.setRoot(Hi.getValue(1)); 787 788 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 789 } 790 NumIntRegs += 2; 791 break; 792 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 793 if (!I->use_empty()) { 794 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 795 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 796 DAG.setRoot(Low.getValue(1)); 797 798 // Load the high part from memory. 799 // Create the frame index object for this incoming parameter... 800 int FI = MFI->CreateFixedObject(4, ArgOffset); 801 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 802 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 803 DAG.getSrcValue(NULL)); 804 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 805 } 806 ArgOffset += 4; 807 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 808 break; 809 } 810 ObjSize = ArgIncrement = 8; 811 break; 812 case MVT::f32: ObjSize = 4; break; 813 case MVT::f64: ObjSize = ArgIncrement = 8; break; 814 } 815 816 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 817 // dead loads. 818 if (ObjSize && !I->use_empty()) { 819 // Create the frame index object for this incoming parameter... 820 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 821 822 // Create the SelectionDAG nodes corresponding to a load from this 823 // parameter. 824 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 825 826 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 827 DAG.getSrcValue(NULL)); 828 } else if (ArgValue.Val == 0) { 829 if (MVT::isInteger(ObjectVT)) 830 ArgValue = DAG.getConstant(0, ObjectVT); 831 else 832 ArgValue = DAG.getConstantFP(0, ObjectVT); 833 } 834 ArgValues.push_back(ArgValue); 835 836 if (ObjSize) 837 ArgOffset += ArgIncrement; // Move on to the next argument. 838 } 839 840 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 841 // arguments and the arguments after the retaddr has been pushed are aligned. 842 if ((ArgOffset & 7) == 0) 843 ArgOffset += 4; 844 845 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 846 ReturnAddrIndex = 0; // No return address slot generated yet. 847 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 848 BytesCallerReserves = 0; 849 850 // Finally, inform the code generator which regs we return values in. 851 switch (getValueType(F.getReturnType())) { 852 default: assert(0 && "Unknown type!"); 853 case MVT::isVoid: break; 854 case MVT::i1: 855 case MVT::i8: 856 case MVT::i16: 857 case MVT::i32: 858 MF.addLiveOut(X86::EAX); 859 break; 860 case MVT::i64: 861 MF.addLiveOut(X86::EAX); 862 MF.addLiveOut(X86::EDX); 863 break; 864 case MVT::f32: 865 case MVT::f64: 866 MF.addLiveOut(X86::ST0); 867 break; 868 } 869 return ArgValues; 870} 871 872std::pair<SDOperand, SDOperand> 873X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 874 bool isTailCall, SDOperand Callee, 875 ArgListTy &Args, SelectionDAG &DAG) { 876 // Count how many bytes are to be pushed on the stack. 877 unsigned NumBytes = 0; 878 879 // Keep track of the number of integer regs passed so far. This can be either 880 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 881 // used). 882 unsigned NumIntRegs = 0; 883 884 for (unsigned i = 0, e = Args.size(); i != e; ++i) 885 switch (getValueType(Args[i].second)) { 886 default: assert(0 && "Unknown value type!"); 887 case MVT::i1: 888 case MVT::i8: 889 case MVT::i16: 890 case MVT::i32: 891 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 892 ++NumIntRegs; 893 break; 894 } 895 // fall through 896 case MVT::f32: 897 NumBytes += 4; 898 break; 899 case MVT::i64: 900 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 901 NumIntRegs += 2; 902 break; 903 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 904 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 905 NumBytes += 4; 906 break; 907 } 908 909 // fall through 910 case MVT::f64: 911 NumBytes += 8; 912 break; 913 } 914 915 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 916 // arguments and the arguments after the retaddr has been pushed are aligned. 917 if ((NumBytes & 7) == 0) 918 NumBytes += 4; 919 920 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 921 922 // Arguments go on the stack in reverse order, as specified by the ABI. 923 unsigned ArgOffset = 0; 924 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 925 NumIntRegs = 0; 926 std::vector<SDOperand> Stores; 927 std::vector<SDOperand> RegValuesToPass; 928 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 929 switch (getValueType(Args[i].second)) { 930 default: assert(0 && "Unexpected ValueType for argument!"); 931 case MVT::i1: 932 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 933 // Fall through. 934 case MVT::i8: 935 case MVT::i16: 936 case MVT::i32: 937 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 938 RegValuesToPass.push_back(Args[i].first); 939 ++NumIntRegs; 940 break; 941 } 942 // Fall through 943 case MVT::f32: { 944 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 945 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 946 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 947 Args[i].first, PtrOff, 948 DAG.getSrcValue(NULL))); 949 ArgOffset += 4; 950 break; 951 } 952 case MVT::i64: 953 // Can pass (at least) part of it in regs? 954 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 955 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 956 Args[i].first, DAG.getConstant(1, MVT::i32)); 957 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 958 Args[i].first, DAG.getConstant(0, MVT::i32)); 959 RegValuesToPass.push_back(Lo); 960 ++NumIntRegs; 961 962 // Pass both parts in regs? 963 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 964 RegValuesToPass.push_back(Hi); 965 ++NumIntRegs; 966 } else { 967 // Pass the high part in memory. 968 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 969 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 970 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 971 Hi, PtrOff, DAG.getSrcValue(NULL))); 972 ArgOffset += 4; 973 } 974 break; 975 } 976 // Fall through 977 case MVT::f64: 978 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 979 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 980 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 981 Args[i].first, PtrOff, 982 DAG.getSrcValue(NULL))); 983 ArgOffset += 8; 984 break; 985 } 986 } 987 if (!Stores.empty()) 988 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 989 990 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 991 // arguments and the arguments after the retaddr has been pushed are aligned. 992 if ((ArgOffset & 7) == 0) 993 ArgOffset += 4; 994 995 std::vector<MVT::ValueType> RetVals; 996 MVT::ValueType RetTyVT = getValueType(RetTy); 997 998 RetVals.push_back(MVT::Other); 999 1000 // The result values produced have to be legal. Promote the result. 1001 switch (RetTyVT) { 1002 case MVT::isVoid: break; 1003 default: 1004 RetVals.push_back(RetTyVT); 1005 break; 1006 case MVT::i1: 1007 case MVT::i8: 1008 case MVT::i16: 1009 RetVals.push_back(MVT::i32); 1010 break; 1011 case MVT::f32: 1012 if (X86ScalarSSE) 1013 RetVals.push_back(MVT::f32); 1014 else 1015 RetVals.push_back(MVT::f64); 1016 break; 1017 case MVT::i64: 1018 RetVals.push_back(MVT::i32); 1019 RetVals.push_back(MVT::i32); 1020 break; 1021 } 1022 1023 // Build a sequence of copy-to-reg nodes chained together with token chain 1024 // and flag operands which copy the outgoing args into registers. 1025 SDOperand InFlag; 1026 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1027 unsigned CCReg; 1028 SDOperand RegToPass = RegValuesToPass[i]; 1029 switch (RegToPass.getValueType()) { 1030 default: assert(0 && "Bad thing to pass in regs"); 1031 case MVT::i8: 1032 CCReg = (i == 0) ? X86::AL : X86::DL; 1033 break; 1034 case MVT::i16: 1035 CCReg = (i == 0) ? X86::AX : X86::DX; 1036 break; 1037 case MVT::i32: 1038 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1039 break; 1040 } 1041 1042 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1043 InFlag = Chain.getValue(1); 1044 } 1045 1046 std::vector<MVT::ValueType> NodeTys; 1047 NodeTys.push_back(MVT::Other); // Returns a chain 1048 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1049 std::vector<SDOperand> Ops; 1050 Ops.push_back(Chain); 1051 Ops.push_back(Callee); 1052 if (InFlag.Val) 1053 Ops.push_back(InFlag); 1054 1055 // FIXME: Do not generate X86ISD::TAILCALL for now. 1056 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1057 InFlag = Chain.getValue(1); 1058 1059 NodeTys.clear(); 1060 NodeTys.push_back(MVT::Other); // Returns a chain 1061 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1062 Ops.clear(); 1063 Ops.push_back(Chain); 1064 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1065 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1066 Ops.push_back(InFlag); 1067 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1068 InFlag = Chain.getValue(1); 1069 1070 SDOperand RetVal; 1071 if (RetTyVT != MVT::isVoid) { 1072 switch (RetTyVT) { 1073 default: assert(0 && "Unknown value type to return!"); 1074 case MVT::i1: 1075 case MVT::i8: 1076 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1077 Chain = RetVal.getValue(1); 1078 if (RetTyVT == MVT::i1) 1079 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1080 break; 1081 case MVT::i16: 1082 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1083 Chain = RetVal.getValue(1); 1084 break; 1085 case MVT::i32: 1086 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1087 Chain = RetVal.getValue(1); 1088 break; 1089 case MVT::i64: { 1090 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1091 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1092 Lo.getValue(2)); 1093 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1094 Chain = Hi.getValue(1); 1095 break; 1096 } 1097 case MVT::f32: 1098 case MVT::f64: { 1099 std::vector<MVT::ValueType> Tys; 1100 Tys.push_back(MVT::f64); 1101 Tys.push_back(MVT::Other); 1102 Tys.push_back(MVT::Flag); 1103 std::vector<SDOperand> Ops; 1104 Ops.push_back(Chain); 1105 Ops.push_back(InFlag); 1106 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1107 Chain = RetVal.getValue(1); 1108 InFlag = RetVal.getValue(2); 1109 if (X86ScalarSSE) { 1110 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1111 // shouldn't be necessary except that RFP cannot be live across 1112 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1113 MachineFunction &MF = DAG.getMachineFunction(); 1114 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1115 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1116 Tys.clear(); 1117 Tys.push_back(MVT::Other); 1118 Ops.clear(); 1119 Ops.push_back(Chain); 1120 Ops.push_back(RetVal); 1121 Ops.push_back(StackSlot); 1122 Ops.push_back(DAG.getValueType(RetTyVT)); 1123 Ops.push_back(InFlag); 1124 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1125 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1126 DAG.getSrcValue(NULL)); 1127 Chain = RetVal.getValue(1); 1128 } 1129 1130 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1131 // FIXME: we would really like to remember that this FP_ROUND 1132 // operation is okay to eliminate if we allow excess FP precision. 1133 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1134 break; 1135 } 1136 } 1137 } 1138 1139 return std::make_pair(RetVal, Chain); 1140} 1141 1142SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1143 if (ReturnAddrIndex == 0) { 1144 // Set up a frame object for the return address. 1145 MachineFunction &MF = DAG.getMachineFunction(); 1146 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1147 } 1148 1149 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1150} 1151 1152 1153 1154std::pair<SDOperand, SDOperand> X86TargetLowering:: 1155LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1156 SelectionDAG &DAG) { 1157 SDOperand Result; 1158 if (Depth) // Depths > 0 not supported yet! 1159 Result = DAG.getConstant(0, getPointerTy()); 1160 else { 1161 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1162 if (!isFrameAddress) 1163 // Just load the return address 1164 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1165 DAG.getSrcValue(NULL)); 1166 else 1167 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1168 DAG.getConstant(4, MVT::i32)); 1169 } 1170 return std::make_pair(Result, Chain); 1171} 1172 1173/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1174/// which corresponds to the condition code. 1175static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1176 switch (X86CC) { 1177 default: assert(0 && "Unknown X86 conditional code!"); 1178 case X86ISD::COND_A: return X86::JA; 1179 case X86ISD::COND_AE: return X86::JAE; 1180 case X86ISD::COND_B: return X86::JB; 1181 case X86ISD::COND_BE: return X86::JBE; 1182 case X86ISD::COND_E: return X86::JE; 1183 case X86ISD::COND_G: return X86::JG; 1184 case X86ISD::COND_GE: return X86::JGE; 1185 case X86ISD::COND_L: return X86::JL; 1186 case X86ISD::COND_LE: return X86::JLE; 1187 case X86ISD::COND_NE: return X86::JNE; 1188 case X86ISD::COND_NO: return X86::JNO; 1189 case X86ISD::COND_NP: return X86::JNP; 1190 case X86ISD::COND_NS: return X86::JNS; 1191 case X86ISD::COND_O: return X86::JO; 1192 case X86ISD::COND_P: return X86::JP; 1193 case X86ISD::COND_S: return X86::JS; 1194 } 1195} 1196 1197/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1198/// specific condition code. It returns a false if it cannot do a direct 1199/// translation. X86CC is the translated CondCode. Flip is set to true if the 1200/// the order of comparison operands should be flipped. 1201static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1202 unsigned &X86CC, bool &Flip) { 1203 Flip = false; 1204 X86CC = X86ISD::COND_INVALID; 1205 if (!isFP) { 1206 switch (SetCCOpcode) { 1207 default: break; 1208 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1209 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1210 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1211 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1212 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1213 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1214 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1215 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1216 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1217 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1218 } 1219 } else { 1220 // On a floating point condition, the flags are set as follows: 1221 // ZF PF CF op 1222 // 0 | 0 | 0 | X > Y 1223 // 0 | 0 | 1 | X < Y 1224 // 1 | 0 | 0 | X == Y 1225 // 1 | 1 | 1 | unordered 1226 switch (SetCCOpcode) { 1227 default: break; 1228 case ISD::SETUEQ: 1229 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1230 case ISD::SETOLE: Flip = true; // Fallthrough 1231 case ISD::SETOGT: 1232 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1233 case ISD::SETOLT: Flip = true; // Fallthrough 1234 case ISD::SETOGE: 1235 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1236 case ISD::SETUGE: Flip = true; // Fallthrough 1237 case ISD::SETULT: 1238 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1239 case ISD::SETUGT: Flip = true; // Fallthrough 1240 case ISD::SETULE: 1241 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1242 case ISD::SETONE: 1243 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1244 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1245 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1246 } 1247 } 1248 1249 return X86CC != X86ISD::COND_INVALID; 1250} 1251 1252static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1253 bool &Flip) { 1254 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1255} 1256 1257/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1258/// code. Current x86 isa includes the following FP cmov instructions: 1259/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1260static bool hasFPCMov(unsigned X86CC) { 1261 switch (X86CC) { 1262 default: 1263 return false; 1264 case X86ISD::COND_B: 1265 case X86ISD::COND_BE: 1266 case X86ISD::COND_E: 1267 case X86ISD::COND_P: 1268 case X86ISD::COND_A: 1269 case X86ISD::COND_AE: 1270 case X86ISD::COND_NE: 1271 case X86ISD::COND_NP: 1272 return true; 1273 } 1274} 1275 1276MachineBasicBlock * 1277X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1278 MachineBasicBlock *BB) { 1279 switch (MI->getOpcode()) { 1280 default: assert(false && "Unexpected instr type to insert"); 1281 case X86::CMOV_FR32: 1282 case X86::CMOV_FR64: 1283 case X86::CMOV_V4F32: 1284 case X86::CMOV_V2F64: 1285 case X86::CMOV_V2I64: { 1286 // To "insert" a SELECT_CC instruction, we actually have to insert the 1287 // diamond control-flow pattern. The incoming instruction knows the 1288 // destination vreg to set, the condition code register to branch on, the 1289 // true/false values to select between, and a branch opcode to use. 1290 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1291 ilist<MachineBasicBlock>::iterator It = BB; 1292 ++It; 1293 1294 // thisMBB: 1295 // ... 1296 // TrueVal = ... 1297 // cmpTY ccX, r1, r2 1298 // bCC copy1MBB 1299 // fallthrough --> copy0MBB 1300 MachineBasicBlock *thisMBB = BB; 1301 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1302 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1303 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1304 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1305 MachineFunction *F = BB->getParent(); 1306 F->getBasicBlockList().insert(It, copy0MBB); 1307 F->getBasicBlockList().insert(It, sinkMBB); 1308 // Update machine-CFG edges by first adding all successors of the current 1309 // block to the new block which will contain the Phi node for the select. 1310 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1311 e = BB->succ_end(); i != e; ++i) 1312 sinkMBB->addSuccessor(*i); 1313 // Next, remove all successors of the current block, and add the true 1314 // and fallthrough blocks as its successors. 1315 while(!BB->succ_empty()) 1316 BB->removeSuccessor(BB->succ_begin()); 1317 BB->addSuccessor(copy0MBB); 1318 BB->addSuccessor(sinkMBB); 1319 1320 // copy0MBB: 1321 // %FalseValue = ... 1322 // # fallthrough to sinkMBB 1323 BB = copy0MBB; 1324 1325 // Update machine-CFG edges 1326 BB->addSuccessor(sinkMBB); 1327 1328 // sinkMBB: 1329 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1330 // ... 1331 BB = sinkMBB; 1332 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1333 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1334 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1335 1336 delete MI; // The pseudo instruction is gone now. 1337 return BB; 1338 } 1339 1340 case X86::FP_TO_INT16_IN_MEM: 1341 case X86::FP_TO_INT32_IN_MEM: 1342 case X86::FP_TO_INT64_IN_MEM: { 1343 // Change the floating point control register to use "round towards zero" 1344 // mode when truncating to an integer value. 1345 MachineFunction *F = BB->getParent(); 1346 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1347 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1348 1349 // Load the old value of the high byte of the control word... 1350 unsigned OldCW = 1351 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1352 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1353 1354 // Set the high part to be round to zero... 1355 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1356 1357 // Reload the modified control word now... 1358 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1359 1360 // Restore the memory image of control word to original value 1361 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1362 1363 // Get the X86 opcode to use. 1364 unsigned Opc; 1365 switch (MI->getOpcode()) { 1366 default: assert(0 && "illegal opcode!"); 1367 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1368 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1369 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1370 } 1371 1372 X86AddressMode AM; 1373 MachineOperand &Op = MI->getOperand(0); 1374 if (Op.isRegister()) { 1375 AM.BaseType = X86AddressMode::RegBase; 1376 AM.Base.Reg = Op.getReg(); 1377 } else { 1378 AM.BaseType = X86AddressMode::FrameIndexBase; 1379 AM.Base.FrameIndex = Op.getFrameIndex(); 1380 } 1381 Op = MI->getOperand(1); 1382 if (Op.isImmediate()) 1383 AM.Scale = Op.getImmedValue(); 1384 Op = MI->getOperand(2); 1385 if (Op.isImmediate()) 1386 AM.IndexReg = Op.getImmedValue(); 1387 Op = MI->getOperand(3); 1388 if (Op.isGlobalAddress()) { 1389 AM.GV = Op.getGlobal(); 1390 } else { 1391 AM.Disp = Op.getImmedValue(); 1392 } 1393 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1394 1395 // Reload the original control word now. 1396 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1397 1398 delete MI; // The pseudo instruction is gone now. 1399 return BB; 1400 } 1401 } 1402} 1403 1404 1405//===----------------------------------------------------------------------===// 1406// X86 Custom Lowering Hooks 1407//===----------------------------------------------------------------------===// 1408 1409/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1410/// load. For Darwin, external and weak symbols are indirect, loading the value 1411/// at address GV rather then the value of GV itself. This means that the 1412/// GlobalAddress must be in the base or index register of the address, not the 1413/// GV offset field. 1414static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1415 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1416 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1417} 1418 1419/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1420/// true if Op is undef or if its value falls within the specified range (L, H]. 1421static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1422 if (Op.getOpcode() == ISD::UNDEF) 1423 return true; 1424 1425 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1426 return (Val >= Low && Val < Hi); 1427} 1428 1429/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1430/// true if Op is undef or if its value equal to the specified value. 1431static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1432 if (Op.getOpcode() == ISD::UNDEF) 1433 return true; 1434 return cast<ConstantSDNode>(Op)->getValue() == Val; 1435} 1436 1437/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1438/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1439bool X86::isPSHUFDMask(SDNode *N) { 1440 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1441 1442 if (N->getNumOperands() != 4) 1443 return false; 1444 1445 // Check if the value doesn't reference the second vector. 1446 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1447 SDOperand Arg = N->getOperand(i); 1448 if (Arg.getOpcode() == ISD::UNDEF) continue; 1449 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1450 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1451 return false; 1452 } 1453 1454 return true; 1455} 1456 1457/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1458/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1459bool X86::isPSHUFHWMask(SDNode *N) { 1460 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1461 1462 if (N->getNumOperands() != 8) 1463 return false; 1464 1465 // Lower quadword copied in order. 1466 for (unsigned i = 0; i != 4; ++i) { 1467 SDOperand Arg = N->getOperand(i); 1468 if (Arg.getOpcode() == ISD::UNDEF) continue; 1469 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1470 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1471 return false; 1472 } 1473 1474 // Upper quadword shuffled. 1475 for (unsigned i = 4; i != 8; ++i) { 1476 SDOperand Arg = N->getOperand(i); 1477 if (Arg.getOpcode() == ISD::UNDEF) continue; 1478 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1479 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1480 if (Val < 4 || Val > 7) 1481 return false; 1482 } 1483 1484 return true; 1485} 1486 1487/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1488/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1489bool X86::isPSHUFLWMask(SDNode *N) { 1490 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1491 1492 if (N->getNumOperands() != 8) 1493 return false; 1494 1495 // Upper quadword copied in order. 1496 for (unsigned i = 4; i != 8; ++i) 1497 if (!isUndefOrEqual(N->getOperand(i), i)) 1498 return false; 1499 1500 // Lower quadword shuffled. 1501 for (unsigned i = 0; i != 4; ++i) 1502 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1503 return false; 1504 1505 return true; 1506} 1507 1508/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1509/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1510bool X86::isSHUFPMask(SDNode *N) { 1511 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1512 1513 unsigned NumElems = N->getNumOperands(); 1514 if (NumElems == 2) { 1515 // The only cases that ought be handled by SHUFPD is 1516 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1517 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1518 // Expect bit 0 == 1, bit1 == 2 1519 SDOperand Bit0 = N->getOperand(0); 1520 SDOperand Bit1 = N->getOperand(1); 1521 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 1522 return true; 1523 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 1524 return true; 1525 return false; 1526 } 1527 1528 if (NumElems != 4) return false; 1529 1530 // Each half must refer to only one of the vector. 1531 for (unsigned i = 0; i < 2; ++i) { 1532 SDOperand Arg = N->getOperand(i); 1533 if (Arg.getOpcode() == ISD::UNDEF) continue; 1534 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1535 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1536 if (Val >= 4) return false; 1537 } 1538 for (unsigned i = 2; i < 4; ++i) { 1539 SDOperand Arg = N->getOperand(i); 1540 if (Arg.getOpcode() == ISD::UNDEF) continue; 1541 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1542 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1543 if (Val < 4) return false; 1544 } 1545 1546 return true; 1547} 1548 1549/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1550/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1551bool X86::isMOVHLPSMask(SDNode *N) { 1552 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1553 1554 if (N->getNumOperands() != 4) 1555 return false; 1556 1557 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1558 return isUndefOrEqual(N->getOperand(0), 6) && 1559 isUndefOrEqual(N->getOperand(1), 7) && 1560 isUndefOrEqual(N->getOperand(2), 2) && 1561 isUndefOrEqual(N->getOperand(3), 3); 1562} 1563 1564/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1565/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1566bool X86::isMOVLHPSMask(SDNode *N) { 1567 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1568 1569 if (N->getNumOperands() != 4) 1570 return false; 1571 1572 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1573 return isUndefOrEqual(N->getOperand(0), 0) && 1574 isUndefOrEqual(N->getOperand(1), 1) && 1575 isUndefOrEqual(N->getOperand(2), 4) && 1576 isUndefOrEqual(N->getOperand(3), 5); 1577} 1578 1579/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1580/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1581bool X86::isMOVLPMask(SDNode *N) { 1582 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1583 1584 unsigned NumElems = N->getNumOperands(); 1585 if (NumElems != 2 && NumElems != 4) 1586 return false; 1587 1588 for (unsigned i = 0; i < NumElems/2; ++i) 1589 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1590 return false; 1591 1592 for (unsigned i = NumElems/2; i < NumElems; ++i) 1593 if (!isUndefOrEqual(N->getOperand(i), i)) 1594 return false; 1595 1596 return true; 1597} 1598 1599/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1600/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 1601bool X86::isMOVHPMask(SDNode *N) { 1602 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1603 1604 unsigned NumElems = N->getNumOperands(); 1605 if (NumElems != 2 && NumElems != 4) 1606 return false; 1607 1608 for (unsigned i = 0; i < NumElems/2; ++i) 1609 if (!isUndefOrEqual(N->getOperand(i), i)) 1610 return false; 1611 1612 for (unsigned i = 0; i < NumElems/2; ++i) { 1613 SDOperand Arg = N->getOperand(i + NumElems/2); 1614 if (!isUndefOrEqual(Arg, i + NumElems)) 1615 return false; 1616 } 1617 1618 return true; 1619} 1620 1621/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1622/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1623bool X86::isUNPCKLMask(SDNode *N) { 1624 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1625 1626 unsigned NumElems = N->getNumOperands(); 1627 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1628 return false; 1629 1630 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1631 SDOperand BitI = N->getOperand(i); 1632 SDOperand BitI1 = N->getOperand(i+1); 1633 if (!isUndefOrEqual(BitI, j)) 1634 return false; 1635 if (!isUndefOrEqual(BitI1, j + NumElems)) 1636 return false; 1637 } 1638 1639 return true; 1640} 1641 1642/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1643/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1644bool X86::isUNPCKHMask(SDNode *N) { 1645 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1646 1647 unsigned NumElems = N->getNumOperands(); 1648 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1649 return false; 1650 1651 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1652 SDOperand BitI = N->getOperand(i); 1653 SDOperand BitI1 = N->getOperand(i+1); 1654 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1655 return false; 1656 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1657 return false; 1658 } 1659 1660 return true; 1661} 1662 1663/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1664/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1665/// <0, 0, 1, 1> 1666bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1667 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1668 1669 unsigned NumElems = N->getNumOperands(); 1670 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1671 return false; 1672 1673 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1674 SDOperand BitI = N->getOperand(i); 1675 SDOperand BitI1 = N->getOperand(i+1); 1676 1677 if (!isUndefOrEqual(BitI, j)) 1678 return false; 1679 if (!isUndefOrEqual(BitI1, j)) 1680 return false; 1681 } 1682 1683 return true; 1684} 1685 1686/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 1687/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 1688bool X86::isMOVSMask(SDNode *N) { 1689 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1690 1691 unsigned NumElems = N->getNumOperands(); 1692 if (NumElems != 2 && NumElems != 4) 1693 return false; 1694 1695 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 1696 return false; 1697 1698 for (unsigned i = 1; i < NumElems; ++i) { 1699 SDOperand Arg = N->getOperand(i); 1700 if (!isUndefOrEqual(Arg, i)) 1701 return false; 1702 } 1703 1704 return true; 1705} 1706 1707/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1708/// a splat of a single element. 1709bool X86::isSplatMask(SDNode *N) { 1710 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1711 1712 // We can only splat 64-bit, and 32-bit quantities. 1713 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1714 return false; 1715 1716 // This is a splat operation if each element of the permute is the same, and 1717 // if the value doesn't reference the second vector. 1718 SDOperand Elt = N->getOperand(0); 1719 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1720 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1721 SDOperand Arg = N->getOperand(i); 1722 if (Arg.getOpcode() == ISD::UNDEF) continue; 1723 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1724 if (Arg != Elt) return false; 1725 } 1726 1727 // Make sure it is a splat of the first vector operand. 1728 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1729} 1730 1731/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1732/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1733/// instructions. 1734unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1735 unsigned NumOperands = N->getNumOperands(); 1736 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1737 unsigned Mask = 0; 1738 for (unsigned i = 0; i < NumOperands; ++i) { 1739 unsigned Val = 0; 1740 SDOperand Arg = N->getOperand(NumOperands-i-1); 1741 if (Arg.getOpcode() != ISD::UNDEF) 1742 Val = cast<ConstantSDNode>(Arg)->getValue(); 1743 if (Val >= NumOperands) Val -= NumOperands; 1744 Mask |= Val; 1745 if (i != NumOperands - 1) 1746 Mask <<= Shift; 1747 } 1748 1749 return Mask; 1750} 1751 1752/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1753/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1754/// instructions. 1755unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1756 unsigned Mask = 0; 1757 // 8 nodes, but we only care about the last 4. 1758 for (unsigned i = 7; i >= 4; --i) { 1759 unsigned Val = 0; 1760 SDOperand Arg = N->getOperand(i); 1761 if (Arg.getOpcode() != ISD::UNDEF) 1762 Val = cast<ConstantSDNode>(Arg)->getValue(); 1763 Mask |= (Val - 4); 1764 if (i != 4) 1765 Mask <<= 2; 1766 } 1767 1768 return Mask; 1769} 1770 1771/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1772/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1773/// instructions. 1774unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1775 unsigned Mask = 0; 1776 // 8 nodes, but we only care about the first 4. 1777 for (int i = 3; i >= 0; --i) { 1778 unsigned Val = 0; 1779 SDOperand Arg = N->getOperand(i); 1780 if (Arg.getOpcode() != ISD::UNDEF) 1781 Val = cast<ConstantSDNode>(Arg)->getValue(); 1782 Mask |= Val; 1783 if (i != 0) 1784 Mask <<= 2; 1785 } 1786 1787 return Mask; 1788} 1789 1790/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1791/// specifies a 8 element shuffle that can be broken into a pair of 1792/// PSHUFHW and PSHUFLW. 1793static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1794 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1795 1796 if (N->getNumOperands() != 8) 1797 return false; 1798 1799 // Lower quadword shuffled. 1800 for (unsigned i = 0; i != 4; ++i) { 1801 SDOperand Arg = N->getOperand(i); 1802 if (Arg.getOpcode() == ISD::UNDEF) continue; 1803 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1804 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1805 if (Val > 4) 1806 return false; 1807 } 1808 1809 // Upper quadword shuffled. 1810 for (unsigned i = 4; i != 8; ++i) { 1811 SDOperand Arg = N->getOperand(i); 1812 if (Arg.getOpcode() == ISD::UNDEF) continue; 1813 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1814 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1815 if (Val < 4 || Val > 7) 1816 return false; 1817 } 1818 1819 return true; 1820} 1821 1822/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1823/// values in ther permute mask. 1824static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1825 SDOperand V1 = Op.getOperand(0); 1826 SDOperand V2 = Op.getOperand(1); 1827 SDOperand Mask = Op.getOperand(2); 1828 MVT::ValueType VT = Op.getValueType(); 1829 MVT::ValueType MaskVT = Mask.getValueType(); 1830 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1831 unsigned NumElems = Mask.getNumOperands(); 1832 std::vector<SDOperand> MaskVec; 1833 1834 for (unsigned i = 0; i != NumElems; ++i) { 1835 SDOperand Arg = Mask.getOperand(i); 1836 if (Arg.getOpcode() == ISD::UNDEF) continue; 1837 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1838 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1839 if (Val < NumElems) 1840 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1841 else 1842 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1843 } 1844 1845 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1846 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1847} 1848 1849/// isScalarLoadToVector - Returns true if the node is a scalar load that 1850/// is promoted to a vector. 1851static inline bool isScalarLoadToVector(SDOperand Op) { 1852 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1853 Op = Op.getOperand(0); 1854 return (Op.getOpcode() == ISD::LOAD); 1855 } 1856 return false; 1857} 1858 1859/// ShouldXformedToMOVLP - Return true if the node should be transformed to 1860/// match movlp{d|s}. The lower half elements should come from V1 (and in 1861/// order), and the upper half elements should come from the upper half of 1862/// V2 (not necessarily in order). And since V1 will become the source of 1863/// the MOVLP, it must be a scalar load. 1864static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 1865 if (isScalarLoadToVector(V1)) { 1866 unsigned NumElems = Mask.getNumOperands(); 1867 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1868 if (!isUndefOrEqual(Mask.getOperand(i), i)) 1869 return false; 1870 for (unsigned i = NumElems/2; i != NumElems; ++i) 1871 if (!isUndefOrInRange(Mask.getOperand(i), 1872 NumElems+NumElems/2, NumElems*2)) 1873 return false; 1874 return true; 1875 } 1876 1877 return false; 1878} 1879 1880/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 1881/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1882/// half elements to come from vector 1 (which would equal the dest.) and 1883/// the upper half to come from vector 2. 1884static bool isLowerFromV2UpperFromV1(SDOperand Op) { 1885 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 1886 1887 unsigned NumElems = Op.getNumOperands(); 1888 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1889 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 1890 return false; 1891 for (unsigned i = NumElems/2; i != NumElems; ++i) 1892 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 1893 return false; 1894 return true; 1895} 1896 1897/// LowerOperation - Provide custom lowering hooks for some operations. 1898/// 1899SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1900 switch (Op.getOpcode()) { 1901 default: assert(0 && "Should not custom lower this!"); 1902 case ISD::SHL_PARTS: 1903 case ISD::SRA_PARTS: 1904 case ISD::SRL_PARTS: { 1905 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 1906 "Not an i64 shift!"); 1907 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 1908 SDOperand ShOpLo = Op.getOperand(0); 1909 SDOperand ShOpHi = Op.getOperand(1); 1910 SDOperand ShAmt = Op.getOperand(2); 1911 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 1912 DAG.getConstant(31, MVT::i8)) 1913 : DAG.getConstant(0, MVT::i32); 1914 1915 SDOperand Tmp2, Tmp3; 1916 if (Op.getOpcode() == ISD::SHL_PARTS) { 1917 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 1918 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 1919 } else { 1920 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 1921 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 1922 } 1923 1924 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 1925 ShAmt, DAG.getConstant(32, MVT::i8)); 1926 1927 SDOperand Hi, Lo; 1928 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 1929 1930 std::vector<MVT::ValueType> Tys; 1931 Tys.push_back(MVT::i32); 1932 Tys.push_back(MVT::Flag); 1933 std::vector<SDOperand> Ops; 1934 if (Op.getOpcode() == ISD::SHL_PARTS) { 1935 Ops.push_back(Tmp2); 1936 Ops.push_back(Tmp3); 1937 Ops.push_back(CC); 1938 Ops.push_back(InFlag); 1939 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1940 InFlag = Hi.getValue(1); 1941 1942 Ops.clear(); 1943 Ops.push_back(Tmp3); 1944 Ops.push_back(Tmp1); 1945 Ops.push_back(CC); 1946 Ops.push_back(InFlag); 1947 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1948 } else { 1949 Ops.push_back(Tmp2); 1950 Ops.push_back(Tmp3); 1951 Ops.push_back(CC); 1952 Ops.push_back(InFlag); 1953 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1954 InFlag = Lo.getValue(1); 1955 1956 Ops.clear(); 1957 Ops.push_back(Tmp3); 1958 Ops.push_back(Tmp1); 1959 Ops.push_back(CC); 1960 Ops.push_back(InFlag); 1961 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1962 } 1963 1964 Tys.clear(); 1965 Tys.push_back(MVT::i32); 1966 Tys.push_back(MVT::i32); 1967 Ops.clear(); 1968 Ops.push_back(Lo); 1969 Ops.push_back(Hi); 1970 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 1971 } 1972 case ISD::SINT_TO_FP: { 1973 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 1974 Op.getOperand(0).getValueType() >= MVT::i16 && 1975 "Unknown SINT_TO_FP to lower!"); 1976 1977 SDOperand Result; 1978 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 1979 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 1980 MachineFunction &MF = DAG.getMachineFunction(); 1981 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 1982 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1983 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 1984 DAG.getEntryNode(), Op.getOperand(0), 1985 StackSlot, DAG.getSrcValue(NULL)); 1986 1987 // Build the FILD 1988 std::vector<MVT::ValueType> Tys; 1989 Tys.push_back(MVT::f64); 1990 Tys.push_back(MVT::Other); 1991 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 1992 std::vector<SDOperand> Ops; 1993 Ops.push_back(Chain); 1994 Ops.push_back(StackSlot); 1995 Ops.push_back(DAG.getValueType(SrcVT)); 1996 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 1997 Tys, Ops); 1998 1999 if (X86ScalarSSE) { 2000 Chain = Result.getValue(1); 2001 SDOperand InFlag = Result.getValue(2); 2002 2003 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2004 // shouldn't be necessary except that RFP cannot be live across 2005 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2006 MachineFunction &MF = DAG.getMachineFunction(); 2007 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2008 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2009 std::vector<MVT::ValueType> Tys; 2010 Tys.push_back(MVT::Other); 2011 std::vector<SDOperand> Ops; 2012 Ops.push_back(Chain); 2013 Ops.push_back(Result); 2014 Ops.push_back(StackSlot); 2015 Ops.push_back(DAG.getValueType(Op.getValueType())); 2016 Ops.push_back(InFlag); 2017 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2018 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2019 DAG.getSrcValue(NULL)); 2020 } 2021 2022 return Result; 2023 } 2024 case ISD::FP_TO_SINT: { 2025 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2026 "Unknown FP_TO_SINT to lower!"); 2027 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2028 // stack slot. 2029 MachineFunction &MF = DAG.getMachineFunction(); 2030 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2031 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2032 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2033 2034 unsigned Opc; 2035 switch (Op.getValueType()) { 2036 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2037 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2038 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2039 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2040 } 2041 2042 SDOperand Chain = DAG.getEntryNode(); 2043 SDOperand Value = Op.getOperand(0); 2044 if (X86ScalarSSE) { 2045 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2046 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2047 DAG.getSrcValue(0)); 2048 std::vector<MVT::ValueType> Tys; 2049 Tys.push_back(MVT::f64); 2050 Tys.push_back(MVT::Other); 2051 std::vector<SDOperand> Ops; 2052 Ops.push_back(Chain); 2053 Ops.push_back(StackSlot); 2054 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2055 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2056 Chain = Value.getValue(1); 2057 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2058 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2059 } 2060 2061 // Build the FP_TO_INT*_IN_MEM 2062 std::vector<SDOperand> Ops; 2063 Ops.push_back(Chain); 2064 Ops.push_back(Value); 2065 Ops.push_back(StackSlot); 2066 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2067 2068 // Load the result. 2069 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2070 DAG.getSrcValue(NULL)); 2071 } 2072 case ISD::READCYCLECOUNTER: { 2073 std::vector<MVT::ValueType> Tys; 2074 Tys.push_back(MVT::Other); 2075 Tys.push_back(MVT::Flag); 2076 std::vector<SDOperand> Ops; 2077 Ops.push_back(Op.getOperand(0)); 2078 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2079 Ops.clear(); 2080 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2081 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2082 MVT::i32, Ops[0].getValue(2))); 2083 Ops.push_back(Ops[1].getValue(1)); 2084 Tys[0] = Tys[1] = MVT::i32; 2085 Tys.push_back(MVT::Other); 2086 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2087 } 2088 case ISD::FABS: { 2089 MVT::ValueType VT = Op.getValueType(); 2090 const Type *OpNTy = MVT::getTypeForValueType(VT); 2091 std::vector<Constant*> CV; 2092 if (VT == MVT::f64) { 2093 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2094 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2095 } else { 2096 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2097 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2098 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2099 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2100 } 2101 Constant *CS = ConstantStruct::get(CV); 2102 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2103 SDOperand Mask 2104 = DAG.getNode(X86ISD::LOAD_PACK, 2105 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2106 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2107 } 2108 case ISD::FNEG: { 2109 MVT::ValueType VT = Op.getValueType(); 2110 const Type *OpNTy = MVT::getTypeForValueType(VT); 2111 std::vector<Constant*> CV; 2112 if (VT == MVT::f64) { 2113 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2114 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2115 } else { 2116 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2117 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2118 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2119 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2120 } 2121 Constant *CS = ConstantStruct::get(CV); 2122 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2123 SDOperand Mask 2124 = DAG.getNode(X86ISD::LOAD_PACK, 2125 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2126 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2127 } 2128 case ISD::SETCC: { 2129 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2130 SDOperand Cond; 2131 SDOperand CC = Op.getOperand(2); 2132 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2133 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2134 bool Flip; 2135 unsigned X86CC; 2136 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2137 if (Flip) 2138 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2139 Op.getOperand(1), Op.getOperand(0)); 2140 else 2141 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2142 Op.getOperand(0), Op.getOperand(1)); 2143 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2144 DAG.getConstant(X86CC, MVT::i8), Cond); 2145 } else { 2146 assert(isFP && "Illegal integer SetCC!"); 2147 2148 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2149 Op.getOperand(0), Op.getOperand(1)); 2150 std::vector<MVT::ValueType> Tys; 2151 std::vector<SDOperand> Ops; 2152 switch (SetCCOpcode) { 2153 default: assert(false && "Illegal floating point SetCC!"); 2154 case ISD::SETOEQ: { // !PF & ZF 2155 Tys.push_back(MVT::i8); 2156 Tys.push_back(MVT::Flag); 2157 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2158 Ops.push_back(Cond); 2159 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2160 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2161 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2162 Tmp1.getValue(1)); 2163 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2164 } 2165 case ISD::SETUNE: { // PF | !ZF 2166 Tys.push_back(MVT::i8); 2167 Tys.push_back(MVT::Flag); 2168 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2169 Ops.push_back(Cond); 2170 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2171 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2172 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2173 Tmp1.getValue(1)); 2174 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2175 } 2176 } 2177 } 2178 } 2179 case ISD::SELECT: { 2180 MVT::ValueType VT = Op.getValueType(); 2181 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2182 bool addTest = false; 2183 SDOperand Op0 = Op.getOperand(0); 2184 SDOperand Cond, CC; 2185 if (Op0.getOpcode() == ISD::SETCC) 2186 Op0 = LowerOperation(Op0, DAG); 2187 2188 if (Op0.getOpcode() == X86ISD::SETCC) { 2189 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2190 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2191 // have another use it will be eliminated. 2192 // If the X86ISD::SETCC has more than one use, then it's probably better 2193 // to use a test instead of duplicating the X86ISD::CMP (for register 2194 // pressure reason). 2195 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2196 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2197 CmpOpc == X86ISD::UCOMI) { 2198 if (!Op0.hasOneUse()) { 2199 std::vector<MVT::ValueType> Tys; 2200 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2201 Tys.push_back(Op0.Val->getValueType(i)); 2202 std::vector<SDOperand> Ops; 2203 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2204 Ops.push_back(Op0.getOperand(i)); 2205 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2206 } 2207 2208 CC = Op0.getOperand(0); 2209 Cond = Op0.getOperand(1); 2210 // Make a copy as flag result cannot be used by more than one. 2211 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2212 Cond.getOperand(0), Cond.getOperand(1)); 2213 addTest = 2214 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2215 } else 2216 addTest = true; 2217 } else 2218 addTest = true; 2219 2220 if (addTest) { 2221 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2222 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2223 } 2224 2225 std::vector<MVT::ValueType> Tys; 2226 Tys.push_back(Op.getValueType()); 2227 Tys.push_back(MVT::Flag); 2228 std::vector<SDOperand> Ops; 2229 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2230 // condition is true. 2231 Ops.push_back(Op.getOperand(2)); 2232 Ops.push_back(Op.getOperand(1)); 2233 Ops.push_back(CC); 2234 Ops.push_back(Cond); 2235 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2236 } 2237 case ISD::BRCOND: { 2238 bool addTest = false; 2239 SDOperand Cond = Op.getOperand(1); 2240 SDOperand Dest = Op.getOperand(2); 2241 SDOperand CC; 2242 if (Cond.getOpcode() == ISD::SETCC) 2243 Cond = LowerOperation(Cond, DAG); 2244 2245 if (Cond.getOpcode() == X86ISD::SETCC) { 2246 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2247 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2248 // have another use it will be eliminated. 2249 // If the X86ISD::SETCC has more than one use, then it's probably better 2250 // to use a test instead of duplicating the X86ISD::CMP (for register 2251 // pressure reason). 2252 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2253 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2254 CmpOpc == X86ISD::UCOMI) { 2255 if (!Cond.hasOneUse()) { 2256 std::vector<MVT::ValueType> Tys; 2257 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2258 Tys.push_back(Cond.Val->getValueType(i)); 2259 std::vector<SDOperand> Ops; 2260 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2261 Ops.push_back(Cond.getOperand(i)); 2262 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2263 } 2264 2265 CC = Cond.getOperand(0); 2266 Cond = Cond.getOperand(1); 2267 // Make a copy as flag result cannot be used by more than one. 2268 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2269 Cond.getOperand(0), Cond.getOperand(1)); 2270 } else 2271 addTest = true; 2272 } else 2273 addTest = true; 2274 2275 if (addTest) { 2276 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2277 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2278 } 2279 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2280 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2281 } 2282 case ISD::MEMSET: { 2283 SDOperand InFlag(0, 0); 2284 SDOperand Chain = Op.getOperand(0); 2285 unsigned Align = 2286 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2287 if (Align == 0) Align = 1; 2288 2289 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2290 // If not DWORD aligned, call memset if size is less than the threshold. 2291 // It knows how to align to the right boundary first. 2292 if ((Align & 3) != 0 || 2293 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2294 MVT::ValueType IntPtr = getPointerTy(); 2295 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2296 std::vector<std::pair<SDOperand, const Type*> > Args; 2297 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2298 // Extend the ubyte argument to be an int value for the call. 2299 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2300 Args.push_back(std::make_pair(Val, IntPtrTy)); 2301 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2302 std::pair<SDOperand,SDOperand> CallResult = 2303 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2304 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2305 return CallResult.second; 2306 } 2307 2308 MVT::ValueType AVT; 2309 SDOperand Count; 2310 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2311 unsigned BytesLeft = 0; 2312 bool TwoRepStos = false; 2313 if (ValC) { 2314 unsigned ValReg; 2315 unsigned Val = ValC->getValue() & 255; 2316 2317 // If the value is a constant, then we can potentially use larger sets. 2318 switch (Align & 3) { 2319 case 2: // WORD aligned 2320 AVT = MVT::i16; 2321 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2322 BytesLeft = I->getValue() % 2; 2323 Val = (Val << 8) | Val; 2324 ValReg = X86::AX; 2325 break; 2326 case 0: // DWORD aligned 2327 AVT = MVT::i32; 2328 if (I) { 2329 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2330 BytesLeft = I->getValue() % 4; 2331 } else { 2332 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2333 DAG.getConstant(2, MVT::i8)); 2334 TwoRepStos = true; 2335 } 2336 Val = (Val << 8) | Val; 2337 Val = (Val << 16) | Val; 2338 ValReg = X86::EAX; 2339 break; 2340 default: // Byte aligned 2341 AVT = MVT::i8; 2342 Count = Op.getOperand(3); 2343 ValReg = X86::AL; 2344 break; 2345 } 2346 2347 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2348 InFlag); 2349 InFlag = Chain.getValue(1); 2350 } else { 2351 AVT = MVT::i8; 2352 Count = Op.getOperand(3); 2353 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2354 InFlag = Chain.getValue(1); 2355 } 2356 2357 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2358 InFlag = Chain.getValue(1); 2359 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2360 InFlag = Chain.getValue(1); 2361 2362 std::vector<MVT::ValueType> Tys; 2363 Tys.push_back(MVT::Other); 2364 Tys.push_back(MVT::Flag); 2365 std::vector<SDOperand> Ops; 2366 Ops.push_back(Chain); 2367 Ops.push_back(DAG.getValueType(AVT)); 2368 Ops.push_back(InFlag); 2369 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2370 2371 if (TwoRepStos) { 2372 InFlag = Chain.getValue(1); 2373 Count = Op.getOperand(3); 2374 MVT::ValueType CVT = Count.getValueType(); 2375 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2376 DAG.getConstant(3, CVT)); 2377 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2378 InFlag = Chain.getValue(1); 2379 Tys.clear(); 2380 Tys.push_back(MVT::Other); 2381 Tys.push_back(MVT::Flag); 2382 Ops.clear(); 2383 Ops.push_back(Chain); 2384 Ops.push_back(DAG.getValueType(MVT::i8)); 2385 Ops.push_back(InFlag); 2386 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2387 } else if (BytesLeft) { 2388 // Issue stores for the last 1 - 3 bytes. 2389 SDOperand Value; 2390 unsigned Val = ValC->getValue() & 255; 2391 unsigned Offset = I->getValue() - BytesLeft; 2392 SDOperand DstAddr = Op.getOperand(1); 2393 MVT::ValueType AddrVT = DstAddr.getValueType(); 2394 if (BytesLeft >= 2) { 2395 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2396 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2397 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2398 DAG.getConstant(Offset, AddrVT)), 2399 DAG.getSrcValue(NULL)); 2400 BytesLeft -= 2; 2401 Offset += 2; 2402 } 2403 2404 if (BytesLeft == 1) { 2405 Value = DAG.getConstant(Val, MVT::i8); 2406 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2407 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2408 DAG.getConstant(Offset, AddrVT)), 2409 DAG.getSrcValue(NULL)); 2410 } 2411 } 2412 2413 return Chain; 2414 } 2415 case ISD::MEMCPY: { 2416 SDOperand Chain = Op.getOperand(0); 2417 unsigned Align = 2418 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2419 if (Align == 0) Align = 1; 2420 2421 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2422 // If not DWORD aligned, call memcpy if size is less than the threshold. 2423 // It knows how to align to the right boundary first. 2424 if ((Align & 3) != 0 || 2425 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2426 MVT::ValueType IntPtr = getPointerTy(); 2427 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2428 std::vector<std::pair<SDOperand, const Type*> > Args; 2429 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2430 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2431 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2432 std::pair<SDOperand,SDOperand> CallResult = 2433 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2434 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2435 return CallResult.second; 2436 } 2437 2438 MVT::ValueType AVT; 2439 SDOperand Count; 2440 unsigned BytesLeft = 0; 2441 bool TwoRepMovs = false; 2442 switch (Align & 3) { 2443 case 2: // WORD aligned 2444 AVT = MVT::i16; 2445 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2446 BytesLeft = I->getValue() % 2; 2447 break; 2448 case 0: // DWORD aligned 2449 AVT = MVT::i32; 2450 if (I) { 2451 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2452 BytesLeft = I->getValue() % 4; 2453 } else { 2454 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2455 DAG.getConstant(2, MVT::i8)); 2456 TwoRepMovs = true; 2457 } 2458 break; 2459 default: // Byte aligned 2460 AVT = MVT::i8; 2461 Count = Op.getOperand(3); 2462 break; 2463 } 2464 2465 SDOperand InFlag(0, 0); 2466 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2467 InFlag = Chain.getValue(1); 2468 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2469 InFlag = Chain.getValue(1); 2470 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2471 InFlag = Chain.getValue(1); 2472 2473 std::vector<MVT::ValueType> Tys; 2474 Tys.push_back(MVT::Other); 2475 Tys.push_back(MVT::Flag); 2476 std::vector<SDOperand> Ops; 2477 Ops.push_back(Chain); 2478 Ops.push_back(DAG.getValueType(AVT)); 2479 Ops.push_back(InFlag); 2480 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2481 2482 if (TwoRepMovs) { 2483 InFlag = Chain.getValue(1); 2484 Count = Op.getOperand(3); 2485 MVT::ValueType CVT = Count.getValueType(); 2486 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2487 DAG.getConstant(3, CVT)); 2488 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2489 InFlag = Chain.getValue(1); 2490 Tys.clear(); 2491 Tys.push_back(MVT::Other); 2492 Tys.push_back(MVT::Flag); 2493 Ops.clear(); 2494 Ops.push_back(Chain); 2495 Ops.push_back(DAG.getValueType(MVT::i8)); 2496 Ops.push_back(InFlag); 2497 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2498 } else if (BytesLeft) { 2499 // Issue loads and stores for the last 1 - 3 bytes. 2500 unsigned Offset = I->getValue() - BytesLeft; 2501 SDOperand DstAddr = Op.getOperand(1); 2502 MVT::ValueType DstVT = DstAddr.getValueType(); 2503 SDOperand SrcAddr = Op.getOperand(2); 2504 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2505 SDOperand Value; 2506 if (BytesLeft >= 2) { 2507 Value = DAG.getLoad(MVT::i16, Chain, 2508 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2509 DAG.getConstant(Offset, SrcVT)), 2510 DAG.getSrcValue(NULL)); 2511 Chain = Value.getValue(1); 2512 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2513 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2514 DAG.getConstant(Offset, DstVT)), 2515 DAG.getSrcValue(NULL)); 2516 BytesLeft -= 2; 2517 Offset += 2; 2518 } 2519 2520 if (BytesLeft == 1) { 2521 Value = DAG.getLoad(MVT::i8, Chain, 2522 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2523 DAG.getConstant(Offset, SrcVT)), 2524 DAG.getSrcValue(NULL)); 2525 Chain = Value.getValue(1); 2526 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2527 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2528 DAG.getConstant(Offset, DstVT)), 2529 DAG.getSrcValue(NULL)); 2530 } 2531 } 2532 2533 return Chain; 2534 } 2535 2536 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2537 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2538 // one of the above mentioned nodes. It has to be wrapped because otherwise 2539 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2540 // be used to form addressing mode. These wrapped nodes will be selected 2541 // into MOV32ri. 2542 case ISD::ConstantPool: { 2543 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2544 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2545 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2546 CP->getAlignment())); 2547 if (Subtarget->isTargetDarwin()) { 2548 // With PIC, the address is actually $g + Offset. 2549 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2550 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2551 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2552 } 2553 2554 return Result; 2555 } 2556 case ISD::GlobalAddress: { 2557 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2558 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2559 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2560 if (Subtarget->isTargetDarwin()) { 2561 // With PIC, the address is actually $g + Offset. 2562 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2563 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2564 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2565 2566 // For Darwin, external and weak symbols are indirect, so we want to load 2567 // the value at address GV, not the value of GV itself. This means that 2568 // the GlobalAddress must be in the base or index register of the address, 2569 // not the GV offset field. 2570 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2571 DarwinGVRequiresExtraLoad(GV)) 2572 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2573 Result, DAG.getSrcValue(NULL)); 2574 } 2575 2576 return Result; 2577 } 2578 case ISD::ExternalSymbol: { 2579 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2580 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2581 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2582 if (Subtarget->isTargetDarwin()) { 2583 // With PIC, the address is actually $g + Offset. 2584 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2585 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2586 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2587 } 2588 2589 return Result; 2590 } 2591 case ISD::VASTART: { 2592 // vastart just stores the address of the VarArgsFrameIndex slot into the 2593 // memory location argument. 2594 // FIXME: Replace MVT::i32 with PointerTy 2595 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2596 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2597 Op.getOperand(1), Op.getOperand(2)); 2598 } 2599 case ISD::RET: { 2600 SDOperand Copy; 2601 2602 switch(Op.getNumOperands()) { 2603 default: 2604 assert(0 && "Do not know how to return this many arguments!"); 2605 abort(); 2606 case 1: 2607 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2608 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2609 case 2: { 2610 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2611 if (MVT::isInteger(ArgVT)) 2612 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2613 SDOperand()); 2614 else if (!X86ScalarSSE) { 2615 std::vector<MVT::ValueType> Tys; 2616 Tys.push_back(MVT::Other); 2617 Tys.push_back(MVT::Flag); 2618 std::vector<SDOperand> Ops; 2619 Ops.push_back(Op.getOperand(0)); 2620 Ops.push_back(Op.getOperand(1)); 2621 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2622 } else { 2623 SDOperand MemLoc; 2624 SDOperand Chain = Op.getOperand(0); 2625 SDOperand Value = Op.getOperand(1); 2626 2627 if (Value.getOpcode() == ISD::LOAD && 2628 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2629 Chain = Value.getOperand(0); 2630 MemLoc = Value.getOperand(1); 2631 } else { 2632 // Spill the value to memory and reload it into top of stack. 2633 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2634 MachineFunction &MF = DAG.getMachineFunction(); 2635 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2636 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2637 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2638 Value, MemLoc, DAG.getSrcValue(0)); 2639 } 2640 std::vector<MVT::ValueType> Tys; 2641 Tys.push_back(MVT::f64); 2642 Tys.push_back(MVT::Other); 2643 std::vector<SDOperand> Ops; 2644 Ops.push_back(Chain); 2645 Ops.push_back(MemLoc); 2646 Ops.push_back(DAG.getValueType(ArgVT)); 2647 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2648 Tys.clear(); 2649 Tys.push_back(MVT::Other); 2650 Tys.push_back(MVT::Flag); 2651 Ops.clear(); 2652 Ops.push_back(Copy.getValue(1)); 2653 Ops.push_back(Copy); 2654 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2655 } 2656 break; 2657 } 2658 case 3: 2659 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2660 SDOperand()); 2661 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2662 break; 2663 } 2664 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2665 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2666 Copy.getValue(1)); 2667 } 2668 case ISD::SCALAR_TO_VECTOR: { 2669 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2670 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2671 } 2672 case ISD::VECTOR_SHUFFLE: { 2673 SDOperand V1 = Op.getOperand(0); 2674 SDOperand V2 = Op.getOperand(1); 2675 SDOperand PermMask = Op.getOperand(2); 2676 MVT::ValueType VT = Op.getValueType(); 2677 unsigned NumElems = PermMask.getNumOperands(); 2678 2679 if (X86::isSplatMask(PermMask.Val)) 2680 return Op; 2681 2682 // Normalize the node to match x86 shuffle ops if needed 2683 if (V2.getOpcode() != ISD::UNDEF) { 2684 bool DoSwap = false; 2685 2686 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 2687 DoSwap = true; 2688 else if (isLowerFromV2UpperFromV1(PermMask)) 2689 DoSwap = true; 2690 2691 if (DoSwap) { 2692 Op = CommuteVectorShuffle(Op, DAG); 2693 V1 = Op.getOperand(0); 2694 V2 = Op.getOperand(1); 2695 PermMask = Op.getOperand(2); 2696 } 2697 } 2698 2699 if (NumElems == 2) 2700 return Op; 2701 2702 if (X86::isMOVSMask(PermMask.Val)) 2703 // Leave the VECTOR_SHUFFLE alone. It matches MOVS{S|D}. 2704 return Op; 2705 2706 if (X86::isUNPCKLMask(PermMask.Val) || 2707 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2708 X86::isUNPCKHMask(PermMask.Val)) 2709 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2710 return Op; 2711 2712 // If VT is integer, try PSHUF* first, then SHUFP*. 2713 if (MVT::isInteger(VT)) { 2714 if (X86::isPSHUFDMask(PermMask.Val) || 2715 X86::isPSHUFHWMask(PermMask.Val) || 2716 X86::isPSHUFLWMask(PermMask.Val)) { 2717 if (V2.getOpcode() != ISD::UNDEF) 2718 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2719 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2720 return Op; 2721 } 2722 2723 if (X86::isSHUFPMask(PermMask.Val)) 2724 return Op; 2725 2726 // Handle v8i16 shuffle high / low shuffle node pair. 2727 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2728 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2729 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2730 std::vector<SDOperand> MaskVec; 2731 for (unsigned i = 0; i != 4; ++i) 2732 MaskVec.push_back(PermMask.getOperand(i)); 2733 for (unsigned i = 4; i != 8; ++i) 2734 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2735 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2736 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2737 MaskVec.clear(); 2738 for (unsigned i = 0; i != 4; ++i) 2739 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2740 for (unsigned i = 4; i != 8; ++i) 2741 MaskVec.push_back(PermMask.getOperand(i)); 2742 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2743 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2744 } 2745 } else { 2746 // Floating point cases in the other order. 2747 if (X86::isSHUFPMask(PermMask.Val)) 2748 return Op; 2749 if (X86::isPSHUFDMask(PermMask.Val) || 2750 X86::isPSHUFHWMask(PermMask.Val) || 2751 X86::isPSHUFLWMask(PermMask.Val)) { 2752 if (V2.getOpcode() != ISD::UNDEF) 2753 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2754 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2755 return Op; 2756 } 2757 } 2758 2759 return SDOperand(); 2760 } 2761 case ISD::BUILD_VECTOR: { 2762 // All one's are handled with pcmpeqd. 2763 if (ISD::isBuildVectorAllOnes(Op.Val)) 2764 return Op; 2765 2766 std::set<SDOperand> Values; 2767 SDOperand Elt0 = Op.getOperand(0); 2768 Values.insert(Elt0); 2769 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2770 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2771 (isa<ConstantFPSDNode>(Elt0) && 2772 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2773 bool RestAreZero = true; 2774 unsigned NumElems = Op.getNumOperands(); 2775 for (unsigned i = 1; i < NumElems; ++i) { 2776 SDOperand Elt = Op.getOperand(i); 2777 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2778 if (!FPC->isExactlyValue(+0.0)) 2779 RestAreZero = false; 2780 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2781 if (!C->isNullValue()) 2782 RestAreZero = false; 2783 } else 2784 RestAreZero = false; 2785 Values.insert(Elt); 2786 } 2787 2788 if (RestAreZero) { 2789 if (Elt0IsZero) return Op; 2790 2791 // Zero extend a scalar to a vector. 2792 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2793 } 2794 2795 if (Values.size() > 2) { 2796 // Expand into a number of unpckl*. 2797 // e.g. for v4f32 2798 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2799 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2800 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2801 MVT::ValueType VT = Op.getValueType(); 2802 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2803 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2804 std::vector<SDOperand> MaskVec; 2805 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2806 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2807 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2808 } 2809 SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2810 std::vector<SDOperand> V(NumElems); 2811 for (unsigned i = 0; i < NumElems; ++i) 2812 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2813 NumElems >>= 1; 2814 while (NumElems != 0) { 2815 for (unsigned i = 0; i < NumElems; ++i) 2816 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2817 PermMask); 2818 NumElems >>= 1; 2819 } 2820 return V[0]; 2821 } 2822 2823 return SDOperand(); 2824 } 2825 case ISD::EXTRACT_VECTOR_ELT: { 2826 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2827 return SDOperand(); 2828 2829 MVT::ValueType VT = Op.getValueType(); 2830 if (MVT::getSizeInBits(VT) == 16) { 2831 // Transform it so it match pextrw which produces a 32-bit result. 2832 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2833 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2834 Op.getOperand(0), Op.getOperand(1)); 2835 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2836 DAG.getValueType(VT)); 2837 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2838 } else if (MVT::getSizeInBits(VT) == 32) { 2839 SDOperand Vec = Op.getOperand(0); 2840 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2841 if (Idx == 0) 2842 return Op; 2843 2844 // TODO: if Idex == 2, we can use unpckhps 2845 // SHUFPS the element to the lowest double word, then movss. 2846 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2847 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2848 MVT::getVectorBaseType(MaskVT)); 2849 std::vector<SDOperand> IdxVec; 2850 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2851 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2852 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2853 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2854 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2855 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2856 Vec, Vec, Mask); 2857 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2858 DAG.getConstant(0, MVT::i32)); 2859 } else if (MVT::getSizeInBits(VT) == 64) { 2860 SDOperand Vec = Op.getOperand(0); 2861 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2862 if (Idx == 0) 2863 return Op; 2864 2865 // UNPCKHPD the element to the lowest double word, then movsd. 2866 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2867 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2868 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2869 std::vector<SDOperand> IdxVec; 2870 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2871 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2872 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2873 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2874 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2875 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2876 DAG.getConstant(0, MVT::i32)); 2877 } 2878 2879 return SDOperand(); 2880 } 2881 case ISD::INSERT_VECTOR_ELT: { 2882 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2883 // as its second argument. 2884 MVT::ValueType VT = Op.getValueType(); 2885 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2886 if (MVT::getSizeInBits(BaseVT) == 16) { 2887 SDOperand N1 = Op.getOperand(1); 2888 SDOperand N2 = Op.getOperand(2); 2889 if (N1.getValueType() != MVT::i32) 2890 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2891 if (N2.getValueType() != MVT::i32) 2892 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2893 return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2); 2894 } 2895 2896 return SDOperand(); 2897 } 2898 case ISD::INTRINSIC_WO_CHAIN: { 2899 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 2900 switch (IntNo) { 2901 default: return SDOperand(); // Don't custom lower most intrinsics. 2902 // Comparison intrinsics. 2903 case Intrinsic::x86_sse_comieq_ss: 2904 case Intrinsic::x86_sse_comilt_ss: 2905 case Intrinsic::x86_sse_comile_ss: 2906 case Intrinsic::x86_sse_comigt_ss: 2907 case Intrinsic::x86_sse_comige_ss: 2908 case Intrinsic::x86_sse_comineq_ss: 2909 case Intrinsic::x86_sse_ucomieq_ss: 2910 case Intrinsic::x86_sse_ucomilt_ss: 2911 case Intrinsic::x86_sse_ucomile_ss: 2912 case Intrinsic::x86_sse_ucomigt_ss: 2913 case Intrinsic::x86_sse_ucomige_ss: 2914 case Intrinsic::x86_sse_ucomineq_ss: 2915 case Intrinsic::x86_sse2_comieq_sd: 2916 case Intrinsic::x86_sse2_comilt_sd: 2917 case Intrinsic::x86_sse2_comile_sd: 2918 case Intrinsic::x86_sse2_comigt_sd: 2919 case Intrinsic::x86_sse2_comige_sd: 2920 case Intrinsic::x86_sse2_comineq_sd: 2921 case Intrinsic::x86_sse2_ucomieq_sd: 2922 case Intrinsic::x86_sse2_ucomilt_sd: 2923 case Intrinsic::x86_sse2_ucomile_sd: 2924 case Intrinsic::x86_sse2_ucomigt_sd: 2925 case Intrinsic::x86_sse2_ucomige_sd: 2926 case Intrinsic::x86_sse2_ucomineq_sd: { 2927 unsigned Opc = 0; 2928 ISD::CondCode CC = ISD::SETCC_INVALID; 2929 switch (IntNo) { 2930 default: break; 2931 case Intrinsic::x86_sse_comieq_ss: 2932 case Intrinsic::x86_sse2_comieq_sd: 2933 Opc = X86ISD::COMI; 2934 CC = ISD::SETEQ; 2935 break; 2936 case Intrinsic::x86_sse_comilt_ss: 2937 case Intrinsic::x86_sse2_comilt_sd: 2938 Opc = X86ISD::COMI; 2939 CC = ISD::SETLT; 2940 break; 2941 case Intrinsic::x86_sse_comile_ss: 2942 case Intrinsic::x86_sse2_comile_sd: 2943 Opc = X86ISD::COMI; 2944 CC = ISD::SETLE; 2945 break; 2946 case Intrinsic::x86_sse_comigt_ss: 2947 case Intrinsic::x86_sse2_comigt_sd: 2948 Opc = X86ISD::COMI; 2949 CC = ISD::SETGT; 2950 break; 2951 case Intrinsic::x86_sse_comige_ss: 2952 case Intrinsic::x86_sse2_comige_sd: 2953 Opc = X86ISD::COMI; 2954 CC = ISD::SETGE; 2955 break; 2956 case Intrinsic::x86_sse_comineq_ss: 2957 case Intrinsic::x86_sse2_comineq_sd: 2958 Opc = X86ISD::COMI; 2959 CC = ISD::SETNE; 2960 break; 2961 case Intrinsic::x86_sse_ucomieq_ss: 2962 case Intrinsic::x86_sse2_ucomieq_sd: 2963 Opc = X86ISD::UCOMI; 2964 CC = ISD::SETEQ; 2965 break; 2966 case Intrinsic::x86_sse_ucomilt_ss: 2967 case Intrinsic::x86_sse2_ucomilt_sd: 2968 Opc = X86ISD::UCOMI; 2969 CC = ISD::SETLT; 2970 break; 2971 case Intrinsic::x86_sse_ucomile_ss: 2972 case Intrinsic::x86_sse2_ucomile_sd: 2973 Opc = X86ISD::UCOMI; 2974 CC = ISD::SETLE; 2975 break; 2976 case Intrinsic::x86_sse_ucomigt_ss: 2977 case Intrinsic::x86_sse2_ucomigt_sd: 2978 Opc = X86ISD::UCOMI; 2979 CC = ISD::SETGT; 2980 break; 2981 case Intrinsic::x86_sse_ucomige_ss: 2982 case Intrinsic::x86_sse2_ucomige_sd: 2983 Opc = X86ISD::UCOMI; 2984 CC = ISD::SETGE; 2985 break; 2986 case Intrinsic::x86_sse_ucomineq_ss: 2987 case Intrinsic::x86_sse2_ucomineq_sd: 2988 Opc = X86ISD::UCOMI; 2989 CC = ISD::SETNE; 2990 break; 2991 } 2992 bool Flip; 2993 unsigned X86CC; 2994 translateX86CC(CC, true, X86CC, Flip); 2995 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 2996 Op.getOperand(Flip?1:2)); 2997 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 2998 DAG.getConstant(X86CC, MVT::i8), Cond); 2999 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3000 } 3001 } 3002 } 3003 } 3004} 3005 3006const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3007 switch (Opcode) { 3008 default: return NULL; 3009 case X86ISD::SHLD: return "X86ISD::SHLD"; 3010 case X86ISD::SHRD: return "X86ISD::SHRD"; 3011 case X86ISD::FAND: return "X86ISD::FAND"; 3012 case X86ISD::FXOR: return "X86ISD::FXOR"; 3013 case X86ISD::FILD: return "X86ISD::FILD"; 3014 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3015 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3016 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3017 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3018 case X86ISD::FLD: return "X86ISD::FLD"; 3019 case X86ISD::FST: return "X86ISD::FST"; 3020 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3021 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3022 case X86ISD::CALL: return "X86ISD::CALL"; 3023 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3024 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3025 case X86ISD::CMP: return "X86ISD::CMP"; 3026 case X86ISD::TEST: return "X86ISD::TEST"; 3027 case X86ISD::COMI: return "X86ISD::COMI"; 3028 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3029 case X86ISD::SETCC: return "X86ISD::SETCC"; 3030 case X86ISD::CMOV: return "X86ISD::CMOV"; 3031 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3032 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3033 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3034 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3035 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3036 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3037 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3038 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3039 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 3040 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3041 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3042 } 3043} 3044 3045void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3046 uint64_t Mask, 3047 uint64_t &KnownZero, 3048 uint64_t &KnownOne, 3049 unsigned Depth) const { 3050 unsigned Opc = Op.getOpcode(); 3051 assert((Opc >= ISD::BUILTIN_OP_END || 3052 Opc == ISD::INTRINSIC_WO_CHAIN || 3053 Opc == ISD::INTRINSIC_W_CHAIN || 3054 Opc == ISD::INTRINSIC_VOID) && 3055 "Should use MaskedValueIsZero if you don't know whether Op" 3056 " is a target node!"); 3057 3058 KnownZero = KnownOne = 0; // Don't know anything. 3059 switch (Opc) { 3060 default: break; 3061 case X86ISD::SETCC: 3062 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3063 break; 3064 } 3065} 3066 3067std::vector<unsigned> X86TargetLowering:: 3068getRegClassForInlineAsmConstraint(const std::string &Constraint, 3069 MVT::ValueType VT) const { 3070 if (Constraint.size() == 1) { 3071 // FIXME: not handling fp-stack yet! 3072 // FIXME: not handling MMX registers yet ('y' constraint). 3073 switch (Constraint[0]) { // GCC X86 Constraint Letters 3074 default: break; // Unknown constriant letter 3075 case 'r': // GENERAL_REGS 3076 case 'R': // LEGACY_REGS 3077 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3078 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3079 case 'l': // INDEX_REGS 3080 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3081 X86::ESI, X86::EDI, X86::EBP, 0); 3082 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3083 case 'Q': // Q_REGS 3084 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3085 case 'x': // SSE_REGS if SSE1 allowed 3086 if (Subtarget->hasSSE1()) 3087 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3088 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3089 0); 3090 return std::vector<unsigned>(); 3091 case 'Y': // SSE_REGS if SSE2 allowed 3092 if (Subtarget->hasSSE2()) 3093 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3094 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3095 0); 3096 return std::vector<unsigned>(); 3097 } 3098 } 3099 3100 return std::vector<unsigned>(); 3101} 3102 3103/// isLegalAddressImmediate - Return true if the integer value or 3104/// GlobalValue can be used as the offset of the target addressing mode. 3105bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3106 // X86 allows a sign-extended 32-bit immediate field. 3107 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3108} 3109 3110bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3111 if (Subtarget->isTargetDarwin()) { 3112 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3113 if (RModel == Reloc::Static) 3114 return true; 3115 else if (RModel == Reloc::DynamicNoPIC) 3116 return !DarwinGVRequiresExtraLoad(GV); 3117 else 3118 return false; 3119 } else 3120 return true; 3121} 3122 3123/// isShuffleMaskLegal - Targets can use this to indicate that they only 3124/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3125/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3126/// are assumed to be legal. 3127bool 3128X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3129 // Only do shuffles on 128-bit vector types for now. 3130 if (MVT::getSizeInBits(VT) == 64) return false; 3131 return (Mask.Val->getNumOperands() == 2 || 3132 X86::isSplatMask(Mask.Val) || 3133 X86::isMOVSMask(Mask.Val) || 3134 X86::isPSHUFDMask(Mask.Val) || 3135 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3136 X86::isSHUFPMask(Mask.Val) || 3137 X86::isUNPCKLMask(Mask.Val) || 3138 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3139 X86::isUNPCKHMask(Mask.Val)); 3140} 3141