X86ISelLowering.cpp revision 475aecf467aa63623e5840ac896faef496eaca8c
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/ADT/VectorExtras.h" 23#include "llvm/Analysis/ScalarEvolutionExpressions.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/CodeGen/MachineFunction.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CodeGen/SSARegMap.h" 29#include "llvm/Support/MathExtras.h" 30#include "llvm/Target/TargetOptions.h" 31using namespace llvm; 32 33// FIXME: temporary. 34#include "llvm/Support/CommandLine.h" 35static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 36 cl::desc("Enable fastcc on X86")); 37 38X86TargetLowering::X86TargetLowering(TargetMachine &TM) 39 : TargetLowering(TM) { 40 Subtarget = &TM.getSubtarget<X86Subtarget>(); 41 X86ScalarSSE = Subtarget->hasSSE2(); 42 43 // Set up the TargetLowering object. 44 45 // X86 is weird, it always uses i8 for shift amounts and setcc results. 46 setShiftAmountType(MVT::i8); 47 setSetCCResultType(MVT::i8); 48 setSetCCResultContents(ZeroOrOneSetCCResult); 49 setSchedulingPreference(SchedulingForRegPressure); 50 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 51 setStackPointerRegisterToSaveRestore(X86::ESP); 52 53 if (!Subtarget->isTargetDarwin()) 54 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 55 setUseUnderscoreSetJmpLongJmp(true); 56 57 // Add legal addressing mode scale values. 58 addLegalAddressScale(8); 59 addLegalAddressScale(4); 60 addLegalAddressScale(2); 61 // Enter the ones which require both scale + index last. These are more 62 // expensive. 63 addLegalAddressScale(9); 64 addLegalAddressScale(5); 65 addLegalAddressScale(3); 66 67 // Set up the register classes. 68 addRegisterClass(MVT::i8, X86::R8RegisterClass); 69 addRegisterClass(MVT::i16, X86::R16RegisterClass); 70 addRegisterClass(MVT::i32, X86::R32RegisterClass); 71 72 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 73 // operation. 74 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 75 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 77 78 if (X86ScalarSSE) 79 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 80 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 81 else 82 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 83 84 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 85 // this operation. 86 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 87 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 88 // SSE has no i16 to fp conversion, only i32 89 if (X86ScalarSSE) 90 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 91 else { 92 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 93 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 94 } 95 96 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 97 // isn't legal. 98 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 99 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 100 101 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 102 // this operation. 103 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 104 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 105 106 if (X86ScalarSSE) { 107 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 108 } else { 109 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 110 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 111 } 112 113 // Handle FP_TO_UINT by promoting the destination to a larger signed 114 // conversion. 115 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 116 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 118 119 if (X86ScalarSSE && !Subtarget->hasSSE3()) 120 // Expand FP_TO_UINT into a select. 121 // FIXME: We would like to use a Custom expander here eventually to do 122 // the optimal thing for SSE vs. the default expansion in the legalizer. 123 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 124 else 125 // With SSE3 we can use fisttpll to convert to a signed i64. 126 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 127 128 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 129 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 130 131 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 132 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 133 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 134 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 135 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 138 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 139 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 140 setOperationAction(ISD::FREM , MVT::f64 , Expand); 141 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 142 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 143 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 145 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 146 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 148 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 149 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 150 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 151 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 152 153 // These should be promoted to a larger select which is supported. 154 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 155 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 156 157 // X86 wants to expand cmov itself. 158 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 159 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 160 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 162 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 165 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 167 // X86 ret instruction may pop stack. 168 setOperationAction(ISD::RET , MVT::Other, Custom); 169 // Darwin ABI issue. 170 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 171 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 172 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 173 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 174 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 175 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 177 // X86 wants to expand memset / memcpy itself. 178 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 179 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 180 181 // We don't have line number support yet. 182 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 183 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 184 // FIXME - use subtarget debug flags 185 if (!Subtarget->isTargetDarwin()) 186 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 187 188 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 189 setOperationAction(ISD::VASTART , MVT::Other, Custom); 190 191 // Use the default implementation. 192 setOperationAction(ISD::VAARG , MVT::Other, Expand); 193 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 194 setOperationAction(ISD::VAEND , MVT::Other, Expand); 195 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 196 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 197 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 198 199 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 200 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 201 202 if (X86ScalarSSE) { 203 // Set up the FP register classes. 204 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 205 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 206 207 // SSE has no load+extend ops 208 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 209 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 210 211 // Use ANDPD to simulate FABS. 212 setOperationAction(ISD::FABS , MVT::f64, Custom); 213 setOperationAction(ISD::FABS , MVT::f32, Custom); 214 215 // Use XORP to simulate FNEG. 216 setOperationAction(ISD::FNEG , MVT::f64, Custom); 217 setOperationAction(ISD::FNEG , MVT::f32, Custom); 218 219 // We don't support sin/cos/fmod 220 setOperationAction(ISD::FSIN , MVT::f64, Expand); 221 setOperationAction(ISD::FCOS , MVT::f64, Expand); 222 setOperationAction(ISD::FREM , MVT::f64, Expand); 223 setOperationAction(ISD::FSIN , MVT::f32, Expand); 224 setOperationAction(ISD::FCOS , MVT::f32, Expand); 225 setOperationAction(ISD::FREM , MVT::f32, Expand); 226 227 // Expand FP immediates into loads from the stack, except for the special 228 // cases we handle. 229 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 230 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 231 addLegalFPImmediate(+0.0); // xorps / xorpd 232 } else { 233 // Set up the FP register classes. 234 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 235 236 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 237 238 if (!UnsafeFPMath) { 239 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 240 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 241 } 242 243 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 244 addLegalFPImmediate(+0.0); // FLD0 245 addLegalFPImmediate(+1.0); // FLD1 246 addLegalFPImmediate(-0.0); // FLD0/FCHS 247 addLegalFPImmediate(-1.0); // FLD1/FCHS 248 } 249 250 // First set operation action for all vector types to expand. Then we 251 // will selectively turn on ones that can be effectively codegen'd. 252 for (unsigned VT = (unsigned)MVT::Vector + 1; 253 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 254 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 255 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 260 } 261 262 if (Subtarget->hasMMX()) { 263 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 264 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 265 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 266 267 // FIXME: add MMX packed arithmetics 268 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 269 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 271 } 272 273 if (Subtarget->hasSSE1()) { 274 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 275 276 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 277 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 278 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 279 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 280 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 281 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 282 } 283 284 if (Subtarget->hasSSE2()) { 285 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 286 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 287 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 288 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 289 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 290 291 292 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 293 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 294 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 295 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 296 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 297 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 298 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 299 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 300 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 301 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 302 setOperationAction(ISD::LOAD, MVT::v16i8, Legal); 303 setOperationAction(ISD::LOAD, MVT::v8i16, Legal); 304 setOperationAction(ISD::LOAD, MVT::v4i32, Legal); 305 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 306 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 307 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 308 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 309 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 312 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 313 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 314 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 315 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); 316 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); 317 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 318 } 319 320 computeRegisterProperties(); 321 322 // FIXME: These should be based on subtarget info. Plus, the values should 323 // be smaller when we are in optimizing for size mode. 324 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 325 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 326 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 327 allowUnalignedMemoryAccesses = true; // x86 supports it! 328} 329 330std::vector<SDOperand> 331X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 332 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 333 return LowerFastCCArguments(F, DAG); 334 return LowerCCCArguments(F, DAG); 335} 336 337std::pair<SDOperand, SDOperand> 338X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 339 bool isVarArg, unsigned CallingConv, 340 bool isTailCall, 341 SDOperand Callee, ArgListTy &Args, 342 SelectionDAG &DAG) { 343 assert((!isVarArg || CallingConv == CallingConv::C) && 344 "Only C takes varargs!"); 345 346 // If the callee is a GlobalAddress node (quite common, every direct call is) 347 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 348 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 349 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 350 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 351 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 352 353 if (CallingConv == CallingConv::Fast && EnableFastCC) 354 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 355 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 356} 357 358//===----------------------------------------------------------------------===// 359// C Calling Convention implementation 360//===----------------------------------------------------------------------===// 361 362std::vector<SDOperand> 363X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 364 std::vector<SDOperand> ArgValues; 365 366 MachineFunction &MF = DAG.getMachineFunction(); 367 MachineFrameInfo *MFI = MF.getFrameInfo(); 368 369 // Add DAG nodes to load the arguments... On entry to a function on the X86, 370 // the stack frame looks like this: 371 // 372 // [ESP] -- return address 373 // [ESP + 4] -- first argument (leftmost lexically) 374 // [ESP + 8] -- second argument, if first argument is four bytes in size 375 // ... 376 // 377 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 378 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 379 MVT::ValueType ObjectVT = getValueType(I->getType()); 380 unsigned ArgIncrement = 4; 381 unsigned ObjSize; 382 switch (ObjectVT) { 383 default: assert(0 && "Unhandled argument type!"); 384 case MVT::i1: 385 case MVT::i8: ObjSize = 1; break; 386 case MVT::i16: ObjSize = 2; break; 387 case MVT::i32: ObjSize = 4; break; 388 case MVT::i64: ObjSize = ArgIncrement = 8; break; 389 case MVT::f32: ObjSize = 4; break; 390 case MVT::f64: ObjSize = ArgIncrement = 8; break; 391 } 392 // Create the frame index object for this incoming parameter... 393 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 394 395 // Create the SelectionDAG nodes corresponding to a load from this parameter 396 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 397 398 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 399 // dead loads. 400 SDOperand ArgValue; 401 if (!I->use_empty()) 402 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 403 DAG.getSrcValue(NULL)); 404 else { 405 if (MVT::isInteger(ObjectVT)) 406 ArgValue = DAG.getConstant(0, ObjectVT); 407 else 408 ArgValue = DAG.getConstantFP(0, ObjectVT); 409 } 410 ArgValues.push_back(ArgValue); 411 412 ArgOffset += ArgIncrement; // Move on to the next argument... 413 } 414 415 // If the function takes variable number of arguments, make a frame index for 416 // the start of the first vararg value... for expansion of llvm.va_start. 417 if (F.isVarArg()) 418 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 419 ReturnAddrIndex = 0; // No return address slot generated yet. 420 BytesToPopOnReturn = 0; // Callee pops nothing. 421 BytesCallerReserves = ArgOffset; 422 423 // Finally, inform the code generator which regs we return values in. 424 switch (getValueType(F.getReturnType())) { 425 default: assert(0 && "Unknown type!"); 426 case MVT::isVoid: break; 427 case MVT::i1: 428 case MVT::i8: 429 case MVT::i16: 430 case MVT::i32: 431 MF.addLiveOut(X86::EAX); 432 break; 433 case MVT::i64: 434 MF.addLiveOut(X86::EAX); 435 MF.addLiveOut(X86::EDX); 436 break; 437 case MVT::f32: 438 case MVT::f64: 439 MF.addLiveOut(X86::ST0); 440 break; 441 } 442 return ArgValues; 443} 444 445std::pair<SDOperand, SDOperand> 446X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 447 bool isVarArg, bool isTailCall, 448 SDOperand Callee, ArgListTy &Args, 449 SelectionDAG &DAG) { 450 // Count how many bytes are to be pushed on the stack. 451 unsigned NumBytes = 0; 452 453 if (Args.empty()) { 454 // Save zero bytes. 455 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 456 } else { 457 for (unsigned i = 0, e = Args.size(); i != e; ++i) 458 switch (getValueType(Args[i].second)) { 459 default: assert(0 && "Unknown value type!"); 460 case MVT::i1: 461 case MVT::i8: 462 case MVT::i16: 463 case MVT::i32: 464 case MVT::f32: 465 NumBytes += 4; 466 break; 467 case MVT::i64: 468 case MVT::f64: 469 NumBytes += 8; 470 break; 471 } 472 473 Chain = DAG.getCALLSEQ_START(Chain, 474 DAG.getConstant(NumBytes, getPointerTy())); 475 476 // Arguments go on the stack in reverse order, as specified by the ABI. 477 unsigned ArgOffset = 0; 478 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 479 std::vector<SDOperand> Stores; 480 481 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 482 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 483 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 484 485 switch (getValueType(Args[i].second)) { 486 default: assert(0 && "Unexpected ValueType for argument!"); 487 case MVT::i1: 488 case MVT::i8: 489 case MVT::i16: 490 // Promote the integer to 32 bits. If the input type is signed use a 491 // sign extend, otherwise use a zero extend. 492 if (Args[i].second->isSigned()) 493 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 494 else 495 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 496 497 // FALL THROUGH 498 case MVT::i32: 499 case MVT::f32: 500 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 501 Args[i].first, PtrOff, 502 DAG.getSrcValue(NULL))); 503 ArgOffset += 4; 504 break; 505 case MVT::i64: 506 case MVT::f64: 507 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 508 Args[i].first, PtrOff, 509 DAG.getSrcValue(NULL))); 510 ArgOffset += 8; 511 break; 512 } 513 } 514 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 515 } 516 517 std::vector<MVT::ValueType> RetVals; 518 MVT::ValueType RetTyVT = getValueType(RetTy); 519 RetVals.push_back(MVT::Other); 520 521 // The result values produced have to be legal. Promote the result. 522 switch (RetTyVT) { 523 case MVT::isVoid: break; 524 default: 525 RetVals.push_back(RetTyVT); 526 break; 527 case MVT::i1: 528 case MVT::i8: 529 case MVT::i16: 530 RetVals.push_back(MVT::i32); 531 break; 532 case MVT::f32: 533 if (X86ScalarSSE) 534 RetVals.push_back(MVT::f32); 535 else 536 RetVals.push_back(MVT::f64); 537 break; 538 case MVT::i64: 539 RetVals.push_back(MVT::i32); 540 RetVals.push_back(MVT::i32); 541 break; 542 } 543 544 std::vector<MVT::ValueType> NodeTys; 545 NodeTys.push_back(MVT::Other); // Returns a chain 546 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 547 std::vector<SDOperand> Ops; 548 Ops.push_back(Chain); 549 Ops.push_back(Callee); 550 551 // FIXME: Do not generate X86ISD::TAILCALL for now. 552 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 553 SDOperand InFlag = Chain.getValue(1); 554 555 NodeTys.clear(); 556 NodeTys.push_back(MVT::Other); // Returns a chain 557 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 558 Ops.clear(); 559 Ops.push_back(Chain); 560 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 561 Ops.push_back(DAG.getConstant(0, getPointerTy())); 562 Ops.push_back(InFlag); 563 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 564 InFlag = Chain.getValue(1); 565 566 SDOperand RetVal; 567 if (RetTyVT != MVT::isVoid) { 568 switch (RetTyVT) { 569 default: assert(0 && "Unknown value type to return!"); 570 case MVT::i1: 571 case MVT::i8: 572 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 573 Chain = RetVal.getValue(1); 574 if (RetTyVT == MVT::i1) 575 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 576 break; 577 case MVT::i16: 578 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 579 Chain = RetVal.getValue(1); 580 break; 581 case MVT::i32: 582 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 583 Chain = RetVal.getValue(1); 584 break; 585 case MVT::i64: { 586 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 587 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 588 Lo.getValue(2)); 589 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 590 Chain = Hi.getValue(1); 591 break; 592 } 593 case MVT::f32: 594 case MVT::f64: { 595 std::vector<MVT::ValueType> Tys; 596 Tys.push_back(MVT::f64); 597 Tys.push_back(MVT::Other); 598 Tys.push_back(MVT::Flag); 599 std::vector<SDOperand> Ops; 600 Ops.push_back(Chain); 601 Ops.push_back(InFlag); 602 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 603 Chain = RetVal.getValue(1); 604 InFlag = RetVal.getValue(2); 605 if (X86ScalarSSE) { 606 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 607 // shouldn't be necessary except that RFP cannot be live across 608 // multiple blocks. When stackifier is fixed, they can be uncoupled. 609 MachineFunction &MF = DAG.getMachineFunction(); 610 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 611 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 612 Tys.clear(); 613 Tys.push_back(MVT::Other); 614 Ops.clear(); 615 Ops.push_back(Chain); 616 Ops.push_back(RetVal); 617 Ops.push_back(StackSlot); 618 Ops.push_back(DAG.getValueType(RetTyVT)); 619 Ops.push_back(InFlag); 620 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 621 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 622 DAG.getSrcValue(NULL)); 623 Chain = RetVal.getValue(1); 624 } 625 626 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 627 // FIXME: we would really like to remember that this FP_ROUND 628 // operation is okay to eliminate if we allow excess FP precision. 629 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 630 break; 631 } 632 } 633 } 634 635 return std::make_pair(RetVal, Chain); 636} 637 638//===----------------------------------------------------------------------===// 639// Fast Calling Convention implementation 640//===----------------------------------------------------------------------===// 641// 642// The X86 'fast' calling convention passes up to two integer arguments in 643// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 644// and requires that the callee pop its arguments off the stack (allowing proper 645// tail calls), and has the same return value conventions as C calling convs. 646// 647// This calling convention always arranges for the callee pop value to be 8n+4 648// bytes, which is needed for tail recursion elimination and stack alignment 649// reasons. 650// 651// Note that this can be enhanced in the future to pass fp vals in registers 652// (when we have a global fp allocator) and do other tricks. 653// 654 655/// AddLiveIn - This helper function adds the specified physical register to the 656/// MachineFunction as a live in value. It also creates a corresponding virtual 657/// register for it. 658static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 659 TargetRegisterClass *RC) { 660 assert(RC->contains(PReg) && "Not the correct regclass!"); 661 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 662 MF.addLiveIn(PReg, VReg); 663 return VReg; 664} 665 666// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 667// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 668// EDX". Anything more is illegal. 669// 670// FIXME: The linscan register allocator currently has problem with 671// coalescing. At the time of this writing, whenever it decides to coalesce 672// a physreg with a virtreg, this increases the size of the physreg's live 673// range, and the live range cannot ever be reduced. This causes problems if 674// too many physregs are coaleced with virtregs, which can cause the register 675// allocator to wedge itself. 676// 677// This code triggers this problem more often if we pass args in registers, 678// so disable it until this is fixed. 679// 680// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 681// about code being dead. 682// 683static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 684 685 686std::vector<SDOperand> 687X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 688 std::vector<SDOperand> ArgValues; 689 690 MachineFunction &MF = DAG.getMachineFunction(); 691 MachineFrameInfo *MFI = MF.getFrameInfo(); 692 693 // Add DAG nodes to load the arguments... On entry to a function the stack 694 // frame looks like this: 695 // 696 // [ESP] -- return address 697 // [ESP + 4] -- first nonreg argument (leftmost lexically) 698 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 699 // ... 700 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 701 702 // Keep track of the number of integer regs passed so far. This can be either 703 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 704 // used). 705 unsigned NumIntRegs = 0; 706 707 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 708 MVT::ValueType ObjectVT = getValueType(I->getType()); 709 unsigned ArgIncrement = 4; 710 unsigned ObjSize = 0; 711 SDOperand ArgValue; 712 713 switch (ObjectVT) { 714 default: assert(0 && "Unhandled argument type!"); 715 case MVT::i1: 716 case MVT::i8: 717 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 718 if (!I->use_empty()) { 719 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 720 X86::R8RegisterClass); 721 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 722 DAG.setRoot(ArgValue.getValue(1)); 723 if (ObjectVT == MVT::i1) 724 // FIXME: Should insert a assertzext here. 725 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 726 } 727 ++NumIntRegs; 728 break; 729 } 730 731 ObjSize = 1; 732 break; 733 case MVT::i16: 734 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 735 if (!I->use_empty()) { 736 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 737 X86::R16RegisterClass); 738 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 739 DAG.setRoot(ArgValue.getValue(1)); 740 } 741 ++NumIntRegs; 742 break; 743 } 744 ObjSize = 2; 745 break; 746 case MVT::i32: 747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 748 if (!I->use_empty()) { 749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 750 X86::R32RegisterClass); 751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 752 DAG.setRoot(ArgValue.getValue(1)); 753 } 754 ++NumIntRegs; 755 break; 756 } 757 ObjSize = 4; 758 break; 759 case MVT::i64: 760 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 761 if (!I->use_empty()) { 762 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 763 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 764 765 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 766 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 767 DAG.setRoot(Hi.getValue(1)); 768 769 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 770 } 771 NumIntRegs += 2; 772 break; 773 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 774 if (!I->use_empty()) { 775 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 776 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 777 DAG.setRoot(Low.getValue(1)); 778 779 // Load the high part from memory. 780 // Create the frame index object for this incoming parameter... 781 int FI = MFI->CreateFixedObject(4, ArgOffset); 782 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 783 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 784 DAG.getSrcValue(NULL)); 785 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 786 } 787 ArgOffset += 4; 788 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 789 break; 790 } 791 ObjSize = ArgIncrement = 8; 792 break; 793 case MVT::f32: ObjSize = 4; break; 794 case MVT::f64: ObjSize = ArgIncrement = 8; break; 795 } 796 797 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 798 // dead loads. 799 if (ObjSize && !I->use_empty()) { 800 // Create the frame index object for this incoming parameter... 801 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 802 803 // Create the SelectionDAG nodes corresponding to a load from this 804 // parameter. 805 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 806 807 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 808 DAG.getSrcValue(NULL)); 809 } else if (ArgValue.Val == 0) { 810 if (MVT::isInteger(ObjectVT)) 811 ArgValue = DAG.getConstant(0, ObjectVT); 812 else 813 ArgValue = DAG.getConstantFP(0, ObjectVT); 814 } 815 ArgValues.push_back(ArgValue); 816 817 if (ObjSize) 818 ArgOffset += ArgIncrement; // Move on to the next argument. 819 } 820 821 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 822 // arguments and the arguments after the retaddr has been pushed are aligned. 823 if ((ArgOffset & 7) == 0) 824 ArgOffset += 4; 825 826 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 827 ReturnAddrIndex = 0; // No return address slot generated yet. 828 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 829 BytesCallerReserves = 0; 830 831 // Finally, inform the code generator which regs we return values in. 832 switch (getValueType(F.getReturnType())) { 833 default: assert(0 && "Unknown type!"); 834 case MVT::isVoid: break; 835 case MVT::i1: 836 case MVT::i8: 837 case MVT::i16: 838 case MVT::i32: 839 MF.addLiveOut(X86::EAX); 840 break; 841 case MVT::i64: 842 MF.addLiveOut(X86::EAX); 843 MF.addLiveOut(X86::EDX); 844 break; 845 case MVT::f32: 846 case MVT::f64: 847 MF.addLiveOut(X86::ST0); 848 break; 849 } 850 return ArgValues; 851} 852 853std::pair<SDOperand, SDOperand> 854X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 855 bool isTailCall, SDOperand Callee, 856 ArgListTy &Args, SelectionDAG &DAG) { 857 // Count how many bytes are to be pushed on the stack. 858 unsigned NumBytes = 0; 859 860 // Keep track of the number of integer regs passed so far. This can be either 861 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 862 // used). 863 unsigned NumIntRegs = 0; 864 865 for (unsigned i = 0, e = Args.size(); i != e; ++i) 866 switch (getValueType(Args[i].second)) { 867 default: assert(0 && "Unknown value type!"); 868 case MVT::i1: 869 case MVT::i8: 870 case MVT::i16: 871 case MVT::i32: 872 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 873 ++NumIntRegs; 874 break; 875 } 876 // fall through 877 case MVT::f32: 878 NumBytes += 4; 879 break; 880 case MVT::i64: 881 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 882 NumIntRegs += 2; 883 break; 884 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 885 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 886 NumBytes += 4; 887 break; 888 } 889 890 // fall through 891 case MVT::f64: 892 NumBytes += 8; 893 break; 894 } 895 896 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 897 // arguments and the arguments after the retaddr has been pushed are aligned. 898 if ((NumBytes & 7) == 0) 899 NumBytes += 4; 900 901 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 902 903 // Arguments go on the stack in reverse order, as specified by the ABI. 904 unsigned ArgOffset = 0; 905 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 906 NumIntRegs = 0; 907 std::vector<SDOperand> Stores; 908 std::vector<SDOperand> RegValuesToPass; 909 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 910 switch (getValueType(Args[i].second)) { 911 default: assert(0 && "Unexpected ValueType for argument!"); 912 case MVT::i1: 913 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 914 // Fall through. 915 case MVT::i8: 916 case MVT::i16: 917 case MVT::i32: 918 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 919 RegValuesToPass.push_back(Args[i].first); 920 ++NumIntRegs; 921 break; 922 } 923 // Fall through 924 case MVT::f32: { 925 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 926 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 927 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 928 Args[i].first, PtrOff, 929 DAG.getSrcValue(NULL))); 930 ArgOffset += 4; 931 break; 932 } 933 case MVT::i64: 934 // Can pass (at least) part of it in regs? 935 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 936 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 937 Args[i].first, DAG.getConstant(1, MVT::i32)); 938 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 939 Args[i].first, DAG.getConstant(0, MVT::i32)); 940 RegValuesToPass.push_back(Lo); 941 ++NumIntRegs; 942 943 // Pass both parts in regs? 944 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 945 RegValuesToPass.push_back(Hi); 946 ++NumIntRegs; 947 } else { 948 // Pass the high part in memory. 949 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 950 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 951 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 952 Hi, PtrOff, DAG.getSrcValue(NULL))); 953 ArgOffset += 4; 954 } 955 break; 956 } 957 // Fall through 958 case MVT::f64: 959 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 960 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 961 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 962 Args[i].first, PtrOff, 963 DAG.getSrcValue(NULL))); 964 ArgOffset += 8; 965 break; 966 } 967 } 968 if (!Stores.empty()) 969 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 970 971 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 972 // arguments and the arguments after the retaddr has been pushed are aligned. 973 if ((ArgOffset & 7) == 0) 974 ArgOffset += 4; 975 976 std::vector<MVT::ValueType> RetVals; 977 MVT::ValueType RetTyVT = getValueType(RetTy); 978 979 RetVals.push_back(MVT::Other); 980 981 // The result values produced have to be legal. Promote the result. 982 switch (RetTyVT) { 983 case MVT::isVoid: break; 984 default: 985 RetVals.push_back(RetTyVT); 986 break; 987 case MVT::i1: 988 case MVT::i8: 989 case MVT::i16: 990 RetVals.push_back(MVT::i32); 991 break; 992 case MVT::f32: 993 if (X86ScalarSSE) 994 RetVals.push_back(MVT::f32); 995 else 996 RetVals.push_back(MVT::f64); 997 break; 998 case MVT::i64: 999 RetVals.push_back(MVT::i32); 1000 RetVals.push_back(MVT::i32); 1001 break; 1002 } 1003 1004 // Build a sequence of copy-to-reg nodes chained together with token chain 1005 // and flag operands which copy the outgoing args into registers. 1006 SDOperand InFlag; 1007 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1008 unsigned CCReg; 1009 SDOperand RegToPass = RegValuesToPass[i]; 1010 switch (RegToPass.getValueType()) { 1011 default: assert(0 && "Bad thing to pass in regs"); 1012 case MVT::i8: 1013 CCReg = (i == 0) ? X86::AL : X86::DL; 1014 break; 1015 case MVT::i16: 1016 CCReg = (i == 0) ? X86::AX : X86::DX; 1017 break; 1018 case MVT::i32: 1019 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1020 break; 1021 } 1022 1023 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1024 InFlag = Chain.getValue(1); 1025 } 1026 1027 std::vector<MVT::ValueType> NodeTys; 1028 NodeTys.push_back(MVT::Other); // Returns a chain 1029 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1030 std::vector<SDOperand> Ops; 1031 Ops.push_back(Chain); 1032 Ops.push_back(Callee); 1033 if (InFlag.Val) 1034 Ops.push_back(InFlag); 1035 1036 // FIXME: Do not generate X86ISD::TAILCALL for now. 1037 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1038 InFlag = Chain.getValue(1); 1039 1040 NodeTys.clear(); 1041 NodeTys.push_back(MVT::Other); // Returns a chain 1042 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1043 Ops.clear(); 1044 Ops.push_back(Chain); 1045 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1046 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1047 Ops.push_back(InFlag); 1048 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1049 InFlag = Chain.getValue(1); 1050 1051 SDOperand RetVal; 1052 if (RetTyVT != MVT::isVoid) { 1053 switch (RetTyVT) { 1054 default: assert(0 && "Unknown value type to return!"); 1055 case MVT::i1: 1056 case MVT::i8: 1057 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1058 Chain = RetVal.getValue(1); 1059 if (RetTyVT == MVT::i1) 1060 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1061 break; 1062 case MVT::i16: 1063 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1064 Chain = RetVal.getValue(1); 1065 break; 1066 case MVT::i32: 1067 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1068 Chain = RetVal.getValue(1); 1069 break; 1070 case MVT::i64: { 1071 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1072 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1073 Lo.getValue(2)); 1074 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1075 Chain = Hi.getValue(1); 1076 break; 1077 } 1078 case MVT::f32: 1079 case MVT::f64: { 1080 std::vector<MVT::ValueType> Tys; 1081 Tys.push_back(MVT::f64); 1082 Tys.push_back(MVT::Other); 1083 Tys.push_back(MVT::Flag); 1084 std::vector<SDOperand> Ops; 1085 Ops.push_back(Chain); 1086 Ops.push_back(InFlag); 1087 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1088 Chain = RetVal.getValue(1); 1089 InFlag = RetVal.getValue(2); 1090 if (X86ScalarSSE) { 1091 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1092 // shouldn't be necessary except that RFP cannot be live across 1093 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1094 MachineFunction &MF = DAG.getMachineFunction(); 1095 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1096 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1097 Tys.clear(); 1098 Tys.push_back(MVT::Other); 1099 Ops.clear(); 1100 Ops.push_back(Chain); 1101 Ops.push_back(RetVal); 1102 Ops.push_back(StackSlot); 1103 Ops.push_back(DAG.getValueType(RetTyVT)); 1104 Ops.push_back(InFlag); 1105 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1106 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1107 DAG.getSrcValue(NULL)); 1108 Chain = RetVal.getValue(1); 1109 } 1110 1111 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1112 // FIXME: we would really like to remember that this FP_ROUND 1113 // operation is okay to eliminate if we allow excess FP precision. 1114 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1115 break; 1116 } 1117 } 1118 } 1119 1120 return std::make_pair(RetVal, Chain); 1121} 1122 1123SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1124 if (ReturnAddrIndex == 0) { 1125 // Set up a frame object for the return address. 1126 MachineFunction &MF = DAG.getMachineFunction(); 1127 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1128 } 1129 1130 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1131} 1132 1133 1134 1135std::pair<SDOperand, SDOperand> X86TargetLowering:: 1136LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1137 SelectionDAG &DAG) { 1138 SDOperand Result; 1139 if (Depth) // Depths > 0 not supported yet! 1140 Result = DAG.getConstant(0, getPointerTy()); 1141 else { 1142 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1143 if (!isFrameAddress) 1144 // Just load the return address 1145 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1146 DAG.getSrcValue(NULL)); 1147 else 1148 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1149 DAG.getConstant(4, MVT::i32)); 1150 } 1151 return std::make_pair(Result, Chain); 1152} 1153 1154/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1155/// which corresponds to the condition code. 1156static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1157 switch (X86CC) { 1158 default: assert(0 && "Unknown X86 conditional code!"); 1159 case X86ISD::COND_A: return X86::JA; 1160 case X86ISD::COND_AE: return X86::JAE; 1161 case X86ISD::COND_B: return X86::JB; 1162 case X86ISD::COND_BE: return X86::JBE; 1163 case X86ISD::COND_E: return X86::JE; 1164 case X86ISD::COND_G: return X86::JG; 1165 case X86ISD::COND_GE: return X86::JGE; 1166 case X86ISD::COND_L: return X86::JL; 1167 case X86ISD::COND_LE: return X86::JLE; 1168 case X86ISD::COND_NE: return X86::JNE; 1169 case X86ISD::COND_NO: return X86::JNO; 1170 case X86ISD::COND_NP: return X86::JNP; 1171 case X86ISD::COND_NS: return X86::JNS; 1172 case X86ISD::COND_O: return X86::JO; 1173 case X86ISD::COND_P: return X86::JP; 1174 case X86ISD::COND_S: return X86::JS; 1175 } 1176} 1177 1178/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1179/// specific condition code. It returns a false if it cannot do a direct 1180/// translation. X86CC is the translated CondCode. Flip is set to true if the 1181/// the order of comparison operands should be flipped. 1182static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1183 bool &Flip) { 1184 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 1185 Flip = false; 1186 X86CC = X86ISD::COND_INVALID; 1187 if (!isFP) { 1188 switch (SetCCOpcode) { 1189 default: break; 1190 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1191 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1192 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1193 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1194 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1195 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1196 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1197 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1198 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1199 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1200 } 1201 } else { 1202 // On a floating point condition, the flags are set as follows: 1203 // ZF PF CF op 1204 // 0 | 0 | 0 | X > Y 1205 // 0 | 0 | 1 | X < Y 1206 // 1 | 0 | 0 | X == Y 1207 // 1 | 1 | 1 | unordered 1208 switch (SetCCOpcode) { 1209 default: break; 1210 case ISD::SETUEQ: 1211 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1212 case ISD::SETOLE: Flip = true; // Fallthrough 1213 case ISD::SETOGT: 1214 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1215 case ISD::SETOLT: Flip = true; // Fallthrough 1216 case ISD::SETOGE: 1217 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1218 case ISD::SETUGE: Flip = true; // Fallthrough 1219 case ISD::SETULT: 1220 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1221 case ISD::SETUGT: Flip = true; // Fallthrough 1222 case ISD::SETULE: 1223 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1224 case ISD::SETONE: 1225 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1226 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1227 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1228 } 1229 } 1230 1231 return X86CC != X86ISD::COND_INVALID; 1232} 1233 1234/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1235/// code. Current x86 isa includes the following FP cmov instructions: 1236/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1237static bool hasFPCMov(unsigned X86CC) { 1238 switch (X86CC) { 1239 default: 1240 return false; 1241 case X86ISD::COND_B: 1242 case X86ISD::COND_BE: 1243 case X86ISD::COND_E: 1244 case X86ISD::COND_P: 1245 case X86ISD::COND_A: 1246 case X86ISD::COND_AE: 1247 case X86ISD::COND_NE: 1248 case X86ISD::COND_NP: 1249 return true; 1250 } 1251} 1252 1253MachineBasicBlock * 1254X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1255 MachineBasicBlock *BB) { 1256 switch (MI->getOpcode()) { 1257 default: assert(false && "Unexpected instr type to insert"); 1258 case X86::CMOV_FR32: 1259 case X86::CMOV_FR64: { 1260 // To "insert" a SELECT_CC instruction, we actually have to insert the 1261 // diamond control-flow pattern. The incoming instruction knows the 1262 // destination vreg to set, the condition code register to branch on, the 1263 // true/false values to select between, and a branch opcode to use. 1264 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1265 ilist<MachineBasicBlock>::iterator It = BB; 1266 ++It; 1267 1268 // thisMBB: 1269 // ... 1270 // TrueVal = ... 1271 // cmpTY ccX, r1, r2 1272 // bCC copy1MBB 1273 // fallthrough --> copy0MBB 1274 MachineBasicBlock *thisMBB = BB; 1275 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1276 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1277 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1278 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1279 MachineFunction *F = BB->getParent(); 1280 F->getBasicBlockList().insert(It, copy0MBB); 1281 F->getBasicBlockList().insert(It, sinkMBB); 1282 // Update machine-CFG edges by first adding all successors of the current 1283 // block to the new block which will contain the Phi node for the select. 1284 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1285 e = BB->succ_end(); i != e; ++i) 1286 sinkMBB->addSuccessor(*i); 1287 // Next, remove all successors of the current block, and add the true 1288 // and fallthrough blocks as its successors. 1289 while(!BB->succ_empty()) 1290 BB->removeSuccessor(BB->succ_begin()); 1291 BB->addSuccessor(copy0MBB); 1292 BB->addSuccessor(sinkMBB); 1293 1294 // copy0MBB: 1295 // %FalseValue = ... 1296 // # fallthrough to sinkMBB 1297 BB = copy0MBB; 1298 1299 // Update machine-CFG edges 1300 BB->addSuccessor(sinkMBB); 1301 1302 // sinkMBB: 1303 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1304 // ... 1305 BB = sinkMBB; 1306 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1307 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1308 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1309 1310 delete MI; // The pseudo instruction is gone now. 1311 return BB; 1312 } 1313 1314 case X86::FP_TO_INT16_IN_MEM: 1315 case X86::FP_TO_INT32_IN_MEM: 1316 case X86::FP_TO_INT64_IN_MEM: { 1317 // Change the floating point control register to use "round towards zero" 1318 // mode when truncating to an integer value. 1319 MachineFunction *F = BB->getParent(); 1320 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1321 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1322 1323 // Load the old value of the high byte of the control word... 1324 unsigned OldCW = 1325 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1326 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1327 1328 // Set the high part to be round to zero... 1329 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1330 1331 // Reload the modified control word now... 1332 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1333 1334 // Restore the memory image of control word to original value 1335 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1336 1337 // Get the X86 opcode to use. 1338 unsigned Opc; 1339 switch (MI->getOpcode()) { 1340 default: assert(0 && "illegal opcode!"); 1341 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1342 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1343 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1344 } 1345 1346 X86AddressMode AM; 1347 MachineOperand &Op = MI->getOperand(0); 1348 if (Op.isRegister()) { 1349 AM.BaseType = X86AddressMode::RegBase; 1350 AM.Base.Reg = Op.getReg(); 1351 } else { 1352 AM.BaseType = X86AddressMode::FrameIndexBase; 1353 AM.Base.FrameIndex = Op.getFrameIndex(); 1354 } 1355 Op = MI->getOperand(1); 1356 if (Op.isImmediate()) 1357 AM.Scale = Op.getImmedValue(); 1358 Op = MI->getOperand(2); 1359 if (Op.isImmediate()) 1360 AM.IndexReg = Op.getImmedValue(); 1361 Op = MI->getOperand(3); 1362 if (Op.isGlobalAddress()) { 1363 AM.GV = Op.getGlobal(); 1364 } else { 1365 AM.Disp = Op.getImmedValue(); 1366 } 1367 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1368 1369 // Reload the original control word now. 1370 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1371 1372 delete MI; // The pseudo instruction is gone now. 1373 return BB; 1374 } 1375 } 1376} 1377 1378 1379//===----------------------------------------------------------------------===// 1380// X86 Custom Lowering Hooks 1381//===----------------------------------------------------------------------===// 1382 1383/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1384/// load. For Darwin, external and weak symbols are indirect, loading the value 1385/// at address GV rather then the value of GV itself. This means that the 1386/// GlobalAddress must be in the base or index register of the address, not the 1387/// GV offset field. 1388static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1389 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1390 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1391} 1392 1393/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1394/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1395bool X86::isPSHUFDMask(SDNode *N) { 1396 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1397 1398 if (N->getNumOperands() != 4) 1399 return false; 1400 1401 // Check if the value doesn't reference the second vector. 1402 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1403 assert(isa<ConstantSDNode>(N->getOperand(i)) && 1404 "Invalid VECTOR_SHUFFLE mask!"); 1405 if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false; 1406 } 1407 1408 return true; 1409} 1410 1411/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1412/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1413bool X86::isSHUFPMask(SDNode *N) { 1414 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1415 1416 unsigned NumElems = N->getNumOperands(); 1417 if (NumElems == 2) { 1418 // The only case that ought be handled by SHUFPD is 1419 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1420 // Expect bit 0 == 1, bit1 == 2 1421 SDOperand Bit0 = N->getOperand(0); 1422 SDOperand Bit1 = N->getOperand(1); 1423 assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) && 1424 "Invalid VECTOR_SHUFFLE mask!"); 1425 return (cast<ConstantSDNode>(Bit0)->getValue() == 1 && 1426 cast<ConstantSDNode>(Bit1)->getValue() == 2); 1427 } 1428 1429 if (NumElems != 4) return false; 1430 1431 // Each half must refer to only one of the vector. 1432 SDOperand Elt = N->getOperand(0); 1433 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1434 for (unsigned i = 1; i != NumElems / 2; ++i) { 1435 assert(isa<ConstantSDNode>(N->getOperand(i)) && 1436 "Invalid VECTOR_SHUFFLE mask!"); 1437 if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != 1438 cast<ConstantSDNode>(Elt)->getValue()) 1439 return false; 1440 } 1441 Elt = N->getOperand(NumElems / 2); 1442 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1443 for (unsigned i = NumElems / 2; i != NumElems; ++i) { 1444 assert(isa<ConstantSDNode>(N->getOperand(i)) && 1445 "Invalid VECTOR_SHUFFLE mask!"); 1446 if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != 1447 cast<ConstantSDNode>(Elt)->getValue()) 1448 return false; 1449 } 1450 1451 return true; 1452} 1453 1454/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1455/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1456bool X86::isMOVHLPSMask(SDNode *N) { 1457 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1458 1459 if (N->getNumOperands() != 4) 1460 return false; 1461 1462 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1463 SDOperand Bit0 = N->getOperand(0); 1464 SDOperand Bit1 = N->getOperand(1); 1465 SDOperand Bit2 = N->getOperand(2); 1466 SDOperand Bit3 = N->getOperand(3); 1467 assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) && 1468 isa<ConstantSDNode>(Bit2) && isa<ConstantSDNode>(Bit3) && 1469 "Invalid VECTOR_SHUFFLE mask!"); 1470 return (cast<ConstantSDNode>(Bit0)->getValue() == 6 && 1471 cast<ConstantSDNode>(Bit1)->getValue() == 7 && 1472 cast<ConstantSDNode>(Bit2)->getValue() == 2 && 1473 cast<ConstantSDNode>(Bit3)->getValue() == 3); 1474} 1475 1476/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1477/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1478bool X86::isMOVLHPSMask(SDNode *N) { 1479 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1480 1481 if (N->getNumOperands() != 4) 1482 return false; 1483 1484 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1485 SDOperand Bit0 = N->getOperand(0); 1486 SDOperand Bit1 = N->getOperand(1); 1487 SDOperand Bit2 = N->getOperand(2); 1488 SDOperand Bit3 = N->getOperand(3); 1489 assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) && 1490 isa<ConstantSDNode>(Bit2) && isa<ConstantSDNode>(Bit3) && 1491 "Invalid VECTOR_SHUFFLE mask!"); 1492 return (cast<ConstantSDNode>(Bit0)->getValue() == 0 && 1493 cast<ConstantSDNode>(Bit1)->getValue() == 1 && 1494 cast<ConstantSDNode>(Bit2)->getValue() == 4 && 1495 cast<ConstantSDNode>(Bit3)->getValue() == 5); 1496} 1497 1498/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1499/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1500bool X86::isUNPCKLMask(SDNode *N) { 1501 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1502 1503 unsigned NumElems = N->getNumOperands(); 1504 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1505 return false; 1506 1507 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1508 SDOperand BitI = N->getOperand(i); 1509 SDOperand BitI1 = N->getOperand(i+1); 1510 assert(isa<ConstantSDNode>(BitI) && isa<ConstantSDNode>(BitI1) && 1511 "Invalid VECTOR_SHUFFLE mask!"); 1512 if (cast<ConstantSDNode>(BitI)->getValue() != j) 1513 return false; 1514 if (cast<ConstantSDNode>(BitI1)->getValue() != j + NumElems) 1515 return false; 1516 } 1517 1518 return true; 1519} 1520 1521/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1522/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1523bool X86::isUNPCKHMask(SDNode *N) { 1524 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1525 1526 unsigned NumElems = N->getNumOperands(); 1527 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1528 return false; 1529 1530 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1531 SDOperand BitI = N->getOperand(i); 1532 SDOperand BitI1 = N->getOperand(i+1); 1533 assert(isa<ConstantSDNode>(BitI) && isa<ConstantSDNode>(BitI1) && 1534 "Invalid VECTOR_SHUFFLE mask!"); 1535 if (cast<ConstantSDNode>(BitI)->getValue() != j + NumElems/2) 1536 return false; 1537 if (cast<ConstantSDNode>(BitI1)->getValue() != j + NumElems/2 + NumElems) 1538 return false; 1539 } 1540 1541 return true; 1542} 1543 1544/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1545/// a splat of a single element. 1546bool X86::isSplatMask(SDNode *N) { 1547 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1548 1549 // We can only splat 64-bit, and 32-bit quantities. 1550 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1551 return false; 1552 1553 // This is a splat operation if each element of the permute is the same, and 1554 // if the value doesn't reference the second vector. 1555 SDOperand Elt = N->getOperand(0); 1556 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1557 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1558 assert(isa<ConstantSDNode>(N->getOperand(i)) && 1559 "Invalid VECTOR_SHUFFLE mask!"); 1560 if (N->getOperand(i) != Elt) return false; 1561 } 1562 1563 // Make sure it is a splat of the first vector operand. 1564 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1565} 1566 1567/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1568/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1569/// instructions. 1570unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1571 unsigned NumOperands = N->getNumOperands(); 1572 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1573 unsigned Mask = 0; 1574 for (unsigned i = 0; i < NumOperands; ++i) { 1575 unsigned Val 1576 = cast<ConstantSDNode>(N->getOperand(NumOperands-i-1))->getValue(); 1577 if (Val >= NumOperands) Val -= NumOperands; 1578 Mask |= Val; 1579 if (i != NumOperands - 1) 1580 Mask <<= Shift; 1581 } 1582 1583 return Mask; 1584} 1585 1586/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as 1587/// values in ther permute mask if needed. Use V1 as second vector if it is 1588/// undef. Return an empty SDOperand is it is already well formed. 1589static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2, 1590 SDOperand Mask, MVT::ValueType VT, 1591 SelectionDAG &DAG) { 1592 unsigned NumElems = Mask.getNumOperands(); 1593 SDOperand Half1 = Mask.getOperand(0); 1594 SDOperand Half2 = Mask.getOperand(NumElems/2); 1595 bool V2Undef = false; 1596 if (V2.getOpcode() == ISD::UNDEF) { 1597 V2Undef = true; 1598 V2 = V1; 1599 } 1600 1601 if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems && 1602 cast<ConstantSDNode>(Half2)->getValue() < NumElems) { 1603 // Swap the operands and change mask. 1604 std::vector<SDOperand> MaskVec; 1605 for (unsigned i = NumElems / 2; i != NumElems; ++i) 1606 MaskVec.push_back(Mask.getOperand(i)); 1607 for (unsigned i = 0; i != NumElems / 2; ++i) 1608 MaskVec.push_back(Mask.getOperand(i)); 1609 Mask = 1610 DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 1611 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1612 } 1613 1614 if (V2Undef) 1615 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 1616 1617 return SDOperand(); 1618} 1619 1620/// LowerOperation - Provide custom lowering hooks for some operations. 1621/// 1622SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1623 switch (Op.getOpcode()) { 1624 default: assert(0 && "Should not custom lower this!"); 1625 case ISD::SHL_PARTS: 1626 case ISD::SRA_PARTS: 1627 case ISD::SRL_PARTS: { 1628 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 1629 "Not an i64 shift!"); 1630 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 1631 SDOperand ShOpLo = Op.getOperand(0); 1632 SDOperand ShOpHi = Op.getOperand(1); 1633 SDOperand ShAmt = Op.getOperand(2); 1634 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 1635 DAG.getConstant(31, MVT::i8)) 1636 : DAG.getConstant(0, MVT::i32); 1637 1638 SDOperand Tmp2, Tmp3; 1639 if (Op.getOpcode() == ISD::SHL_PARTS) { 1640 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 1641 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 1642 } else { 1643 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 1644 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 1645 } 1646 1647 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 1648 ShAmt, DAG.getConstant(32, MVT::i8)); 1649 1650 SDOperand Hi, Lo; 1651 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 1652 1653 std::vector<MVT::ValueType> Tys; 1654 Tys.push_back(MVT::i32); 1655 Tys.push_back(MVT::Flag); 1656 std::vector<SDOperand> Ops; 1657 if (Op.getOpcode() == ISD::SHL_PARTS) { 1658 Ops.push_back(Tmp2); 1659 Ops.push_back(Tmp3); 1660 Ops.push_back(CC); 1661 Ops.push_back(InFlag); 1662 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1663 InFlag = Hi.getValue(1); 1664 1665 Ops.clear(); 1666 Ops.push_back(Tmp3); 1667 Ops.push_back(Tmp1); 1668 Ops.push_back(CC); 1669 Ops.push_back(InFlag); 1670 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1671 } else { 1672 Ops.push_back(Tmp2); 1673 Ops.push_back(Tmp3); 1674 Ops.push_back(CC); 1675 Ops.push_back(InFlag); 1676 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1677 InFlag = Lo.getValue(1); 1678 1679 Ops.clear(); 1680 Ops.push_back(Tmp3); 1681 Ops.push_back(Tmp1); 1682 Ops.push_back(CC); 1683 Ops.push_back(InFlag); 1684 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1685 } 1686 1687 Tys.clear(); 1688 Tys.push_back(MVT::i32); 1689 Tys.push_back(MVT::i32); 1690 Ops.clear(); 1691 Ops.push_back(Lo); 1692 Ops.push_back(Hi); 1693 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 1694 } 1695 case ISD::SINT_TO_FP: { 1696 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 1697 Op.getOperand(0).getValueType() >= MVT::i16 && 1698 "Unknown SINT_TO_FP to lower!"); 1699 1700 SDOperand Result; 1701 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 1702 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 1703 MachineFunction &MF = DAG.getMachineFunction(); 1704 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 1705 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1706 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 1707 DAG.getEntryNode(), Op.getOperand(0), 1708 StackSlot, DAG.getSrcValue(NULL)); 1709 1710 // Build the FILD 1711 std::vector<MVT::ValueType> Tys; 1712 Tys.push_back(MVT::f64); 1713 Tys.push_back(MVT::Other); 1714 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 1715 std::vector<SDOperand> Ops; 1716 Ops.push_back(Chain); 1717 Ops.push_back(StackSlot); 1718 Ops.push_back(DAG.getValueType(SrcVT)); 1719 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 1720 Tys, Ops); 1721 1722 if (X86ScalarSSE) { 1723 Chain = Result.getValue(1); 1724 SDOperand InFlag = Result.getValue(2); 1725 1726 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 1727 // shouldn't be necessary except that RFP cannot be live across 1728 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1729 MachineFunction &MF = DAG.getMachineFunction(); 1730 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1731 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1732 std::vector<MVT::ValueType> Tys; 1733 Tys.push_back(MVT::Other); 1734 std::vector<SDOperand> Ops; 1735 Ops.push_back(Chain); 1736 Ops.push_back(Result); 1737 Ops.push_back(StackSlot); 1738 Ops.push_back(DAG.getValueType(Op.getValueType())); 1739 Ops.push_back(InFlag); 1740 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1741 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 1742 DAG.getSrcValue(NULL)); 1743 } 1744 1745 return Result; 1746 } 1747 case ISD::FP_TO_SINT: { 1748 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 1749 "Unknown FP_TO_SINT to lower!"); 1750 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 1751 // stack slot. 1752 MachineFunction &MF = DAG.getMachineFunction(); 1753 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 1754 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 1755 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1756 1757 unsigned Opc; 1758 switch (Op.getValueType()) { 1759 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 1760 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 1761 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 1762 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 1763 } 1764 1765 SDOperand Chain = DAG.getEntryNode(); 1766 SDOperand Value = Op.getOperand(0); 1767 if (X86ScalarSSE) { 1768 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 1769 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 1770 DAG.getSrcValue(0)); 1771 std::vector<MVT::ValueType> Tys; 1772 Tys.push_back(MVT::f64); 1773 Tys.push_back(MVT::Other); 1774 std::vector<SDOperand> Ops; 1775 Ops.push_back(Chain); 1776 Ops.push_back(StackSlot); 1777 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 1778 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 1779 Chain = Value.getValue(1); 1780 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 1781 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1782 } 1783 1784 // Build the FP_TO_INT*_IN_MEM 1785 std::vector<SDOperand> Ops; 1786 Ops.push_back(Chain); 1787 Ops.push_back(Value); 1788 Ops.push_back(StackSlot); 1789 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 1790 1791 // Load the result. 1792 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 1793 DAG.getSrcValue(NULL)); 1794 } 1795 case ISD::READCYCLECOUNTER: { 1796 std::vector<MVT::ValueType> Tys; 1797 Tys.push_back(MVT::Other); 1798 Tys.push_back(MVT::Flag); 1799 std::vector<SDOperand> Ops; 1800 Ops.push_back(Op.getOperand(0)); 1801 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 1802 Ops.clear(); 1803 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 1804 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 1805 MVT::i32, Ops[0].getValue(2))); 1806 Ops.push_back(Ops[1].getValue(1)); 1807 Tys[0] = Tys[1] = MVT::i32; 1808 Tys.push_back(MVT::Other); 1809 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 1810 } 1811 case ISD::FABS: { 1812 MVT::ValueType VT = Op.getValueType(); 1813 const Type *OpNTy = MVT::getTypeForValueType(VT); 1814 std::vector<Constant*> CV; 1815 if (VT == MVT::f64) { 1816 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 1817 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1818 } else { 1819 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 1820 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1821 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1822 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1823 } 1824 Constant *CS = ConstantStruct::get(CV); 1825 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 1826 SDOperand Mask 1827 = DAG.getNode(X86ISD::LOAD_PACK, 1828 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 1829 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 1830 } 1831 case ISD::FNEG: { 1832 MVT::ValueType VT = Op.getValueType(); 1833 const Type *OpNTy = MVT::getTypeForValueType(VT); 1834 std::vector<Constant*> CV; 1835 if (VT == MVT::f64) { 1836 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 1837 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1838 } else { 1839 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 1840 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1841 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1842 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 1843 } 1844 Constant *CS = ConstantStruct::get(CV); 1845 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 1846 SDOperand Mask 1847 = DAG.getNode(X86ISD::LOAD_PACK, 1848 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 1849 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 1850 } 1851 case ISD::SETCC: { 1852 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 1853 SDOperand Cond; 1854 SDOperand CC = Op.getOperand(2); 1855 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 1856 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 1857 bool Flip; 1858 unsigned X86CC; 1859 if (translateX86CC(CC, isFP, X86CC, Flip)) { 1860 if (Flip) 1861 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 1862 Op.getOperand(1), Op.getOperand(0)); 1863 else 1864 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 1865 Op.getOperand(0), Op.getOperand(1)); 1866 return DAG.getNode(X86ISD::SETCC, MVT::i8, 1867 DAG.getConstant(X86CC, MVT::i8), Cond); 1868 } else { 1869 assert(isFP && "Illegal integer SetCC!"); 1870 1871 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 1872 Op.getOperand(0), Op.getOperand(1)); 1873 std::vector<MVT::ValueType> Tys; 1874 std::vector<SDOperand> Ops; 1875 switch (SetCCOpcode) { 1876 default: assert(false && "Illegal floating point SetCC!"); 1877 case ISD::SETOEQ: { // !PF & ZF 1878 Tys.push_back(MVT::i8); 1879 Tys.push_back(MVT::Flag); 1880 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 1881 Ops.push_back(Cond); 1882 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 1883 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 1884 DAG.getConstant(X86ISD::COND_E, MVT::i8), 1885 Tmp1.getValue(1)); 1886 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 1887 } 1888 case ISD::SETUNE: { // PF | !ZF 1889 Tys.push_back(MVT::i8); 1890 Tys.push_back(MVT::Flag); 1891 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 1892 Ops.push_back(Cond); 1893 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 1894 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 1895 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 1896 Tmp1.getValue(1)); 1897 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 1898 } 1899 } 1900 } 1901 } 1902 case ISD::SELECT: { 1903 MVT::ValueType VT = Op.getValueType(); 1904 bool isFP = MVT::isFloatingPoint(VT); 1905 bool isFPStack = isFP && !X86ScalarSSE; 1906 bool isFPSSE = isFP && X86ScalarSSE; 1907 bool addTest = false; 1908 SDOperand Op0 = Op.getOperand(0); 1909 SDOperand Cond, CC; 1910 if (Op0.getOpcode() == ISD::SETCC) 1911 Op0 = LowerOperation(Op0, DAG); 1912 1913 if (Op0.getOpcode() == X86ISD::SETCC) { 1914 // If condition flag is set by a X86ISD::CMP, then make a copy of it 1915 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 1916 // have another use it will be eliminated. 1917 // If the X86ISD::SETCC has more than one use, then it's probably better 1918 // to use a test instead of duplicating the X86ISD::CMP (for register 1919 // pressure reason). 1920 if (Op0.getOperand(1).getOpcode() == X86ISD::CMP) { 1921 if (!Op0.hasOneUse()) { 1922 std::vector<MVT::ValueType> Tys; 1923 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 1924 Tys.push_back(Op0.Val->getValueType(i)); 1925 std::vector<SDOperand> Ops; 1926 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 1927 Ops.push_back(Op0.getOperand(i)); 1928 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 1929 } 1930 1931 CC = Op0.getOperand(0); 1932 Cond = Op0.getOperand(1); 1933 // Make a copy as flag result cannot be used by more than one. 1934 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 1935 Cond.getOperand(0), Cond.getOperand(1)); 1936 addTest = 1937 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 1938 } else 1939 addTest = true; 1940 } else 1941 addTest = true; 1942 1943 if (addTest) { 1944 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 1945 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 1946 } 1947 1948 std::vector<MVT::ValueType> Tys; 1949 Tys.push_back(Op.getValueType()); 1950 Tys.push_back(MVT::Flag); 1951 std::vector<SDOperand> Ops; 1952 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 1953 // condition is true. 1954 Ops.push_back(Op.getOperand(2)); 1955 Ops.push_back(Op.getOperand(1)); 1956 Ops.push_back(CC); 1957 Ops.push_back(Cond); 1958 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 1959 } 1960 case ISD::BRCOND: { 1961 bool addTest = false; 1962 SDOperand Cond = Op.getOperand(1); 1963 SDOperand Dest = Op.getOperand(2); 1964 SDOperand CC; 1965 if (Cond.getOpcode() == ISD::SETCC) 1966 Cond = LowerOperation(Cond, DAG); 1967 1968 if (Cond.getOpcode() == X86ISD::SETCC) { 1969 // If condition flag is set by a X86ISD::CMP, then make a copy of it 1970 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 1971 // have another use it will be eliminated. 1972 // If the X86ISD::SETCC has more than one use, then it's probably better 1973 // to use a test instead of duplicating the X86ISD::CMP (for register 1974 // pressure reason). 1975 if (Cond.getOperand(1).getOpcode() == X86ISD::CMP) { 1976 if (!Cond.hasOneUse()) { 1977 std::vector<MVT::ValueType> Tys; 1978 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 1979 Tys.push_back(Cond.Val->getValueType(i)); 1980 std::vector<SDOperand> Ops; 1981 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 1982 Ops.push_back(Cond.getOperand(i)); 1983 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 1984 } 1985 1986 CC = Cond.getOperand(0); 1987 Cond = Cond.getOperand(1); 1988 // Make a copy as flag result cannot be used by more than one. 1989 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 1990 Cond.getOperand(0), Cond.getOperand(1)); 1991 } else 1992 addTest = true; 1993 } else 1994 addTest = true; 1995 1996 if (addTest) { 1997 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 1998 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 1999 } 2000 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2001 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2002 } 2003 case ISD::MEMSET: { 2004 SDOperand InFlag(0, 0); 2005 SDOperand Chain = Op.getOperand(0); 2006 unsigned Align = 2007 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2008 if (Align == 0) Align = 1; 2009 2010 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2011 // If not DWORD aligned, call memset if size is less than the threshold. 2012 // It knows how to align to the right boundary first. 2013 if ((Align & 3) != 0 || 2014 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2015 MVT::ValueType IntPtr = getPointerTy(); 2016 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2017 std::vector<std::pair<SDOperand, const Type*> > Args; 2018 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2019 // Extend the ubyte argument to be an int value for the call. 2020 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2021 Args.push_back(std::make_pair(Val, IntPtrTy)); 2022 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2023 std::pair<SDOperand,SDOperand> CallResult = 2024 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2025 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2026 return CallResult.second; 2027 } 2028 2029 MVT::ValueType AVT; 2030 SDOperand Count; 2031 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2032 unsigned BytesLeft = 0; 2033 bool TwoRepStos = false; 2034 if (ValC) { 2035 unsigned ValReg; 2036 unsigned Val = ValC->getValue() & 255; 2037 2038 // If the value is a constant, then we can potentially use larger sets. 2039 switch (Align & 3) { 2040 case 2: // WORD aligned 2041 AVT = MVT::i16; 2042 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2043 BytesLeft = I->getValue() % 2; 2044 Val = (Val << 8) | Val; 2045 ValReg = X86::AX; 2046 break; 2047 case 0: // DWORD aligned 2048 AVT = MVT::i32; 2049 if (I) { 2050 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2051 BytesLeft = I->getValue() % 4; 2052 } else { 2053 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2054 DAG.getConstant(2, MVT::i8)); 2055 TwoRepStos = true; 2056 } 2057 Val = (Val << 8) | Val; 2058 Val = (Val << 16) | Val; 2059 ValReg = X86::EAX; 2060 break; 2061 default: // Byte aligned 2062 AVT = MVT::i8; 2063 Count = Op.getOperand(3); 2064 ValReg = X86::AL; 2065 break; 2066 } 2067 2068 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2069 InFlag); 2070 InFlag = Chain.getValue(1); 2071 } else { 2072 AVT = MVT::i8; 2073 Count = Op.getOperand(3); 2074 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2075 InFlag = Chain.getValue(1); 2076 } 2077 2078 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2079 InFlag = Chain.getValue(1); 2080 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2081 InFlag = Chain.getValue(1); 2082 2083 std::vector<MVT::ValueType> Tys; 2084 Tys.push_back(MVT::Other); 2085 Tys.push_back(MVT::Flag); 2086 std::vector<SDOperand> Ops; 2087 Ops.push_back(Chain); 2088 Ops.push_back(DAG.getValueType(AVT)); 2089 Ops.push_back(InFlag); 2090 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2091 2092 if (TwoRepStos) { 2093 InFlag = Chain.getValue(1); 2094 Count = Op.getOperand(3); 2095 MVT::ValueType CVT = Count.getValueType(); 2096 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2097 DAG.getConstant(3, CVT)); 2098 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2099 InFlag = Chain.getValue(1); 2100 Tys.clear(); 2101 Tys.push_back(MVT::Other); 2102 Tys.push_back(MVT::Flag); 2103 Ops.clear(); 2104 Ops.push_back(Chain); 2105 Ops.push_back(DAG.getValueType(MVT::i8)); 2106 Ops.push_back(InFlag); 2107 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2108 } else if (BytesLeft) { 2109 // Issue stores for the last 1 - 3 bytes. 2110 SDOperand Value; 2111 unsigned Val = ValC->getValue() & 255; 2112 unsigned Offset = I->getValue() - BytesLeft; 2113 SDOperand DstAddr = Op.getOperand(1); 2114 MVT::ValueType AddrVT = DstAddr.getValueType(); 2115 if (BytesLeft >= 2) { 2116 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2117 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2118 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2119 DAG.getConstant(Offset, AddrVT)), 2120 DAG.getSrcValue(NULL)); 2121 BytesLeft -= 2; 2122 Offset += 2; 2123 } 2124 2125 if (BytesLeft == 1) { 2126 Value = DAG.getConstant(Val, MVT::i8); 2127 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2128 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2129 DAG.getConstant(Offset, AddrVT)), 2130 DAG.getSrcValue(NULL)); 2131 } 2132 } 2133 2134 return Chain; 2135 } 2136 case ISD::MEMCPY: { 2137 SDOperand Chain = Op.getOperand(0); 2138 unsigned Align = 2139 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2140 if (Align == 0) Align = 1; 2141 2142 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2143 // If not DWORD aligned, call memcpy if size is less than the threshold. 2144 // It knows how to align to the right boundary first. 2145 if ((Align & 3) != 0 || 2146 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2147 MVT::ValueType IntPtr = getPointerTy(); 2148 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2149 std::vector<std::pair<SDOperand, const Type*> > Args; 2150 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2151 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2152 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2153 std::pair<SDOperand,SDOperand> CallResult = 2154 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2155 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2156 return CallResult.second; 2157 } 2158 2159 MVT::ValueType AVT; 2160 SDOperand Count; 2161 unsigned BytesLeft = 0; 2162 bool TwoRepMovs = false; 2163 switch (Align & 3) { 2164 case 2: // WORD aligned 2165 AVT = MVT::i16; 2166 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2167 BytesLeft = I->getValue() % 2; 2168 break; 2169 case 0: // DWORD aligned 2170 AVT = MVT::i32; 2171 if (I) { 2172 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2173 BytesLeft = I->getValue() % 4; 2174 } else { 2175 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2176 DAG.getConstant(2, MVT::i8)); 2177 TwoRepMovs = true; 2178 } 2179 break; 2180 default: // Byte aligned 2181 AVT = MVT::i8; 2182 Count = Op.getOperand(3); 2183 break; 2184 } 2185 2186 SDOperand InFlag(0, 0); 2187 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2188 InFlag = Chain.getValue(1); 2189 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2190 InFlag = Chain.getValue(1); 2191 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2192 InFlag = Chain.getValue(1); 2193 2194 std::vector<MVT::ValueType> Tys; 2195 Tys.push_back(MVT::Other); 2196 Tys.push_back(MVT::Flag); 2197 std::vector<SDOperand> Ops; 2198 Ops.push_back(Chain); 2199 Ops.push_back(DAG.getValueType(AVT)); 2200 Ops.push_back(InFlag); 2201 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2202 2203 if (TwoRepMovs) { 2204 InFlag = Chain.getValue(1); 2205 Count = Op.getOperand(3); 2206 MVT::ValueType CVT = Count.getValueType(); 2207 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2208 DAG.getConstant(3, CVT)); 2209 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2210 InFlag = Chain.getValue(1); 2211 Tys.clear(); 2212 Tys.push_back(MVT::Other); 2213 Tys.push_back(MVT::Flag); 2214 Ops.clear(); 2215 Ops.push_back(Chain); 2216 Ops.push_back(DAG.getValueType(MVT::i8)); 2217 Ops.push_back(InFlag); 2218 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2219 } else if (BytesLeft) { 2220 // Issue loads and stores for the last 1 - 3 bytes. 2221 unsigned Offset = I->getValue() - BytesLeft; 2222 SDOperand DstAddr = Op.getOperand(1); 2223 MVT::ValueType DstVT = DstAddr.getValueType(); 2224 SDOperand SrcAddr = Op.getOperand(2); 2225 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2226 SDOperand Value; 2227 if (BytesLeft >= 2) { 2228 Value = DAG.getLoad(MVT::i16, Chain, 2229 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2230 DAG.getConstant(Offset, SrcVT)), 2231 DAG.getSrcValue(NULL)); 2232 Chain = Value.getValue(1); 2233 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2234 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2235 DAG.getConstant(Offset, DstVT)), 2236 DAG.getSrcValue(NULL)); 2237 BytesLeft -= 2; 2238 Offset += 2; 2239 } 2240 2241 if (BytesLeft == 1) { 2242 Value = DAG.getLoad(MVT::i8, Chain, 2243 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2244 DAG.getConstant(Offset, SrcVT)), 2245 DAG.getSrcValue(NULL)); 2246 Chain = Value.getValue(1); 2247 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2248 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2249 DAG.getConstant(Offset, DstVT)), 2250 DAG.getSrcValue(NULL)); 2251 } 2252 } 2253 2254 return Chain; 2255 } 2256 2257 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2258 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2259 // one of the above mentioned nodes. It has to be wrapped because otherwise 2260 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2261 // be used to form addressing mode. These wrapped nodes will be selected 2262 // into MOV32ri. 2263 case ISD::ConstantPool: { 2264 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2265 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2266 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2267 CP->getAlignment())); 2268 if (Subtarget->isTargetDarwin()) { 2269 // With PIC, the address is actually $g + Offset. 2270 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2271 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2272 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2273 } 2274 2275 return Result; 2276 } 2277 case ISD::GlobalAddress: { 2278 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2279 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2280 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2281 if (Subtarget->isTargetDarwin()) { 2282 // With PIC, the address is actually $g + Offset. 2283 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2284 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2285 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2286 2287 // For Darwin, external and weak symbols are indirect, so we want to load 2288 // the value at address GV, not the value of GV itself. This means that 2289 // the GlobalAddress must be in the base or index register of the address, 2290 // not the GV offset field. 2291 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2292 DarwinGVRequiresExtraLoad(GV)) 2293 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2294 Result, DAG.getSrcValue(NULL)); 2295 } 2296 2297 return Result; 2298 } 2299 case ISD::ExternalSymbol: { 2300 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2301 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2302 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2303 if (Subtarget->isTargetDarwin()) { 2304 // With PIC, the address is actually $g + Offset. 2305 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2306 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2307 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2308 } 2309 2310 return Result; 2311 } 2312 case ISD::VASTART: { 2313 // vastart just stores the address of the VarArgsFrameIndex slot into the 2314 // memory location argument. 2315 // FIXME: Replace MVT::i32 with PointerTy 2316 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2317 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2318 Op.getOperand(1), Op.getOperand(2)); 2319 } 2320 case ISD::RET: { 2321 SDOperand Copy; 2322 2323 switch(Op.getNumOperands()) { 2324 default: 2325 assert(0 && "Do not know how to return this many arguments!"); 2326 abort(); 2327 case 1: 2328 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2329 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2330 case 2: { 2331 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2332 if (MVT::isInteger(ArgVT)) 2333 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2334 SDOperand()); 2335 else if (!X86ScalarSSE) { 2336 std::vector<MVT::ValueType> Tys; 2337 Tys.push_back(MVT::Other); 2338 Tys.push_back(MVT::Flag); 2339 std::vector<SDOperand> Ops; 2340 Ops.push_back(Op.getOperand(0)); 2341 Ops.push_back(Op.getOperand(1)); 2342 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2343 } else { 2344 SDOperand MemLoc; 2345 SDOperand Chain = Op.getOperand(0); 2346 SDOperand Value = Op.getOperand(1); 2347 2348 if (Value.getOpcode() == ISD::LOAD && 2349 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2350 Chain = Value.getOperand(0); 2351 MemLoc = Value.getOperand(1); 2352 } else { 2353 // Spill the value to memory and reload it into top of stack. 2354 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2355 MachineFunction &MF = DAG.getMachineFunction(); 2356 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2357 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2358 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2359 Value, MemLoc, DAG.getSrcValue(0)); 2360 } 2361 std::vector<MVT::ValueType> Tys; 2362 Tys.push_back(MVT::f64); 2363 Tys.push_back(MVT::Other); 2364 std::vector<SDOperand> Ops; 2365 Ops.push_back(Chain); 2366 Ops.push_back(MemLoc); 2367 Ops.push_back(DAG.getValueType(ArgVT)); 2368 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2369 Tys.clear(); 2370 Tys.push_back(MVT::Other); 2371 Tys.push_back(MVT::Flag); 2372 Ops.clear(); 2373 Ops.push_back(Copy.getValue(1)); 2374 Ops.push_back(Copy); 2375 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2376 } 2377 break; 2378 } 2379 case 3: 2380 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2381 SDOperand()); 2382 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2383 break; 2384 } 2385 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2386 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2387 Copy.getValue(1)); 2388 } 2389 case ISD::SCALAR_TO_VECTOR: { 2390 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2391 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2392 } 2393 case ISD::VECTOR_SHUFFLE: { 2394 SDOperand V1 = Op.getOperand(0); 2395 SDOperand V2 = Op.getOperand(1); 2396 SDOperand PermMask = Op.getOperand(2); 2397 MVT::ValueType VT = Op.getValueType(); 2398 unsigned NumElems = PermMask.getNumOperands(); 2399 2400 if (X86::isUNPCKLMask(PermMask.Val) || 2401 X86::isUNPCKHMask(PermMask.Val)) 2402 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2403 return SDOperand(); 2404 2405 // PSHUFD's 2nd vector must be undef. 2406 if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)) { 2407 if (V2.getOpcode() != ISD::UNDEF) 2408 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2409 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2410 return SDOperand(); 2411 } 2412 2413 if (NumElems == 2 || 2414 X86::isSplatMask(PermMask.Val) || 2415 X86::isSHUFPMask(PermMask.Val)) { 2416 return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); 2417 } 2418 2419 assert(0 && "Unexpected VECTOR_SHUFFLE to lower"); 2420 abort(); 2421 } 2422 case ISD::BUILD_VECTOR: { 2423 // All one's are handled with pcmpeqd. 2424 if (ISD::isBuildVectorAllOnes(Op.Val)) 2425 return Op; 2426 2427 std::set<SDOperand> Values; 2428 SDOperand Elt0 = Op.getOperand(0); 2429 Values.insert(Elt0); 2430 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2431 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2432 (isa<ConstantFPSDNode>(Elt0) && 2433 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2434 bool RestAreZero = true; 2435 unsigned NumElems = Op.getNumOperands(); 2436 for (unsigned i = 1; i < NumElems; ++i) { 2437 SDOperand Elt = Op.getOperand(i); 2438 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2439 if (!FPC->isExactlyValue(+0.0)) 2440 RestAreZero = false; 2441 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2442 if (!C->isNullValue()) 2443 RestAreZero = false; 2444 } else 2445 RestAreZero = false; 2446 Values.insert(Elt); 2447 } 2448 2449 if (RestAreZero) { 2450 if (Elt0IsZero) return Op; 2451 2452 // Zero extend a scalar to a vector. 2453 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2454 } 2455 2456 if (Values.size() > 2) { 2457 // Expand into a number of unpckl*. 2458 // e.g. for v4f32 2459 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2460 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2461 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2462 MVT::ValueType VT = Op.getValueType(); 2463 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2464 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2465 std::vector<SDOperand> MaskVec; 2466 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2467 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2468 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2469 } 2470 SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2471 std::vector<SDOperand> V(NumElems); 2472 for (unsigned i = 0; i < NumElems; ++i) 2473 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2474 NumElems >>= 1; 2475 while (NumElems != 0) { 2476 for (unsigned i = 0; i < NumElems; ++i) 2477 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2478 PermMask); 2479 NumElems >>= 1; 2480 } 2481 return V[0]; 2482 } 2483 2484 return SDOperand(); 2485 } 2486 } 2487} 2488 2489const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 2490 switch (Opcode) { 2491 default: return NULL; 2492 case X86ISD::SHLD: return "X86ISD::SHLD"; 2493 case X86ISD::SHRD: return "X86ISD::SHRD"; 2494 case X86ISD::FAND: return "X86ISD::FAND"; 2495 case X86ISD::FXOR: return "X86ISD::FXOR"; 2496 case X86ISD::FILD: return "X86ISD::FILD"; 2497 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 2498 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 2499 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 2500 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 2501 case X86ISD::FLD: return "X86ISD::FLD"; 2502 case X86ISD::FST: return "X86ISD::FST"; 2503 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 2504 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 2505 case X86ISD::CALL: return "X86ISD::CALL"; 2506 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 2507 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 2508 case X86ISD::CMP: return "X86ISD::CMP"; 2509 case X86ISD::TEST: return "X86ISD::TEST"; 2510 case X86ISD::SETCC: return "X86ISD::SETCC"; 2511 case X86ISD::CMOV: return "X86ISD::CMOV"; 2512 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 2513 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 2514 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 2515 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 2516 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 2517 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 2518 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 2519 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 2520 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 2521 } 2522} 2523 2524void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 2525 uint64_t Mask, 2526 uint64_t &KnownZero, 2527 uint64_t &KnownOne, 2528 unsigned Depth) const { 2529 2530 unsigned Opc = Op.getOpcode(); 2531 KnownZero = KnownOne = 0; // Don't know anything. 2532 2533 switch (Opc) { 2534 default: 2535 assert(Opc >= ISD::BUILTIN_OP_END && "Expected a target specific node"); 2536 break; 2537 case X86ISD::SETCC: 2538 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 2539 break; 2540 } 2541} 2542 2543std::vector<unsigned> X86TargetLowering:: 2544getRegClassForInlineAsmConstraint(const std::string &Constraint, 2545 MVT::ValueType VT) const { 2546 if (Constraint.size() == 1) { 2547 // FIXME: not handling fp-stack yet! 2548 // FIXME: not handling MMX registers yet ('y' constraint). 2549 switch (Constraint[0]) { // GCC X86 Constraint Letters 2550 default: break; // Unknown constriant letter 2551 case 'r': // GENERAL_REGS 2552 case 'R': // LEGACY_REGS 2553 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 2554 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 2555 case 'l': // INDEX_REGS 2556 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 2557 X86::ESI, X86::EDI, X86::EBP, 0); 2558 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 2559 case 'Q': // Q_REGS 2560 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 2561 case 'x': // SSE_REGS if SSE1 allowed 2562 if (Subtarget->hasSSE1()) 2563 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2564 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 2565 0); 2566 return std::vector<unsigned>(); 2567 case 'Y': // SSE_REGS if SSE2 allowed 2568 if (Subtarget->hasSSE2()) 2569 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2570 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 2571 0); 2572 return std::vector<unsigned>(); 2573 } 2574 } 2575 2576 return std::vector<unsigned>(); 2577} 2578 2579/// isLegalAddressImmediate - Return true if the integer value or 2580/// GlobalValue can be used as the offset of the target addressing mode. 2581bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 2582 // X86 allows a sign-extended 32-bit immediate field. 2583 return (V > -(1LL << 32) && V < (1LL << 32)-1); 2584} 2585 2586bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 2587 if (Subtarget->isTargetDarwin()) { 2588 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 2589 if (RModel == Reloc::Static) 2590 return true; 2591 else if (RModel == Reloc::DynamicNoPIC) 2592 return !DarwinGVRequiresExtraLoad(GV); 2593 else 2594 return false; 2595 } else 2596 return true; 2597} 2598 2599/// isShuffleMaskLegal - Targets can use this to indicate that they only 2600/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 2601/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 2602/// are assumed to be legal. 2603bool 2604X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 2605 // Only do shuffles on 128-bit vector types for now. 2606 if (MVT::getSizeInBits(VT) == 64) return false; 2607 return (Mask.Val->getNumOperands() == 2 || 2608 X86::isSplatMask(Mask.Val) || 2609 X86::isPSHUFDMask(Mask.Val) || 2610 X86::isSHUFPMask(Mask.Val) || 2611 X86::isUNPCKLMask(Mask.Val) || 2612 X86::isUNPCKHMask(Mask.Val)); 2613} 2614