X86ISelLowering.cpp revision f99898453dc287627f8a0373e4ebbdb523683611
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 289 } 290 291 if (Subtarget->hasSSE2()) { 292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 297 298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 308 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 312 313 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 314 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 315 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 316 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 318 } 319 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 320 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 321 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 322 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 323 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 324 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 325 326 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 327 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 328 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 329 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 330 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 331 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 332 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 338 } 339 340 // Custom lower v2i64 and v2f64 selects. 341 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 342 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 343 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 344 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 345 } 346 347 // We want to custom lower some of our intrinsics. 348 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 349 350 computeRegisterProperties(); 351 352 // FIXME: These should be based on subtarget info. Plus, the values should 353 // be smaller when we are in optimizing for size mode. 354 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 355 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 356 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 357 allowUnalignedMemoryAccesses = true; // x86 supports it! 358} 359 360std::vector<SDOperand> 361X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 362 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 363 return LowerFastCCArguments(F, DAG); 364 return LowerCCCArguments(F, DAG); 365} 366 367std::pair<SDOperand, SDOperand> 368X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 369 bool isVarArg, unsigned CallingConv, 370 bool isTailCall, 371 SDOperand Callee, ArgListTy &Args, 372 SelectionDAG &DAG) { 373 assert((!isVarArg || CallingConv == CallingConv::C) && 374 "Only C takes varargs!"); 375 376 // If the callee is a GlobalAddress node (quite common, every direct call is) 377 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 378 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 379 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 380 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 381 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 382 383 if (CallingConv == CallingConv::Fast && EnableFastCC) 384 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 385 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 386} 387 388//===----------------------------------------------------------------------===// 389// C Calling Convention implementation 390//===----------------------------------------------------------------------===// 391 392std::vector<SDOperand> 393X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 394 std::vector<SDOperand> ArgValues; 395 396 MachineFunction &MF = DAG.getMachineFunction(); 397 MachineFrameInfo *MFI = MF.getFrameInfo(); 398 399 // Add DAG nodes to load the arguments... On entry to a function on the X86, 400 // the stack frame looks like this: 401 // 402 // [ESP] -- return address 403 // [ESP + 4] -- first argument (leftmost lexically) 404 // [ESP + 8] -- second argument, if first argument is four bytes in size 405 // ... 406 // 407 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 408 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 409 MVT::ValueType ObjectVT = getValueType(I->getType()); 410 unsigned ArgIncrement = 4; 411 unsigned ObjSize; 412 switch (ObjectVT) { 413 default: assert(0 && "Unhandled argument type!"); 414 case MVT::i1: 415 case MVT::i8: ObjSize = 1; break; 416 case MVT::i16: ObjSize = 2; break; 417 case MVT::i32: ObjSize = 4; break; 418 case MVT::i64: ObjSize = ArgIncrement = 8; break; 419 case MVT::f32: ObjSize = 4; break; 420 case MVT::f64: ObjSize = ArgIncrement = 8; break; 421 } 422 // Create the frame index object for this incoming parameter... 423 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 424 425 // Create the SelectionDAG nodes corresponding to a load from this parameter 426 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 427 428 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 429 // dead loads. 430 SDOperand ArgValue; 431 if (!I->use_empty()) 432 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 433 DAG.getSrcValue(NULL)); 434 else { 435 if (MVT::isInteger(ObjectVT)) 436 ArgValue = DAG.getConstant(0, ObjectVT); 437 else 438 ArgValue = DAG.getConstantFP(0, ObjectVT); 439 } 440 ArgValues.push_back(ArgValue); 441 442 ArgOffset += ArgIncrement; // Move on to the next argument... 443 } 444 445 // If the function takes variable number of arguments, make a frame index for 446 // the start of the first vararg value... for expansion of llvm.va_start. 447 if (F.isVarArg()) 448 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 449 ReturnAddrIndex = 0; // No return address slot generated yet. 450 BytesToPopOnReturn = 0; // Callee pops nothing. 451 BytesCallerReserves = ArgOffset; 452 453 // Finally, inform the code generator which regs we return values in. 454 switch (getValueType(F.getReturnType())) { 455 default: assert(0 && "Unknown type!"); 456 case MVT::isVoid: break; 457 case MVT::i1: 458 case MVT::i8: 459 case MVT::i16: 460 case MVT::i32: 461 MF.addLiveOut(X86::EAX); 462 break; 463 case MVT::i64: 464 MF.addLiveOut(X86::EAX); 465 MF.addLiveOut(X86::EDX); 466 break; 467 case MVT::f32: 468 case MVT::f64: 469 MF.addLiveOut(X86::ST0); 470 break; 471 } 472 return ArgValues; 473} 474 475std::pair<SDOperand, SDOperand> 476X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 477 bool isVarArg, bool isTailCall, 478 SDOperand Callee, ArgListTy &Args, 479 SelectionDAG &DAG) { 480 // Count how many bytes are to be pushed on the stack. 481 unsigned NumBytes = 0; 482 483 if (Args.empty()) { 484 // Save zero bytes. 485 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 486 } else { 487 for (unsigned i = 0, e = Args.size(); i != e; ++i) 488 switch (getValueType(Args[i].second)) { 489 default: assert(0 && "Unknown value type!"); 490 case MVT::i1: 491 case MVT::i8: 492 case MVT::i16: 493 case MVT::i32: 494 case MVT::f32: 495 NumBytes += 4; 496 break; 497 case MVT::i64: 498 case MVT::f64: 499 NumBytes += 8; 500 break; 501 } 502 503 Chain = DAG.getCALLSEQ_START(Chain, 504 DAG.getConstant(NumBytes, getPointerTy())); 505 506 // Arguments go on the stack in reverse order, as specified by the ABI. 507 unsigned ArgOffset = 0; 508 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 509 std::vector<SDOperand> Stores; 510 511 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 512 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 513 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 514 515 switch (getValueType(Args[i].second)) { 516 default: assert(0 && "Unexpected ValueType for argument!"); 517 case MVT::i1: 518 case MVT::i8: 519 case MVT::i16: 520 // Promote the integer to 32 bits. If the input type is signed use a 521 // sign extend, otherwise use a zero extend. 522 if (Args[i].second->isSigned()) 523 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 524 else 525 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 526 527 // FALL THROUGH 528 case MVT::i32: 529 case MVT::f32: 530 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 531 Args[i].first, PtrOff, 532 DAG.getSrcValue(NULL))); 533 ArgOffset += 4; 534 break; 535 case MVT::i64: 536 case MVT::f64: 537 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 538 Args[i].first, PtrOff, 539 DAG.getSrcValue(NULL))); 540 ArgOffset += 8; 541 break; 542 } 543 } 544 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 545 } 546 547 std::vector<MVT::ValueType> RetVals; 548 MVT::ValueType RetTyVT = getValueType(RetTy); 549 RetVals.push_back(MVT::Other); 550 551 // The result values produced have to be legal. Promote the result. 552 switch (RetTyVT) { 553 case MVT::isVoid: break; 554 default: 555 RetVals.push_back(RetTyVT); 556 break; 557 case MVT::i1: 558 case MVT::i8: 559 case MVT::i16: 560 RetVals.push_back(MVT::i32); 561 break; 562 case MVT::f32: 563 if (X86ScalarSSE) 564 RetVals.push_back(MVT::f32); 565 else 566 RetVals.push_back(MVT::f64); 567 break; 568 case MVT::i64: 569 RetVals.push_back(MVT::i32); 570 RetVals.push_back(MVT::i32); 571 break; 572 } 573 574 std::vector<MVT::ValueType> NodeTys; 575 NodeTys.push_back(MVT::Other); // Returns a chain 576 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 577 std::vector<SDOperand> Ops; 578 Ops.push_back(Chain); 579 Ops.push_back(Callee); 580 581 // FIXME: Do not generate X86ISD::TAILCALL for now. 582 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 583 SDOperand InFlag = Chain.getValue(1); 584 585 NodeTys.clear(); 586 NodeTys.push_back(MVT::Other); // Returns a chain 587 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 588 Ops.clear(); 589 Ops.push_back(Chain); 590 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 591 Ops.push_back(DAG.getConstant(0, getPointerTy())); 592 Ops.push_back(InFlag); 593 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 594 InFlag = Chain.getValue(1); 595 596 SDOperand RetVal; 597 if (RetTyVT != MVT::isVoid) { 598 switch (RetTyVT) { 599 default: assert(0 && "Unknown value type to return!"); 600 case MVT::i1: 601 case MVT::i8: 602 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 603 Chain = RetVal.getValue(1); 604 if (RetTyVT == MVT::i1) 605 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 606 break; 607 case MVT::i16: 608 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 609 Chain = RetVal.getValue(1); 610 break; 611 case MVT::i32: 612 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 613 Chain = RetVal.getValue(1); 614 break; 615 case MVT::i64: { 616 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 617 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 618 Lo.getValue(2)); 619 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 620 Chain = Hi.getValue(1); 621 break; 622 } 623 case MVT::f32: 624 case MVT::f64: { 625 std::vector<MVT::ValueType> Tys; 626 Tys.push_back(MVT::f64); 627 Tys.push_back(MVT::Other); 628 Tys.push_back(MVT::Flag); 629 std::vector<SDOperand> Ops; 630 Ops.push_back(Chain); 631 Ops.push_back(InFlag); 632 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 633 Chain = RetVal.getValue(1); 634 InFlag = RetVal.getValue(2); 635 if (X86ScalarSSE) { 636 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 637 // shouldn't be necessary except that RFP cannot be live across 638 // multiple blocks. When stackifier is fixed, they can be uncoupled. 639 MachineFunction &MF = DAG.getMachineFunction(); 640 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 641 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 642 Tys.clear(); 643 Tys.push_back(MVT::Other); 644 Ops.clear(); 645 Ops.push_back(Chain); 646 Ops.push_back(RetVal); 647 Ops.push_back(StackSlot); 648 Ops.push_back(DAG.getValueType(RetTyVT)); 649 Ops.push_back(InFlag); 650 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 651 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 652 DAG.getSrcValue(NULL)); 653 Chain = RetVal.getValue(1); 654 } 655 656 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 657 // FIXME: we would really like to remember that this FP_ROUND 658 // operation is okay to eliminate if we allow excess FP precision. 659 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 660 break; 661 } 662 } 663 } 664 665 return std::make_pair(RetVal, Chain); 666} 667 668//===----------------------------------------------------------------------===// 669// Fast Calling Convention implementation 670//===----------------------------------------------------------------------===// 671// 672// The X86 'fast' calling convention passes up to two integer arguments in 673// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 674// and requires that the callee pop its arguments off the stack (allowing proper 675// tail calls), and has the same return value conventions as C calling convs. 676// 677// This calling convention always arranges for the callee pop value to be 8n+4 678// bytes, which is needed for tail recursion elimination and stack alignment 679// reasons. 680// 681// Note that this can be enhanced in the future to pass fp vals in registers 682// (when we have a global fp allocator) and do other tricks. 683// 684 685/// AddLiveIn - This helper function adds the specified physical register to the 686/// MachineFunction as a live in value. It also creates a corresponding virtual 687/// register for it. 688static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 689 TargetRegisterClass *RC) { 690 assert(RC->contains(PReg) && "Not the correct regclass!"); 691 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 692 MF.addLiveIn(PReg, VReg); 693 return VReg; 694} 695 696// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 697// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 698// EDX". Anything more is illegal. 699// 700// FIXME: The linscan register allocator currently has problem with 701// coalescing. At the time of this writing, whenever it decides to coalesce 702// a physreg with a virtreg, this increases the size of the physreg's live 703// range, and the live range cannot ever be reduced. This causes problems if 704// too many physregs are coaleced with virtregs, which can cause the register 705// allocator to wedge itself. 706// 707// This code triggers this problem more often if we pass args in registers, 708// so disable it until this is fixed. 709// 710// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 711// about code being dead. 712// 713static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 714 715 716std::vector<SDOperand> 717X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 718 std::vector<SDOperand> ArgValues; 719 720 MachineFunction &MF = DAG.getMachineFunction(); 721 MachineFrameInfo *MFI = MF.getFrameInfo(); 722 723 // Add DAG nodes to load the arguments... On entry to a function the stack 724 // frame looks like this: 725 // 726 // [ESP] -- return address 727 // [ESP + 4] -- first nonreg argument (leftmost lexically) 728 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 729 // ... 730 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 731 732 // Keep track of the number of integer regs passed so far. This can be either 733 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 734 // used). 735 unsigned NumIntRegs = 0; 736 737 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 738 MVT::ValueType ObjectVT = getValueType(I->getType()); 739 unsigned ArgIncrement = 4; 740 unsigned ObjSize = 0; 741 SDOperand ArgValue; 742 743 switch (ObjectVT) { 744 default: assert(0 && "Unhandled argument type!"); 745 case MVT::i1: 746 case MVT::i8: 747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 748 if (!I->use_empty()) { 749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 750 X86::R8RegisterClass); 751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 752 DAG.setRoot(ArgValue.getValue(1)); 753 if (ObjectVT == MVT::i1) 754 // FIXME: Should insert a assertzext here. 755 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 756 } 757 ++NumIntRegs; 758 break; 759 } 760 761 ObjSize = 1; 762 break; 763 case MVT::i16: 764 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 765 if (!I->use_empty()) { 766 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 767 X86::R16RegisterClass); 768 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 769 DAG.setRoot(ArgValue.getValue(1)); 770 } 771 ++NumIntRegs; 772 break; 773 } 774 ObjSize = 2; 775 break; 776 case MVT::i32: 777 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 778 if (!I->use_empty()) { 779 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 780 X86::R32RegisterClass); 781 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 782 DAG.setRoot(ArgValue.getValue(1)); 783 } 784 ++NumIntRegs; 785 break; 786 } 787 ObjSize = 4; 788 break; 789 case MVT::i64: 790 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 791 if (!I->use_empty()) { 792 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 793 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 794 795 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 796 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 797 DAG.setRoot(Hi.getValue(1)); 798 799 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 800 } 801 NumIntRegs += 2; 802 break; 803 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 804 if (!I->use_empty()) { 805 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 806 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 807 DAG.setRoot(Low.getValue(1)); 808 809 // Load the high part from memory. 810 // Create the frame index object for this incoming parameter... 811 int FI = MFI->CreateFixedObject(4, ArgOffset); 812 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 813 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 814 DAG.getSrcValue(NULL)); 815 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 816 } 817 ArgOffset += 4; 818 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 819 break; 820 } 821 ObjSize = ArgIncrement = 8; 822 break; 823 case MVT::f32: ObjSize = 4; break; 824 case MVT::f64: ObjSize = ArgIncrement = 8; break; 825 } 826 827 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 828 // dead loads. 829 if (ObjSize && !I->use_empty()) { 830 // Create the frame index object for this incoming parameter... 831 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 832 833 // Create the SelectionDAG nodes corresponding to a load from this 834 // parameter. 835 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 836 837 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 838 DAG.getSrcValue(NULL)); 839 } else if (ArgValue.Val == 0) { 840 if (MVT::isInteger(ObjectVT)) 841 ArgValue = DAG.getConstant(0, ObjectVT); 842 else 843 ArgValue = DAG.getConstantFP(0, ObjectVT); 844 } 845 ArgValues.push_back(ArgValue); 846 847 if (ObjSize) 848 ArgOffset += ArgIncrement; // Move on to the next argument. 849 } 850 851 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 852 // arguments and the arguments after the retaddr has been pushed are aligned. 853 if ((ArgOffset & 7) == 0) 854 ArgOffset += 4; 855 856 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 857 ReturnAddrIndex = 0; // No return address slot generated yet. 858 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 859 BytesCallerReserves = 0; 860 861 // Finally, inform the code generator which regs we return values in. 862 switch (getValueType(F.getReturnType())) { 863 default: assert(0 && "Unknown type!"); 864 case MVT::isVoid: break; 865 case MVT::i1: 866 case MVT::i8: 867 case MVT::i16: 868 case MVT::i32: 869 MF.addLiveOut(X86::EAX); 870 break; 871 case MVT::i64: 872 MF.addLiveOut(X86::EAX); 873 MF.addLiveOut(X86::EDX); 874 break; 875 case MVT::f32: 876 case MVT::f64: 877 MF.addLiveOut(X86::ST0); 878 break; 879 } 880 return ArgValues; 881} 882 883std::pair<SDOperand, SDOperand> 884X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 885 bool isTailCall, SDOperand Callee, 886 ArgListTy &Args, SelectionDAG &DAG) { 887 // Count how many bytes are to be pushed on the stack. 888 unsigned NumBytes = 0; 889 890 // Keep track of the number of integer regs passed so far. This can be either 891 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 892 // used). 893 unsigned NumIntRegs = 0; 894 895 for (unsigned i = 0, e = Args.size(); i != e; ++i) 896 switch (getValueType(Args[i].second)) { 897 default: assert(0 && "Unknown value type!"); 898 case MVT::i1: 899 case MVT::i8: 900 case MVT::i16: 901 case MVT::i32: 902 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 903 ++NumIntRegs; 904 break; 905 } 906 // fall through 907 case MVT::f32: 908 NumBytes += 4; 909 break; 910 case MVT::i64: 911 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 912 NumIntRegs += 2; 913 break; 914 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 915 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 916 NumBytes += 4; 917 break; 918 } 919 920 // fall through 921 case MVT::f64: 922 NumBytes += 8; 923 break; 924 } 925 926 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 927 // arguments and the arguments after the retaddr has been pushed are aligned. 928 if ((NumBytes & 7) == 0) 929 NumBytes += 4; 930 931 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 932 933 // Arguments go on the stack in reverse order, as specified by the ABI. 934 unsigned ArgOffset = 0; 935 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 936 NumIntRegs = 0; 937 std::vector<SDOperand> Stores; 938 std::vector<SDOperand> RegValuesToPass; 939 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 940 switch (getValueType(Args[i].second)) { 941 default: assert(0 && "Unexpected ValueType for argument!"); 942 case MVT::i1: 943 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 944 // Fall through. 945 case MVT::i8: 946 case MVT::i16: 947 case MVT::i32: 948 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 949 RegValuesToPass.push_back(Args[i].first); 950 ++NumIntRegs; 951 break; 952 } 953 // Fall through 954 case MVT::f32: { 955 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 956 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 957 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 958 Args[i].first, PtrOff, 959 DAG.getSrcValue(NULL))); 960 ArgOffset += 4; 961 break; 962 } 963 case MVT::i64: 964 // Can pass (at least) part of it in regs? 965 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 966 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 967 Args[i].first, DAG.getConstant(1, MVT::i32)); 968 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 969 Args[i].first, DAG.getConstant(0, MVT::i32)); 970 RegValuesToPass.push_back(Lo); 971 ++NumIntRegs; 972 973 // Pass both parts in regs? 974 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 975 RegValuesToPass.push_back(Hi); 976 ++NumIntRegs; 977 } else { 978 // Pass the high part in memory. 979 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 980 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 981 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 982 Hi, PtrOff, DAG.getSrcValue(NULL))); 983 ArgOffset += 4; 984 } 985 break; 986 } 987 // Fall through 988 case MVT::f64: 989 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 990 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 991 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 992 Args[i].first, PtrOff, 993 DAG.getSrcValue(NULL))); 994 ArgOffset += 8; 995 break; 996 } 997 } 998 if (!Stores.empty()) 999 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 1000 1001 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1002 // arguments and the arguments after the retaddr has been pushed are aligned. 1003 if ((ArgOffset & 7) == 0) 1004 ArgOffset += 4; 1005 1006 std::vector<MVT::ValueType> RetVals; 1007 MVT::ValueType RetTyVT = getValueType(RetTy); 1008 1009 RetVals.push_back(MVT::Other); 1010 1011 // The result values produced have to be legal. Promote the result. 1012 switch (RetTyVT) { 1013 case MVT::isVoid: break; 1014 default: 1015 RetVals.push_back(RetTyVT); 1016 break; 1017 case MVT::i1: 1018 case MVT::i8: 1019 case MVT::i16: 1020 RetVals.push_back(MVT::i32); 1021 break; 1022 case MVT::f32: 1023 if (X86ScalarSSE) 1024 RetVals.push_back(MVT::f32); 1025 else 1026 RetVals.push_back(MVT::f64); 1027 break; 1028 case MVT::i64: 1029 RetVals.push_back(MVT::i32); 1030 RetVals.push_back(MVT::i32); 1031 break; 1032 } 1033 1034 // Build a sequence of copy-to-reg nodes chained together with token chain 1035 // and flag operands which copy the outgoing args into registers. 1036 SDOperand InFlag; 1037 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1038 unsigned CCReg; 1039 SDOperand RegToPass = RegValuesToPass[i]; 1040 switch (RegToPass.getValueType()) { 1041 default: assert(0 && "Bad thing to pass in regs"); 1042 case MVT::i8: 1043 CCReg = (i == 0) ? X86::AL : X86::DL; 1044 break; 1045 case MVT::i16: 1046 CCReg = (i == 0) ? X86::AX : X86::DX; 1047 break; 1048 case MVT::i32: 1049 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1050 break; 1051 } 1052 1053 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1054 InFlag = Chain.getValue(1); 1055 } 1056 1057 std::vector<MVT::ValueType> NodeTys; 1058 NodeTys.push_back(MVT::Other); // Returns a chain 1059 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1060 std::vector<SDOperand> Ops; 1061 Ops.push_back(Chain); 1062 Ops.push_back(Callee); 1063 if (InFlag.Val) 1064 Ops.push_back(InFlag); 1065 1066 // FIXME: Do not generate X86ISD::TAILCALL for now. 1067 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1068 InFlag = Chain.getValue(1); 1069 1070 NodeTys.clear(); 1071 NodeTys.push_back(MVT::Other); // Returns a chain 1072 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1073 Ops.clear(); 1074 Ops.push_back(Chain); 1075 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1076 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1077 Ops.push_back(InFlag); 1078 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1079 InFlag = Chain.getValue(1); 1080 1081 SDOperand RetVal; 1082 if (RetTyVT != MVT::isVoid) { 1083 switch (RetTyVT) { 1084 default: assert(0 && "Unknown value type to return!"); 1085 case MVT::i1: 1086 case MVT::i8: 1087 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1088 Chain = RetVal.getValue(1); 1089 if (RetTyVT == MVT::i1) 1090 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1091 break; 1092 case MVT::i16: 1093 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1094 Chain = RetVal.getValue(1); 1095 break; 1096 case MVT::i32: 1097 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1098 Chain = RetVal.getValue(1); 1099 break; 1100 case MVT::i64: { 1101 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1102 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1103 Lo.getValue(2)); 1104 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1105 Chain = Hi.getValue(1); 1106 break; 1107 } 1108 case MVT::f32: 1109 case MVT::f64: { 1110 std::vector<MVT::ValueType> Tys; 1111 Tys.push_back(MVT::f64); 1112 Tys.push_back(MVT::Other); 1113 Tys.push_back(MVT::Flag); 1114 std::vector<SDOperand> Ops; 1115 Ops.push_back(Chain); 1116 Ops.push_back(InFlag); 1117 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1118 Chain = RetVal.getValue(1); 1119 InFlag = RetVal.getValue(2); 1120 if (X86ScalarSSE) { 1121 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1122 // shouldn't be necessary except that RFP cannot be live across 1123 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1124 MachineFunction &MF = DAG.getMachineFunction(); 1125 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1126 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1127 Tys.clear(); 1128 Tys.push_back(MVT::Other); 1129 Ops.clear(); 1130 Ops.push_back(Chain); 1131 Ops.push_back(RetVal); 1132 Ops.push_back(StackSlot); 1133 Ops.push_back(DAG.getValueType(RetTyVT)); 1134 Ops.push_back(InFlag); 1135 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1136 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1137 DAG.getSrcValue(NULL)); 1138 Chain = RetVal.getValue(1); 1139 } 1140 1141 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1142 // FIXME: we would really like to remember that this FP_ROUND 1143 // operation is okay to eliminate if we allow excess FP precision. 1144 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1145 break; 1146 } 1147 } 1148 } 1149 1150 return std::make_pair(RetVal, Chain); 1151} 1152 1153SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1154 if (ReturnAddrIndex == 0) { 1155 // Set up a frame object for the return address. 1156 MachineFunction &MF = DAG.getMachineFunction(); 1157 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1158 } 1159 1160 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1161} 1162 1163 1164 1165std::pair<SDOperand, SDOperand> X86TargetLowering:: 1166LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1167 SelectionDAG &DAG) { 1168 SDOperand Result; 1169 if (Depth) // Depths > 0 not supported yet! 1170 Result = DAG.getConstant(0, getPointerTy()); 1171 else { 1172 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1173 if (!isFrameAddress) 1174 // Just load the return address 1175 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1176 DAG.getSrcValue(NULL)); 1177 else 1178 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1179 DAG.getConstant(4, MVT::i32)); 1180 } 1181 return std::make_pair(Result, Chain); 1182} 1183 1184/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1185/// which corresponds to the condition code. 1186static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1187 switch (X86CC) { 1188 default: assert(0 && "Unknown X86 conditional code!"); 1189 case X86ISD::COND_A: return X86::JA; 1190 case X86ISD::COND_AE: return X86::JAE; 1191 case X86ISD::COND_B: return X86::JB; 1192 case X86ISD::COND_BE: return X86::JBE; 1193 case X86ISD::COND_E: return X86::JE; 1194 case X86ISD::COND_G: return X86::JG; 1195 case X86ISD::COND_GE: return X86::JGE; 1196 case X86ISD::COND_L: return X86::JL; 1197 case X86ISD::COND_LE: return X86::JLE; 1198 case X86ISD::COND_NE: return X86::JNE; 1199 case X86ISD::COND_NO: return X86::JNO; 1200 case X86ISD::COND_NP: return X86::JNP; 1201 case X86ISD::COND_NS: return X86::JNS; 1202 case X86ISD::COND_O: return X86::JO; 1203 case X86ISD::COND_P: return X86::JP; 1204 case X86ISD::COND_S: return X86::JS; 1205 } 1206} 1207 1208/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1209/// specific condition code. It returns a false if it cannot do a direct 1210/// translation. X86CC is the translated CondCode. Flip is set to true if the 1211/// the order of comparison operands should be flipped. 1212static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1213 unsigned &X86CC, bool &Flip) { 1214 Flip = false; 1215 X86CC = X86ISD::COND_INVALID; 1216 if (!isFP) { 1217 switch (SetCCOpcode) { 1218 default: break; 1219 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1220 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1221 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1222 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1223 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1224 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1225 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1226 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1227 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1228 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1229 } 1230 } else { 1231 // On a floating point condition, the flags are set as follows: 1232 // ZF PF CF op 1233 // 0 | 0 | 0 | X > Y 1234 // 0 | 0 | 1 | X < Y 1235 // 1 | 0 | 0 | X == Y 1236 // 1 | 1 | 1 | unordered 1237 switch (SetCCOpcode) { 1238 default: break; 1239 case ISD::SETUEQ: 1240 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1241 case ISD::SETOLE: Flip = true; // Fallthrough 1242 case ISD::SETOGT: 1243 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1244 case ISD::SETOLT: Flip = true; // Fallthrough 1245 case ISD::SETOGE: 1246 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1247 case ISD::SETUGE: Flip = true; // Fallthrough 1248 case ISD::SETULT: 1249 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1250 case ISD::SETUGT: Flip = true; // Fallthrough 1251 case ISD::SETULE: 1252 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1253 case ISD::SETONE: 1254 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1255 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1256 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1257 } 1258 } 1259 1260 return X86CC != X86ISD::COND_INVALID; 1261} 1262 1263static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1264 bool &Flip) { 1265 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1266} 1267 1268/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1269/// code. Current x86 isa includes the following FP cmov instructions: 1270/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1271static bool hasFPCMov(unsigned X86CC) { 1272 switch (X86CC) { 1273 default: 1274 return false; 1275 case X86ISD::COND_B: 1276 case X86ISD::COND_BE: 1277 case X86ISD::COND_E: 1278 case X86ISD::COND_P: 1279 case X86ISD::COND_A: 1280 case X86ISD::COND_AE: 1281 case X86ISD::COND_NE: 1282 case X86ISD::COND_NP: 1283 return true; 1284 } 1285} 1286 1287MachineBasicBlock * 1288X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1289 MachineBasicBlock *BB) { 1290 switch (MI->getOpcode()) { 1291 default: assert(false && "Unexpected instr type to insert"); 1292 case X86::CMOV_FR32: 1293 case X86::CMOV_FR64: 1294 case X86::CMOV_V4F32: 1295 case X86::CMOV_V2F64: 1296 case X86::CMOV_V2I64: { 1297 // To "insert" a SELECT_CC instruction, we actually have to insert the 1298 // diamond control-flow pattern. The incoming instruction knows the 1299 // destination vreg to set, the condition code register to branch on, the 1300 // true/false values to select between, and a branch opcode to use. 1301 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1302 ilist<MachineBasicBlock>::iterator It = BB; 1303 ++It; 1304 1305 // thisMBB: 1306 // ... 1307 // TrueVal = ... 1308 // cmpTY ccX, r1, r2 1309 // bCC copy1MBB 1310 // fallthrough --> copy0MBB 1311 MachineBasicBlock *thisMBB = BB; 1312 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1313 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1314 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1315 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1316 MachineFunction *F = BB->getParent(); 1317 F->getBasicBlockList().insert(It, copy0MBB); 1318 F->getBasicBlockList().insert(It, sinkMBB); 1319 // Update machine-CFG edges by first adding all successors of the current 1320 // block to the new block which will contain the Phi node for the select. 1321 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1322 e = BB->succ_end(); i != e; ++i) 1323 sinkMBB->addSuccessor(*i); 1324 // Next, remove all successors of the current block, and add the true 1325 // and fallthrough blocks as its successors. 1326 while(!BB->succ_empty()) 1327 BB->removeSuccessor(BB->succ_begin()); 1328 BB->addSuccessor(copy0MBB); 1329 BB->addSuccessor(sinkMBB); 1330 1331 // copy0MBB: 1332 // %FalseValue = ... 1333 // # fallthrough to sinkMBB 1334 BB = copy0MBB; 1335 1336 // Update machine-CFG edges 1337 BB->addSuccessor(sinkMBB); 1338 1339 // sinkMBB: 1340 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1341 // ... 1342 BB = sinkMBB; 1343 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1344 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1345 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1346 1347 delete MI; // The pseudo instruction is gone now. 1348 return BB; 1349 } 1350 1351 case X86::FP_TO_INT16_IN_MEM: 1352 case X86::FP_TO_INT32_IN_MEM: 1353 case X86::FP_TO_INT64_IN_MEM: { 1354 // Change the floating point control register to use "round towards zero" 1355 // mode when truncating to an integer value. 1356 MachineFunction *F = BB->getParent(); 1357 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1358 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1359 1360 // Load the old value of the high byte of the control word... 1361 unsigned OldCW = 1362 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1363 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1364 1365 // Set the high part to be round to zero... 1366 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1367 1368 // Reload the modified control word now... 1369 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1370 1371 // Restore the memory image of control word to original value 1372 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1373 1374 // Get the X86 opcode to use. 1375 unsigned Opc; 1376 switch (MI->getOpcode()) { 1377 default: assert(0 && "illegal opcode!"); 1378 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1379 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1380 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1381 } 1382 1383 X86AddressMode AM; 1384 MachineOperand &Op = MI->getOperand(0); 1385 if (Op.isRegister()) { 1386 AM.BaseType = X86AddressMode::RegBase; 1387 AM.Base.Reg = Op.getReg(); 1388 } else { 1389 AM.BaseType = X86AddressMode::FrameIndexBase; 1390 AM.Base.FrameIndex = Op.getFrameIndex(); 1391 } 1392 Op = MI->getOperand(1); 1393 if (Op.isImmediate()) 1394 AM.Scale = Op.getImmedValue(); 1395 Op = MI->getOperand(2); 1396 if (Op.isImmediate()) 1397 AM.IndexReg = Op.getImmedValue(); 1398 Op = MI->getOperand(3); 1399 if (Op.isGlobalAddress()) { 1400 AM.GV = Op.getGlobal(); 1401 } else { 1402 AM.Disp = Op.getImmedValue(); 1403 } 1404 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1405 1406 // Reload the original control word now. 1407 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1408 1409 delete MI; // The pseudo instruction is gone now. 1410 return BB; 1411 } 1412 } 1413} 1414 1415 1416//===----------------------------------------------------------------------===// 1417// X86 Custom Lowering Hooks 1418//===----------------------------------------------------------------------===// 1419 1420/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1421/// load. For Darwin, external and weak symbols are indirect, loading the value 1422/// at address GV rather then the value of GV itself. This means that the 1423/// GlobalAddress must be in the base or index register of the address, not the 1424/// GV offset field. 1425static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1426 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1427 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1428} 1429 1430/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1431/// true if Op is undef or if its value falls within the specified range (L, H]. 1432static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1433 if (Op.getOpcode() == ISD::UNDEF) 1434 return true; 1435 1436 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1437 return (Val >= Low && Val < Hi); 1438} 1439 1440/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1441/// true if Op is undef or if its value equal to the specified value. 1442static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1443 if (Op.getOpcode() == ISD::UNDEF) 1444 return true; 1445 return cast<ConstantSDNode>(Op)->getValue() == Val; 1446} 1447 1448/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1449/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1450bool X86::isPSHUFDMask(SDNode *N) { 1451 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1452 1453 if (N->getNumOperands() != 4) 1454 return false; 1455 1456 // Check if the value doesn't reference the second vector. 1457 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1458 SDOperand Arg = N->getOperand(i); 1459 if (Arg.getOpcode() == ISD::UNDEF) continue; 1460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1461 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1462 return false; 1463 } 1464 1465 return true; 1466} 1467 1468/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1469/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1470bool X86::isPSHUFHWMask(SDNode *N) { 1471 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1472 1473 if (N->getNumOperands() != 8) 1474 return false; 1475 1476 // Lower quadword copied in order. 1477 for (unsigned i = 0; i != 4; ++i) { 1478 SDOperand Arg = N->getOperand(i); 1479 if (Arg.getOpcode() == ISD::UNDEF) continue; 1480 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1481 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1482 return false; 1483 } 1484 1485 // Upper quadword shuffled. 1486 for (unsigned i = 4; i != 8; ++i) { 1487 SDOperand Arg = N->getOperand(i); 1488 if (Arg.getOpcode() == ISD::UNDEF) continue; 1489 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1490 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1491 if (Val < 4 || Val > 7) 1492 return false; 1493 } 1494 1495 return true; 1496} 1497 1498/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1499/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1500bool X86::isPSHUFLWMask(SDNode *N) { 1501 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1502 1503 if (N->getNumOperands() != 8) 1504 return false; 1505 1506 // Upper quadword copied in order. 1507 for (unsigned i = 4; i != 8; ++i) 1508 if (!isUndefOrEqual(N->getOperand(i), i)) 1509 return false; 1510 1511 // Lower quadword shuffled. 1512 for (unsigned i = 0; i != 4; ++i) 1513 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1514 return false; 1515 1516 return true; 1517} 1518 1519/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1520/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1521bool X86::isSHUFPMask(SDNode *N) { 1522 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1523 1524 unsigned NumElems = N->getNumOperands(); 1525 if (NumElems == 2) { 1526 // The only cases that ought be handled by SHUFPD is 1527 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1528 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1529 // Expect bit 0 == 1, bit1 == 2 1530 SDOperand Bit0 = N->getOperand(0); 1531 SDOperand Bit1 = N->getOperand(1); 1532 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 1533 return true; 1534 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 1535 return true; 1536 return false; 1537 } 1538 1539 if (NumElems != 4) return false; 1540 1541 // Each half must refer to only one of the vector. 1542 for (unsigned i = 0; i < 2; ++i) { 1543 SDOperand Arg = N->getOperand(i); 1544 if (Arg.getOpcode() == ISD::UNDEF) continue; 1545 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1546 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1547 if (Val >= 4) return false; 1548 } 1549 for (unsigned i = 2; i < 4; ++i) { 1550 SDOperand Arg = N->getOperand(i); 1551 if (Arg.getOpcode() == ISD::UNDEF) continue; 1552 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1553 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1554 if (Val < 4) return false; 1555 } 1556 1557 return true; 1558} 1559 1560/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1561/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1562bool X86::isMOVHLPSMask(SDNode *N) { 1563 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1564 1565 if (N->getNumOperands() != 4) 1566 return false; 1567 1568 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1569 return isUndefOrEqual(N->getOperand(0), 6) && 1570 isUndefOrEqual(N->getOperand(1), 7) && 1571 isUndefOrEqual(N->getOperand(2), 2) && 1572 isUndefOrEqual(N->getOperand(3), 3); 1573} 1574 1575/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1576/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1577bool X86::isMOVLHPSMask(SDNode *N) { 1578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1579 1580 if (N->getNumOperands() != 4) 1581 return false; 1582 1583 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1584 return isUndefOrEqual(N->getOperand(0), 0) && 1585 isUndefOrEqual(N->getOperand(1), 1) && 1586 isUndefOrEqual(N->getOperand(2), 4) && 1587 isUndefOrEqual(N->getOperand(3), 5); 1588} 1589 1590/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1591/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1592bool X86::isMOVLPMask(SDNode *N) { 1593 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1594 1595 unsigned NumElems = N->getNumOperands(); 1596 if (NumElems != 2 && NumElems != 4) 1597 return false; 1598 1599 for (unsigned i = 0; i < NumElems/2; ++i) 1600 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1601 return false; 1602 1603 for (unsigned i = NumElems/2; i < NumElems; ++i) 1604 if (!isUndefOrEqual(N->getOperand(i), i)) 1605 return false; 1606 1607 return true; 1608} 1609 1610/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1611/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 1612bool X86::isMOVHPMask(SDNode *N) { 1613 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1614 1615 unsigned NumElems = N->getNumOperands(); 1616 if (NumElems != 2 && NumElems != 4) 1617 return false; 1618 1619 for (unsigned i = 0; i < NumElems/2; ++i) 1620 if (!isUndefOrEqual(N->getOperand(i), i)) 1621 return false; 1622 1623 for (unsigned i = 0; i < NumElems/2; ++i) { 1624 SDOperand Arg = N->getOperand(i + NumElems/2); 1625 if (!isUndefOrEqual(Arg, i + NumElems)) 1626 return false; 1627 } 1628 1629 return true; 1630} 1631 1632/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1633/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1634bool X86::isUNPCKLMask(SDNode *N) { 1635 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1636 1637 unsigned NumElems = N->getNumOperands(); 1638 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1639 return false; 1640 1641 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1642 SDOperand BitI = N->getOperand(i); 1643 SDOperand BitI1 = N->getOperand(i+1); 1644 if (!isUndefOrEqual(BitI, j)) 1645 return false; 1646 if (!isUndefOrEqual(BitI1, j + NumElems)) 1647 return false; 1648 } 1649 1650 return true; 1651} 1652 1653/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1654/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1655bool X86::isUNPCKHMask(SDNode *N) { 1656 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1657 1658 unsigned NumElems = N->getNumOperands(); 1659 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1660 return false; 1661 1662 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1663 SDOperand BitI = N->getOperand(i); 1664 SDOperand BitI1 = N->getOperand(i+1); 1665 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1666 return false; 1667 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1668 return false; 1669 } 1670 1671 return true; 1672} 1673 1674/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1675/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1676/// <0, 0, 1, 1> 1677bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1678 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1679 1680 unsigned NumElems = N->getNumOperands(); 1681 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1682 return false; 1683 1684 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1685 SDOperand BitI = N->getOperand(i); 1686 SDOperand BitI1 = N->getOperand(i+1); 1687 1688 if (!isUndefOrEqual(BitI, j)) 1689 return false; 1690 if (!isUndefOrEqual(BitI1, j)) 1691 return false; 1692 } 1693 1694 return true; 1695} 1696 1697/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 1698/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 1699bool X86::isMOVSMask(SDNode *N) { 1700 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1701 1702 unsigned NumElems = N->getNumOperands(); 1703 if (NumElems != 2 && NumElems != 4) 1704 return false; 1705 1706 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 1707 return false; 1708 1709 for (unsigned i = 1; i < NumElems; ++i) { 1710 SDOperand Arg = N->getOperand(i); 1711 if (!isUndefOrEqual(Arg, i)) 1712 return false; 1713 } 1714 1715 return true; 1716} 1717 1718/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1719/// a splat of a single element. 1720bool X86::isSplatMask(SDNode *N) { 1721 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1722 1723 // We can only splat 64-bit, and 32-bit quantities. 1724 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1725 return false; 1726 1727 // This is a splat operation if each element of the permute is the same, and 1728 // if the value doesn't reference the second vector. 1729 SDOperand Elt = N->getOperand(0); 1730 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1731 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1732 SDOperand Arg = N->getOperand(i); 1733 if (Arg.getOpcode() == ISD::UNDEF) continue; 1734 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1735 if (Arg != Elt) return false; 1736 } 1737 1738 // Make sure it is a splat of the first vector operand. 1739 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1740} 1741 1742/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1743/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1744/// instructions. 1745unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1746 unsigned NumOperands = N->getNumOperands(); 1747 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1748 unsigned Mask = 0; 1749 for (unsigned i = 0; i < NumOperands; ++i) { 1750 unsigned Val = 0; 1751 SDOperand Arg = N->getOperand(NumOperands-i-1); 1752 if (Arg.getOpcode() != ISD::UNDEF) 1753 Val = cast<ConstantSDNode>(Arg)->getValue(); 1754 if (Val >= NumOperands) Val -= NumOperands; 1755 Mask |= Val; 1756 if (i != NumOperands - 1) 1757 Mask <<= Shift; 1758 } 1759 1760 return Mask; 1761} 1762 1763/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1764/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1765/// instructions. 1766unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1767 unsigned Mask = 0; 1768 // 8 nodes, but we only care about the last 4. 1769 for (unsigned i = 7; i >= 4; --i) { 1770 unsigned Val = 0; 1771 SDOperand Arg = N->getOperand(i); 1772 if (Arg.getOpcode() != ISD::UNDEF) 1773 Val = cast<ConstantSDNode>(Arg)->getValue(); 1774 Mask |= (Val - 4); 1775 if (i != 4) 1776 Mask <<= 2; 1777 } 1778 1779 return Mask; 1780} 1781 1782/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1783/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1784/// instructions. 1785unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1786 unsigned Mask = 0; 1787 // 8 nodes, but we only care about the first 4. 1788 for (int i = 3; i >= 0; --i) { 1789 unsigned Val = 0; 1790 SDOperand Arg = N->getOperand(i); 1791 if (Arg.getOpcode() != ISD::UNDEF) 1792 Val = cast<ConstantSDNode>(Arg)->getValue(); 1793 Mask |= Val; 1794 if (i != 0) 1795 Mask <<= 2; 1796 } 1797 1798 return Mask; 1799} 1800 1801/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1802/// specifies a 8 element shuffle that can be broken into a pair of 1803/// PSHUFHW and PSHUFLW. 1804static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1805 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1806 1807 if (N->getNumOperands() != 8) 1808 return false; 1809 1810 // Lower quadword shuffled. 1811 for (unsigned i = 0; i != 4; ++i) { 1812 SDOperand Arg = N->getOperand(i); 1813 if (Arg.getOpcode() == ISD::UNDEF) continue; 1814 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1815 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1816 if (Val > 4) 1817 return false; 1818 } 1819 1820 // Upper quadword shuffled. 1821 for (unsigned i = 4; i != 8; ++i) { 1822 SDOperand Arg = N->getOperand(i); 1823 if (Arg.getOpcode() == ISD::UNDEF) continue; 1824 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1825 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1826 if (Val < 4 || Val > 7) 1827 return false; 1828 } 1829 1830 return true; 1831} 1832 1833/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1834/// values in ther permute mask. 1835static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1836 SDOperand V1 = Op.getOperand(0); 1837 SDOperand V2 = Op.getOperand(1); 1838 SDOperand Mask = Op.getOperand(2); 1839 MVT::ValueType VT = Op.getValueType(); 1840 MVT::ValueType MaskVT = Mask.getValueType(); 1841 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1842 unsigned NumElems = Mask.getNumOperands(); 1843 std::vector<SDOperand> MaskVec; 1844 1845 for (unsigned i = 0; i != NumElems; ++i) { 1846 SDOperand Arg = Mask.getOperand(i); 1847 if (Arg.getOpcode() == ISD::UNDEF) continue; 1848 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1849 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1850 if (Val < NumElems) 1851 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1852 else 1853 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1854 } 1855 1856 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1857 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1858} 1859 1860/// isScalarLoadToVector - Returns true if the node is a scalar load that 1861/// is promoted to a vector. 1862static inline bool isScalarLoadToVector(SDOperand Op) { 1863 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1864 Op = Op.getOperand(0); 1865 return (Op.getOpcode() == ISD::LOAD); 1866 } 1867 return false; 1868} 1869 1870/// ShouldXformedToMOVLP - Return true if the node should be transformed to 1871/// match movlp{d|s}. The lower half elements should come from V1 (and in 1872/// order), and the upper half elements should come from the upper half of 1873/// V2 (not necessarily in order). And since V1 will become the source of 1874/// the MOVLP, it must be a scalar load. 1875static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 1876 if (isScalarLoadToVector(V1)) { 1877 unsigned NumElems = Mask.getNumOperands(); 1878 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1879 if (!isUndefOrEqual(Mask.getOperand(i), i)) 1880 return false; 1881 for (unsigned i = NumElems/2; i != NumElems; ++i) 1882 if (!isUndefOrInRange(Mask.getOperand(i), 1883 NumElems+NumElems/2, NumElems*2)) 1884 return false; 1885 return true; 1886 } 1887 1888 return false; 1889} 1890 1891/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 1892/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1893/// half elements to come from vector 1 (which would equal the dest.) and 1894/// the upper half to come from vector 2. 1895static bool isLowerFromV2UpperFromV1(SDOperand Op) { 1896 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 1897 1898 unsigned NumElems = Op.getNumOperands(); 1899 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1900 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 1901 return false; 1902 for (unsigned i = NumElems/2; i != NumElems; ++i) 1903 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 1904 return false; 1905 return true; 1906} 1907 1908/// LowerOperation - Provide custom lowering hooks for some operations. 1909/// 1910SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1911 switch (Op.getOpcode()) { 1912 default: assert(0 && "Should not custom lower this!"); 1913 case ISD::SHL_PARTS: 1914 case ISD::SRA_PARTS: 1915 case ISD::SRL_PARTS: { 1916 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 1917 "Not an i64 shift!"); 1918 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 1919 SDOperand ShOpLo = Op.getOperand(0); 1920 SDOperand ShOpHi = Op.getOperand(1); 1921 SDOperand ShAmt = Op.getOperand(2); 1922 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 1923 DAG.getConstant(31, MVT::i8)) 1924 : DAG.getConstant(0, MVT::i32); 1925 1926 SDOperand Tmp2, Tmp3; 1927 if (Op.getOpcode() == ISD::SHL_PARTS) { 1928 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 1929 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 1930 } else { 1931 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 1932 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 1933 } 1934 1935 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 1936 ShAmt, DAG.getConstant(32, MVT::i8)); 1937 1938 SDOperand Hi, Lo; 1939 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 1940 1941 std::vector<MVT::ValueType> Tys; 1942 Tys.push_back(MVT::i32); 1943 Tys.push_back(MVT::Flag); 1944 std::vector<SDOperand> Ops; 1945 if (Op.getOpcode() == ISD::SHL_PARTS) { 1946 Ops.push_back(Tmp2); 1947 Ops.push_back(Tmp3); 1948 Ops.push_back(CC); 1949 Ops.push_back(InFlag); 1950 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1951 InFlag = Hi.getValue(1); 1952 1953 Ops.clear(); 1954 Ops.push_back(Tmp3); 1955 Ops.push_back(Tmp1); 1956 Ops.push_back(CC); 1957 Ops.push_back(InFlag); 1958 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1959 } else { 1960 Ops.push_back(Tmp2); 1961 Ops.push_back(Tmp3); 1962 Ops.push_back(CC); 1963 Ops.push_back(InFlag); 1964 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1965 InFlag = Lo.getValue(1); 1966 1967 Ops.clear(); 1968 Ops.push_back(Tmp3); 1969 Ops.push_back(Tmp1); 1970 Ops.push_back(CC); 1971 Ops.push_back(InFlag); 1972 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 1973 } 1974 1975 Tys.clear(); 1976 Tys.push_back(MVT::i32); 1977 Tys.push_back(MVT::i32); 1978 Ops.clear(); 1979 Ops.push_back(Lo); 1980 Ops.push_back(Hi); 1981 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 1982 } 1983 case ISD::SINT_TO_FP: { 1984 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 1985 Op.getOperand(0).getValueType() >= MVT::i16 && 1986 "Unknown SINT_TO_FP to lower!"); 1987 1988 SDOperand Result; 1989 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 1990 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 1991 MachineFunction &MF = DAG.getMachineFunction(); 1992 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 1993 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1994 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 1995 DAG.getEntryNode(), Op.getOperand(0), 1996 StackSlot, DAG.getSrcValue(NULL)); 1997 1998 // Build the FILD 1999 std::vector<MVT::ValueType> Tys; 2000 Tys.push_back(MVT::f64); 2001 Tys.push_back(MVT::Other); 2002 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2003 std::vector<SDOperand> Ops; 2004 Ops.push_back(Chain); 2005 Ops.push_back(StackSlot); 2006 Ops.push_back(DAG.getValueType(SrcVT)); 2007 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2008 Tys, Ops); 2009 2010 if (X86ScalarSSE) { 2011 Chain = Result.getValue(1); 2012 SDOperand InFlag = Result.getValue(2); 2013 2014 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2015 // shouldn't be necessary except that RFP cannot be live across 2016 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2017 MachineFunction &MF = DAG.getMachineFunction(); 2018 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2019 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2020 std::vector<MVT::ValueType> Tys; 2021 Tys.push_back(MVT::Other); 2022 std::vector<SDOperand> Ops; 2023 Ops.push_back(Chain); 2024 Ops.push_back(Result); 2025 Ops.push_back(StackSlot); 2026 Ops.push_back(DAG.getValueType(Op.getValueType())); 2027 Ops.push_back(InFlag); 2028 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2029 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2030 DAG.getSrcValue(NULL)); 2031 } 2032 2033 return Result; 2034 } 2035 case ISD::FP_TO_SINT: { 2036 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2037 "Unknown FP_TO_SINT to lower!"); 2038 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2039 // stack slot. 2040 MachineFunction &MF = DAG.getMachineFunction(); 2041 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2042 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2043 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2044 2045 unsigned Opc; 2046 switch (Op.getValueType()) { 2047 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2048 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2049 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2050 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2051 } 2052 2053 SDOperand Chain = DAG.getEntryNode(); 2054 SDOperand Value = Op.getOperand(0); 2055 if (X86ScalarSSE) { 2056 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2057 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2058 DAG.getSrcValue(0)); 2059 std::vector<MVT::ValueType> Tys; 2060 Tys.push_back(MVT::f64); 2061 Tys.push_back(MVT::Other); 2062 std::vector<SDOperand> Ops; 2063 Ops.push_back(Chain); 2064 Ops.push_back(StackSlot); 2065 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2066 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2067 Chain = Value.getValue(1); 2068 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2069 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2070 } 2071 2072 // Build the FP_TO_INT*_IN_MEM 2073 std::vector<SDOperand> Ops; 2074 Ops.push_back(Chain); 2075 Ops.push_back(Value); 2076 Ops.push_back(StackSlot); 2077 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2078 2079 // Load the result. 2080 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2081 DAG.getSrcValue(NULL)); 2082 } 2083 case ISD::READCYCLECOUNTER: { 2084 std::vector<MVT::ValueType> Tys; 2085 Tys.push_back(MVT::Other); 2086 Tys.push_back(MVT::Flag); 2087 std::vector<SDOperand> Ops; 2088 Ops.push_back(Op.getOperand(0)); 2089 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2090 Ops.clear(); 2091 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2092 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2093 MVT::i32, Ops[0].getValue(2))); 2094 Ops.push_back(Ops[1].getValue(1)); 2095 Tys[0] = Tys[1] = MVT::i32; 2096 Tys.push_back(MVT::Other); 2097 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2098 } 2099 case ISD::FABS: { 2100 MVT::ValueType VT = Op.getValueType(); 2101 const Type *OpNTy = MVT::getTypeForValueType(VT); 2102 std::vector<Constant*> CV; 2103 if (VT == MVT::f64) { 2104 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2105 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2106 } else { 2107 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2108 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2109 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2110 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2111 } 2112 Constant *CS = ConstantStruct::get(CV); 2113 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2114 SDOperand Mask 2115 = DAG.getNode(X86ISD::LOAD_PACK, 2116 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2117 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2118 } 2119 case ISD::FNEG: { 2120 MVT::ValueType VT = Op.getValueType(); 2121 const Type *OpNTy = MVT::getTypeForValueType(VT); 2122 std::vector<Constant*> CV; 2123 if (VT == MVT::f64) { 2124 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2125 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2126 } else { 2127 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2128 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2129 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2130 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2131 } 2132 Constant *CS = ConstantStruct::get(CV); 2133 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2134 SDOperand Mask 2135 = DAG.getNode(X86ISD::LOAD_PACK, 2136 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2137 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2138 } 2139 case ISD::SETCC: { 2140 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2141 SDOperand Cond; 2142 SDOperand CC = Op.getOperand(2); 2143 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2144 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2145 bool Flip; 2146 unsigned X86CC; 2147 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2148 if (Flip) 2149 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2150 Op.getOperand(1), Op.getOperand(0)); 2151 else 2152 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2153 Op.getOperand(0), Op.getOperand(1)); 2154 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2155 DAG.getConstant(X86CC, MVT::i8), Cond); 2156 } else { 2157 assert(isFP && "Illegal integer SetCC!"); 2158 2159 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2160 Op.getOperand(0), Op.getOperand(1)); 2161 std::vector<MVT::ValueType> Tys; 2162 std::vector<SDOperand> Ops; 2163 switch (SetCCOpcode) { 2164 default: assert(false && "Illegal floating point SetCC!"); 2165 case ISD::SETOEQ: { // !PF & ZF 2166 Tys.push_back(MVT::i8); 2167 Tys.push_back(MVT::Flag); 2168 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2169 Ops.push_back(Cond); 2170 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2171 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2172 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2173 Tmp1.getValue(1)); 2174 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2175 } 2176 case ISD::SETUNE: { // PF | !ZF 2177 Tys.push_back(MVT::i8); 2178 Tys.push_back(MVT::Flag); 2179 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2180 Ops.push_back(Cond); 2181 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2182 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2183 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2184 Tmp1.getValue(1)); 2185 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2186 } 2187 } 2188 } 2189 } 2190 case ISD::SELECT: { 2191 MVT::ValueType VT = Op.getValueType(); 2192 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2193 bool addTest = false; 2194 SDOperand Op0 = Op.getOperand(0); 2195 SDOperand Cond, CC; 2196 if (Op0.getOpcode() == ISD::SETCC) 2197 Op0 = LowerOperation(Op0, DAG); 2198 2199 if (Op0.getOpcode() == X86ISD::SETCC) { 2200 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2201 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2202 // have another use it will be eliminated. 2203 // If the X86ISD::SETCC has more than one use, then it's probably better 2204 // to use a test instead of duplicating the X86ISD::CMP (for register 2205 // pressure reason). 2206 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2207 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2208 CmpOpc == X86ISD::UCOMI) { 2209 if (!Op0.hasOneUse()) { 2210 std::vector<MVT::ValueType> Tys; 2211 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2212 Tys.push_back(Op0.Val->getValueType(i)); 2213 std::vector<SDOperand> Ops; 2214 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2215 Ops.push_back(Op0.getOperand(i)); 2216 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2217 } 2218 2219 CC = Op0.getOperand(0); 2220 Cond = Op0.getOperand(1); 2221 // Make a copy as flag result cannot be used by more than one. 2222 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2223 Cond.getOperand(0), Cond.getOperand(1)); 2224 addTest = 2225 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2226 } else 2227 addTest = true; 2228 } else 2229 addTest = true; 2230 2231 if (addTest) { 2232 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2233 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2234 } 2235 2236 std::vector<MVT::ValueType> Tys; 2237 Tys.push_back(Op.getValueType()); 2238 Tys.push_back(MVT::Flag); 2239 std::vector<SDOperand> Ops; 2240 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2241 // condition is true. 2242 Ops.push_back(Op.getOperand(2)); 2243 Ops.push_back(Op.getOperand(1)); 2244 Ops.push_back(CC); 2245 Ops.push_back(Cond); 2246 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2247 } 2248 case ISD::BRCOND: { 2249 bool addTest = false; 2250 SDOperand Cond = Op.getOperand(1); 2251 SDOperand Dest = Op.getOperand(2); 2252 SDOperand CC; 2253 if (Cond.getOpcode() == ISD::SETCC) 2254 Cond = LowerOperation(Cond, DAG); 2255 2256 if (Cond.getOpcode() == X86ISD::SETCC) { 2257 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2258 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2259 // have another use it will be eliminated. 2260 // If the X86ISD::SETCC has more than one use, then it's probably better 2261 // to use a test instead of duplicating the X86ISD::CMP (for register 2262 // pressure reason). 2263 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2264 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2265 CmpOpc == X86ISD::UCOMI) { 2266 if (!Cond.hasOneUse()) { 2267 std::vector<MVT::ValueType> Tys; 2268 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2269 Tys.push_back(Cond.Val->getValueType(i)); 2270 std::vector<SDOperand> Ops; 2271 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2272 Ops.push_back(Cond.getOperand(i)); 2273 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2274 } 2275 2276 CC = Cond.getOperand(0); 2277 Cond = Cond.getOperand(1); 2278 // Make a copy as flag result cannot be used by more than one. 2279 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2280 Cond.getOperand(0), Cond.getOperand(1)); 2281 } else 2282 addTest = true; 2283 } else 2284 addTest = true; 2285 2286 if (addTest) { 2287 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2288 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2289 } 2290 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2291 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2292 } 2293 case ISD::MEMSET: { 2294 SDOperand InFlag(0, 0); 2295 SDOperand Chain = Op.getOperand(0); 2296 unsigned Align = 2297 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2298 if (Align == 0) Align = 1; 2299 2300 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2301 // If not DWORD aligned, call memset if size is less than the threshold. 2302 // It knows how to align to the right boundary first. 2303 if ((Align & 3) != 0 || 2304 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2305 MVT::ValueType IntPtr = getPointerTy(); 2306 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2307 std::vector<std::pair<SDOperand, const Type*> > Args; 2308 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2309 // Extend the ubyte argument to be an int value for the call. 2310 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2311 Args.push_back(std::make_pair(Val, IntPtrTy)); 2312 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2313 std::pair<SDOperand,SDOperand> CallResult = 2314 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2315 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2316 return CallResult.second; 2317 } 2318 2319 MVT::ValueType AVT; 2320 SDOperand Count; 2321 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2322 unsigned BytesLeft = 0; 2323 bool TwoRepStos = false; 2324 if (ValC) { 2325 unsigned ValReg; 2326 unsigned Val = ValC->getValue() & 255; 2327 2328 // If the value is a constant, then we can potentially use larger sets. 2329 switch (Align & 3) { 2330 case 2: // WORD aligned 2331 AVT = MVT::i16; 2332 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2333 BytesLeft = I->getValue() % 2; 2334 Val = (Val << 8) | Val; 2335 ValReg = X86::AX; 2336 break; 2337 case 0: // DWORD aligned 2338 AVT = MVT::i32; 2339 if (I) { 2340 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2341 BytesLeft = I->getValue() % 4; 2342 } else { 2343 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2344 DAG.getConstant(2, MVT::i8)); 2345 TwoRepStos = true; 2346 } 2347 Val = (Val << 8) | Val; 2348 Val = (Val << 16) | Val; 2349 ValReg = X86::EAX; 2350 break; 2351 default: // Byte aligned 2352 AVT = MVT::i8; 2353 Count = Op.getOperand(3); 2354 ValReg = X86::AL; 2355 break; 2356 } 2357 2358 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2359 InFlag); 2360 InFlag = Chain.getValue(1); 2361 } else { 2362 AVT = MVT::i8; 2363 Count = Op.getOperand(3); 2364 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2365 InFlag = Chain.getValue(1); 2366 } 2367 2368 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2369 InFlag = Chain.getValue(1); 2370 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2371 InFlag = Chain.getValue(1); 2372 2373 std::vector<MVT::ValueType> Tys; 2374 Tys.push_back(MVT::Other); 2375 Tys.push_back(MVT::Flag); 2376 std::vector<SDOperand> Ops; 2377 Ops.push_back(Chain); 2378 Ops.push_back(DAG.getValueType(AVT)); 2379 Ops.push_back(InFlag); 2380 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2381 2382 if (TwoRepStos) { 2383 InFlag = Chain.getValue(1); 2384 Count = Op.getOperand(3); 2385 MVT::ValueType CVT = Count.getValueType(); 2386 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2387 DAG.getConstant(3, CVT)); 2388 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2389 InFlag = Chain.getValue(1); 2390 Tys.clear(); 2391 Tys.push_back(MVT::Other); 2392 Tys.push_back(MVT::Flag); 2393 Ops.clear(); 2394 Ops.push_back(Chain); 2395 Ops.push_back(DAG.getValueType(MVT::i8)); 2396 Ops.push_back(InFlag); 2397 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2398 } else if (BytesLeft) { 2399 // Issue stores for the last 1 - 3 bytes. 2400 SDOperand Value; 2401 unsigned Val = ValC->getValue() & 255; 2402 unsigned Offset = I->getValue() - BytesLeft; 2403 SDOperand DstAddr = Op.getOperand(1); 2404 MVT::ValueType AddrVT = DstAddr.getValueType(); 2405 if (BytesLeft >= 2) { 2406 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2407 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2408 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2409 DAG.getConstant(Offset, AddrVT)), 2410 DAG.getSrcValue(NULL)); 2411 BytesLeft -= 2; 2412 Offset += 2; 2413 } 2414 2415 if (BytesLeft == 1) { 2416 Value = DAG.getConstant(Val, MVT::i8); 2417 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2418 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2419 DAG.getConstant(Offset, AddrVT)), 2420 DAG.getSrcValue(NULL)); 2421 } 2422 } 2423 2424 return Chain; 2425 } 2426 case ISD::MEMCPY: { 2427 SDOperand Chain = Op.getOperand(0); 2428 unsigned Align = 2429 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2430 if (Align == 0) Align = 1; 2431 2432 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2433 // If not DWORD aligned, call memcpy if size is less than the threshold. 2434 // It knows how to align to the right boundary first. 2435 if ((Align & 3) != 0 || 2436 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2437 MVT::ValueType IntPtr = getPointerTy(); 2438 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2439 std::vector<std::pair<SDOperand, const Type*> > Args; 2440 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2441 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2442 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2443 std::pair<SDOperand,SDOperand> CallResult = 2444 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2445 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2446 return CallResult.second; 2447 } 2448 2449 MVT::ValueType AVT; 2450 SDOperand Count; 2451 unsigned BytesLeft = 0; 2452 bool TwoRepMovs = false; 2453 switch (Align & 3) { 2454 case 2: // WORD aligned 2455 AVT = MVT::i16; 2456 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2457 BytesLeft = I->getValue() % 2; 2458 break; 2459 case 0: // DWORD aligned 2460 AVT = MVT::i32; 2461 if (I) { 2462 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2463 BytesLeft = I->getValue() % 4; 2464 } else { 2465 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2466 DAG.getConstant(2, MVT::i8)); 2467 TwoRepMovs = true; 2468 } 2469 break; 2470 default: // Byte aligned 2471 AVT = MVT::i8; 2472 Count = Op.getOperand(3); 2473 break; 2474 } 2475 2476 SDOperand InFlag(0, 0); 2477 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2478 InFlag = Chain.getValue(1); 2479 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2480 InFlag = Chain.getValue(1); 2481 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2482 InFlag = Chain.getValue(1); 2483 2484 std::vector<MVT::ValueType> Tys; 2485 Tys.push_back(MVT::Other); 2486 Tys.push_back(MVT::Flag); 2487 std::vector<SDOperand> Ops; 2488 Ops.push_back(Chain); 2489 Ops.push_back(DAG.getValueType(AVT)); 2490 Ops.push_back(InFlag); 2491 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2492 2493 if (TwoRepMovs) { 2494 InFlag = Chain.getValue(1); 2495 Count = Op.getOperand(3); 2496 MVT::ValueType CVT = Count.getValueType(); 2497 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2498 DAG.getConstant(3, CVT)); 2499 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2500 InFlag = Chain.getValue(1); 2501 Tys.clear(); 2502 Tys.push_back(MVT::Other); 2503 Tys.push_back(MVT::Flag); 2504 Ops.clear(); 2505 Ops.push_back(Chain); 2506 Ops.push_back(DAG.getValueType(MVT::i8)); 2507 Ops.push_back(InFlag); 2508 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2509 } else if (BytesLeft) { 2510 // Issue loads and stores for the last 1 - 3 bytes. 2511 unsigned Offset = I->getValue() - BytesLeft; 2512 SDOperand DstAddr = Op.getOperand(1); 2513 MVT::ValueType DstVT = DstAddr.getValueType(); 2514 SDOperand SrcAddr = Op.getOperand(2); 2515 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2516 SDOperand Value; 2517 if (BytesLeft >= 2) { 2518 Value = DAG.getLoad(MVT::i16, Chain, 2519 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2520 DAG.getConstant(Offset, SrcVT)), 2521 DAG.getSrcValue(NULL)); 2522 Chain = Value.getValue(1); 2523 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2524 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2525 DAG.getConstant(Offset, DstVT)), 2526 DAG.getSrcValue(NULL)); 2527 BytesLeft -= 2; 2528 Offset += 2; 2529 } 2530 2531 if (BytesLeft == 1) { 2532 Value = DAG.getLoad(MVT::i8, Chain, 2533 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2534 DAG.getConstant(Offset, SrcVT)), 2535 DAG.getSrcValue(NULL)); 2536 Chain = Value.getValue(1); 2537 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2538 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2539 DAG.getConstant(Offset, DstVT)), 2540 DAG.getSrcValue(NULL)); 2541 } 2542 } 2543 2544 return Chain; 2545 } 2546 2547 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2548 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2549 // one of the above mentioned nodes. It has to be wrapped because otherwise 2550 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2551 // be used to form addressing mode. These wrapped nodes will be selected 2552 // into MOV32ri. 2553 case ISD::ConstantPool: { 2554 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2555 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2556 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2557 CP->getAlignment())); 2558 if (Subtarget->isTargetDarwin()) { 2559 // With PIC, the address is actually $g + Offset. 2560 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2561 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2562 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2563 } 2564 2565 return Result; 2566 } 2567 case ISD::GlobalAddress: { 2568 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2569 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2570 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2571 if (Subtarget->isTargetDarwin()) { 2572 // With PIC, the address is actually $g + Offset. 2573 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2574 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2575 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2576 2577 // For Darwin, external and weak symbols are indirect, so we want to load 2578 // the value at address GV, not the value of GV itself. This means that 2579 // the GlobalAddress must be in the base or index register of the address, 2580 // not the GV offset field. 2581 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2582 DarwinGVRequiresExtraLoad(GV)) 2583 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2584 Result, DAG.getSrcValue(NULL)); 2585 } 2586 2587 return Result; 2588 } 2589 case ISD::ExternalSymbol: { 2590 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2591 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2592 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2593 if (Subtarget->isTargetDarwin()) { 2594 // With PIC, the address is actually $g + Offset. 2595 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2596 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2597 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2598 } 2599 2600 return Result; 2601 } 2602 case ISD::VASTART: { 2603 // vastart just stores the address of the VarArgsFrameIndex slot into the 2604 // memory location argument. 2605 // FIXME: Replace MVT::i32 with PointerTy 2606 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2607 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2608 Op.getOperand(1), Op.getOperand(2)); 2609 } 2610 case ISD::RET: { 2611 SDOperand Copy; 2612 2613 switch(Op.getNumOperands()) { 2614 default: 2615 assert(0 && "Do not know how to return this many arguments!"); 2616 abort(); 2617 case 1: 2618 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2619 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2620 case 2: { 2621 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2622 if (MVT::isInteger(ArgVT)) 2623 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2624 SDOperand()); 2625 else if (!X86ScalarSSE) { 2626 std::vector<MVT::ValueType> Tys; 2627 Tys.push_back(MVT::Other); 2628 Tys.push_back(MVT::Flag); 2629 std::vector<SDOperand> Ops; 2630 Ops.push_back(Op.getOperand(0)); 2631 Ops.push_back(Op.getOperand(1)); 2632 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2633 } else { 2634 SDOperand MemLoc; 2635 SDOperand Chain = Op.getOperand(0); 2636 SDOperand Value = Op.getOperand(1); 2637 2638 if (Value.getOpcode() == ISD::LOAD && 2639 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2640 Chain = Value.getOperand(0); 2641 MemLoc = Value.getOperand(1); 2642 } else { 2643 // Spill the value to memory and reload it into top of stack. 2644 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2645 MachineFunction &MF = DAG.getMachineFunction(); 2646 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2647 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2648 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2649 Value, MemLoc, DAG.getSrcValue(0)); 2650 } 2651 std::vector<MVT::ValueType> Tys; 2652 Tys.push_back(MVT::f64); 2653 Tys.push_back(MVT::Other); 2654 std::vector<SDOperand> Ops; 2655 Ops.push_back(Chain); 2656 Ops.push_back(MemLoc); 2657 Ops.push_back(DAG.getValueType(ArgVT)); 2658 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2659 Tys.clear(); 2660 Tys.push_back(MVT::Other); 2661 Tys.push_back(MVT::Flag); 2662 Ops.clear(); 2663 Ops.push_back(Copy.getValue(1)); 2664 Ops.push_back(Copy); 2665 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2666 } 2667 break; 2668 } 2669 case 3: 2670 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2671 SDOperand()); 2672 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2673 break; 2674 } 2675 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2676 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2677 Copy.getValue(1)); 2678 } 2679 case ISD::SCALAR_TO_VECTOR: { 2680 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2681 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2682 } 2683 case ISD::VECTOR_SHUFFLE: { 2684 SDOperand V1 = Op.getOperand(0); 2685 SDOperand V2 = Op.getOperand(1); 2686 SDOperand PermMask = Op.getOperand(2); 2687 MVT::ValueType VT = Op.getValueType(); 2688 unsigned NumElems = PermMask.getNumOperands(); 2689 2690 if (X86::isSplatMask(PermMask.Val)) 2691 return Op; 2692 2693 // Normalize the node to match x86 shuffle ops if needed 2694 if (V2.getOpcode() != ISD::UNDEF) { 2695 bool DoSwap = false; 2696 2697 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 2698 DoSwap = true; 2699 else if (isLowerFromV2UpperFromV1(PermMask)) 2700 DoSwap = true; 2701 2702 if (DoSwap) { 2703 Op = CommuteVectorShuffle(Op, DAG); 2704 V1 = Op.getOperand(0); 2705 V2 = Op.getOperand(1); 2706 PermMask = Op.getOperand(2); 2707 } 2708 } 2709 2710 if (NumElems == 2) 2711 return Op; 2712 2713 if (X86::isMOVSMask(PermMask.Val)) 2714 // Leave the VECTOR_SHUFFLE alone. It matches MOVS{S|D}. 2715 return Op; 2716 2717 if (X86::isUNPCKLMask(PermMask.Val) || 2718 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2719 X86::isUNPCKHMask(PermMask.Val)) 2720 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2721 return Op; 2722 2723 // If VT is integer, try PSHUF* first, then SHUFP*. 2724 if (MVT::isInteger(VT)) { 2725 if (X86::isPSHUFDMask(PermMask.Val) || 2726 X86::isPSHUFHWMask(PermMask.Val) || 2727 X86::isPSHUFLWMask(PermMask.Val)) { 2728 if (V2.getOpcode() != ISD::UNDEF) 2729 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2730 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2731 return Op; 2732 } 2733 2734 if (X86::isSHUFPMask(PermMask.Val)) 2735 return Op; 2736 2737 // Handle v8i16 shuffle high / low shuffle node pair. 2738 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2739 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2740 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2741 std::vector<SDOperand> MaskVec; 2742 for (unsigned i = 0; i != 4; ++i) 2743 MaskVec.push_back(PermMask.getOperand(i)); 2744 for (unsigned i = 4; i != 8; ++i) 2745 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2746 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2747 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2748 MaskVec.clear(); 2749 for (unsigned i = 0; i != 4; ++i) 2750 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2751 for (unsigned i = 4; i != 8; ++i) 2752 MaskVec.push_back(PermMask.getOperand(i)); 2753 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2754 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2755 } 2756 } else { 2757 // Floating point cases in the other order. 2758 if (X86::isSHUFPMask(PermMask.Val)) 2759 return Op; 2760 if (X86::isPSHUFDMask(PermMask.Val) || 2761 X86::isPSHUFHWMask(PermMask.Val) || 2762 X86::isPSHUFLWMask(PermMask.Val)) { 2763 if (V2.getOpcode() != ISD::UNDEF) 2764 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2765 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2766 return Op; 2767 } 2768 } 2769 2770 return SDOperand(); 2771 } 2772 case ISD::BUILD_VECTOR: { 2773 // All one's are handled with pcmpeqd. 2774 if (ISD::isBuildVectorAllOnes(Op.Val)) 2775 return Op; 2776 2777 std::set<SDOperand> Values; 2778 SDOperand Elt0 = Op.getOperand(0); 2779 Values.insert(Elt0); 2780 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2781 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2782 (isa<ConstantFPSDNode>(Elt0) && 2783 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2784 bool RestAreZero = true; 2785 unsigned NumElems = Op.getNumOperands(); 2786 for (unsigned i = 1; i < NumElems; ++i) { 2787 SDOperand Elt = Op.getOperand(i); 2788 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2789 if (!FPC->isExactlyValue(+0.0)) 2790 RestAreZero = false; 2791 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2792 if (!C->isNullValue()) 2793 RestAreZero = false; 2794 } else 2795 RestAreZero = false; 2796 Values.insert(Elt); 2797 } 2798 2799 if (RestAreZero) { 2800 if (Elt0IsZero) return Op; 2801 2802 // Zero extend a scalar to a vector. 2803 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2804 } 2805 2806 if (Values.size() > 2) { 2807 // Expand into a number of unpckl*. 2808 // e.g. for v4f32 2809 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2810 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2811 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2812 MVT::ValueType VT = Op.getValueType(); 2813 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2814 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2815 std::vector<SDOperand> MaskVec; 2816 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2817 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2818 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2819 } 2820 SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2821 std::vector<SDOperand> V(NumElems); 2822 for (unsigned i = 0; i < NumElems; ++i) 2823 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2824 NumElems >>= 1; 2825 while (NumElems != 0) { 2826 for (unsigned i = 0; i < NumElems; ++i) 2827 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2828 PermMask); 2829 NumElems >>= 1; 2830 } 2831 return V[0]; 2832 } 2833 2834 return SDOperand(); 2835 } 2836 case ISD::EXTRACT_VECTOR_ELT: { 2837 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2838 return SDOperand(); 2839 2840 MVT::ValueType VT = Op.getValueType(); 2841 // TODO: handle v16i8. 2842 if (MVT::getSizeInBits(VT) == 16) { 2843 // Transform it so it match pextrw which produces a 32-bit result. 2844 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2845 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2846 Op.getOperand(0), Op.getOperand(1)); 2847 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2848 DAG.getValueType(VT)); 2849 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2850 } else if (MVT::getSizeInBits(VT) == 32) { 2851 SDOperand Vec = Op.getOperand(0); 2852 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2853 if (Idx == 0) 2854 return Op; 2855 2856 // TODO: if Idex == 2, we can use unpckhps 2857 // SHUFPS the element to the lowest double word, then movss. 2858 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2859 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2860 MVT::getVectorBaseType(MaskVT)); 2861 std::vector<SDOperand> IdxVec; 2862 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2863 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2864 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2865 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2866 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2867 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2868 Vec, Vec, Mask); 2869 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2870 DAG.getConstant(0, MVT::i32)); 2871 } else if (MVT::getSizeInBits(VT) == 64) { 2872 SDOperand Vec = Op.getOperand(0); 2873 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2874 if (Idx == 0) 2875 return Op; 2876 2877 // UNPCKHPD the element to the lowest double word, then movsd. 2878 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2879 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2880 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2881 std::vector<SDOperand> IdxVec; 2882 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2883 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2884 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2885 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2886 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2887 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2888 DAG.getConstant(0, MVT::i32)); 2889 } 2890 2891 return SDOperand(); 2892 } 2893 case ISD::INSERT_VECTOR_ELT: { 2894 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2895 // as its second argument. 2896 MVT::ValueType VT = Op.getValueType(); 2897 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2898 if (MVT::getSizeInBits(BaseVT) == 16) { 2899 SDOperand N1 = Op.getOperand(1); 2900 SDOperand N2 = Op.getOperand(2); 2901 if (N1.getValueType() != MVT::i32) 2902 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2903 if (N2.getValueType() != MVT::i32) 2904 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2905 return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2); 2906 } 2907 2908 return SDOperand(); 2909 } 2910 case ISD::INTRINSIC_WO_CHAIN: { 2911 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 2912 switch (IntNo) { 2913 default: return SDOperand(); // Don't custom lower most intrinsics. 2914 // Comparison intrinsics. 2915 case Intrinsic::x86_sse_comieq_ss: 2916 case Intrinsic::x86_sse_comilt_ss: 2917 case Intrinsic::x86_sse_comile_ss: 2918 case Intrinsic::x86_sse_comigt_ss: 2919 case Intrinsic::x86_sse_comige_ss: 2920 case Intrinsic::x86_sse_comineq_ss: 2921 case Intrinsic::x86_sse_ucomieq_ss: 2922 case Intrinsic::x86_sse_ucomilt_ss: 2923 case Intrinsic::x86_sse_ucomile_ss: 2924 case Intrinsic::x86_sse_ucomigt_ss: 2925 case Intrinsic::x86_sse_ucomige_ss: 2926 case Intrinsic::x86_sse_ucomineq_ss: 2927 case Intrinsic::x86_sse2_comieq_sd: 2928 case Intrinsic::x86_sse2_comilt_sd: 2929 case Intrinsic::x86_sse2_comile_sd: 2930 case Intrinsic::x86_sse2_comigt_sd: 2931 case Intrinsic::x86_sse2_comige_sd: 2932 case Intrinsic::x86_sse2_comineq_sd: 2933 case Intrinsic::x86_sse2_ucomieq_sd: 2934 case Intrinsic::x86_sse2_ucomilt_sd: 2935 case Intrinsic::x86_sse2_ucomile_sd: 2936 case Intrinsic::x86_sse2_ucomigt_sd: 2937 case Intrinsic::x86_sse2_ucomige_sd: 2938 case Intrinsic::x86_sse2_ucomineq_sd: { 2939 unsigned Opc = 0; 2940 ISD::CondCode CC = ISD::SETCC_INVALID; 2941 switch (IntNo) { 2942 default: break; 2943 case Intrinsic::x86_sse_comieq_ss: 2944 case Intrinsic::x86_sse2_comieq_sd: 2945 Opc = X86ISD::COMI; 2946 CC = ISD::SETEQ; 2947 break; 2948 case Intrinsic::x86_sse_comilt_ss: 2949 case Intrinsic::x86_sse2_comilt_sd: 2950 Opc = X86ISD::COMI; 2951 CC = ISD::SETLT; 2952 break; 2953 case Intrinsic::x86_sse_comile_ss: 2954 case Intrinsic::x86_sse2_comile_sd: 2955 Opc = X86ISD::COMI; 2956 CC = ISD::SETLE; 2957 break; 2958 case Intrinsic::x86_sse_comigt_ss: 2959 case Intrinsic::x86_sse2_comigt_sd: 2960 Opc = X86ISD::COMI; 2961 CC = ISD::SETGT; 2962 break; 2963 case Intrinsic::x86_sse_comige_ss: 2964 case Intrinsic::x86_sse2_comige_sd: 2965 Opc = X86ISD::COMI; 2966 CC = ISD::SETGE; 2967 break; 2968 case Intrinsic::x86_sse_comineq_ss: 2969 case Intrinsic::x86_sse2_comineq_sd: 2970 Opc = X86ISD::COMI; 2971 CC = ISD::SETNE; 2972 break; 2973 case Intrinsic::x86_sse_ucomieq_ss: 2974 case Intrinsic::x86_sse2_ucomieq_sd: 2975 Opc = X86ISD::UCOMI; 2976 CC = ISD::SETEQ; 2977 break; 2978 case Intrinsic::x86_sse_ucomilt_ss: 2979 case Intrinsic::x86_sse2_ucomilt_sd: 2980 Opc = X86ISD::UCOMI; 2981 CC = ISD::SETLT; 2982 break; 2983 case Intrinsic::x86_sse_ucomile_ss: 2984 case Intrinsic::x86_sse2_ucomile_sd: 2985 Opc = X86ISD::UCOMI; 2986 CC = ISD::SETLE; 2987 break; 2988 case Intrinsic::x86_sse_ucomigt_ss: 2989 case Intrinsic::x86_sse2_ucomigt_sd: 2990 Opc = X86ISD::UCOMI; 2991 CC = ISD::SETGT; 2992 break; 2993 case Intrinsic::x86_sse_ucomige_ss: 2994 case Intrinsic::x86_sse2_ucomige_sd: 2995 Opc = X86ISD::UCOMI; 2996 CC = ISD::SETGE; 2997 break; 2998 case Intrinsic::x86_sse_ucomineq_ss: 2999 case Intrinsic::x86_sse2_ucomineq_sd: 3000 Opc = X86ISD::UCOMI; 3001 CC = ISD::SETNE; 3002 break; 3003 } 3004 bool Flip; 3005 unsigned X86CC; 3006 translateX86CC(CC, true, X86CC, Flip); 3007 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3008 Op.getOperand(Flip?1:2)); 3009 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3010 DAG.getConstant(X86CC, MVT::i8), Cond); 3011 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3012 } 3013 } 3014 } 3015 } 3016} 3017 3018const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3019 switch (Opcode) { 3020 default: return NULL; 3021 case X86ISD::SHLD: return "X86ISD::SHLD"; 3022 case X86ISD::SHRD: return "X86ISD::SHRD"; 3023 case X86ISD::FAND: return "X86ISD::FAND"; 3024 case X86ISD::FXOR: return "X86ISD::FXOR"; 3025 case X86ISD::FILD: return "X86ISD::FILD"; 3026 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3027 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3028 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3029 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3030 case X86ISD::FLD: return "X86ISD::FLD"; 3031 case X86ISD::FST: return "X86ISD::FST"; 3032 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3033 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3034 case X86ISD::CALL: return "X86ISD::CALL"; 3035 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3036 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3037 case X86ISD::CMP: return "X86ISD::CMP"; 3038 case X86ISD::TEST: return "X86ISD::TEST"; 3039 case X86ISD::COMI: return "X86ISD::COMI"; 3040 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3041 case X86ISD::SETCC: return "X86ISD::SETCC"; 3042 case X86ISD::CMOV: return "X86ISD::CMOV"; 3043 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3044 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3045 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3046 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3047 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3048 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3049 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3050 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3051 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 3052 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3053 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3054 } 3055} 3056 3057void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3058 uint64_t Mask, 3059 uint64_t &KnownZero, 3060 uint64_t &KnownOne, 3061 unsigned Depth) const { 3062 unsigned Opc = Op.getOpcode(); 3063 assert((Opc >= ISD::BUILTIN_OP_END || 3064 Opc == ISD::INTRINSIC_WO_CHAIN || 3065 Opc == ISD::INTRINSIC_W_CHAIN || 3066 Opc == ISD::INTRINSIC_VOID) && 3067 "Should use MaskedValueIsZero if you don't know whether Op" 3068 " is a target node!"); 3069 3070 KnownZero = KnownOne = 0; // Don't know anything. 3071 switch (Opc) { 3072 default: break; 3073 case X86ISD::SETCC: 3074 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3075 break; 3076 } 3077} 3078 3079std::vector<unsigned> X86TargetLowering:: 3080getRegClassForInlineAsmConstraint(const std::string &Constraint, 3081 MVT::ValueType VT) const { 3082 if (Constraint.size() == 1) { 3083 // FIXME: not handling fp-stack yet! 3084 // FIXME: not handling MMX registers yet ('y' constraint). 3085 switch (Constraint[0]) { // GCC X86 Constraint Letters 3086 default: break; // Unknown constriant letter 3087 case 'r': // GENERAL_REGS 3088 case 'R': // LEGACY_REGS 3089 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3090 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3091 case 'l': // INDEX_REGS 3092 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3093 X86::ESI, X86::EDI, X86::EBP, 0); 3094 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3095 case 'Q': // Q_REGS 3096 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3097 case 'x': // SSE_REGS if SSE1 allowed 3098 if (Subtarget->hasSSE1()) 3099 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3100 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3101 0); 3102 return std::vector<unsigned>(); 3103 case 'Y': // SSE_REGS if SSE2 allowed 3104 if (Subtarget->hasSSE2()) 3105 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3106 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3107 0); 3108 return std::vector<unsigned>(); 3109 } 3110 } 3111 3112 return std::vector<unsigned>(); 3113} 3114 3115/// isLegalAddressImmediate - Return true if the integer value or 3116/// GlobalValue can be used as the offset of the target addressing mode. 3117bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3118 // X86 allows a sign-extended 32-bit immediate field. 3119 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3120} 3121 3122bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3123 if (Subtarget->isTargetDarwin()) { 3124 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3125 if (RModel == Reloc::Static) 3126 return true; 3127 else if (RModel == Reloc::DynamicNoPIC) 3128 return !DarwinGVRequiresExtraLoad(GV); 3129 else 3130 return false; 3131 } else 3132 return true; 3133} 3134 3135/// isShuffleMaskLegal - Targets can use this to indicate that they only 3136/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3137/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3138/// are assumed to be legal. 3139bool 3140X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3141 // Only do shuffles on 128-bit vector types for now. 3142 if (MVT::getSizeInBits(VT) == 64) return false; 3143 return (Mask.Val->getNumOperands() == 2 || 3144 X86::isSplatMask(Mask.Val) || 3145 X86::isMOVSMask(Mask.Val) || 3146 X86::isPSHUFDMask(Mask.Val) || 3147 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3148 X86::isSHUFPMask(Mask.Val) || 3149 X86::isUNPCKLMask(Mask.Val) || 3150 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3151 X86::isUNPCKHMask(Mask.Val)); 3152} 3153