X86ISelLowering.cpp revision 5001ea1078e300fab8d26d183249522b4c4edd98
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 289 } 290 291 if (Subtarget->hasSSE2()) { 292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 297 298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 308 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 312 313 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 314 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 315 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 316 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 318 } 319 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 320 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 321 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 322 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 323 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 324 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 325 326 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 327 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 328 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 329 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 330 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 331 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 332 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 338 } 339 340 // Custom lower v2i64 and v2f64 selects. 341 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 342 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 343 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 344 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 345 } 346 347 // We want to custom lower some of our intrinsics. 348 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 349 350 computeRegisterProperties(); 351 352 // FIXME: These should be based on subtarget info. Plus, the values should 353 // be smaller when we are in optimizing for size mode. 354 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 355 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 356 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 357 allowUnalignedMemoryAccesses = true; // x86 supports it! 358} 359 360std::vector<SDOperand> 361X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 362 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 363 return LowerFastCCArguments(F, DAG); 364 return LowerCCCArguments(F, DAG); 365} 366 367std::pair<SDOperand, SDOperand> 368X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 369 bool isVarArg, unsigned CallingConv, 370 bool isTailCall, 371 SDOperand Callee, ArgListTy &Args, 372 SelectionDAG &DAG) { 373 assert((!isVarArg || CallingConv == CallingConv::C) && 374 "Only C takes varargs!"); 375 376 // If the callee is a GlobalAddress node (quite common, every direct call is) 377 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 378 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 379 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 380 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 381 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 382 383 if (CallingConv == CallingConv::Fast && EnableFastCC) 384 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 385 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 386} 387 388//===----------------------------------------------------------------------===// 389// C Calling Convention implementation 390//===----------------------------------------------------------------------===// 391 392std::vector<SDOperand> 393X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 394 std::vector<SDOperand> ArgValues; 395 396 MachineFunction &MF = DAG.getMachineFunction(); 397 MachineFrameInfo *MFI = MF.getFrameInfo(); 398 399 // Add DAG nodes to load the arguments... On entry to a function on the X86, 400 // the stack frame looks like this: 401 // 402 // [ESP] -- return address 403 // [ESP + 4] -- first argument (leftmost lexically) 404 // [ESP + 8] -- second argument, if first argument is four bytes in size 405 // ... 406 // 407 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 408 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 409 MVT::ValueType ObjectVT = getValueType(I->getType()); 410 unsigned ArgIncrement = 4; 411 unsigned ObjSize; 412 switch (ObjectVT) { 413 default: assert(0 && "Unhandled argument type!"); 414 case MVT::i1: 415 case MVT::i8: ObjSize = 1; break; 416 case MVT::i16: ObjSize = 2; break; 417 case MVT::i32: ObjSize = 4; break; 418 case MVT::i64: ObjSize = ArgIncrement = 8; break; 419 case MVT::f32: ObjSize = 4; break; 420 case MVT::f64: ObjSize = ArgIncrement = 8; break; 421 } 422 // Create the frame index object for this incoming parameter... 423 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 424 425 // Create the SelectionDAG nodes corresponding to a load from this parameter 426 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 427 428 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 429 // dead loads. 430 SDOperand ArgValue; 431 if (!I->use_empty()) 432 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 433 DAG.getSrcValue(NULL)); 434 else { 435 if (MVT::isInteger(ObjectVT)) 436 ArgValue = DAG.getConstant(0, ObjectVT); 437 else 438 ArgValue = DAG.getConstantFP(0, ObjectVT); 439 } 440 ArgValues.push_back(ArgValue); 441 442 ArgOffset += ArgIncrement; // Move on to the next argument... 443 } 444 445 // If the function takes variable number of arguments, make a frame index for 446 // the start of the first vararg value... for expansion of llvm.va_start. 447 if (F.isVarArg()) 448 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 449 ReturnAddrIndex = 0; // No return address slot generated yet. 450 BytesToPopOnReturn = 0; // Callee pops nothing. 451 BytesCallerReserves = ArgOffset; 452 453 // Finally, inform the code generator which regs we return values in. 454 switch (getValueType(F.getReturnType())) { 455 default: assert(0 && "Unknown type!"); 456 case MVT::isVoid: break; 457 case MVT::i1: 458 case MVT::i8: 459 case MVT::i16: 460 case MVT::i32: 461 MF.addLiveOut(X86::EAX); 462 break; 463 case MVT::i64: 464 MF.addLiveOut(X86::EAX); 465 MF.addLiveOut(X86::EDX); 466 break; 467 case MVT::f32: 468 case MVT::f64: 469 MF.addLiveOut(X86::ST0); 470 break; 471 } 472 return ArgValues; 473} 474 475std::pair<SDOperand, SDOperand> 476X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 477 bool isVarArg, bool isTailCall, 478 SDOperand Callee, ArgListTy &Args, 479 SelectionDAG &DAG) { 480 // Count how many bytes are to be pushed on the stack. 481 unsigned NumBytes = 0; 482 483 if (Args.empty()) { 484 // Save zero bytes. 485 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 486 } else { 487 for (unsigned i = 0, e = Args.size(); i != e; ++i) 488 switch (getValueType(Args[i].second)) { 489 default: assert(0 && "Unknown value type!"); 490 case MVT::i1: 491 case MVT::i8: 492 case MVT::i16: 493 case MVT::i32: 494 case MVT::f32: 495 NumBytes += 4; 496 break; 497 case MVT::i64: 498 case MVT::f64: 499 NumBytes += 8; 500 break; 501 } 502 503 Chain = DAG.getCALLSEQ_START(Chain, 504 DAG.getConstant(NumBytes, getPointerTy())); 505 506 // Arguments go on the stack in reverse order, as specified by the ABI. 507 unsigned ArgOffset = 0; 508 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 509 std::vector<SDOperand> Stores; 510 511 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 512 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 513 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 514 515 switch (getValueType(Args[i].second)) { 516 default: assert(0 && "Unexpected ValueType for argument!"); 517 case MVT::i1: 518 case MVT::i8: 519 case MVT::i16: 520 // Promote the integer to 32 bits. If the input type is signed use a 521 // sign extend, otherwise use a zero extend. 522 if (Args[i].second->isSigned()) 523 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 524 else 525 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 526 527 // FALL THROUGH 528 case MVT::i32: 529 case MVT::f32: 530 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 531 Args[i].first, PtrOff, 532 DAG.getSrcValue(NULL))); 533 ArgOffset += 4; 534 break; 535 case MVT::i64: 536 case MVT::f64: 537 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 538 Args[i].first, PtrOff, 539 DAG.getSrcValue(NULL))); 540 ArgOffset += 8; 541 break; 542 } 543 } 544 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 545 } 546 547 std::vector<MVT::ValueType> RetVals; 548 MVT::ValueType RetTyVT = getValueType(RetTy); 549 RetVals.push_back(MVT::Other); 550 551 // The result values produced have to be legal. Promote the result. 552 switch (RetTyVT) { 553 case MVT::isVoid: break; 554 default: 555 RetVals.push_back(RetTyVT); 556 break; 557 case MVT::i1: 558 case MVT::i8: 559 case MVT::i16: 560 RetVals.push_back(MVT::i32); 561 break; 562 case MVT::f32: 563 if (X86ScalarSSE) 564 RetVals.push_back(MVT::f32); 565 else 566 RetVals.push_back(MVT::f64); 567 break; 568 case MVT::i64: 569 RetVals.push_back(MVT::i32); 570 RetVals.push_back(MVT::i32); 571 break; 572 } 573 574 std::vector<MVT::ValueType> NodeTys; 575 NodeTys.push_back(MVT::Other); // Returns a chain 576 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 577 std::vector<SDOperand> Ops; 578 Ops.push_back(Chain); 579 Ops.push_back(Callee); 580 581 // FIXME: Do not generate X86ISD::TAILCALL for now. 582 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 583 SDOperand InFlag = Chain.getValue(1); 584 585 NodeTys.clear(); 586 NodeTys.push_back(MVT::Other); // Returns a chain 587 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 588 Ops.clear(); 589 Ops.push_back(Chain); 590 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 591 Ops.push_back(DAG.getConstant(0, getPointerTy())); 592 Ops.push_back(InFlag); 593 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 594 InFlag = Chain.getValue(1); 595 596 SDOperand RetVal; 597 if (RetTyVT != MVT::isVoid) { 598 switch (RetTyVT) { 599 default: assert(0 && "Unknown value type to return!"); 600 case MVT::i1: 601 case MVT::i8: 602 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 603 Chain = RetVal.getValue(1); 604 if (RetTyVT == MVT::i1) 605 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 606 break; 607 case MVT::i16: 608 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 609 Chain = RetVal.getValue(1); 610 break; 611 case MVT::i32: 612 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 613 Chain = RetVal.getValue(1); 614 break; 615 case MVT::i64: { 616 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 617 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 618 Lo.getValue(2)); 619 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 620 Chain = Hi.getValue(1); 621 break; 622 } 623 case MVT::f32: 624 case MVT::f64: { 625 std::vector<MVT::ValueType> Tys; 626 Tys.push_back(MVT::f64); 627 Tys.push_back(MVT::Other); 628 Tys.push_back(MVT::Flag); 629 std::vector<SDOperand> Ops; 630 Ops.push_back(Chain); 631 Ops.push_back(InFlag); 632 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 633 Chain = RetVal.getValue(1); 634 InFlag = RetVal.getValue(2); 635 if (X86ScalarSSE) { 636 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 637 // shouldn't be necessary except that RFP cannot be live across 638 // multiple blocks. When stackifier is fixed, they can be uncoupled. 639 MachineFunction &MF = DAG.getMachineFunction(); 640 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 641 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 642 Tys.clear(); 643 Tys.push_back(MVT::Other); 644 Ops.clear(); 645 Ops.push_back(Chain); 646 Ops.push_back(RetVal); 647 Ops.push_back(StackSlot); 648 Ops.push_back(DAG.getValueType(RetTyVT)); 649 Ops.push_back(InFlag); 650 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 651 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 652 DAG.getSrcValue(NULL)); 653 Chain = RetVal.getValue(1); 654 } 655 656 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 657 // FIXME: we would really like to remember that this FP_ROUND 658 // operation is okay to eliminate if we allow excess FP precision. 659 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 660 break; 661 } 662 } 663 } 664 665 return std::make_pair(RetVal, Chain); 666} 667 668//===----------------------------------------------------------------------===// 669// Fast Calling Convention implementation 670//===----------------------------------------------------------------------===// 671// 672// The X86 'fast' calling convention passes up to two integer arguments in 673// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 674// and requires that the callee pop its arguments off the stack (allowing proper 675// tail calls), and has the same return value conventions as C calling convs. 676// 677// This calling convention always arranges for the callee pop value to be 8n+4 678// bytes, which is needed for tail recursion elimination and stack alignment 679// reasons. 680// 681// Note that this can be enhanced in the future to pass fp vals in registers 682// (when we have a global fp allocator) and do other tricks. 683// 684 685/// AddLiveIn - This helper function adds the specified physical register to the 686/// MachineFunction as a live in value. It also creates a corresponding virtual 687/// register for it. 688static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 689 TargetRegisterClass *RC) { 690 assert(RC->contains(PReg) && "Not the correct regclass!"); 691 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 692 MF.addLiveIn(PReg, VReg); 693 return VReg; 694} 695 696// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 697// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 698// EDX". Anything more is illegal. 699// 700// FIXME: The linscan register allocator currently has problem with 701// coalescing. At the time of this writing, whenever it decides to coalesce 702// a physreg with a virtreg, this increases the size of the physreg's live 703// range, and the live range cannot ever be reduced. This causes problems if 704// too many physregs are coaleced with virtregs, which can cause the register 705// allocator to wedge itself. 706// 707// This code triggers this problem more often if we pass args in registers, 708// so disable it until this is fixed. 709// 710// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 711// about code being dead. 712// 713static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 714 715 716std::vector<SDOperand> 717X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 718 std::vector<SDOperand> ArgValues; 719 720 MachineFunction &MF = DAG.getMachineFunction(); 721 MachineFrameInfo *MFI = MF.getFrameInfo(); 722 723 // Add DAG nodes to load the arguments... On entry to a function the stack 724 // frame looks like this: 725 // 726 // [ESP] -- return address 727 // [ESP + 4] -- first nonreg argument (leftmost lexically) 728 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 729 // ... 730 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 731 732 // Keep track of the number of integer regs passed so far. This can be either 733 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 734 // used). 735 unsigned NumIntRegs = 0; 736 737 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 738 MVT::ValueType ObjectVT = getValueType(I->getType()); 739 unsigned ArgIncrement = 4; 740 unsigned ObjSize = 0; 741 SDOperand ArgValue; 742 743 switch (ObjectVT) { 744 default: assert(0 && "Unhandled argument type!"); 745 case MVT::i1: 746 case MVT::i8: 747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 748 if (!I->use_empty()) { 749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 750 X86::R8RegisterClass); 751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 752 DAG.setRoot(ArgValue.getValue(1)); 753 if (ObjectVT == MVT::i1) 754 // FIXME: Should insert a assertzext here. 755 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 756 } 757 ++NumIntRegs; 758 break; 759 } 760 761 ObjSize = 1; 762 break; 763 case MVT::i16: 764 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 765 if (!I->use_empty()) { 766 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 767 X86::R16RegisterClass); 768 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 769 DAG.setRoot(ArgValue.getValue(1)); 770 } 771 ++NumIntRegs; 772 break; 773 } 774 ObjSize = 2; 775 break; 776 case MVT::i32: 777 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 778 if (!I->use_empty()) { 779 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 780 X86::R32RegisterClass); 781 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 782 DAG.setRoot(ArgValue.getValue(1)); 783 } 784 ++NumIntRegs; 785 break; 786 } 787 ObjSize = 4; 788 break; 789 case MVT::i64: 790 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 791 if (!I->use_empty()) { 792 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 793 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 794 795 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 796 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 797 DAG.setRoot(Hi.getValue(1)); 798 799 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 800 } 801 NumIntRegs += 2; 802 break; 803 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 804 if (!I->use_empty()) { 805 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 806 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 807 DAG.setRoot(Low.getValue(1)); 808 809 // Load the high part from memory. 810 // Create the frame index object for this incoming parameter... 811 int FI = MFI->CreateFixedObject(4, ArgOffset); 812 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 813 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 814 DAG.getSrcValue(NULL)); 815 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 816 } 817 ArgOffset += 4; 818 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 819 break; 820 } 821 ObjSize = ArgIncrement = 8; 822 break; 823 case MVT::f32: ObjSize = 4; break; 824 case MVT::f64: ObjSize = ArgIncrement = 8; break; 825 } 826 827 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 828 // dead loads. 829 if (ObjSize && !I->use_empty()) { 830 // Create the frame index object for this incoming parameter... 831 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 832 833 // Create the SelectionDAG nodes corresponding to a load from this 834 // parameter. 835 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 836 837 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 838 DAG.getSrcValue(NULL)); 839 } else if (ArgValue.Val == 0) { 840 if (MVT::isInteger(ObjectVT)) 841 ArgValue = DAG.getConstant(0, ObjectVT); 842 else 843 ArgValue = DAG.getConstantFP(0, ObjectVT); 844 } 845 ArgValues.push_back(ArgValue); 846 847 if (ObjSize) 848 ArgOffset += ArgIncrement; // Move on to the next argument. 849 } 850 851 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 852 // arguments and the arguments after the retaddr has been pushed are aligned. 853 if ((ArgOffset & 7) == 0) 854 ArgOffset += 4; 855 856 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 857 ReturnAddrIndex = 0; // No return address slot generated yet. 858 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 859 BytesCallerReserves = 0; 860 861 // Finally, inform the code generator which regs we return values in. 862 switch (getValueType(F.getReturnType())) { 863 default: assert(0 && "Unknown type!"); 864 case MVT::isVoid: break; 865 case MVT::i1: 866 case MVT::i8: 867 case MVT::i16: 868 case MVT::i32: 869 MF.addLiveOut(X86::EAX); 870 break; 871 case MVT::i64: 872 MF.addLiveOut(X86::EAX); 873 MF.addLiveOut(X86::EDX); 874 break; 875 case MVT::f32: 876 case MVT::f64: 877 MF.addLiveOut(X86::ST0); 878 break; 879 } 880 return ArgValues; 881} 882 883std::pair<SDOperand, SDOperand> 884X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 885 bool isTailCall, SDOperand Callee, 886 ArgListTy &Args, SelectionDAG &DAG) { 887 // Count how many bytes are to be pushed on the stack. 888 unsigned NumBytes = 0; 889 890 // Keep track of the number of integer regs passed so far. This can be either 891 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 892 // used). 893 unsigned NumIntRegs = 0; 894 895 for (unsigned i = 0, e = Args.size(); i != e; ++i) 896 switch (getValueType(Args[i].second)) { 897 default: assert(0 && "Unknown value type!"); 898 case MVT::i1: 899 case MVT::i8: 900 case MVT::i16: 901 case MVT::i32: 902 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 903 ++NumIntRegs; 904 break; 905 } 906 // fall through 907 case MVT::f32: 908 NumBytes += 4; 909 break; 910 case MVT::i64: 911 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 912 NumIntRegs += 2; 913 break; 914 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 915 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 916 NumBytes += 4; 917 break; 918 } 919 920 // fall through 921 case MVT::f64: 922 NumBytes += 8; 923 break; 924 } 925 926 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 927 // arguments and the arguments after the retaddr has been pushed are aligned. 928 if ((NumBytes & 7) == 0) 929 NumBytes += 4; 930 931 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 932 933 // Arguments go on the stack in reverse order, as specified by the ABI. 934 unsigned ArgOffset = 0; 935 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 936 NumIntRegs = 0; 937 std::vector<SDOperand> Stores; 938 std::vector<SDOperand> RegValuesToPass; 939 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 940 switch (getValueType(Args[i].second)) { 941 default: assert(0 && "Unexpected ValueType for argument!"); 942 case MVT::i1: 943 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 944 // Fall through. 945 case MVT::i8: 946 case MVT::i16: 947 case MVT::i32: 948 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 949 RegValuesToPass.push_back(Args[i].first); 950 ++NumIntRegs; 951 break; 952 } 953 // Fall through 954 case MVT::f32: { 955 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 956 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 957 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 958 Args[i].first, PtrOff, 959 DAG.getSrcValue(NULL))); 960 ArgOffset += 4; 961 break; 962 } 963 case MVT::i64: 964 // Can pass (at least) part of it in regs? 965 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 966 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 967 Args[i].first, DAG.getConstant(1, MVT::i32)); 968 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 969 Args[i].first, DAG.getConstant(0, MVT::i32)); 970 RegValuesToPass.push_back(Lo); 971 ++NumIntRegs; 972 973 // Pass both parts in regs? 974 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 975 RegValuesToPass.push_back(Hi); 976 ++NumIntRegs; 977 } else { 978 // Pass the high part in memory. 979 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 980 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 981 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 982 Hi, PtrOff, DAG.getSrcValue(NULL))); 983 ArgOffset += 4; 984 } 985 break; 986 } 987 // Fall through 988 case MVT::f64: 989 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 990 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 991 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 992 Args[i].first, PtrOff, 993 DAG.getSrcValue(NULL))); 994 ArgOffset += 8; 995 break; 996 } 997 } 998 if (!Stores.empty()) 999 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 1000 1001 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1002 // arguments and the arguments after the retaddr has been pushed are aligned. 1003 if ((ArgOffset & 7) == 0) 1004 ArgOffset += 4; 1005 1006 std::vector<MVT::ValueType> RetVals; 1007 MVT::ValueType RetTyVT = getValueType(RetTy); 1008 1009 RetVals.push_back(MVT::Other); 1010 1011 // The result values produced have to be legal. Promote the result. 1012 switch (RetTyVT) { 1013 case MVT::isVoid: break; 1014 default: 1015 RetVals.push_back(RetTyVT); 1016 break; 1017 case MVT::i1: 1018 case MVT::i8: 1019 case MVT::i16: 1020 RetVals.push_back(MVT::i32); 1021 break; 1022 case MVT::f32: 1023 if (X86ScalarSSE) 1024 RetVals.push_back(MVT::f32); 1025 else 1026 RetVals.push_back(MVT::f64); 1027 break; 1028 case MVT::i64: 1029 RetVals.push_back(MVT::i32); 1030 RetVals.push_back(MVT::i32); 1031 break; 1032 } 1033 1034 // Build a sequence of copy-to-reg nodes chained together with token chain 1035 // and flag operands which copy the outgoing args into registers. 1036 SDOperand InFlag; 1037 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1038 unsigned CCReg; 1039 SDOperand RegToPass = RegValuesToPass[i]; 1040 switch (RegToPass.getValueType()) { 1041 default: assert(0 && "Bad thing to pass in regs"); 1042 case MVT::i8: 1043 CCReg = (i == 0) ? X86::AL : X86::DL; 1044 break; 1045 case MVT::i16: 1046 CCReg = (i == 0) ? X86::AX : X86::DX; 1047 break; 1048 case MVT::i32: 1049 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1050 break; 1051 } 1052 1053 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1054 InFlag = Chain.getValue(1); 1055 } 1056 1057 std::vector<MVT::ValueType> NodeTys; 1058 NodeTys.push_back(MVT::Other); // Returns a chain 1059 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1060 std::vector<SDOperand> Ops; 1061 Ops.push_back(Chain); 1062 Ops.push_back(Callee); 1063 if (InFlag.Val) 1064 Ops.push_back(InFlag); 1065 1066 // FIXME: Do not generate X86ISD::TAILCALL for now. 1067 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1068 InFlag = Chain.getValue(1); 1069 1070 NodeTys.clear(); 1071 NodeTys.push_back(MVT::Other); // Returns a chain 1072 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1073 Ops.clear(); 1074 Ops.push_back(Chain); 1075 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1076 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1077 Ops.push_back(InFlag); 1078 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1079 InFlag = Chain.getValue(1); 1080 1081 SDOperand RetVal; 1082 if (RetTyVT != MVT::isVoid) { 1083 switch (RetTyVT) { 1084 default: assert(0 && "Unknown value type to return!"); 1085 case MVT::i1: 1086 case MVT::i8: 1087 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1088 Chain = RetVal.getValue(1); 1089 if (RetTyVT == MVT::i1) 1090 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1091 break; 1092 case MVT::i16: 1093 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1094 Chain = RetVal.getValue(1); 1095 break; 1096 case MVT::i32: 1097 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1098 Chain = RetVal.getValue(1); 1099 break; 1100 case MVT::i64: { 1101 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1102 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1103 Lo.getValue(2)); 1104 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1105 Chain = Hi.getValue(1); 1106 break; 1107 } 1108 case MVT::f32: 1109 case MVT::f64: { 1110 std::vector<MVT::ValueType> Tys; 1111 Tys.push_back(MVT::f64); 1112 Tys.push_back(MVT::Other); 1113 Tys.push_back(MVT::Flag); 1114 std::vector<SDOperand> Ops; 1115 Ops.push_back(Chain); 1116 Ops.push_back(InFlag); 1117 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1118 Chain = RetVal.getValue(1); 1119 InFlag = RetVal.getValue(2); 1120 if (X86ScalarSSE) { 1121 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1122 // shouldn't be necessary except that RFP cannot be live across 1123 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1124 MachineFunction &MF = DAG.getMachineFunction(); 1125 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1126 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1127 Tys.clear(); 1128 Tys.push_back(MVT::Other); 1129 Ops.clear(); 1130 Ops.push_back(Chain); 1131 Ops.push_back(RetVal); 1132 Ops.push_back(StackSlot); 1133 Ops.push_back(DAG.getValueType(RetTyVT)); 1134 Ops.push_back(InFlag); 1135 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1136 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1137 DAG.getSrcValue(NULL)); 1138 Chain = RetVal.getValue(1); 1139 } 1140 1141 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1142 // FIXME: we would really like to remember that this FP_ROUND 1143 // operation is okay to eliminate if we allow excess FP precision. 1144 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1145 break; 1146 } 1147 } 1148 } 1149 1150 return std::make_pair(RetVal, Chain); 1151} 1152 1153SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1154 if (ReturnAddrIndex == 0) { 1155 // Set up a frame object for the return address. 1156 MachineFunction &MF = DAG.getMachineFunction(); 1157 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1158 } 1159 1160 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1161} 1162 1163 1164 1165std::pair<SDOperand, SDOperand> X86TargetLowering:: 1166LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1167 SelectionDAG &DAG) { 1168 SDOperand Result; 1169 if (Depth) // Depths > 0 not supported yet! 1170 Result = DAG.getConstant(0, getPointerTy()); 1171 else { 1172 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1173 if (!isFrameAddress) 1174 // Just load the return address 1175 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1176 DAG.getSrcValue(NULL)); 1177 else 1178 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1179 DAG.getConstant(4, MVT::i32)); 1180 } 1181 return std::make_pair(Result, Chain); 1182} 1183 1184/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1185/// which corresponds to the condition code. 1186static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1187 switch (X86CC) { 1188 default: assert(0 && "Unknown X86 conditional code!"); 1189 case X86ISD::COND_A: return X86::JA; 1190 case X86ISD::COND_AE: return X86::JAE; 1191 case X86ISD::COND_B: return X86::JB; 1192 case X86ISD::COND_BE: return X86::JBE; 1193 case X86ISD::COND_E: return X86::JE; 1194 case X86ISD::COND_G: return X86::JG; 1195 case X86ISD::COND_GE: return X86::JGE; 1196 case X86ISD::COND_L: return X86::JL; 1197 case X86ISD::COND_LE: return X86::JLE; 1198 case X86ISD::COND_NE: return X86::JNE; 1199 case X86ISD::COND_NO: return X86::JNO; 1200 case X86ISD::COND_NP: return X86::JNP; 1201 case X86ISD::COND_NS: return X86::JNS; 1202 case X86ISD::COND_O: return X86::JO; 1203 case X86ISD::COND_P: return X86::JP; 1204 case X86ISD::COND_S: return X86::JS; 1205 } 1206} 1207 1208/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1209/// specific condition code. It returns a false if it cannot do a direct 1210/// translation. X86CC is the translated CondCode. Flip is set to true if the 1211/// the order of comparison operands should be flipped. 1212static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1213 unsigned &X86CC, bool &Flip) { 1214 Flip = false; 1215 X86CC = X86ISD::COND_INVALID; 1216 if (!isFP) { 1217 switch (SetCCOpcode) { 1218 default: break; 1219 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1220 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1221 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1222 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1223 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1224 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1225 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1226 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1227 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1228 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1229 } 1230 } else { 1231 // On a floating point condition, the flags are set as follows: 1232 // ZF PF CF op 1233 // 0 | 0 | 0 | X > Y 1234 // 0 | 0 | 1 | X < Y 1235 // 1 | 0 | 0 | X == Y 1236 // 1 | 1 | 1 | unordered 1237 switch (SetCCOpcode) { 1238 default: break; 1239 case ISD::SETUEQ: 1240 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1241 case ISD::SETOLT: Flip = true; // Fallthrough 1242 case ISD::SETOGT: 1243 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1244 case ISD::SETOLE: Flip = true; // Fallthrough 1245 case ISD::SETOGE: 1246 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1247 case ISD::SETUGT: Flip = true; // Fallthrough 1248 case ISD::SETULT: 1249 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1250 case ISD::SETUGE: Flip = true; // Fallthrough 1251 case ISD::SETULE: 1252 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1253 case ISD::SETONE: 1254 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1255 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1256 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1257 } 1258 } 1259 1260 return X86CC != X86ISD::COND_INVALID; 1261} 1262 1263static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1264 bool &Flip) { 1265 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1266} 1267 1268/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1269/// code. Current x86 isa includes the following FP cmov instructions: 1270/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1271static bool hasFPCMov(unsigned X86CC) { 1272 switch (X86CC) { 1273 default: 1274 return false; 1275 case X86ISD::COND_B: 1276 case X86ISD::COND_BE: 1277 case X86ISD::COND_E: 1278 case X86ISD::COND_P: 1279 case X86ISD::COND_A: 1280 case X86ISD::COND_AE: 1281 case X86ISD::COND_NE: 1282 case X86ISD::COND_NP: 1283 return true; 1284 } 1285} 1286 1287MachineBasicBlock * 1288X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1289 MachineBasicBlock *BB) { 1290 switch (MI->getOpcode()) { 1291 default: assert(false && "Unexpected instr type to insert"); 1292 case X86::CMOV_FR32: 1293 case X86::CMOV_FR64: 1294 case X86::CMOV_V4F32: 1295 case X86::CMOV_V2F64: 1296 case X86::CMOV_V2I64: { 1297 // To "insert" a SELECT_CC instruction, we actually have to insert the 1298 // diamond control-flow pattern. The incoming instruction knows the 1299 // destination vreg to set, the condition code register to branch on, the 1300 // true/false values to select between, and a branch opcode to use. 1301 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1302 ilist<MachineBasicBlock>::iterator It = BB; 1303 ++It; 1304 1305 // thisMBB: 1306 // ... 1307 // TrueVal = ... 1308 // cmpTY ccX, r1, r2 1309 // bCC copy1MBB 1310 // fallthrough --> copy0MBB 1311 MachineBasicBlock *thisMBB = BB; 1312 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1313 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1314 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1315 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1316 MachineFunction *F = BB->getParent(); 1317 F->getBasicBlockList().insert(It, copy0MBB); 1318 F->getBasicBlockList().insert(It, sinkMBB); 1319 // Update machine-CFG edges by first adding all successors of the current 1320 // block to the new block which will contain the Phi node for the select. 1321 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1322 e = BB->succ_end(); i != e; ++i) 1323 sinkMBB->addSuccessor(*i); 1324 // Next, remove all successors of the current block, and add the true 1325 // and fallthrough blocks as its successors. 1326 while(!BB->succ_empty()) 1327 BB->removeSuccessor(BB->succ_begin()); 1328 BB->addSuccessor(copy0MBB); 1329 BB->addSuccessor(sinkMBB); 1330 1331 // copy0MBB: 1332 // %FalseValue = ... 1333 // # fallthrough to sinkMBB 1334 BB = copy0MBB; 1335 1336 // Update machine-CFG edges 1337 BB->addSuccessor(sinkMBB); 1338 1339 // sinkMBB: 1340 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1341 // ... 1342 BB = sinkMBB; 1343 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1344 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1345 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1346 1347 delete MI; // The pseudo instruction is gone now. 1348 return BB; 1349 } 1350 1351 case X86::FP_TO_INT16_IN_MEM: 1352 case X86::FP_TO_INT32_IN_MEM: 1353 case X86::FP_TO_INT64_IN_MEM: { 1354 // Change the floating point control register to use "round towards zero" 1355 // mode when truncating to an integer value. 1356 MachineFunction *F = BB->getParent(); 1357 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1358 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1359 1360 // Load the old value of the high byte of the control word... 1361 unsigned OldCW = 1362 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1363 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1364 1365 // Set the high part to be round to zero... 1366 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1367 1368 // Reload the modified control word now... 1369 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1370 1371 // Restore the memory image of control word to original value 1372 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1373 1374 // Get the X86 opcode to use. 1375 unsigned Opc; 1376 switch (MI->getOpcode()) { 1377 default: assert(0 && "illegal opcode!"); 1378 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1379 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1380 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1381 } 1382 1383 X86AddressMode AM; 1384 MachineOperand &Op = MI->getOperand(0); 1385 if (Op.isRegister()) { 1386 AM.BaseType = X86AddressMode::RegBase; 1387 AM.Base.Reg = Op.getReg(); 1388 } else { 1389 AM.BaseType = X86AddressMode::FrameIndexBase; 1390 AM.Base.FrameIndex = Op.getFrameIndex(); 1391 } 1392 Op = MI->getOperand(1); 1393 if (Op.isImmediate()) 1394 AM.Scale = Op.getImmedValue(); 1395 Op = MI->getOperand(2); 1396 if (Op.isImmediate()) 1397 AM.IndexReg = Op.getImmedValue(); 1398 Op = MI->getOperand(3); 1399 if (Op.isGlobalAddress()) { 1400 AM.GV = Op.getGlobal(); 1401 } else { 1402 AM.Disp = Op.getImmedValue(); 1403 } 1404 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1405 1406 // Reload the original control word now. 1407 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1408 1409 delete MI; // The pseudo instruction is gone now. 1410 return BB; 1411 } 1412 } 1413} 1414 1415 1416//===----------------------------------------------------------------------===// 1417// X86 Custom Lowering Hooks 1418//===----------------------------------------------------------------------===// 1419 1420/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1421/// load. For Darwin, external and weak symbols are indirect, loading the value 1422/// at address GV rather then the value of GV itself. This means that the 1423/// GlobalAddress must be in the base or index register of the address, not the 1424/// GV offset field. 1425static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1426 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1427 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1428} 1429 1430/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1431/// true if Op is undef or if its value falls within the specified range (L, H]. 1432static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1433 if (Op.getOpcode() == ISD::UNDEF) 1434 return true; 1435 1436 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1437 return (Val >= Low && Val < Hi); 1438} 1439 1440/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1441/// true if Op is undef or if its value equal to the specified value. 1442static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1443 if (Op.getOpcode() == ISD::UNDEF) 1444 return true; 1445 return cast<ConstantSDNode>(Op)->getValue() == Val; 1446} 1447 1448/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1449/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1450bool X86::isPSHUFDMask(SDNode *N) { 1451 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1452 1453 if (N->getNumOperands() != 4) 1454 return false; 1455 1456 // Check if the value doesn't reference the second vector. 1457 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1458 SDOperand Arg = N->getOperand(i); 1459 if (Arg.getOpcode() == ISD::UNDEF) continue; 1460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1461 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1462 return false; 1463 } 1464 1465 return true; 1466} 1467 1468/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1469/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1470bool X86::isPSHUFHWMask(SDNode *N) { 1471 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1472 1473 if (N->getNumOperands() != 8) 1474 return false; 1475 1476 // Lower quadword copied in order. 1477 for (unsigned i = 0; i != 4; ++i) { 1478 SDOperand Arg = N->getOperand(i); 1479 if (Arg.getOpcode() == ISD::UNDEF) continue; 1480 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1481 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1482 return false; 1483 } 1484 1485 // Upper quadword shuffled. 1486 for (unsigned i = 4; i != 8; ++i) { 1487 SDOperand Arg = N->getOperand(i); 1488 if (Arg.getOpcode() == ISD::UNDEF) continue; 1489 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1490 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1491 if (Val < 4 || Val > 7) 1492 return false; 1493 } 1494 1495 return true; 1496} 1497 1498/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1499/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1500bool X86::isPSHUFLWMask(SDNode *N) { 1501 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1502 1503 if (N->getNumOperands() != 8) 1504 return false; 1505 1506 // Upper quadword copied in order. 1507 for (unsigned i = 4; i != 8; ++i) 1508 if (!isUndefOrEqual(N->getOperand(i), i)) 1509 return false; 1510 1511 // Lower quadword shuffled. 1512 for (unsigned i = 0; i != 4; ++i) 1513 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1514 return false; 1515 1516 return true; 1517} 1518 1519/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1520/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1521bool X86::isSHUFPMask(SDNode *N) { 1522 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1523 1524 unsigned NumElems = N->getNumOperands(); 1525 if (NumElems == 2) { 1526 // The only cases that ought be handled by SHUFPD is 1527 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1528 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1529 // Expect bit 0 == 1, bit1 == 2 1530 SDOperand Bit0 = N->getOperand(0); 1531 SDOperand Bit1 = N->getOperand(1); 1532 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 1533 return true; 1534 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 1535 return true; 1536 return false; 1537 } 1538 1539 if (NumElems != 4) return false; 1540 1541 // Each half must refer to only one of the vector. 1542 for (unsigned i = 0; i < 2; ++i) { 1543 SDOperand Arg = N->getOperand(i); 1544 if (Arg.getOpcode() == ISD::UNDEF) continue; 1545 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1546 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1547 if (Val >= 4) return false; 1548 } 1549 for (unsigned i = 2; i < 4; ++i) { 1550 SDOperand Arg = N->getOperand(i); 1551 if (Arg.getOpcode() == ISD::UNDEF) continue; 1552 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1553 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1554 if (Val < 4) return false; 1555 } 1556 1557 return true; 1558} 1559 1560/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1561/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1562bool X86::isMOVHLPSMask(SDNode *N) { 1563 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1564 1565 if (N->getNumOperands() != 4) 1566 return false; 1567 1568 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1569 return isUndefOrEqual(N->getOperand(0), 6) && 1570 isUndefOrEqual(N->getOperand(1), 7) && 1571 isUndefOrEqual(N->getOperand(2), 2) && 1572 isUndefOrEqual(N->getOperand(3), 3); 1573} 1574 1575/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1576/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1577bool X86::isMOVLHPSMask(SDNode *N) { 1578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1579 1580 if (N->getNumOperands() != 4) 1581 return false; 1582 1583 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1584 return isUndefOrEqual(N->getOperand(0), 0) && 1585 isUndefOrEqual(N->getOperand(1), 1) && 1586 isUndefOrEqual(N->getOperand(2), 4) && 1587 isUndefOrEqual(N->getOperand(3), 5); 1588} 1589 1590/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1591/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1592bool X86::isMOVLPMask(SDNode *N) { 1593 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1594 1595 unsigned NumElems = N->getNumOperands(); 1596 if (NumElems != 2 && NumElems != 4) 1597 return false; 1598 1599 for (unsigned i = 0; i < NumElems/2; ++i) 1600 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1601 return false; 1602 1603 for (unsigned i = NumElems/2; i < NumElems; ++i) 1604 if (!isUndefOrEqual(N->getOperand(i), i)) 1605 return false; 1606 1607 return true; 1608} 1609 1610/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1611/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 1612bool X86::isMOVHPMask(SDNode *N) { 1613 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1614 1615 unsigned NumElems = N->getNumOperands(); 1616 if (NumElems != 2 && NumElems != 4) 1617 return false; 1618 1619 for (unsigned i = 0; i < NumElems/2; ++i) 1620 if (!isUndefOrEqual(N->getOperand(i), i)) 1621 return false; 1622 1623 for (unsigned i = 0; i < NumElems/2; ++i) { 1624 SDOperand Arg = N->getOperand(i + NumElems/2); 1625 if (!isUndefOrEqual(Arg, i + NumElems)) 1626 return false; 1627 } 1628 1629 return true; 1630} 1631 1632/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1633/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1634bool X86::isUNPCKLMask(SDNode *N) { 1635 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1636 1637 unsigned NumElems = N->getNumOperands(); 1638 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1639 return false; 1640 1641 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1642 SDOperand BitI = N->getOperand(i); 1643 SDOperand BitI1 = N->getOperand(i+1); 1644 if (!isUndefOrEqual(BitI, j)) 1645 return false; 1646 if (!isUndefOrEqual(BitI1, j + NumElems)) 1647 return false; 1648 } 1649 1650 return true; 1651} 1652 1653/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1654/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1655bool X86::isUNPCKHMask(SDNode *N) { 1656 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1657 1658 unsigned NumElems = N->getNumOperands(); 1659 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1660 return false; 1661 1662 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1663 SDOperand BitI = N->getOperand(i); 1664 SDOperand BitI1 = N->getOperand(i+1); 1665 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1666 return false; 1667 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1668 return false; 1669 } 1670 1671 return true; 1672} 1673 1674/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1675/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1676/// <0, 0, 1, 1> 1677bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1678 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1679 1680 unsigned NumElems = N->getNumOperands(); 1681 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1682 return false; 1683 1684 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1685 SDOperand BitI = N->getOperand(i); 1686 SDOperand BitI1 = N->getOperand(i+1); 1687 1688 if (!isUndefOrEqual(BitI, j)) 1689 return false; 1690 if (!isUndefOrEqual(BitI1, j)) 1691 return false; 1692 } 1693 1694 return true; 1695} 1696 1697/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 1698/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 1699bool X86::isMOVSMask(SDNode *N) { 1700 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1701 1702 unsigned NumElems = N->getNumOperands(); 1703 if (NumElems != 2 && NumElems != 4) 1704 return false; 1705 1706 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 1707 return false; 1708 1709 for (unsigned i = 1; i < NumElems; ++i) { 1710 SDOperand Arg = N->getOperand(i); 1711 if (!isUndefOrEqual(Arg, i)) 1712 return false; 1713 } 1714 1715 return true; 1716} 1717 1718/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1719/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1720bool X86::isMOVSHDUPMask(SDNode *N) { 1721 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1722 1723 if (N->getNumOperands() != 4) 1724 return false; 1725 1726 // Expect 1, 1, 3, 3 1727 for (unsigned i = 0; i < 2; ++i) { 1728 SDOperand Arg = N->getOperand(i); 1729 if (Arg.getOpcode() == ISD::UNDEF) continue; 1730 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1731 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1732 if (Val != 1) return false; 1733 } 1734 1735 bool HasHi = false; 1736 for (unsigned i = 2; i < 4; ++i) { 1737 SDOperand Arg = N->getOperand(i); 1738 if (Arg.getOpcode() == ISD::UNDEF) continue; 1739 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1740 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1741 if (Val != 3) return false; 1742 HasHi = true; 1743 } 1744 1745 // Don't use movshdup if it can be done with a shufps. 1746 return HasHi; 1747} 1748 1749/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1750/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1751bool X86::isMOVSLDUPMask(SDNode *N) { 1752 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1753 1754 if (N->getNumOperands() != 4) 1755 return false; 1756 1757 // Expect 0, 0, 2, 2 1758 for (unsigned i = 0; i < 2; ++i) { 1759 SDOperand Arg = N->getOperand(i); 1760 if (Arg.getOpcode() == ISD::UNDEF) continue; 1761 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1762 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1763 if (Val != 0) return false; 1764 } 1765 1766 bool HasHi = false; 1767 for (unsigned i = 2; i < 4; ++i) { 1768 SDOperand Arg = N->getOperand(i); 1769 if (Arg.getOpcode() == ISD::UNDEF) continue; 1770 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1771 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1772 if (Val != 2) return false; 1773 HasHi = true; 1774 } 1775 1776 // Don't use movshdup if it can be done with a shufps. 1777 return HasHi; 1778} 1779 1780/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1781/// a splat of a single element. 1782bool X86::isSplatMask(SDNode *N) { 1783 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1784 1785 // We can only splat 64-bit, and 32-bit quantities. 1786 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1787 return false; 1788 1789 // This is a splat operation if each element of the permute is the same, and 1790 // if the value doesn't reference the second vector. 1791 SDOperand Elt = N->getOperand(0); 1792 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1793 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1794 SDOperand Arg = N->getOperand(i); 1795 if (Arg.getOpcode() == ISD::UNDEF) continue; 1796 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1797 if (Arg != Elt) return false; 1798 } 1799 1800 // Make sure it is a splat of the first vector operand. 1801 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1802} 1803 1804/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1805/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1806/// instructions. 1807unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1808 unsigned NumOperands = N->getNumOperands(); 1809 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1810 unsigned Mask = 0; 1811 for (unsigned i = 0; i < NumOperands; ++i) { 1812 unsigned Val = 0; 1813 SDOperand Arg = N->getOperand(NumOperands-i-1); 1814 if (Arg.getOpcode() != ISD::UNDEF) 1815 Val = cast<ConstantSDNode>(Arg)->getValue(); 1816 if (Val >= NumOperands) Val -= NumOperands; 1817 Mask |= Val; 1818 if (i != NumOperands - 1) 1819 Mask <<= Shift; 1820 } 1821 1822 return Mask; 1823} 1824 1825/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1826/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1827/// instructions. 1828unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1829 unsigned Mask = 0; 1830 // 8 nodes, but we only care about the last 4. 1831 for (unsigned i = 7; i >= 4; --i) { 1832 unsigned Val = 0; 1833 SDOperand Arg = N->getOperand(i); 1834 if (Arg.getOpcode() != ISD::UNDEF) 1835 Val = cast<ConstantSDNode>(Arg)->getValue(); 1836 Mask |= (Val - 4); 1837 if (i != 4) 1838 Mask <<= 2; 1839 } 1840 1841 return Mask; 1842} 1843 1844/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1845/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1846/// instructions. 1847unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1848 unsigned Mask = 0; 1849 // 8 nodes, but we only care about the first 4. 1850 for (int i = 3; i >= 0; --i) { 1851 unsigned Val = 0; 1852 SDOperand Arg = N->getOperand(i); 1853 if (Arg.getOpcode() != ISD::UNDEF) 1854 Val = cast<ConstantSDNode>(Arg)->getValue(); 1855 Mask |= Val; 1856 if (i != 0) 1857 Mask <<= 2; 1858 } 1859 1860 return Mask; 1861} 1862 1863/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1864/// specifies a 8 element shuffle that can be broken into a pair of 1865/// PSHUFHW and PSHUFLW. 1866static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1867 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1868 1869 if (N->getNumOperands() != 8) 1870 return false; 1871 1872 // Lower quadword shuffled. 1873 for (unsigned i = 0; i != 4; ++i) { 1874 SDOperand Arg = N->getOperand(i); 1875 if (Arg.getOpcode() == ISD::UNDEF) continue; 1876 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1877 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1878 if (Val > 4) 1879 return false; 1880 } 1881 1882 // Upper quadword shuffled. 1883 for (unsigned i = 4; i != 8; ++i) { 1884 SDOperand Arg = N->getOperand(i); 1885 if (Arg.getOpcode() == ISD::UNDEF) continue; 1886 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1887 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1888 if (Val < 4 || Val > 7) 1889 return false; 1890 } 1891 1892 return true; 1893} 1894 1895/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1896/// values in ther permute mask. 1897static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1898 SDOperand V1 = Op.getOperand(0); 1899 SDOperand V2 = Op.getOperand(1); 1900 SDOperand Mask = Op.getOperand(2); 1901 MVT::ValueType VT = Op.getValueType(); 1902 MVT::ValueType MaskVT = Mask.getValueType(); 1903 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1904 unsigned NumElems = Mask.getNumOperands(); 1905 std::vector<SDOperand> MaskVec; 1906 1907 for (unsigned i = 0; i != NumElems; ++i) { 1908 SDOperand Arg = Mask.getOperand(i); 1909 if (Arg.getOpcode() == ISD::UNDEF) continue; 1910 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1911 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1912 if (Val < NumElems) 1913 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1914 else 1915 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1916 } 1917 1918 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1919 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1920} 1921 1922/// isScalarLoadToVector - Returns true if the node is a scalar load that 1923/// is promoted to a vector. 1924static inline bool isScalarLoadToVector(SDOperand Op) { 1925 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1926 Op = Op.getOperand(0); 1927 return (Op.getOpcode() == ISD::LOAD); 1928 } 1929 return false; 1930} 1931 1932/// ShouldXformedToMOVLP - Return true if the node should be transformed to 1933/// match movlp{d|s}. The lower half elements should come from V1 (and in 1934/// order), and the upper half elements should come from the upper half of 1935/// V2 (not necessarily in order). And since V1 will become the source of 1936/// the MOVLP, it must be a scalar load. 1937static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 1938 if (isScalarLoadToVector(V1)) { 1939 unsigned NumElems = Mask.getNumOperands(); 1940 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1941 if (!isUndefOrEqual(Mask.getOperand(i), i)) 1942 return false; 1943 for (unsigned i = NumElems/2; i != NumElems; ++i) 1944 if (!isUndefOrInRange(Mask.getOperand(i), 1945 NumElems+NumElems/2, NumElems*2)) 1946 return false; 1947 return true; 1948 } 1949 1950 return false; 1951} 1952 1953/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 1954/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1955/// half elements to come from vector 1 (which would equal the dest.) and 1956/// the upper half to come from vector 2. 1957static bool isLowerFromV2UpperFromV1(SDOperand Op) { 1958 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 1959 1960 unsigned NumElems = Op.getNumOperands(); 1961 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1962 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 1963 return false; 1964 for (unsigned i = NumElems/2; i != NumElems; ++i) 1965 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 1966 return false; 1967 return true; 1968} 1969 1970/// LowerOperation - Provide custom lowering hooks for some operations. 1971/// 1972SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1973 switch (Op.getOpcode()) { 1974 default: assert(0 && "Should not custom lower this!"); 1975 case ISD::SHL_PARTS: 1976 case ISD::SRA_PARTS: 1977 case ISD::SRL_PARTS: { 1978 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 1979 "Not an i64 shift!"); 1980 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 1981 SDOperand ShOpLo = Op.getOperand(0); 1982 SDOperand ShOpHi = Op.getOperand(1); 1983 SDOperand ShAmt = Op.getOperand(2); 1984 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 1985 DAG.getConstant(31, MVT::i8)) 1986 : DAG.getConstant(0, MVT::i32); 1987 1988 SDOperand Tmp2, Tmp3; 1989 if (Op.getOpcode() == ISD::SHL_PARTS) { 1990 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 1991 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 1992 } else { 1993 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 1994 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 1995 } 1996 1997 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 1998 ShAmt, DAG.getConstant(32, MVT::i8)); 1999 2000 SDOperand Hi, Lo; 2001 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2002 2003 std::vector<MVT::ValueType> Tys; 2004 Tys.push_back(MVT::i32); 2005 Tys.push_back(MVT::Flag); 2006 std::vector<SDOperand> Ops; 2007 if (Op.getOpcode() == ISD::SHL_PARTS) { 2008 Ops.push_back(Tmp2); 2009 Ops.push_back(Tmp3); 2010 Ops.push_back(CC); 2011 Ops.push_back(InFlag); 2012 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2013 InFlag = Hi.getValue(1); 2014 2015 Ops.clear(); 2016 Ops.push_back(Tmp3); 2017 Ops.push_back(Tmp1); 2018 Ops.push_back(CC); 2019 Ops.push_back(InFlag); 2020 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2021 } else { 2022 Ops.push_back(Tmp2); 2023 Ops.push_back(Tmp3); 2024 Ops.push_back(CC); 2025 Ops.push_back(InFlag); 2026 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2027 InFlag = Lo.getValue(1); 2028 2029 Ops.clear(); 2030 Ops.push_back(Tmp3); 2031 Ops.push_back(Tmp1); 2032 Ops.push_back(CC); 2033 Ops.push_back(InFlag); 2034 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2035 } 2036 2037 Tys.clear(); 2038 Tys.push_back(MVT::i32); 2039 Tys.push_back(MVT::i32); 2040 Ops.clear(); 2041 Ops.push_back(Lo); 2042 Ops.push_back(Hi); 2043 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2044 } 2045 case ISD::SINT_TO_FP: { 2046 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2047 Op.getOperand(0).getValueType() >= MVT::i16 && 2048 "Unknown SINT_TO_FP to lower!"); 2049 2050 SDOperand Result; 2051 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2052 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2053 MachineFunction &MF = DAG.getMachineFunction(); 2054 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2055 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2056 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2057 DAG.getEntryNode(), Op.getOperand(0), 2058 StackSlot, DAG.getSrcValue(NULL)); 2059 2060 // Build the FILD 2061 std::vector<MVT::ValueType> Tys; 2062 Tys.push_back(MVT::f64); 2063 Tys.push_back(MVT::Other); 2064 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2065 std::vector<SDOperand> Ops; 2066 Ops.push_back(Chain); 2067 Ops.push_back(StackSlot); 2068 Ops.push_back(DAG.getValueType(SrcVT)); 2069 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2070 Tys, Ops); 2071 2072 if (X86ScalarSSE) { 2073 Chain = Result.getValue(1); 2074 SDOperand InFlag = Result.getValue(2); 2075 2076 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2077 // shouldn't be necessary except that RFP cannot be live across 2078 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2079 MachineFunction &MF = DAG.getMachineFunction(); 2080 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2081 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2082 std::vector<MVT::ValueType> Tys; 2083 Tys.push_back(MVT::Other); 2084 std::vector<SDOperand> Ops; 2085 Ops.push_back(Chain); 2086 Ops.push_back(Result); 2087 Ops.push_back(StackSlot); 2088 Ops.push_back(DAG.getValueType(Op.getValueType())); 2089 Ops.push_back(InFlag); 2090 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2091 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2092 DAG.getSrcValue(NULL)); 2093 } 2094 2095 return Result; 2096 } 2097 case ISD::FP_TO_SINT: { 2098 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2099 "Unknown FP_TO_SINT to lower!"); 2100 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2101 // stack slot. 2102 MachineFunction &MF = DAG.getMachineFunction(); 2103 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2104 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2105 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2106 2107 unsigned Opc; 2108 switch (Op.getValueType()) { 2109 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2110 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2111 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2112 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2113 } 2114 2115 SDOperand Chain = DAG.getEntryNode(); 2116 SDOperand Value = Op.getOperand(0); 2117 if (X86ScalarSSE) { 2118 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2119 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2120 DAG.getSrcValue(0)); 2121 std::vector<MVT::ValueType> Tys; 2122 Tys.push_back(MVT::f64); 2123 Tys.push_back(MVT::Other); 2124 std::vector<SDOperand> Ops; 2125 Ops.push_back(Chain); 2126 Ops.push_back(StackSlot); 2127 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2128 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2129 Chain = Value.getValue(1); 2130 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2131 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2132 } 2133 2134 // Build the FP_TO_INT*_IN_MEM 2135 std::vector<SDOperand> Ops; 2136 Ops.push_back(Chain); 2137 Ops.push_back(Value); 2138 Ops.push_back(StackSlot); 2139 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2140 2141 // Load the result. 2142 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2143 DAG.getSrcValue(NULL)); 2144 } 2145 case ISD::READCYCLECOUNTER: { 2146 std::vector<MVT::ValueType> Tys; 2147 Tys.push_back(MVT::Other); 2148 Tys.push_back(MVT::Flag); 2149 std::vector<SDOperand> Ops; 2150 Ops.push_back(Op.getOperand(0)); 2151 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2152 Ops.clear(); 2153 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2154 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2155 MVT::i32, Ops[0].getValue(2))); 2156 Ops.push_back(Ops[1].getValue(1)); 2157 Tys[0] = Tys[1] = MVT::i32; 2158 Tys.push_back(MVT::Other); 2159 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2160 } 2161 case ISD::FABS: { 2162 MVT::ValueType VT = Op.getValueType(); 2163 const Type *OpNTy = MVT::getTypeForValueType(VT); 2164 std::vector<Constant*> CV; 2165 if (VT == MVT::f64) { 2166 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2167 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2168 } else { 2169 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2170 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2171 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2172 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2173 } 2174 Constant *CS = ConstantStruct::get(CV); 2175 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2176 SDOperand Mask 2177 = DAG.getNode(X86ISD::LOAD_PACK, 2178 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2179 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2180 } 2181 case ISD::FNEG: { 2182 MVT::ValueType VT = Op.getValueType(); 2183 const Type *OpNTy = MVT::getTypeForValueType(VT); 2184 std::vector<Constant*> CV; 2185 if (VT == MVT::f64) { 2186 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2187 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2188 } else { 2189 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2190 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2191 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2192 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2193 } 2194 Constant *CS = ConstantStruct::get(CV); 2195 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2196 SDOperand Mask 2197 = DAG.getNode(X86ISD::LOAD_PACK, 2198 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2199 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2200 } 2201 case ISD::SETCC: { 2202 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2203 SDOperand Cond; 2204 SDOperand CC = Op.getOperand(2); 2205 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2206 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2207 bool Flip; 2208 unsigned X86CC; 2209 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2210 if (Flip) 2211 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2212 Op.getOperand(1), Op.getOperand(0)); 2213 else 2214 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2215 Op.getOperand(0), Op.getOperand(1)); 2216 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2217 DAG.getConstant(X86CC, MVT::i8), Cond); 2218 } else { 2219 assert(isFP && "Illegal integer SetCC!"); 2220 2221 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2222 Op.getOperand(0), Op.getOperand(1)); 2223 std::vector<MVT::ValueType> Tys; 2224 std::vector<SDOperand> Ops; 2225 switch (SetCCOpcode) { 2226 default: assert(false && "Illegal floating point SetCC!"); 2227 case ISD::SETOEQ: { // !PF & ZF 2228 Tys.push_back(MVT::i8); 2229 Tys.push_back(MVT::Flag); 2230 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2231 Ops.push_back(Cond); 2232 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2233 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2234 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2235 Tmp1.getValue(1)); 2236 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2237 } 2238 case ISD::SETUNE: { // PF | !ZF 2239 Tys.push_back(MVT::i8); 2240 Tys.push_back(MVT::Flag); 2241 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2242 Ops.push_back(Cond); 2243 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2244 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2245 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2246 Tmp1.getValue(1)); 2247 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2248 } 2249 } 2250 } 2251 } 2252 case ISD::SELECT: { 2253 MVT::ValueType VT = Op.getValueType(); 2254 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2255 bool addTest = false; 2256 SDOperand Op0 = Op.getOperand(0); 2257 SDOperand Cond, CC; 2258 if (Op0.getOpcode() == ISD::SETCC) 2259 Op0 = LowerOperation(Op0, DAG); 2260 2261 if (Op0.getOpcode() == X86ISD::SETCC) { 2262 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2263 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2264 // have another use it will be eliminated. 2265 // If the X86ISD::SETCC has more than one use, then it's probably better 2266 // to use a test instead of duplicating the X86ISD::CMP (for register 2267 // pressure reason). 2268 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2269 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2270 CmpOpc == X86ISD::UCOMI) { 2271 if (!Op0.hasOneUse()) { 2272 std::vector<MVT::ValueType> Tys; 2273 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2274 Tys.push_back(Op0.Val->getValueType(i)); 2275 std::vector<SDOperand> Ops; 2276 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2277 Ops.push_back(Op0.getOperand(i)); 2278 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2279 } 2280 2281 CC = Op0.getOperand(0); 2282 Cond = Op0.getOperand(1); 2283 // Make a copy as flag result cannot be used by more than one. 2284 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2285 Cond.getOperand(0), Cond.getOperand(1)); 2286 addTest = 2287 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2288 } else 2289 addTest = true; 2290 } else 2291 addTest = true; 2292 2293 if (addTest) { 2294 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2295 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2296 } 2297 2298 std::vector<MVT::ValueType> Tys; 2299 Tys.push_back(Op.getValueType()); 2300 Tys.push_back(MVT::Flag); 2301 std::vector<SDOperand> Ops; 2302 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2303 // condition is true. 2304 Ops.push_back(Op.getOperand(2)); 2305 Ops.push_back(Op.getOperand(1)); 2306 Ops.push_back(CC); 2307 Ops.push_back(Cond); 2308 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2309 } 2310 case ISD::BRCOND: { 2311 bool addTest = false; 2312 SDOperand Cond = Op.getOperand(1); 2313 SDOperand Dest = Op.getOperand(2); 2314 SDOperand CC; 2315 if (Cond.getOpcode() == ISD::SETCC) 2316 Cond = LowerOperation(Cond, DAG); 2317 2318 if (Cond.getOpcode() == X86ISD::SETCC) { 2319 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2320 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2321 // have another use it will be eliminated. 2322 // If the X86ISD::SETCC has more than one use, then it's probably better 2323 // to use a test instead of duplicating the X86ISD::CMP (for register 2324 // pressure reason). 2325 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2326 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2327 CmpOpc == X86ISD::UCOMI) { 2328 if (!Cond.hasOneUse()) { 2329 std::vector<MVT::ValueType> Tys; 2330 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2331 Tys.push_back(Cond.Val->getValueType(i)); 2332 std::vector<SDOperand> Ops; 2333 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2334 Ops.push_back(Cond.getOperand(i)); 2335 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2336 } 2337 2338 CC = Cond.getOperand(0); 2339 Cond = Cond.getOperand(1); 2340 // Make a copy as flag result cannot be used by more than one. 2341 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2342 Cond.getOperand(0), Cond.getOperand(1)); 2343 } else 2344 addTest = true; 2345 } else 2346 addTest = true; 2347 2348 if (addTest) { 2349 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2350 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2351 } 2352 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2353 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2354 } 2355 case ISD::MEMSET: { 2356 SDOperand InFlag(0, 0); 2357 SDOperand Chain = Op.getOperand(0); 2358 unsigned Align = 2359 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2360 if (Align == 0) Align = 1; 2361 2362 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2363 // If not DWORD aligned, call memset if size is less than the threshold. 2364 // It knows how to align to the right boundary first. 2365 if ((Align & 3) != 0 || 2366 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2367 MVT::ValueType IntPtr = getPointerTy(); 2368 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2369 std::vector<std::pair<SDOperand, const Type*> > Args; 2370 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2371 // Extend the ubyte argument to be an int value for the call. 2372 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2373 Args.push_back(std::make_pair(Val, IntPtrTy)); 2374 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2375 std::pair<SDOperand,SDOperand> CallResult = 2376 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2377 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2378 return CallResult.second; 2379 } 2380 2381 MVT::ValueType AVT; 2382 SDOperand Count; 2383 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2384 unsigned BytesLeft = 0; 2385 bool TwoRepStos = false; 2386 if (ValC) { 2387 unsigned ValReg; 2388 unsigned Val = ValC->getValue() & 255; 2389 2390 // If the value is a constant, then we can potentially use larger sets. 2391 switch (Align & 3) { 2392 case 2: // WORD aligned 2393 AVT = MVT::i16; 2394 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2395 BytesLeft = I->getValue() % 2; 2396 Val = (Val << 8) | Val; 2397 ValReg = X86::AX; 2398 break; 2399 case 0: // DWORD aligned 2400 AVT = MVT::i32; 2401 if (I) { 2402 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2403 BytesLeft = I->getValue() % 4; 2404 } else { 2405 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2406 DAG.getConstant(2, MVT::i8)); 2407 TwoRepStos = true; 2408 } 2409 Val = (Val << 8) | Val; 2410 Val = (Val << 16) | Val; 2411 ValReg = X86::EAX; 2412 break; 2413 default: // Byte aligned 2414 AVT = MVT::i8; 2415 Count = Op.getOperand(3); 2416 ValReg = X86::AL; 2417 break; 2418 } 2419 2420 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2421 InFlag); 2422 InFlag = Chain.getValue(1); 2423 } else { 2424 AVT = MVT::i8; 2425 Count = Op.getOperand(3); 2426 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2427 InFlag = Chain.getValue(1); 2428 } 2429 2430 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2431 InFlag = Chain.getValue(1); 2432 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2433 InFlag = Chain.getValue(1); 2434 2435 std::vector<MVT::ValueType> Tys; 2436 Tys.push_back(MVT::Other); 2437 Tys.push_back(MVT::Flag); 2438 std::vector<SDOperand> Ops; 2439 Ops.push_back(Chain); 2440 Ops.push_back(DAG.getValueType(AVT)); 2441 Ops.push_back(InFlag); 2442 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2443 2444 if (TwoRepStos) { 2445 InFlag = Chain.getValue(1); 2446 Count = Op.getOperand(3); 2447 MVT::ValueType CVT = Count.getValueType(); 2448 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2449 DAG.getConstant(3, CVT)); 2450 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2451 InFlag = Chain.getValue(1); 2452 Tys.clear(); 2453 Tys.push_back(MVT::Other); 2454 Tys.push_back(MVT::Flag); 2455 Ops.clear(); 2456 Ops.push_back(Chain); 2457 Ops.push_back(DAG.getValueType(MVT::i8)); 2458 Ops.push_back(InFlag); 2459 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2460 } else if (BytesLeft) { 2461 // Issue stores for the last 1 - 3 bytes. 2462 SDOperand Value; 2463 unsigned Val = ValC->getValue() & 255; 2464 unsigned Offset = I->getValue() - BytesLeft; 2465 SDOperand DstAddr = Op.getOperand(1); 2466 MVT::ValueType AddrVT = DstAddr.getValueType(); 2467 if (BytesLeft >= 2) { 2468 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2469 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2470 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2471 DAG.getConstant(Offset, AddrVT)), 2472 DAG.getSrcValue(NULL)); 2473 BytesLeft -= 2; 2474 Offset += 2; 2475 } 2476 2477 if (BytesLeft == 1) { 2478 Value = DAG.getConstant(Val, MVT::i8); 2479 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2480 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2481 DAG.getConstant(Offset, AddrVT)), 2482 DAG.getSrcValue(NULL)); 2483 } 2484 } 2485 2486 return Chain; 2487 } 2488 case ISD::MEMCPY: { 2489 SDOperand Chain = Op.getOperand(0); 2490 unsigned Align = 2491 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2492 if (Align == 0) Align = 1; 2493 2494 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2495 // If not DWORD aligned, call memcpy if size is less than the threshold. 2496 // It knows how to align to the right boundary first. 2497 if ((Align & 3) != 0 || 2498 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2499 MVT::ValueType IntPtr = getPointerTy(); 2500 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2501 std::vector<std::pair<SDOperand, const Type*> > Args; 2502 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2503 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2504 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2505 std::pair<SDOperand,SDOperand> CallResult = 2506 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2507 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2508 return CallResult.second; 2509 } 2510 2511 MVT::ValueType AVT; 2512 SDOperand Count; 2513 unsigned BytesLeft = 0; 2514 bool TwoRepMovs = false; 2515 switch (Align & 3) { 2516 case 2: // WORD aligned 2517 AVT = MVT::i16; 2518 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2519 BytesLeft = I->getValue() % 2; 2520 break; 2521 case 0: // DWORD aligned 2522 AVT = MVT::i32; 2523 if (I) { 2524 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2525 BytesLeft = I->getValue() % 4; 2526 } else { 2527 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2528 DAG.getConstant(2, MVT::i8)); 2529 TwoRepMovs = true; 2530 } 2531 break; 2532 default: // Byte aligned 2533 AVT = MVT::i8; 2534 Count = Op.getOperand(3); 2535 break; 2536 } 2537 2538 SDOperand InFlag(0, 0); 2539 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2540 InFlag = Chain.getValue(1); 2541 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2542 InFlag = Chain.getValue(1); 2543 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2544 InFlag = Chain.getValue(1); 2545 2546 std::vector<MVT::ValueType> Tys; 2547 Tys.push_back(MVT::Other); 2548 Tys.push_back(MVT::Flag); 2549 std::vector<SDOperand> Ops; 2550 Ops.push_back(Chain); 2551 Ops.push_back(DAG.getValueType(AVT)); 2552 Ops.push_back(InFlag); 2553 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2554 2555 if (TwoRepMovs) { 2556 InFlag = Chain.getValue(1); 2557 Count = Op.getOperand(3); 2558 MVT::ValueType CVT = Count.getValueType(); 2559 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2560 DAG.getConstant(3, CVT)); 2561 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2562 InFlag = Chain.getValue(1); 2563 Tys.clear(); 2564 Tys.push_back(MVT::Other); 2565 Tys.push_back(MVT::Flag); 2566 Ops.clear(); 2567 Ops.push_back(Chain); 2568 Ops.push_back(DAG.getValueType(MVT::i8)); 2569 Ops.push_back(InFlag); 2570 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2571 } else if (BytesLeft) { 2572 // Issue loads and stores for the last 1 - 3 bytes. 2573 unsigned Offset = I->getValue() - BytesLeft; 2574 SDOperand DstAddr = Op.getOperand(1); 2575 MVT::ValueType DstVT = DstAddr.getValueType(); 2576 SDOperand SrcAddr = Op.getOperand(2); 2577 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2578 SDOperand Value; 2579 if (BytesLeft >= 2) { 2580 Value = DAG.getLoad(MVT::i16, Chain, 2581 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2582 DAG.getConstant(Offset, SrcVT)), 2583 DAG.getSrcValue(NULL)); 2584 Chain = Value.getValue(1); 2585 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2586 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2587 DAG.getConstant(Offset, DstVT)), 2588 DAG.getSrcValue(NULL)); 2589 BytesLeft -= 2; 2590 Offset += 2; 2591 } 2592 2593 if (BytesLeft == 1) { 2594 Value = DAG.getLoad(MVT::i8, Chain, 2595 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2596 DAG.getConstant(Offset, SrcVT)), 2597 DAG.getSrcValue(NULL)); 2598 Chain = Value.getValue(1); 2599 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2600 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2601 DAG.getConstant(Offset, DstVT)), 2602 DAG.getSrcValue(NULL)); 2603 } 2604 } 2605 2606 return Chain; 2607 } 2608 2609 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2610 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2611 // one of the above mentioned nodes. It has to be wrapped because otherwise 2612 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2613 // be used to form addressing mode. These wrapped nodes will be selected 2614 // into MOV32ri. 2615 case ISD::ConstantPool: { 2616 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2617 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2618 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2619 CP->getAlignment())); 2620 if (Subtarget->isTargetDarwin()) { 2621 // With PIC, the address is actually $g + Offset. 2622 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2623 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2624 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2625 } 2626 2627 return Result; 2628 } 2629 case ISD::GlobalAddress: { 2630 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2631 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2632 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2633 if (Subtarget->isTargetDarwin()) { 2634 // With PIC, the address is actually $g + Offset. 2635 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2636 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2637 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2638 2639 // For Darwin, external and weak symbols are indirect, so we want to load 2640 // the value at address GV, not the value of GV itself. This means that 2641 // the GlobalAddress must be in the base or index register of the address, 2642 // not the GV offset field. 2643 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2644 DarwinGVRequiresExtraLoad(GV)) 2645 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2646 Result, DAG.getSrcValue(NULL)); 2647 } 2648 2649 return Result; 2650 } 2651 case ISD::ExternalSymbol: { 2652 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2653 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2654 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2655 if (Subtarget->isTargetDarwin()) { 2656 // With PIC, the address is actually $g + Offset. 2657 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2658 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2659 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2660 } 2661 2662 return Result; 2663 } 2664 case ISD::VASTART: { 2665 // vastart just stores the address of the VarArgsFrameIndex slot into the 2666 // memory location argument. 2667 // FIXME: Replace MVT::i32 with PointerTy 2668 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2669 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2670 Op.getOperand(1), Op.getOperand(2)); 2671 } 2672 case ISD::RET: { 2673 SDOperand Copy; 2674 2675 switch(Op.getNumOperands()) { 2676 default: 2677 assert(0 && "Do not know how to return this many arguments!"); 2678 abort(); 2679 case 1: 2680 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2681 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2682 case 2: { 2683 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2684 if (MVT::isInteger(ArgVT)) 2685 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2686 SDOperand()); 2687 else if (!X86ScalarSSE) { 2688 std::vector<MVT::ValueType> Tys; 2689 Tys.push_back(MVT::Other); 2690 Tys.push_back(MVT::Flag); 2691 std::vector<SDOperand> Ops; 2692 Ops.push_back(Op.getOperand(0)); 2693 Ops.push_back(Op.getOperand(1)); 2694 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2695 } else { 2696 SDOperand MemLoc; 2697 SDOperand Chain = Op.getOperand(0); 2698 SDOperand Value = Op.getOperand(1); 2699 2700 if (Value.getOpcode() == ISD::LOAD && 2701 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2702 Chain = Value.getOperand(0); 2703 MemLoc = Value.getOperand(1); 2704 } else { 2705 // Spill the value to memory and reload it into top of stack. 2706 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2707 MachineFunction &MF = DAG.getMachineFunction(); 2708 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2709 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2710 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2711 Value, MemLoc, DAG.getSrcValue(0)); 2712 } 2713 std::vector<MVT::ValueType> Tys; 2714 Tys.push_back(MVT::f64); 2715 Tys.push_back(MVT::Other); 2716 std::vector<SDOperand> Ops; 2717 Ops.push_back(Chain); 2718 Ops.push_back(MemLoc); 2719 Ops.push_back(DAG.getValueType(ArgVT)); 2720 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2721 Tys.clear(); 2722 Tys.push_back(MVT::Other); 2723 Tys.push_back(MVT::Flag); 2724 Ops.clear(); 2725 Ops.push_back(Copy.getValue(1)); 2726 Ops.push_back(Copy); 2727 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2728 } 2729 break; 2730 } 2731 case 3: 2732 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2733 SDOperand()); 2734 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2735 break; 2736 } 2737 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2738 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2739 Copy.getValue(1)); 2740 } 2741 case ISD::SCALAR_TO_VECTOR: { 2742 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2743 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2744 } 2745 case ISD::VECTOR_SHUFFLE: { 2746 SDOperand V1 = Op.getOperand(0); 2747 SDOperand V2 = Op.getOperand(1); 2748 SDOperand PermMask = Op.getOperand(2); 2749 MVT::ValueType VT = Op.getValueType(); 2750 unsigned NumElems = PermMask.getNumOperands(); 2751 2752 if (X86::isSplatMask(PermMask.Val)) 2753 return Op; 2754 2755 // Normalize the node to match x86 shuffle ops if needed 2756 if (V2.getOpcode() != ISD::UNDEF) { 2757 bool DoSwap = false; 2758 2759 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 2760 DoSwap = true; 2761 else if (isLowerFromV2UpperFromV1(PermMask)) 2762 DoSwap = true; 2763 2764 if (DoSwap) { 2765 Op = CommuteVectorShuffle(Op, DAG); 2766 V1 = Op.getOperand(0); 2767 V2 = Op.getOperand(1); 2768 PermMask = Op.getOperand(2); 2769 } 2770 } 2771 2772 if (NumElems == 2) 2773 return Op; 2774 2775 if (X86::isMOVSMask(PermMask.Val) || 2776 X86::isMOVSHDUPMask(PermMask.Val) || 2777 X86::isMOVSLDUPMask(PermMask.Val)) 2778 return Op; 2779 2780 if (X86::isUNPCKLMask(PermMask.Val) || 2781 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2782 X86::isUNPCKHMask(PermMask.Val)) 2783 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2784 return Op; 2785 2786 // If VT is integer, try PSHUF* first, then SHUFP*. 2787 if (MVT::isInteger(VT)) { 2788 if (X86::isPSHUFDMask(PermMask.Val) || 2789 X86::isPSHUFHWMask(PermMask.Val) || 2790 X86::isPSHUFLWMask(PermMask.Val)) { 2791 if (V2.getOpcode() != ISD::UNDEF) 2792 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2793 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2794 return Op; 2795 } 2796 2797 if (X86::isSHUFPMask(PermMask.Val)) 2798 return Op; 2799 2800 // Handle v8i16 shuffle high / low shuffle node pair. 2801 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2802 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2803 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2804 std::vector<SDOperand> MaskVec; 2805 for (unsigned i = 0; i != 4; ++i) 2806 MaskVec.push_back(PermMask.getOperand(i)); 2807 for (unsigned i = 4; i != 8; ++i) 2808 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2809 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2810 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2811 MaskVec.clear(); 2812 for (unsigned i = 0; i != 4; ++i) 2813 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2814 for (unsigned i = 4; i != 8; ++i) 2815 MaskVec.push_back(PermMask.getOperand(i)); 2816 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2817 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2818 } 2819 } else { 2820 // Floating point cases in the other order. 2821 if (X86::isSHUFPMask(PermMask.Val)) 2822 return Op; 2823 if (X86::isPSHUFDMask(PermMask.Val) || 2824 X86::isPSHUFHWMask(PermMask.Val) || 2825 X86::isPSHUFLWMask(PermMask.Val)) { 2826 if (V2.getOpcode() != ISD::UNDEF) 2827 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2828 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2829 return Op; 2830 } 2831 } 2832 2833 return SDOperand(); 2834 } 2835 case ISD::BUILD_VECTOR: { 2836 // All one's are handled with pcmpeqd. 2837 if (ISD::isBuildVectorAllOnes(Op.Val)) 2838 return Op; 2839 2840 std::set<SDOperand> Values; 2841 SDOperand Elt0 = Op.getOperand(0); 2842 Values.insert(Elt0); 2843 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2844 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2845 (isa<ConstantFPSDNode>(Elt0) && 2846 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2847 bool RestAreZero = true; 2848 unsigned NumElems = Op.getNumOperands(); 2849 for (unsigned i = 1; i < NumElems; ++i) { 2850 SDOperand Elt = Op.getOperand(i); 2851 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2852 if (!FPC->isExactlyValue(+0.0)) 2853 RestAreZero = false; 2854 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2855 if (!C->isNullValue()) 2856 RestAreZero = false; 2857 } else 2858 RestAreZero = false; 2859 Values.insert(Elt); 2860 } 2861 2862 if (RestAreZero) { 2863 if (Elt0IsZero) return Op; 2864 2865 // Zero extend a scalar to a vector. 2866 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2867 } 2868 2869 if (Values.size() > 2) { 2870 // Expand into a number of unpckl*. 2871 // e.g. for v4f32 2872 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2873 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2874 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2875 MVT::ValueType VT = Op.getValueType(); 2876 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2877 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2878 std::vector<SDOperand> MaskVec; 2879 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2880 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2881 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2882 } 2883 SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2884 std::vector<SDOperand> V(NumElems); 2885 for (unsigned i = 0; i < NumElems; ++i) 2886 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2887 NumElems >>= 1; 2888 while (NumElems != 0) { 2889 for (unsigned i = 0; i < NumElems; ++i) 2890 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2891 PermMask); 2892 NumElems >>= 1; 2893 } 2894 return V[0]; 2895 } 2896 2897 return SDOperand(); 2898 } 2899 case ISD::EXTRACT_VECTOR_ELT: { 2900 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2901 return SDOperand(); 2902 2903 MVT::ValueType VT = Op.getValueType(); 2904 // TODO: handle v16i8. 2905 if (MVT::getSizeInBits(VT) == 16) { 2906 // Transform it so it match pextrw which produces a 32-bit result. 2907 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2908 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2909 Op.getOperand(0), Op.getOperand(1)); 2910 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2911 DAG.getValueType(VT)); 2912 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2913 } else if (MVT::getSizeInBits(VT) == 32) { 2914 SDOperand Vec = Op.getOperand(0); 2915 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2916 if (Idx == 0) 2917 return Op; 2918 2919 // TODO: if Idex == 2, we can use unpckhps 2920 // SHUFPS the element to the lowest double word, then movss. 2921 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2922 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2923 MVT::getVectorBaseType(MaskVT)); 2924 std::vector<SDOperand> IdxVec; 2925 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2926 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2927 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2928 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2929 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2930 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2931 Vec, Vec, Mask); 2932 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2933 DAG.getConstant(0, MVT::i32)); 2934 } else if (MVT::getSizeInBits(VT) == 64) { 2935 SDOperand Vec = Op.getOperand(0); 2936 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2937 if (Idx == 0) 2938 return Op; 2939 2940 // UNPCKHPD the element to the lowest double word, then movsd. 2941 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2942 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2943 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2944 std::vector<SDOperand> IdxVec; 2945 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2946 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2947 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2948 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2949 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2950 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2951 DAG.getConstant(0, MVT::i32)); 2952 } 2953 2954 return SDOperand(); 2955 } 2956 case ISD::INSERT_VECTOR_ELT: { 2957 // Transform it so it match pinsrw which expects a 16-bit value in a R32 2958 // as its second argument. 2959 MVT::ValueType VT = Op.getValueType(); 2960 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 2961 if (MVT::getSizeInBits(BaseVT) == 16) { 2962 SDOperand N1 = Op.getOperand(1); 2963 SDOperand N2 = Op.getOperand(2); 2964 if (N1.getValueType() != MVT::i32) 2965 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2966 if (N2.getValueType() != MVT::i32) 2967 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 2968 return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2); 2969 } 2970 2971 return SDOperand(); 2972 } 2973 case ISD::INTRINSIC_WO_CHAIN: { 2974 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 2975 switch (IntNo) { 2976 default: return SDOperand(); // Don't custom lower most intrinsics. 2977 // Comparison intrinsics. 2978 case Intrinsic::x86_sse_comieq_ss: 2979 case Intrinsic::x86_sse_comilt_ss: 2980 case Intrinsic::x86_sse_comile_ss: 2981 case Intrinsic::x86_sse_comigt_ss: 2982 case Intrinsic::x86_sse_comige_ss: 2983 case Intrinsic::x86_sse_comineq_ss: 2984 case Intrinsic::x86_sse_ucomieq_ss: 2985 case Intrinsic::x86_sse_ucomilt_ss: 2986 case Intrinsic::x86_sse_ucomile_ss: 2987 case Intrinsic::x86_sse_ucomigt_ss: 2988 case Intrinsic::x86_sse_ucomige_ss: 2989 case Intrinsic::x86_sse_ucomineq_ss: 2990 case Intrinsic::x86_sse2_comieq_sd: 2991 case Intrinsic::x86_sse2_comilt_sd: 2992 case Intrinsic::x86_sse2_comile_sd: 2993 case Intrinsic::x86_sse2_comigt_sd: 2994 case Intrinsic::x86_sse2_comige_sd: 2995 case Intrinsic::x86_sse2_comineq_sd: 2996 case Intrinsic::x86_sse2_ucomieq_sd: 2997 case Intrinsic::x86_sse2_ucomilt_sd: 2998 case Intrinsic::x86_sse2_ucomile_sd: 2999 case Intrinsic::x86_sse2_ucomigt_sd: 3000 case Intrinsic::x86_sse2_ucomige_sd: 3001 case Intrinsic::x86_sse2_ucomineq_sd: { 3002 unsigned Opc = 0; 3003 ISD::CondCode CC = ISD::SETCC_INVALID; 3004 switch (IntNo) { 3005 default: break; 3006 case Intrinsic::x86_sse_comieq_ss: 3007 case Intrinsic::x86_sse2_comieq_sd: 3008 Opc = X86ISD::COMI; 3009 CC = ISD::SETEQ; 3010 break; 3011 case Intrinsic::x86_sse_comilt_ss: 3012 case Intrinsic::x86_sse2_comilt_sd: 3013 Opc = X86ISD::COMI; 3014 CC = ISD::SETLT; 3015 break; 3016 case Intrinsic::x86_sse_comile_ss: 3017 case Intrinsic::x86_sse2_comile_sd: 3018 Opc = X86ISD::COMI; 3019 CC = ISD::SETLE; 3020 break; 3021 case Intrinsic::x86_sse_comigt_ss: 3022 case Intrinsic::x86_sse2_comigt_sd: 3023 Opc = X86ISD::COMI; 3024 CC = ISD::SETGT; 3025 break; 3026 case Intrinsic::x86_sse_comige_ss: 3027 case Intrinsic::x86_sse2_comige_sd: 3028 Opc = X86ISD::COMI; 3029 CC = ISD::SETGE; 3030 break; 3031 case Intrinsic::x86_sse_comineq_ss: 3032 case Intrinsic::x86_sse2_comineq_sd: 3033 Opc = X86ISD::COMI; 3034 CC = ISD::SETNE; 3035 break; 3036 case Intrinsic::x86_sse_ucomieq_ss: 3037 case Intrinsic::x86_sse2_ucomieq_sd: 3038 Opc = X86ISD::UCOMI; 3039 CC = ISD::SETEQ; 3040 break; 3041 case Intrinsic::x86_sse_ucomilt_ss: 3042 case Intrinsic::x86_sse2_ucomilt_sd: 3043 Opc = X86ISD::UCOMI; 3044 CC = ISD::SETLT; 3045 break; 3046 case Intrinsic::x86_sse_ucomile_ss: 3047 case Intrinsic::x86_sse2_ucomile_sd: 3048 Opc = X86ISD::UCOMI; 3049 CC = ISD::SETLE; 3050 break; 3051 case Intrinsic::x86_sse_ucomigt_ss: 3052 case Intrinsic::x86_sse2_ucomigt_sd: 3053 Opc = X86ISD::UCOMI; 3054 CC = ISD::SETGT; 3055 break; 3056 case Intrinsic::x86_sse_ucomige_ss: 3057 case Intrinsic::x86_sse2_ucomige_sd: 3058 Opc = X86ISD::UCOMI; 3059 CC = ISD::SETGE; 3060 break; 3061 case Intrinsic::x86_sse_ucomineq_ss: 3062 case Intrinsic::x86_sse2_ucomineq_sd: 3063 Opc = X86ISD::UCOMI; 3064 CC = ISD::SETNE; 3065 break; 3066 } 3067 bool Flip; 3068 unsigned X86CC; 3069 translateX86CC(CC, true, X86CC, Flip); 3070 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3071 Op.getOperand(Flip?1:2)); 3072 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3073 DAG.getConstant(X86CC, MVT::i8), Cond); 3074 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3075 } 3076 } 3077 } 3078 } 3079} 3080 3081const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3082 switch (Opcode) { 3083 default: return NULL; 3084 case X86ISD::SHLD: return "X86ISD::SHLD"; 3085 case X86ISD::SHRD: return "X86ISD::SHRD"; 3086 case X86ISD::FAND: return "X86ISD::FAND"; 3087 case X86ISD::FXOR: return "X86ISD::FXOR"; 3088 case X86ISD::FILD: return "X86ISD::FILD"; 3089 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3090 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3091 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3092 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3093 case X86ISD::FLD: return "X86ISD::FLD"; 3094 case X86ISD::FST: return "X86ISD::FST"; 3095 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3096 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3097 case X86ISD::CALL: return "X86ISD::CALL"; 3098 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3099 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3100 case X86ISD::CMP: return "X86ISD::CMP"; 3101 case X86ISD::TEST: return "X86ISD::TEST"; 3102 case X86ISD::COMI: return "X86ISD::COMI"; 3103 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3104 case X86ISD::SETCC: return "X86ISD::SETCC"; 3105 case X86ISD::CMOV: return "X86ISD::CMOV"; 3106 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3107 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3108 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3109 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3110 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3111 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3112 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3113 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3114 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 3115 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3116 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3117 } 3118} 3119 3120void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3121 uint64_t Mask, 3122 uint64_t &KnownZero, 3123 uint64_t &KnownOne, 3124 unsigned Depth) const { 3125 unsigned Opc = Op.getOpcode(); 3126 assert((Opc >= ISD::BUILTIN_OP_END || 3127 Opc == ISD::INTRINSIC_WO_CHAIN || 3128 Opc == ISD::INTRINSIC_W_CHAIN || 3129 Opc == ISD::INTRINSIC_VOID) && 3130 "Should use MaskedValueIsZero if you don't know whether Op" 3131 " is a target node!"); 3132 3133 KnownZero = KnownOne = 0; // Don't know anything. 3134 switch (Opc) { 3135 default: break; 3136 case X86ISD::SETCC: 3137 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3138 break; 3139 } 3140} 3141 3142std::vector<unsigned> X86TargetLowering:: 3143getRegClassForInlineAsmConstraint(const std::string &Constraint, 3144 MVT::ValueType VT) const { 3145 if (Constraint.size() == 1) { 3146 // FIXME: not handling fp-stack yet! 3147 // FIXME: not handling MMX registers yet ('y' constraint). 3148 switch (Constraint[0]) { // GCC X86 Constraint Letters 3149 default: break; // Unknown constriant letter 3150 case 'r': // GENERAL_REGS 3151 case 'R': // LEGACY_REGS 3152 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3153 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3154 case 'l': // INDEX_REGS 3155 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3156 X86::ESI, X86::EDI, X86::EBP, 0); 3157 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3158 case 'Q': // Q_REGS 3159 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3160 case 'x': // SSE_REGS if SSE1 allowed 3161 if (Subtarget->hasSSE1()) 3162 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3163 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3164 0); 3165 return std::vector<unsigned>(); 3166 case 'Y': // SSE_REGS if SSE2 allowed 3167 if (Subtarget->hasSSE2()) 3168 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3169 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3170 0); 3171 return std::vector<unsigned>(); 3172 } 3173 } 3174 3175 return std::vector<unsigned>(); 3176} 3177 3178/// isLegalAddressImmediate - Return true if the integer value or 3179/// GlobalValue can be used as the offset of the target addressing mode. 3180bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3181 // X86 allows a sign-extended 32-bit immediate field. 3182 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3183} 3184 3185bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3186 if (Subtarget->isTargetDarwin()) { 3187 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3188 if (RModel == Reloc::Static) 3189 return true; 3190 else if (RModel == Reloc::DynamicNoPIC) 3191 return !DarwinGVRequiresExtraLoad(GV); 3192 else 3193 return false; 3194 } else 3195 return true; 3196} 3197 3198/// isShuffleMaskLegal - Targets can use this to indicate that they only 3199/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3200/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3201/// are assumed to be legal. 3202bool 3203X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3204 // Only do shuffles on 128-bit vector types for now. 3205 if (MVT::getSizeInBits(VT) == 64) return false; 3206 return (Mask.Val->getNumOperands() == 2 || 3207 X86::isSplatMask(Mask.Val) || 3208 X86::isMOVSMask(Mask.Val) || 3209 X86::isMOVSHDUPMask(Mask.Val) || 3210 X86::isMOVSLDUPMask(Mask.Val) || 3211 X86::isPSHUFDMask(Mask.Val) || 3212 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3213 X86::isSHUFPMask(Mask.Val) || 3214 X86::isUNPCKLMask(Mask.Val) || 3215 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3216 X86::isUNPCKHMask(Mask.Val)); 3217} 3218