X86ISelLowering.cpp revision c575ca22eaf8a656f3fa2c3f0f75264c4c4fcd21
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 289 } 290 291 if (Subtarget->hasSSE2()) { 292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 297 298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 308 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 312 313 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 314 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 315 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 316 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 318 } 319 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 320 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 321 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 322 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 323 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 324 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 325 326 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 327 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 328 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 329 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 330 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 331 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 332 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 338 } 339 340 // Custom lower v2i64 and v2f64 selects. 341 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 342 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 343 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 344 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 345 } 346 347 // We want to custom lower some of our intrinsics. 348 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 349 350 computeRegisterProperties(); 351 352 // FIXME: These should be based on subtarget info. Plus, the values should 353 // be smaller when we are in optimizing for size mode. 354 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 355 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 356 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 357 allowUnalignedMemoryAccesses = true; // x86 supports it! 358} 359 360std::vector<SDOperand> 361X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 362 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 363 return LowerFastCCArguments(F, DAG); 364 return LowerCCCArguments(F, DAG); 365} 366 367std::pair<SDOperand, SDOperand> 368X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 369 bool isVarArg, unsigned CallingConv, 370 bool isTailCall, 371 SDOperand Callee, ArgListTy &Args, 372 SelectionDAG &DAG) { 373 assert((!isVarArg || CallingConv == CallingConv::C) && 374 "Only C takes varargs!"); 375 376 // If the callee is a GlobalAddress node (quite common, every direct call is) 377 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 378 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 379 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 380 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 381 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 382 383 if (CallingConv == CallingConv::Fast && EnableFastCC) 384 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 385 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 386} 387 388//===----------------------------------------------------------------------===// 389// C Calling Convention implementation 390//===----------------------------------------------------------------------===// 391 392std::vector<SDOperand> 393X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 394 std::vector<SDOperand> ArgValues; 395 396 MachineFunction &MF = DAG.getMachineFunction(); 397 MachineFrameInfo *MFI = MF.getFrameInfo(); 398 399 // Add DAG nodes to load the arguments... On entry to a function on the X86, 400 // the stack frame looks like this: 401 // 402 // [ESP] -- return address 403 // [ESP + 4] -- first argument (leftmost lexically) 404 // [ESP + 8] -- second argument, if first argument is four bytes in size 405 // ... 406 // 407 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 408 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 409 MVT::ValueType ObjectVT = getValueType(I->getType()); 410 unsigned ArgIncrement = 4; 411 unsigned ObjSize; 412 switch (ObjectVT) { 413 default: assert(0 && "Unhandled argument type!"); 414 case MVT::i1: 415 case MVT::i8: ObjSize = 1; break; 416 case MVT::i16: ObjSize = 2; break; 417 case MVT::i32: ObjSize = 4; break; 418 case MVT::i64: ObjSize = ArgIncrement = 8; break; 419 case MVT::f32: ObjSize = 4; break; 420 case MVT::f64: ObjSize = ArgIncrement = 8; break; 421 } 422 // Create the frame index object for this incoming parameter... 423 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 424 425 // Create the SelectionDAG nodes corresponding to a load from this parameter 426 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 427 428 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 429 // dead loads. 430 SDOperand ArgValue; 431 if (!I->use_empty()) 432 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 433 DAG.getSrcValue(NULL)); 434 else { 435 if (MVT::isInteger(ObjectVT)) 436 ArgValue = DAG.getConstant(0, ObjectVT); 437 else 438 ArgValue = DAG.getConstantFP(0, ObjectVT); 439 } 440 ArgValues.push_back(ArgValue); 441 442 ArgOffset += ArgIncrement; // Move on to the next argument... 443 } 444 445 // If the function takes variable number of arguments, make a frame index for 446 // the start of the first vararg value... for expansion of llvm.va_start. 447 if (F.isVarArg()) 448 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 449 ReturnAddrIndex = 0; // No return address slot generated yet. 450 BytesToPopOnReturn = 0; // Callee pops nothing. 451 BytesCallerReserves = ArgOffset; 452 return ArgValues; 453} 454 455std::pair<SDOperand, SDOperand> 456X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 457 bool isVarArg, bool isTailCall, 458 SDOperand Callee, ArgListTy &Args, 459 SelectionDAG &DAG) { 460 // Count how many bytes are to be pushed on the stack. 461 unsigned NumBytes = 0; 462 463 if (Args.empty()) { 464 // Save zero bytes. 465 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 466 } else { 467 for (unsigned i = 0, e = Args.size(); i != e; ++i) 468 switch (getValueType(Args[i].second)) { 469 default: assert(0 && "Unknown value type!"); 470 case MVT::i1: 471 case MVT::i8: 472 case MVT::i16: 473 case MVT::i32: 474 case MVT::f32: 475 NumBytes += 4; 476 break; 477 case MVT::i64: 478 case MVT::f64: 479 NumBytes += 8; 480 break; 481 } 482 483 Chain = DAG.getCALLSEQ_START(Chain, 484 DAG.getConstant(NumBytes, getPointerTy())); 485 486 // Arguments go on the stack in reverse order, as specified by the ABI. 487 unsigned ArgOffset = 0; 488 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 489 std::vector<SDOperand> Stores; 490 491 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 492 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 493 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 494 495 switch (getValueType(Args[i].second)) { 496 default: assert(0 && "Unexpected ValueType for argument!"); 497 case MVT::i1: 498 case MVT::i8: 499 case MVT::i16: 500 // Promote the integer to 32 bits. If the input type is signed use a 501 // sign extend, otherwise use a zero extend. 502 if (Args[i].second->isSigned()) 503 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 504 else 505 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 506 507 // FALL THROUGH 508 case MVT::i32: 509 case MVT::f32: 510 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 511 Args[i].first, PtrOff, 512 DAG.getSrcValue(NULL))); 513 ArgOffset += 4; 514 break; 515 case MVT::i64: 516 case MVT::f64: 517 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 518 Args[i].first, PtrOff, 519 DAG.getSrcValue(NULL))); 520 ArgOffset += 8; 521 break; 522 } 523 } 524 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 525 } 526 527 std::vector<MVT::ValueType> RetVals; 528 MVT::ValueType RetTyVT = getValueType(RetTy); 529 RetVals.push_back(MVT::Other); 530 531 // The result values produced have to be legal. Promote the result. 532 switch (RetTyVT) { 533 case MVT::isVoid: break; 534 default: 535 RetVals.push_back(RetTyVT); 536 break; 537 case MVT::i1: 538 case MVT::i8: 539 case MVT::i16: 540 RetVals.push_back(MVT::i32); 541 break; 542 case MVT::f32: 543 if (X86ScalarSSE) 544 RetVals.push_back(MVT::f32); 545 else 546 RetVals.push_back(MVT::f64); 547 break; 548 case MVT::i64: 549 RetVals.push_back(MVT::i32); 550 RetVals.push_back(MVT::i32); 551 break; 552 } 553 554 std::vector<MVT::ValueType> NodeTys; 555 NodeTys.push_back(MVT::Other); // Returns a chain 556 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 557 std::vector<SDOperand> Ops; 558 Ops.push_back(Chain); 559 Ops.push_back(Callee); 560 561 // FIXME: Do not generate X86ISD::TAILCALL for now. 562 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 563 SDOperand InFlag = Chain.getValue(1); 564 565 NodeTys.clear(); 566 NodeTys.push_back(MVT::Other); // Returns a chain 567 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 568 Ops.clear(); 569 Ops.push_back(Chain); 570 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 571 Ops.push_back(DAG.getConstant(0, getPointerTy())); 572 Ops.push_back(InFlag); 573 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 574 InFlag = Chain.getValue(1); 575 576 SDOperand RetVal; 577 if (RetTyVT != MVT::isVoid) { 578 switch (RetTyVT) { 579 default: assert(0 && "Unknown value type to return!"); 580 case MVT::i1: 581 case MVT::i8: 582 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 583 Chain = RetVal.getValue(1); 584 if (RetTyVT == MVT::i1) 585 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 586 break; 587 case MVT::i16: 588 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 589 Chain = RetVal.getValue(1); 590 break; 591 case MVT::i32: 592 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 593 Chain = RetVal.getValue(1); 594 break; 595 case MVT::i64: { 596 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 597 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 598 Lo.getValue(2)); 599 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 600 Chain = Hi.getValue(1); 601 break; 602 } 603 case MVT::f32: 604 case MVT::f64: { 605 std::vector<MVT::ValueType> Tys; 606 Tys.push_back(MVT::f64); 607 Tys.push_back(MVT::Other); 608 Tys.push_back(MVT::Flag); 609 std::vector<SDOperand> Ops; 610 Ops.push_back(Chain); 611 Ops.push_back(InFlag); 612 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 613 Chain = RetVal.getValue(1); 614 InFlag = RetVal.getValue(2); 615 if (X86ScalarSSE) { 616 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 617 // shouldn't be necessary except that RFP cannot be live across 618 // multiple blocks. When stackifier is fixed, they can be uncoupled. 619 MachineFunction &MF = DAG.getMachineFunction(); 620 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 621 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 622 Tys.clear(); 623 Tys.push_back(MVT::Other); 624 Ops.clear(); 625 Ops.push_back(Chain); 626 Ops.push_back(RetVal); 627 Ops.push_back(StackSlot); 628 Ops.push_back(DAG.getValueType(RetTyVT)); 629 Ops.push_back(InFlag); 630 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 631 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 632 DAG.getSrcValue(NULL)); 633 Chain = RetVal.getValue(1); 634 } 635 636 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 637 // FIXME: we would really like to remember that this FP_ROUND 638 // operation is okay to eliminate if we allow excess FP precision. 639 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 640 break; 641 } 642 } 643 } 644 645 return std::make_pair(RetVal, Chain); 646} 647 648//===----------------------------------------------------------------------===// 649// Fast Calling Convention implementation 650//===----------------------------------------------------------------------===// 651// 652// The X86 'fast' calling convention passes up to two integer arguments in 653// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 654// and requires that the callee pop its arguments off the stack (allowing proper 655// tail calls), and has the same return value conventions as C calling convs. 656// 657// This calling convention always arranges for the callee pop value to be 8n+4 658// bytes, which is needed for tail recursion elimination and stack alignment 659// reasons. 660// 661// Note that this can be enhanced in the future to pass fp vals in registers 662// (when we have a global fp allocator) and do other tricks. 663// 664 665/// AddLiveIn - This helper function adds the specified physical register to the 666/// MachineFunction as a live in value. It also creates a corresponding virtual 667/// register for it. 668static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 669 TargetRegisterClass *RC) { 670 assert(RC->contains(PReg) && "Not the correct regclass!"); 671 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 672 MF.addLiveIn(PReg, VReg); 673 return VReg; 674} 675 676// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 677// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 678// EDX". Anything more is illegal. 679// 680// FIXME: The linscan register allocator currently has problem with 681// coalescing. At the time of this writing, whenever it decides to coalesce 682// a physreg with a virtreg, this increases the size of the physreg's live 683// range, and the live range cannot ever be reduced. This causes problems if 684// too many physregs are coaleced with virtregs, which can cause the register 685// allocator to wedge itself. 686// 687// This code triggers this problem more often if we pass args in registers, 688// so disable it until this is fixed. 689// 690// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 691// about code being dead. 692// 693static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 694 695 696std::vector<SDOperand> 697X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 698 std::vector<SDOperand> ArgValues; 699 700 MachineFunction &MF = DAG.getMachineFunction(); 701 MachineFrameInfo *MFI = MF.getFrameInfo(); 702 703 // Add DAG nodes to load the arguments... On entry to a function the stack 704 // frame looks like this: 705 // 706 // [ESP] -- return address 707 // [ESP + 4] -- first nonreg argument (leftmost lexically) 708 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 709 // ... 710 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 711 712 // Keep track of the number of integer regs passed so far. This can be either 713 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 714 // used). 715 unsigned NumIntRegs = 0; 716 717 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 718 MVT::ValueType ObjectVT = getValueType(I->getType()); 719 unsigned ArgIncrement = 4; 720 unsigned ObjSize = 0; 721 SDOperand ArgValue; 722 723 switch (ObjectVT) { 724 default: assert(0 && "Unhandled argument type!"); 725 case MVT::i1: 726 case MVT::i8: 727 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 728 if (!I->use_empty()) { 729 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 730 X86::R8RegisterClass); 731 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 732 DAG.setRoot(ArgValue.getValue(1)); 733 if (ObjectVT == MVT::i1) 734 // FIXME: Should insert a assertzext here. 735 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 736 } 737 ++NumIntRegs; 738 break; 739 } 740 741 ObjSize = 1; 742 break; 743 case MVT::i16: 744 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 745 if (!I->use_empty()) { 746 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 747 X86::R16RegisterClass); 748 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 749 DAG.setRoot(ArgValue.getValue(1)); 750 } 751 ++NumIntRegs; 752 break; 753 } 754 ObjSize = 2; 755 break; 756 case MVT::i32: 757 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 758 if (!I->use_empty()) { 759 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 760 X86::R32RegisterClass); 761 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 762 DAG.setRoot(ArgValue.getValue(1)); 763 } 764 ++NumIntRegs; 765 break; 766 } 767 ObjSize = 4; 768 break; 769 case MVT::i64: 770 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 771 if (!I->use_empty()) { 772 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 773 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 774 775 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 776 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 777 DAG.setRoot(Hi.getValue(1)); 778 779 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 780 } 781 NumIntRegs += 2; 782 break; 783 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 784 if (!I->use_empty()) { 785 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 786 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 787 DAG.setRoot(Low.getValue(1)); 788 789 // Load the high part from memory. 790 // Create the frame index object for this incoming parameter... 791 int FI = MFI->CreateFixedObject(4, ArgOffset); 792 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 793 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 794 DAG.getSrcValue(NULL)); 795 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 796 } 797 ArgOffset += 4; 798 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 799 break; 800 } 801 ObjSize = ArgIncrement = 8; 802 break; 803 case MVT::f32: ObjSize = 4; break; 804 case MVT::f64: ObjSize = ArgIncrement = 8; break; 805 } 806 807 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 808 // dead loads. 809 if (ObjSize && !I->use_empty()) { 810 // Create the frame index object for this incoming parameter... 811 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 812 813 // Create the SelectionDAG nodes corresponding to a load from this 814 // parameter. 815 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 816 817 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 818 DAG.getSrcValue(NULL)); 819 } else if (ArgValue.Val == 0) { 820 if (MVT::isInteger(ObjectVT)) 821 ArgValue = DAG.getConstant(0, ObjectVT); 822 else 823 ArgValue = DAG.getConstantFP(0, ObjectVT); 824 } 825 ArgValues.push_back(ArgValue); 826 827 if (ObjSize) 828 ArgOffset += ArgIncrement; // Move on to the next argument. 829 } 830 831 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 832 // arguments and the arguments after the retaddr has been pushed are aligned. 833 if ((ArgOffset & 7) == 0) 834 ArgOffset += 4; 835 836 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 837 ReturnAddrIndex = 0; // No return address slot generated yet. 838 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 839 BytesCallerReserves = 0; 840 841 // Finally, inform the code generator which regs we return values in. 842 switch (getValueType(F.getReturnType())) { 843 default: assert(0 && "Unknown type!"); 844 case MVT::isVoid: break; 845 case MVT::i1: 846 case MVT::i8: 847 case MVT::i16: 848 case MVT::i32: 849 MF.addLiveOut(X86::EAX); 850 break; 851 case MVT::i64: 852 MF.addLiveOut(X86::EAX); 853 MF.addLiveOut(X86::EDX); 854 break; 855 case MVT::f32: 856 case MVT::f64: 857 MF.addLiveOut(X86::ST0); 858 break; 859 } 860 return ArgValues; 861} 862 863std::pair<SDOperand, SDOperand> 864X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 865 bool isTailCall, SDOperand Callee, 866 ArgListTy &Args, SelectionDAG &DAG) { 867 // Count how many bytes are to be pushed on the stack. 868 unsigned NumBytes = 0; 869 870 // Keep track of the number of integer regs passed so far. This can be either 871 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 872 // used). 873 unsigned NumIntRegs = 0; 874 875 for (unsigned i = 0, e = Args.size(); i != e; ++i) 876 switch (getValueType(Args[i].second)) { 877 default: assert(0 && "Unknown value type!"); 878 case MVT::i1: 879 case MVT::i8: 880 case MVT::i16: 881 case MVT::i32: 882 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 883 ++NumIntRegs; 884 break; 885 } 886 // fall through 887 case MVT::f32: 888 NumBytes += 4; 889 break; 890 case MVT::i64: 891 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 892 NumIntRegs += 2; 893 break; 894 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 895 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 896 NumBytes += 4; 897 break; 898 } 899 900 // fall through 901 case MVT::f64: 902 NumBytes += 8; 903 break; 904 } 905 906 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 907 // arguments and the arguments after the retaddr has been pushed are aligned. 908 if ((NumBytes & 7) == 0) 909 NumBytes += 4; 910 911 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 912 913 // Arguments go on the stack in reverse order, as specified by the ABI. 914 unsigned ArgOffset = 0; 915 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 916 NumIntRegs = 0; 917 std::vector<SDOperand> Stores; 918 std::vector<SDOperand> RegValuesToPass; 919 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 920 switch (getValueType(Args[i].second)) { 921 default: assert(0 && "Unexpected ValueType for argument!"); 922 case MVT::i1: 923 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 924 // Fall through. 925 case MVT::i8: 926 case MVT::i16: 927 case MVT::i32: 928 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 929 RegValuesToPass.push_back(Args[i].first); 930 ++NumIntRegs; 931 break; 932 } 933 // Fall through 934 case MVT::f32: { 935 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 936 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 937 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 938 Args[i].first, PtrOff, 939 DAG.getSrcValue(NULL))); 940 ArgOffset += 4; 941 break; 942 } 943 case MVT::i64: 944 // Can pass (at least) part of it in regs? 945 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 946 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 947 Args[i].first, DAG.getConstant(1, MVT::i32)); 948 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 949 Args[i].first, DAG.getConstant(0, MVT::i32)); 950 RegValuesToPass.push_back(Lo); 951 ++NumIntRegs; 952 953 // Pass both parts in regs? 954 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 955 RegValuesToPass.push_back(Hi); 956 ++NumIntRegs; 957 } else { 958 // Pass the high part in memory. 959 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 960 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 961 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 962 Hi, PtrOff, DAG.getSrcValue(NULL))); 963 ArgOffset += 4; 964 } 965 break; 966 } 967 // Fall through 968 case MVT::f64: 969 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 970 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 971 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 972 Args[i].first, PtrOff, 973 DAG.getSrcValue(NULL))); 974 ArgOffset += 8; 975 break; 976 } 977 } 978 if (!Stores.empty()) 979 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 980 981 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 982 // arguments and the arguments after the retaddr has been pushed are aligned. 983 if ((ArgOffset & 7) == 0) 984 ArgOffset += 4; 985 986 std::vector<MVT::ValueType> RetVals; 987 MVT::ValueType RetTyVT = getValueType(RetTy); 988 989 RetVals.push_back(MVT::Other); 990 991 // The result values produced have to be legal. Promote the result. 992 switch (RetTyVT) { 993 case MVT::isVoid: break; 994 default: 995 RetVals.push_back(RetTyVT); 996 break; 997 case MVT::i1: 998 case MVT::i8: 999 case MVT::i16: 1000 RetVals.push_back(MVT::i32); 1001 break; 1002 case MVT::f32: 1003 if (X86ScalarSSE) 1004 RetVals.push_back(MVT::f32); 1005 else 1006 RetVals.push_back(MVT::f64); 1007 break; 1008 case MVT::i64: 1009 RetVals.push_back(MVT::i32); 1010 RetVals.push_back(MVT::i32); 1011 break; 1012 } 1013 1014 // Build a sequence of copy-to-reg nodes chained together with token chain 1015 // and flag operands which copy the outgoing args into registers. 1016 SDOperand InFlag; 1017 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1018 unsigned CCReg; 1019 SDOperand RegToPass = RegValuesToPass[i]; 1020 switch (RegToPass.getValueType()) { 1021 default: assert(0 && "Bad thing to pass in regs"); 1022 case MVT::i8: 1023 CCReg = (i == 0) ? X86::AL : X86::DL; 1024 break; 1025 case MVT::i16: 1026 CCReg = (i == 0) ? X86::AX : X86::DX; 1027 break; 1028 case MVT::i32: 1029 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1030 break; 1031 } 1032 1033 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1034 InFlag = Chain.getValue(1); 1035 } 1036 1037 std::vector<MVT::ValueType> NodeTys; 1038 NodeTys.push_back(MVT::Other); // Returns a chain 1039 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1040 std::vector<SDOperand> Ops; 1041 Ops.push_back(Chain); 1042 Ops.push_back(Callee); 1043 if (InFlag.Val) 1044 Ops.push_back(InFlag); 1045 1046 // FIXME: Do not generate X86ISD::TAILCALL for now. 1047 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1048 InFlag = Chain.getValue(1); 1049 1050 NodeTys.clear(); 1051 NodeTys.push_back(MVT::Other); // Returns a chain 1052 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1053 Ops.clear(); 1054 Ops.push_back(Chain); 1055 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1056 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1057 Ops.push_back(InFlag); 1058 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1059 InFlag = Chain.getValue(1); 1060 1061 SDOperand RetVal; 1062 if (RetTyVT != MVT::isVoid) { 1063 switch (RetTyVT) { 1064 default: assert(0 && "Unknown value type to return!"); 1065 case MVT::i1: 1066 case MVT::i8: 1067 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1068 Chain = RetVal.getValue(1); 1069 if (RetTyVT == MVT::i1) 1070 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1071 break; 1072 case MVT::i16: 1073 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1074 Chain = RetVal.getValue(1); 1075 break; 1076 case MVT::i32: 1077 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1078 Chain = RetVal.getValue(1); 1079 break; 1080 case MVT::i64: { 1081 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1082 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1083 Lo.getValue(2)); 1084 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1085 Chain = Hi.getValue(1); 1086 break; 1087 } 1088 case MVT::f32: 1089 case MVT::f64: { 1090 std::vector<MVT::ValueType> Tys; 1091 Tys.push_back(MVT::f64); 1092 Tys.push_back(MVT::Other); 1093 Tys.push_back(MVT::Flag); 1094 std::vector<SDOperand> Ops; 1095 Ops.push_back(Chain); 1096 Ops.push_back(InFlag); 1097 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1098 Chain = RetVal.getValue(1); 1099 InFlag = RetVal.getValue(2); 1100 if (X86ScalarSSE) { 1101 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1102 // shouldn't be necessary except that RFP cannot be live across 1103 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1104 MachineFunction &MF = DAG.getMachineFunction(); 1105 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1106 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1107 Tys.clear(); 1108 Tys.push_back(MVT::Other); 1109 Ops.clear(); 1110 Ops.push_back(Chain); 1111 Ops.push_back(RetVal); 1112 Ops.push_back(StackSlot); 1113 Ops.push_back(DAG.getValueType(RetTyVT)); 1114 Ops.push_back(InFlag); 1115 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1116 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1117 DAG.getSrcValue(NULL)); 1118 Chain = RetVal.getValue(1); 1119 } 1120 1121 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1122 // FIXME: we would really like to remember that this FP_ROUND 1123 // operation is okay to eliminate if we allow excess FP precision. 1124 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1125 break; 1126 } 1127 } 1128 } 1129 1130 return std::make_pair(RetVal, Chain); 1131} 1132 1133SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1134 if (ReturnAddrIndex == 0) { 1135 // Set up a frame object for the return address. 1136 MachineFunction &MF = DAG.getMachineFunction(); 1137 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1138 } 1139 1140 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1141} 1142 1143 1144 1145std::pair<SDOperand, SDOperand> X86TargetLowering:: 1146LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1147 SelectionDAG &DAG) { 1148 SDOperand Result; 1149 if (Depth) // Depths > 0 not supported yet! 1150 Result = DAG.getConstant(0, getPointerTy()); 1151 else { 1152 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1153 if (!isFrameAddress) 1154 // Just load the return address 1155 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1156 DAG.getSrcValue(NULL)); 1157 else 1158 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1159 DAG.getConstant(4, MVT::i32)); 1160 } 1161 return std::make_pair(Result, Chain); 1162} 1163 1164/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1165/// which corresponds to the condition code. 1166static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1167 switch (X86CC) { 1168 default: assert(0 && "Unknown X86 conditional code!"); 1169 case X86ISD::COND_A: return X86::JA; 1170 case X86ISD::COND_AE: return X86::JAE; 1171 case X86ISD::COND_B: return X86::JB; 1172 case X86ISD::COND_BE: return X86::JBE; 1173 case X86ISD::COND_E: return X86::JE; 1174 case X86ISD::COND_G: return X86::JG; 1175 case X86ISD::COND_GE: return X86::JGE; 1176 case X86ISD::COND_L: return X86::JL; 1177 case X86ISD::COND_LE: return X86::JLE; 1178 case X86ISD::COND_NE: return X86::JNE; 1179 case X86ISD::COND_NO: return X86::JNO; 1180 case X86ISD::COND_NP: return X86::JNP; 1181 case X86ISD::COND_NS: return X86::JNS; 1182 case X86ISD::COND_O: return X86::JO; 1183 case X86ISD::COND_P: return X86::JP; 1184 case X86ISD::COND_S: return X86::JS; 1185 } 1186} 1187 1188/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1189/// specific condition code. It returns a false if it cannot do a direct 1190/// translation. X86CC is the translated CondCode. Flip is set to true if the 1191/// the order of comparison operands should be flipped. 1192static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1193 unsigned &X86CC, bool &Flip) { 1194 Flip = false; 1195 X86CC = X86ISD::COND_INVALID; 1196 if (!isFP) { 1197 switch (SetCCOpcode) { 1198 default: break; 1199 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1200 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1201 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1202 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1203 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1204 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1205 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1206 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1207 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1208 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1209 } 1210 } else { 1211 // On a floating point condition, the flags are set as follows: 1212 // ZF PF CF op 1213 // 0 | 0 | 0 | X > Y 1214 // 0 | 0 | 1 | X < Y 1215 // 1 | 0 | 0 | X == Y 1216 // 1 | 1 | 1 | unordered 1217 switch (SetCCOpcode) { 1218 default: break; 1219 case ISD::SETUEQ: 1220 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1221 case ISD::SETOLT: Flip = true; // Fallthrough 1222 case ISD::SETOGT: 1223 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1224 case ISD::SETOLE: Flip = true; // Fallthrough 1225 case ISD::SETOGE: 1226 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1227 case ISD::SETUGT: Flip = true; // Fallthrough 1228 case ISD::SETULT: 1229 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1230 case ISD::SETUGE: Flip = true; // Fallthrough 1231 case ISD::SETULE: 1232 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1233 case ISD::SETONE: 1234 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1235 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1236 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1237 } 1238 } 1239 1240 return X86CC != X86ISD::COND_INVALID; 1241} 1242 1243static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1244 bool &Flip) { 1245 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1246} 1247 1248/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1249/// code. Current x86 isa includes the following FP cmov instructions: 1250/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1251static bool hasFPCMov(unsigned X86CC) { 1252 switch (X86CC) { 1253 default: 1254 return false; 1255 case X86ISD::COND_B: 1256 case X86ISD::COND_BE: 1257 case X86ISD::COND_E: 1258 case X86ISD::COND_P: 1259 case X86ISD::COND_A: 1260 case X86ISD::COND_AE: 1261 case X86ISD::COND_NE: 1262 case X86ISD::COND_NP: 1263 return true; 1264 } 1265} 1266 1267MachineBasicBlock * 1268X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1269 MachineBasicBlock *BB) { 1270 switch (MI->getOpcode()) { 1271 default: assert(false && "Unexpected instr type to insert"); 1272 case X86::CMOV_FR32: 1273 case X86::CMOV_FR64: 1274 case X86::CMOV_V4F32: 1275 case X86::CMOV_V2F64: 1276 case X86::CMOV_V2I64: { 1277 // To "insert" a SELECT_CC instruction, we actually have to insert the 1278 // diamond control-flow pattern. The incoming instruction knows the 1279 // destination vreg to set, the condition code register to branch on, the 1280 // true/false values to select between, and a branch opcode to use. 1281 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1282 ilist<MachineBasicBlock>::iterator It = BB; 1283 ++It; 1284 1285 // thisMBB: 1286 // ... 1287 // TrueVal = ... 1288 // cmpTY ccX, r1, r2 1289 // bCC copy1MBB 1290 // fallthrough --> copy0MBB 1291 MachineBasicBlock *thisMBB = BB; 1292 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1293 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1294 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1295 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1296 MachineFunction *F = BB->getParent(); 1297 F->getBasicBlockList().insert(It, copy0MBB); 1298 F->getBasicBlockList().insert(It, sinkMBB); 1299 // Update machine-CFG edges by first adding all successors of the current 1300 // block to the new block which will contain the Phi node for the select. 1301 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1302 e = BB->succ_end(); i != e; ++i) 1303 sinkMBB->addSuccessor(*i); 1304 // Next, remove all successors of the current block, and add the true 1305 // and fallthrough blocks as its successors. 1306 while(!BB->succ_empty()) 1307 BB->removeSuccessor(BB->succ_begin()); 1308 BB->addSuccessor(copy0MBB); 1309 BB->addSuccessor(sinkMBB); 1310 1311 // copy0MBB: 1312 // %FalseValue = ... 1313 // # fallthrough to sinkMBB 1314 BB = copy0MBB; 1315 1316 // Update machine-CFG edges 1317 BB->addSuccessor(sinkMBB); 1318 1319 // sinkMBB: 1320 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1321 // ... 1322 BB = sinkMBB; 1323 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1324 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1325 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1326 1327 delete MI; // The pseudo instruction is gone now. 1328 return BB; 1329 } 1330 1331 case X86::FP_TO_INT16_IN_MEM: 1332 case X86::FP_TO_INT32_IN_MEM: 1333 case X86::FP_TO_INT64_IN_MEM: { 1334 // Change the floating point control register to use "round towards zero" 1335 // mode when truncating to an integer value. 1336 MachineFunction *F = BB->getParent(); 1337 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1338 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1339 1340 // Load the old value of the high byte of the control word... 1341 unsigned OldCW = 1342 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1343 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1344 1345 // Set the high part to be round to zero... 1346 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1347 1348 // Reload the modified control word now... 1349 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1350 1351 // Restore the memory image of control word to original value 1352 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1353 1354 // Get the X86 opcode to use. 1355 unsigned Opc; 1356 switch (MI->getOpcode()) { 1357 default: assert(0 && "illegal opcode!"); 1358 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1359 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1360 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1361 } 1362 1363 X86AddressMode AM; 1364 MachineOperand &Op = MI->getOperand(0); 1365 if (Op.isRegister()) { 1366 AM.BaseType = X86AddressMode::RegBase; 1367 AM.Base.Reg = Op.getReg(); 1368 } else { 1369 AM.BaseType = X86AddressMode::FrameIndexBase; 1370 AM.Base.FrameIndex = Op.getFrameIndex(); 1371 } 1372 Op = MI->getOperand(1); 1373 if (Op.isImmediate()) 1374 AM.Scale = Op.getImmedValue(); 1375 Op = MI->getOperand(2); 1376 if (Op.isImmediate()) 1377 AM.IndexReg = Op.getImmedValue(); 1378 Op = MI->getOperand(3); 1379 if (Op.isGlobalAddress()) { 1380 AM.GV = Op.getGlobal(); 1381 } else { 1382 AM.Disp = Op.getImmedValue(); 1383 } 1384 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1385 1386 // Reload the original control word now. 1387 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1388 1389 delete MI; // The pseudo instruction is gone now. 1390 return BB; 1391 } 1392 } 1393} 1394 1395 1396//===----------------------------------------------------------------------===// 1397// X86 Custom Lowering Hooks 1398//===----------------------------------------------------------------------===// 1399 1400/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1401/// load. For Darwin, external and weak symbols are indirect, loading the value 1402/// at address GV rather then the value of GV itself. This means that the 1403/// GlobalAddress must be in the base or index register of the address, not the 1404/// GV offset field. 1405static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1406 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1407 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1408} 1409 1410/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1411/// true if Op is undef or if its value falls within the specified range (L, H]. 1412static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1413 if (Op.getOpcode() == ISD::UNDEF) 1414 return true; 1415 1416 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1417 return (Val >= Low && Val < Hi); 1418} 1419 1420/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1421/// true if Op is undef or if its value equal to the specified value. 1422static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1423 if (Op.getOpcode() == ISD::UNDEF) 1424 return true; 1425 return cast<ConstantSDNode>(Op)->getValue() == Val; 1426} 1427 1428/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1429/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1430bool X86::isPSHUFDMask(SDNode *N) { 1431 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1432 1433 if (N->getNumOperands() != 4) 1434 return false; 1435 1436 // Check if the value doesn't reference the second vector. 1437 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1438 SDOperand Arg = N->getOperand(i); 1439 if (Arg.getOpcode() == ISD::UNDEF) continue; 1440 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1441 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1442 return false; 1443 } 1444 1445 return true; 1446} 1447 1448/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1449/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1450bool X86::isPSHUFHWMask(SDNode *N) { 1451 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1452 1453 if (N->getNumOperands() != 8) 1454 return false; 1455 1456 // Lower quadword copied in order. 1457 for (unsigned i = 0; i != 4; ++i) { 1458 SDOperand Arg = N->getOperand(i); 1459 if (Arg.getOpcode() == ISD::UNDEF) continue; 1460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1461 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1462 return false; 1463 } 1464 1465 // Upper quadword shuffled. 1466 for (unsigned i = 4; i != 8; ++i) { 1467 SDOperand Arg = N->getOperand(i); 1468 if (Arg.getOpcode() == ISD::UNDEF) continue; 1469 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1470 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1471 if (Val < 4 || Val > 7) 1472 return false; 1473 } 1474 1475 return true; 1476} 1477 1478/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1479/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1480bool X86::isPSHUFLWMask(SDNode *N) { 1481 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1482 1483 if (N->getNumOperands() != 8) 1484 return false; 1485 1486 // Upper quadword copied in order. 1487 for (unsigned i = 4; i != 8; ++i) 1488 if (!isUndefOrEqual(N->getOperand(i), i)) 1489 return false; 1490 1491 // Lower quadword shuffled. 1492 for (unsigned i = 0; i != 4; ++i) 1493 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1494 return false; 1495 1496 return true; 1497} 1498 1499/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1500/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1501bool X86::isSHUFPMask(SDNode *N) { 1502 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1503 1504 unsigned NumElems = N->getNumOperands(); 1505 if (NumElems == 2) { 1506 // The only cases that ought be handled by SHUFPD is 1507 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1508 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1509 // Expect bit 0 == 1, bit1 == 2 1510 SDOperand Bit0 = N->getOperand(0); 1511 SDOperand Bit1 = N->getOperand(1); 1512 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 1513 return true; 1514 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 1515 return true; 1516 return false; 1517 } 1518 1519 if (NumElems != 4) return false; 1520 1521 // Each half must refer to only one of the vector. 1522 for (unsigned i = 0; i < 2; ++i) { 1523 SDOperand Arg = N->getOperand(i); 1524 if (Arg.getOpcode() == ISD::UNDEF) continue; 1525 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1526 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1527 if (Val >= 4) return false; 1528 } 1529 for (unsigned i = 2; i < 4; ++i) { 1530 SDOperand Arg = N->getOperand(i); 1531 if (Arg.getOpcode() == ISD::UNDEF) continue; 1532 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1533 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1534 if (Val < 4) return false; 1535 } 1536 1537 return true; 1538} 1539 1540/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1541/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1542bool X86::isMOVHLPSMask(SDNode *N) { 1543 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1544 1545 if (N->getNumOperands() != 4) 1546 return false; 1547 1548 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1549 return isUndefOrEqual(N->getOperand(0), 6) && 1550 isUndefOrEqual(N->getOperand(1), 7) && 1551 isUndefOrEqual(N->getOperand(2), 2) && 1552 isUndefOrEqual(N->getOperand(3), 3); 1553} 1554 1555/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1556/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1557bool X86::isMOVLHPSMask(SDNode *N) { 1558 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1559 1560 if (N->getNumOperands() != 4) 1561 return false; 1562 1563 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1564 return isUndefOrEqual(N->getOperand(0), 0) && 1565 isUndefOrEqual(N->getOperand(1), 1) && 1566 isUndefOrEqual(N->getOperand(2), 4) && 1567 isUndefOrEqual(N->getOperand(3), 5); 1568} 1569 1570/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1571/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1572bool X86::isMOVLPMask(SDNode *N) { 1573 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1574 1575 unsigned NumElems = N->getNumOperands(); 1576 if (NumElems != 2 && NumElems != 4) 1577 return false; 1578 1579 for (unsigned i = 0; i < NumElems/2; ++i) 1580 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1581 return false; 1582 1583 for (unsigned i = NumElems/2; i < NumElems; ++i) 1584 if (!isUndefOrEqual(N->getOperand(i), i)) 1585 return false; 1586 1587 return true; 1588} 1589 1590/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1591/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 1592bool X86::isMOVHPMask(SDNode *N) { 1593 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1594 1595 unsigned NumElems = N->getNumOperands(); 1596 if (NumElems != 2 && NumElems != 4) 1597 return false; 1598 1599 for (unsigned i = 0; i < NumElems/2; ++i) 1600 if (!isUndefOrEqual(N->getOperand(i), i)) 1601 return false; 1602 1603 for (unsigned i = 0; i < NumElems/2; ++i) { 1604 SDOperand Arg = N->getOperand(i + NumElems/2); 1605 if (!isUndefOrEqual(Arg, i + NumElems)) 1606 return false; 1607 } 1608 1609 return true; 1610} 1611 1612/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1613/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1614bool X86::isUNPCKLMask(SDNode *N) { 1615 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1616 1617 unsigned NumElems = N->getNumOperands(); 1618 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1619 return false; 1620 1621 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1622 SDOperand BitI = N->getOperand(i); 1623 SDOperand BitI1 = N->getOperand(i+1); 1624 if (!isUndefOrEqual(BitI, j)) 1625 return false; 1626 if (!isUndefOrEqual(BitI1, j + NumElems)) 1627 return false; 1628 } 1629 1630 return true; 1631} 1632 1633/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1634/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1635bool X86::isUNPCKHMask(SDNode *N) { 1636 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1637 1638 unsigned NumElems = N->getNumOperands(); 1639 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1640 return false; 1641 1642 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1643 SDOperand BitI = N->getOperand(i); 1644 SDOperand BitI1 = N->getOperand(i+1); 1645 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1646 return false; 1647 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1648 return false; 1649 } 1650 1651 return true; 1652} 1653 1654/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1655/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1656/// <0, 0, 1, 1> 1657bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1658 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1659 1660 unsigned NumElems = N->getNumOperands(); 1661 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1662 return false; 1663 1664 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1665 SDOperand BitI = N->getOperand(i); 1666 SDOperand BitI1 = N->getOperand(i+1); 1667 1668 if (!isUndefOrEqual(BitI, j)) 1669 return false; 1670 if (!isUndefOrEqual(BitI1, j)) 1671 return false; 1672 } 1673 1674 return true; 1675} 1676 1677/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 1678/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 1679bool X86::isMOVSMask(SDNode *N) { 1680 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1681 1682 unsigned NumElems = N->getNumOperands(); 1683 if (NumElems != 2 && NumElems != 4) 1684 return false; 1685 1686 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 1687 return false; 1688 1689 for (unsigned i = 1; i < NumElems; ++i) { 1690 SDOperand Arg = N->getOperand(i); 1691 if (!isUndefOrEqual(Arg, i)) 1692 return false; 1693 } 1694 1695 return true; 1696} 1697 1698/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1699/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1700bool X86::isMOVSHDUPMask(SDNode *N) { 1701 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1702 1703 if (N->getNumOperands() != 4) 1704 return false; 1705 1706 // Expect 1, 1, 3, 3 1707 for (unsigned i = 0; i < 2; ++i) { 1708 SDOperand Arg = N->getOperand(i); 1709 if (Arg.getOpcode() == ISD::UNDEF) continue; 1710 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1711 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1712 if (Val != 1) return false; 1713 } 1714 1715 bool HasHi = false; 1716 for (unsigned i = 2; i < 4; ++i) { 1717 SDOperand Arg = N->getOperand(i); 1718 if (Arg.getOpcode() == ISD::UNDEF) continue; 1719 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1720 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1721 if (Val != 3) return false; 1722 HasHi = true; 1723 } 1724 1725 // Don't use movshdup if it can be done with a shufps. 1726 return HasHi; 1727} 1728 1729/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1730/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1731bool X86::isMOVSLDUPMask(SDNode *N) { 1732 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1733 1734 if (N->getNumOperands() != 4) 1735 return false; 1736 1737 // Expect 0, 0, 2, 2 1738 for (unsigned i = 0; i < 2; ++i) { 1739 SDOperand Arg = N->getOperand(i); 1740 if (Arg.getOpcode() == ISD::UNDEF) continue; 1741 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1742 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1743 if (Val != 0) return false; 1744 } 1745 1746 bool HasHi = false; 1747 for (unsigned i = 2; i < 4; ++i) { 1748 SDOperand Arg = N->getOperand(i); 1749 if (Arg.getOpcode() == ISD::UNDEF) continue; 1750 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1751 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1752 if (Val != 2) return false; 1753 HasHi = true; 1754 } 1755 1756 // Don't use movshdup if it can be done with a shufps. 1757 return HasHi; 1758} 1759 1760/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1761/// a splat of a single element. 1762static bool isSplatMask(SDNode *N) { 1763 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1764 1765 // This is a splat operation if each element of the permute is the same, and 1766 // if the value doesn't reference the second vector. 1767 SDOperand Elt = N->getOperand(0); 1768 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1769 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1770 SDOperand Arg = N->getOperand(i); 1771 if (Arg.getOpcode() == ISD::UNDEF) continue; 1772 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1773 if (Arg != Elt) return false; 1774 } 1775 1776 // Make sure it is a splat of the first vector operand. 1777 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1778} 1779 1780/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1781/// a splat of a single element and it's a 2 or 4 element mask. 1782bool X86::isSplatMask(SDNode *N) { 1783 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1784 1785 // We can only splat 64-bit, and 32-bit quantities. 1786 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1787 return false; 1788 return ::isSplatMask(N); 1789} 1790 1791/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1792/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1793/// instructions. 1794unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1795 unsigned NumOperands = N->getNumOperands(); 1796 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1797 unsigned Mask = 0; 1798 for (unsigned i = 0; i < NumOperands; ++i) { 1799 unsigned Val = 0; 1800 SDOperand Arg = N->getOperand(NumOperands-i-1); 1801 if (Arg.getOpcode() != ISD::UNDEF) 1802 Val = cast<ConstantSDNode>(Arg)->getValue(); 1803 if (Val >= NumOperands) Val -= NumOperands; 1804 Mask |= Val; 1805 if (i != NumOperands - 1) 1806 Mask <<= Shift; 1807 } 1808 1809 return Mask; 1810} 1811 1812/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1813/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1814/// instructions. 1815unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1816 unsigned Mask = 0; 1817 // 8 nodes, but we only care about the last 4. 1818 for (unsigned i = 7; i >= 4; --i) { 1819 unsigned Val = 0; 1820 SDOperand Arg = N->getOperand(i); 1821 if (Arg.getOpcode() != ISD::UNDEF) 1822 Val = cast<ConstantSDNode>(Arg)->getValue(); 1823 Mask |= (Val - 4); 1824 if (i != 4) 1825 Mask <<= 2; 1826 } 1827 1828 return Mask; 1829} 1830 1831/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1832/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1833/// instructions. 1834unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1835 unsigned Mask = 0; 1836 // 8 nodes, but we only care about the first 4. 1837 for (int i = 3; i >= 0; --i) { 1838 unsigned Val = 0; 1839 SDOperand Arg = N->getOperand(i); 1840 if (Arg.getOpcode() != ISD::UNDEF) 1841 Val = cast<ConstantSDNode>(Arg)->getValue(); 1842 Mask |= Val; 1843 if (i != 0) 1844 Mask <<= 2; 1845 } 1846 1847 return Mask; 1848} 1849 1850/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1851/// specifies a 8 element shuffle that can be broken into a pair of 1852/// PSHUFHW and PSHUFLW. 1853static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1854 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1855 1856 if (N->getNumOperands() != 8) 1857 return false; 1858 1859 // Lower quadword shuffled. 1860 for (unsigned i = 0; i != 4; ++i) { 1861 SDOperand Arg = N->getOperand(i); 1862 if (Arg.getOpcode() == ISD::UNDEF) continue; 1863 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1864 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1865 if (Val > 4) 1866 return false; 1867 } 1868 1869 // Upper quadword shuffled. 1870 for (unsigned i = 4; i != 8; ++i) { 1871 SDOperand Arg = N->getOperand(i); 1872 if (Arg.getOpcode() == ISD::UNDEF) continue; 1873 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1874 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1875 if (Val < 4 || Val > 7) 1876 return false; 1877 } 1878 1879 return true; 1880} 1881 1882/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1883/// values in ther permute mask. 1884static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1885 SDOperand V1 = Op.getOperand(0); 1886 SDOperand V2 = Op.getOperand(1); 1887 SDOperand Mask = Op.getOperand(2); 1888 MVT::ValueType VT = Op.getValueType(); 1889 MVT::ValueType MaskVT = Mask.getValueType(); 1890 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1891 unsigned NumElems = Mask.getNumOperands(); 1892 std::vector<SDOperand> MaskVec; 1893 1894 for (unsigned i = 0; i != NumElems; ++i) { 1895 SDOperand Arg = Mask.getOperand(i); 1896 if (Arg.getOpcode() == ISD::UNDEF) continue; 1897 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1898 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1899 if (Val < NumElems) 1900 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1901 else 1902 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1903 } 1904 1905 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1906 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1907} 1908 1909/// isScalarLoadToVector - Returns true if the node is a scalar load that 1910/// is promoted to a vector. 1911static inline bool isScalarLoadToVector(SDOperand Op) { 1912 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1913 Op = Op.getOperand(0); 1914 return (Op.getOpcode() == ISD::LOAD); 1915 } 1916 return false; 1917} 1918 1919/// ShouldXformedToMOVLP - Return true if the node should be transformed to 1920/// match movlp{d|s}. The lower half elements should come from V1 (and in 1921/// order), and the upper half elements should come from the upper half of 1922/// V2 (not necessarily in order). And since V1 will become the source of 1923/// the MOVLP, it must be a scalar load. 1924static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 1925 if (isScalarLoadToVector(V1)) { 1926 unsigned NumElems = Mask.getNumOperands(); 1927 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1928 if (!isUndefOrEqual(Mask.getOperand(i), i)) 1929 return false; 1930 for (unsigned i = NumElems/2; i != NumElems; ++i) 1931 if (!isUndefOrInRange(Mask.getOperand(i), 1932 NumElems+NumElems/2, NumElems*2)) 1933 return false; 1934 return true; 1935 } 1936 1937 return false; 1938} 1939 1940/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 1941/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1942/// half elements to come from vector 1 (which would equal the dest.) and 1943/// the upper half to come from vector 2. 1944static bool isLowerFromV2UpperFromV1(SDOperand Op) { 1945 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 1946 1947 unsigned NumElems = Op.getNumOperands(); 1948 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1949 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 1950 return false; 1951 for (unsigned i = NumElems/2; i != NumElems; ++i) 1952 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 1953 return false; 1954 return true; 1955} 1956 1957/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 1958/// of specified width. 1959static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 1960 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 1961 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 1962 std::vector<SDOperand> MaskVec; 1963 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 1964 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 1965 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 1966 } 1967 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1968} 1969 1970/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 1971/// 1972static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 1973 SDOperand V1 = Op.getOperand(0); 1974 SDOperand PermMask = Op.getOperand(2); 1975 MVT::ValueType VT = Op.getValueType(); 1976 unsigned NumElems = PermMask.getNumOperands(); 1977 PermMask = getUnpacklMask(NumElems, DAG); 1978 while (NumElems != 4) { 1979 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, PermMask); 1980 NumElems >>= 1; 1981 } 1982 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 1983 1984 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 1985 SDOperand Zero = DAG.getConstant(0, MVT::getVectorBaseType(MaskVT)); 1986 std::vector<SDOperand> ZeroVec(4, Zero); 1987 SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, ZeroVec); 1988 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 1989 DAG.getNode(ISD::UNDEF, MVT::v4i32), 1990 SplatMask); 1991 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 1992} 1993 1994/// LowerOperation - Provide custom lowering hooks for some operations. 1995/// 1996SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1997 switch (Op.getOpcode()) { 1998 default: assert(0 && "Should not custom lower this!"); 1999 case ISD::SHL_PARTS: 2000 case ISD::SRA_PARTS: 2001 case ISD::SRL_PARTS: { 2002 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2003 "Not an i64 shift!"); 2004 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2005 SDOperand ShOpLo = Op.getOperand(0); 2006 SDOperand ShOpHi = Op.getOperand(1); 2007 SDOperand ShAmt = Op.getOperand(2); 2008 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2009 DAG.getConstant(31, MVT::i8)) 2010 : DAG.getConstant(0, MVT::i32); 2011 2012 SDOperand Tmp2, Tmp3; 2013 if (Op.getOpcode() == ISD::SHL_PARTS) { 2014 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2015 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2016 } else { 2017 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2018 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2019 } 2020 2021 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2022 ShAmt, DAG.getConstant(32, MVT::i8)); 2023 2024 SDOperand Hi, Lo; 2025 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2026 2027 std::vector<MVT::ValueType> Tys; 2028 Tys.push_back(MVT::i32); 2029 Tys.push_back(MVT::Flag); 2030 std::vector<SDOperand> Ops; 2031 if (Op.getOpcode() == ISD::SHL_PARTS) { 2032 Ops.push_back(Tmp2); 2033 Ops.push_back(Tmp3); 2034 Ops.push_back(CC); 2035 Ops.push_back(InFlag); 2036 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2037 InFlag = Hi.getValue(1); 2038 2039 Ops.clear(); 2040 Ops.push_back(Tmp3); 2041 Ops.push_back(Tmp1); 2042 Ops.push_back(CC); 2043 Ops.push_back(InFlag); 2044 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2045 } else { 2046 Ops.push_back(Tmp2); 2047 Ops.push_back(Tmp3); 2048 Ops.push_back(CC); 2049 Ops.push_back(InFlag); 2050 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2051 InFlag = Lo.getValue(1); 2052 2053 Ops.clear(); 2054 Ops.push_back(Tmp3); 2055 Ops.push_back(Tmp1); 2056 Ops.push_back(CC); 2057 Ops.push_back(InFlag); 2058 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2059 } 2060 2061 Tys.clear(); 2062 Tys.push_back(MVT::i32); 2063 Tys.push_back(MVT::i32); 2064 Ops.clear(); 2065 Ops.push_back(Lo); 2066 Ops.push_back(Hi); 2067 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2068 } 2069 case ISD::SINT_TO_FP: { 2070 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2071 Op.getOperand(0).getValueType() >= MVT::i16 && 2072 "Unknown SINT_TO_FP to lower!"); 2073 2074 SDOperand Result; 2075 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2076 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2077 MachineFunction &MF = DAG.getMachineFunction(); 2078 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2079 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2080 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2081 DAG.getEntryNode(), Op.getOperand(0), 2082 StackSlot, DAG.getSrcValue(NULL)); 2083 2084 // Build the FILD 2085 std::vector<MVT::ValueType> Tys; 2086 Tys.push_back(MVT::f64); 2087 Tys.push_back(MVT::Other); 2088 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2089 std::vector<SDOperand> Ops; 2090 Ops.push_back(Chain); 2091 Ops.push_back(StackSlot); 2092 Ops.push_back(DAG.getValueType(SrcVT)); 2093 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2094 Tys, Ops); 2095 2096 if (X86ScalarSSE) { 2097 Chain = Result.getValue(1); 2098 SDOperand InFlag = Result.getValue(2); 2099 2100 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2101 // shouldn't be necessary except that RFP cannot be live across 2102 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2103 MachineFunction &MF = DAG.getMachineFunction(); 2104 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2105 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2106 std::vector<MVT::ValueType> Tys; 2107 Tys.push_back(MVT::Other); 2108 std::vector<SDOperand> Ops; 2109 Ops.push_back(Chain); 2110 Ops.push_back(Result); 2111 Ops.push_back(StackSlot); 2112 Ops.push_back(DAG.getValueType(Op.getValueType())); 2113 Ops.push_back(InFlag); 2114 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2115 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2116 DAG.getSrcValue(NULL)); 2117 } 2118 2119 return Result; 2120 } 2121 case ISD::FP_TO_SINT: { 2122 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2123 "Unknown FP_TO_SINT to lower!"); 2124 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2125 // stack slot. 2126 MachineFunction &MF = DAG.getMachineFunction(); 2127 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2128 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2129 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2130 2131 unsigned Opc; 2132 switch (Op.getValueType()) { 2133 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2134 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2135 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2136 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2137 } 2138 2139 SDOperand Chain = DAG.getEntryNode(); 2140 SDOperand Value = Op.getOperand(0); 2141 if (X86ScalarSSE) { 2142 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2143 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2144 DAG.getSrcValue(0)); 2145 std::vector<MVT::ValueType> Tys; 2146 Tys.push_back(MVT::f64); 2147 Tys.push_back(MVT::Other); 2148 std::vector<SDOperand> Ops; 2149 Ops.push_back(Chain); 2150 Ops.push_back(StackSlot); 2151 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2152 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2153 Chain = Value.getValue(1); 2154 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2155 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2156 } 2157 2158 // Build the FP_TO_INT*_IN_MEM 2159 std::vector<SDOperand> Ops; 2160 Ops.push_back(Chain); 2161 Ops.push_back(Value); 2162 Ops.push_back(StackSlot); 2163 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2164 2165 // Load the result. 2166 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2167 DAG.getSrcValue(NULL)); 2168 } 2169 case ISD::READCYCLECOUNTER: { 2170 std::vector<MVT::ValueType> Tys; 2171 Tys.push_back(MVT::Other); 2172 Tys.push_back(MVT::Flag); 2173 std::vector<SDOperand> Ops; 2174 Ops.push_back(Op.getOperand(0)); 2175 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2176 Ops.clear(); 2177 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2178 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2179 MVT::i32, Ops[0].getValue(2))); 2180 Ops.push_back(Ops[1].getValue(1)); 2181 Tys[0] = Tys[1] = MVT::i32; 2182 Tys.push_back(MVT::Other); 2183 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2184 } 2185 case ISD::FABS: { 2186 MVT::ValueType VT = Op.getValueType(); 2187 const Type *OpNTy = MVT::getTypeForValueType(VT); 2188 std::vector<Constant*> CV; 2189 if (VT == MVT::f64) { 2190 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2191 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2192 } else { 2193 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2194 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2195 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2196 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2197 } 2198 Constant *CS = ConstantStruct::get(CV); 2199 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2200 SDOperand Mask 2201 = DAG.getNode(X86ISD::LOAD_PACK, 2202 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2203 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2204 } 2205 case ISD::FNEG: { 2206 MVT::ValueType VT = Op.getValueType(); 2207 const Type *OpNTy = MVT::getTypeForValueType(VT); 2208 std::vector<Constant*> CV; 2209 if (VT == MVT::f64) { 2210 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2211 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2212 } else { 2213 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2214 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2215 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2216 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2217 } 2218 Constant *CS = ConstantStruct::get(CV); 2219 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2220 SDOperand Mask 2221 = DAG.getNode(X86ISD::LOAD_PACK, 2222 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2223 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2224 } 2225 case ISD::SETCC: { 2226 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2227 SDOperand Cond; 2228 SDOperand CC = Op.getOperand(2); 2229 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2230 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2231 bool Flip; 2232 unsigned X86CC; 2233 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2234 if (Flip) 2235 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2236 Op.getOperand(1), Op.getOperand(0)); 2237 else 2238 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2239 Op.getOperand(0), Op.getOperand(1)); 2240 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2241 DAG.getConstant(X86CC, MVT::i8), Cond); 2242 } else { 2243 assert(isFP && "Illegal integer SetCC!"); 2244 2245 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2246 Op.getOperand(0), Op.getOperand(1)); 2247 std::vector<MVT::ValueType> Tys; 2248 std::vector<SDOperand> Ops; 2249 switch (SetCCOpcode) { 2250 default: assert(false && "Illegal floating point SetCC!"); 2251 case ISD::SETOEQ: { // !PF & ZF 2252 Tys.push_back(MVT::i8); 2253 Tys.push_back(MVT::Flag); 2254 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2255 Ops.push_back(Cond); 2256 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2257 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2258 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2259 Tmp1.getValue(1)); 2260 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2261 } 2262 case ISD::SETUNE: { // PF | !ZF 2263 Tys.push_back(MVT::i8); 2264 Tys.push_back(MVT::Flag); 2265 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2266 Ops.push_back(Cond); 2267 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2268 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2269 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2270 Tmp1.getValue(1)); 2271 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2272 } 2273 } 2274 } 2275 } 2276 case ISD::SELECT: { 2277 MVT::ValueType VT = Op.getValueType(); 2278 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2279 bool addTest = false; 2280 SDOperand Op0 = Op.getOperand(0); 2281 SDOperand Cond, CC; 2282 if (Op0.getOpcode() == ISD::SETCC) 2283 Op0 = LowerOperation(Op0, DAG); 2284 2285 if (Op0.getOpcode() == X86ISD::SETCC) { 2286 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2287 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2288 // have another use it will be eliminated. 2289 // If the X86ISD::SETCC has more than one use, then it's probably better 2290 // to use a test instead of duplicating the X86ISD::CMP (for register 2291 // pressure reason). 2292 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2293 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2294 CmpOpc == X86ISD::UCOMI) { 2295 if (!Op0.hasOneUse()) { 2296 std::vector<MVT::ValueType> Tys; 2297 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2298 Tys.push_back(Op0.Val->getValueType(i)); 2299 std::vector<SDOperand> Ops; 2300 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2301 Ops.push_back(Op0.getOperand(i)); 2302 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2303 } 2304 2305 CC = Op0.getOperand(0); 2306 Cond = Op0.getOperand(1); 2307 // Make a copy as flag result cannot be used by more than one. 2308 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2309 Cond.getOperand(0), Cond.getOperand(1)); 2310 addTest = 2311 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2312 } else 2313 addTest = true; 2314 } else 2315 addTest = true; 2316 2317 if (addTest) { 2318 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2319 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2320 } 2321 2322 std::vector<MVT::ValueType> Tys; 2323 Tys.push_back(Op.getValueType()); 2324 Tys.push_back(MVT::Flag); 2325 std::vector<SDOperand> Ops; 2326 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2327 // condition is true. 2328 Ops.push_back(Op.getOperand(2)); 2329 Ops.push_back(Op.getOperand(1)); 2330 Ops.push_back(CC); 2331 Ops.push_back(Cond); 2332 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2333 } 2334 case ISD::BRCOND: { 2335 bool addTest = false; 2336 SDOperand Cond = Op.getOperand(1); 2337 SDOperand Dest = Op.getOperand(2); 2338 SDOperand CC; 2339 if (Cond.getOpcode() == ISD::SETCC) 2340 Cond = LowerOperation(Cond, DAG); 2341 2342 if (Cond.getOpcode() == X86ISD::SETCC) { 2343 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2344 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2345 // have another use it will be eliminated. 2346 // If the X86ISD::SETCC has more than one use, then it's probably better 2347 // to use a test instead of duplicating the X86ISD::CMP (for register 2348 // pressure reason). 2349 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2350 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2351 CmpOpc == X86ISD::UCOMI) { 2352 if (!Cond.hasOneUse()) { 2353 std::vector<MVT::ValueType> Tys; 2354 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2355 Tys.push_back(Cond.Val->getValueType(i)); 2356 std::vector<SDOperand> Ops; 2357 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2358 Ops.push_back(Cond.getOperand(i)); 2359 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2360 } 2361 2362 CC = Cond.getOperand(0); 2363 Cond = Cond.getOperand(1); 2364 // Make a copy as flag result cannot be used by more than one. 2365 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2366 Cond.getOperand(0), Cond.getOperand(1)); 2367 } else 2368 addTest = true; 2369 } else 2370 addTest = true; 2371 2372 if (addTest) { 2373 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2374 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2375 } 2376 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2377 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2378 } 2379 case ISD::MEMSET: { 2380 SDOperand InFlag(0, 0); 2381 SDOperand Chain = Op.getOperand(0); 2382 unsigned Align = 2383 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2384 if (Align == 0) Align = 1; 2385 2386 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2387 // If not DWORD aligned, call memset if size is less than the threshold. 2388 // It knows how to align to the right boundary first. 2389 if ((Align & 3) != 0 || 2390 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2391 MVT::ValueType IntPtr = getPointerTy(); 2392 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2393 std::vector<std::pair<SDOperand, const Type*> > Args; 2394 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2395 // Extend the ubyte argument to be an int value for the call. 2396 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2397 Args.push_back(std::make_pair(Val, IntPtrTy)); 2398 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2399 std::pair<SDOperand,SDOperand> CallResult = 2400 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2401 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2402 return CallResult.second; 2403 } 2404 2405 MVT::ValueType AVT; 2406 SDOperand Count; 2407 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2408 unsigned BytesLeft = 0; 2409 bool TwoRepStos = false; 2410 if (ValC) { 2411 unsigned ValReg; 2412 unsigned Val = ValC->getValue() & 255; 2413 2414 // If the value is a constant, then we can potentially use larger sets. 2415 switch (Align & 3) { 2416 case 2: // WORD aligned 2417 AVT = MVT::i16; 2418 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2419 BytesLeft = I->getValue() % 2; 2420 Val = (Val << 8) | Val; 2421 ValReg = X86::AX; 2422 break; 2423 case 0: // DWORD aligned 2424 AVT = MVT::i32; 2425 if (I) { 2426 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2427 BytesLeft = I->getValue() % 4; 2428 } else { 2429 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2430 DAG.getConstant(2, MVT::i8)); 2431 TwoRepStos = true; 2432 } 2433 Val = (Val << 8) | Val; 2434 Val = (Val << 16) | Val; 2435 ValReg = X86::EAX; 2436 break; 2437 default: // Byte aligned 2438 AVT = MVT::i8; 2439 Count = Op.getOperand(3); 2440 ValReg = X86::AL; 2441 break; 2442 } 2443 2444 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2445 InFlag); 2446 InFlag = Chain.getValue(1); 2447 } else { 2448 AVT = MVT::i8; 2449 Count = Op.getOperand(3); 2450 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2451 InFlag = Chain.getValue(1); 2452 } 2453 2454 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2455 InFlag = Chain.getValue(1); 2456 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2457 InFlag = Chain.getValue(1); 2458 2459 std::vector<MVT::ValueType> Tys; 2460 Tys.push_back(MVT::Other); 2461 Tys.push_back(MVT::Flag); 2462 std::vector<SDOperand> Ops; 2463 Ops.push_back(Chain); 2464 Ops.push_back(DAG.getValueType(AVT)); 2465 Ops.push_back(InFlag); 2466 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2467 2468 if (TwoRepStos) { 2469 InFlag = Chain.getValue(1); 2470 Count = Op.getOperand(3); 2471 MVT::ValueType CVT = Count.getValueType(); 2472 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2473 DAG.getConstant(3, CVT)); 2474 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2475 InFlag = Chain.getValue(1); 2476 Tys.clear(); 2477 Tys.push_back(MVT::Other); 2478 Tys.push_back(MVT::Flag); 2479 Ops.clear(); 2480 Ops.push_back(Chain); 2481 Ops.push_back(DAG.getValueType(MVT::i8)); 2482 Ops.push_back(InFlag); 2483 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2484 } else if (BytesLeft) { 2485 // Issue stores for the last 1 - 3 bytes. 2486 SDOperand Value; 2487 unsigned Val = ValC->getValue() & 255; 2488 unsigned Offset = I->getValue() - BytesLeft; 2489 SDOperand DstAddr = Op.getOperand(1); 2490 MVT::ValueType AddrVT = DstAddr.getValueType(); 2491 if (BytesLeft >= 2) { 2492 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2493 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2494 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2495 DAG.getConstant(Offset, AddrVT)), 2496 DAG.getSrcValue(NULL)); 2497 BytesLeft -= 2; 2498 Offset += 2; 2499 } 2500 2501 if (BytesLeft == 1) { 2502 Value = DAG.getConstant(Val, MVT::i8); 2503 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2504 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2505 DAG.getConstant(Offset, AddrVT)), 2506 DAG.getSrcValue(NULL)); 2507 } 2508 } 2509 2510 return Chain; 2511 } 2512 case ISD::MEMCPY: { 2513 SDOperand Chain = Op.getOperand(0); 2514 unsigned Align = 2515 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2516 if (Align == 0) Align = 1; 2517 2518 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2519 // If not DWORD aligned, call memcpy if size is less than the threshold. 2520 // It knows how to align to the right boundary first. 2521 if ((Align & 3) != 0 || 2522 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2523 MVT::ValueType IntPtr = getPointerTy(); 2524 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2525 std::vector<std::pair<SDOperand, const Type*> > Args; 2526 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2527 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2528 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2529 std::pair<SDOperand,SDOperand> CallResult = 2530 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2531 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2532 return CallResult.second; 2533 } 2534 2535 MVT::ValueType AVT; 2536 SDOperand Count; 2537 unsigned BytesLeft = 0; 2538 bool TwoRepMovs = false; 2539 switch (Align & 3) { 2540 case 2: // WORD aligned 2541 AVT = MVT::i16; 2542 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2543 BytesLeft = I->getValue() % 2; 2544 break; 2545 case 0: // DWORD aligned 2546 AVT = MVT::i32; 2547 if (I) { 2548 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2549 BytesLeft = I->getValue() % 4; 2550 } else { 2551 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2552 DAG.getConstant(2, MVT::i8)); 2553 TwoRepMovs = true; 2554 } 2555 break; 2556 default: // Byte aligned 2557 AVT = MVT::i8; 2558 Count = Op.getOperand(3); 2559 break; 2560 } 2561 2562 SDOperand InFlag(0, 0); 2563 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2564 InFlag = Chain.getValue(1); 2565 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2566 InFlag = Chain.getValue(1); 2567 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2568 InFlag = Chain.getValue(1); 2569 2570 std::vector<MVT::ValueType> Tys; 2571 Tys.push_back(MVT::Other); 2572 Tys.push_back(MVT::Flag); 2573 std::vector<SDOperand> Ops; 2574 Ops.push_back(Chain); 2575 Ops.push_back(DAG.getValueType(AVT)); 2576 Ops.push_back(InFlag); 2577 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2578 2579 if (TwoRepMovs) { 2580 InFlag = Chain.getValue(1); 2581 Count = Op.getOperand(3); 2582 MVT::ValueType CVT = Count.getValueType(); 2583 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2584 DAG.getConstant(3, CVT)); 2585 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2586 InFlag = Chain.getValue(1); 2587 Tys.clear(); 2588 Tys.push_back(MVT::Other); 2589 Tys.push_back(MVT::Flag); 2590 Ops.clear(); 2591 Ops.push_back(Chain); 2592 Ops.push_back(DAG.getValueType(MVT::i8)); 2593 Ops.push_back(InFlag); 2594 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2595 } else if (BytesLeft) { 2596 // Issue loads and stores for the last 1 - 3 bytes. 2597 unsigned Offset = I->getValue() - BytesLeft; 2598 SDOperand DstAddr = Op.getOperand(1); 2599 MVT::ValueType DstVT = DstAddr.getValueType(); 2600 SDOperand SrcAddr = Op.getOperand(2); 2601 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2602 SDOperand Value; 2603 if (BytesLeft >= 2) { 2604 Value = DAG.getLoad(MVT::i16, Chain, 2605 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2606 DAG.getConstant(Offset, SrcVT)), 2607 DAG.getSrcValue(NULL)); 2608 Chain = Value.getValue(1); 2609 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2610 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2611 DAG.getConstant(Offset, DstVT)), 2612 DAG.getSrcValue(NULL)); 2613 BytesLeft -= 2; 2614 Offset += 2; 2615 } 2616 2617 if (BytesLeft == 1) { 2618 Value = DAG.getLoad(MVT::i8, Chain, 2619 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2620 DAG.getConstant(Offset, SrcVT)), 2621 DAG.getSrcValue(NULL)); 2622 Chain = Value.getValue(1); 2623 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2624 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2625 DAG.getConstant(Offset, DstVT)), 2626 DAG.getSrcValue(NULL)); 2627 } 2628 } 2629 2630 return Chain; 2631 } 2632 2633 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2634 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2635 // one of the above mentioned nodes. It has to be wrapped because otherwise 2636 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2637 // be used to form addressing mode. These wrapped nodes will be selected 2638 // into MOV32ri. 2639 case ISD::ConstantPool: { 2640 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2641 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2642 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2643 CP->getAlignment())); 2644 if (Subtarget->isTargetDarwin()) { 2645 // With PIC, the address is actually $g + Offset. 2646 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2647 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2648 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2649 } 2650 2651 return Result; 2652 } 2653 case ISD::GlobalAddress: { 2654 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2655 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2656 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2657 if (Subtarget->isTargetDarwin()) { 2658 // With PIC, the address is actually $g + Offset. 2659 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2660 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2661 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2662 2663 // For Darwin, external and weak symbols are indirect, so we want to load 2664 // the value at address GV, not the value of GV itself. This means that 2665 // the GlobalAddress must be in the base or index register of the address, 2666 // not the GV offset field. 2667 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2668 DarwinGVRequiresExtraLoad(GV)) 2669 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2670 Result, DAG.getSrcValue(NULL)); 2671 } 2672 2673 return Result; 2674 } 2675 case ISD::ExternalSymbol: { 2676 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2677 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2678 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2679 if (Subtarget->isTargetDarwin()) { 2680 // With PIC, the address is actually $g + Offset. 2681 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2682 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2683 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2684 } 2685 2686 return Result; 2687 } 2688 case ISD::VASTART: { 2689 // vastart just stores the address of the VarArgsFrameIndex slot into the 2690 // memory location argument. 2691 // FIXME: Replace MVT::i32 with PointerTy 2692 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2693 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2694 Op.getOperand(1), Op.getOperand(2)); 2695 } 2696 case ISD::RET: { 2697 SDOperand Copy; 2698 2699 switch(Op.getNumOperands()) { 2700 default: 2701 assert(0 && "Do not know how to return this many arguments!"); 2702 abort(); 2703 case 1: // ret void. 2704 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2705 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2706 case 2: { 2707 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2708 2709 if (MVT::isVector(ArgVT)) { 2710 // Integer or FP vector result -> XMM0. 2711 if (DAG.getMachineFunction().liveout_empty()) 2712 DAG.getMachineFunction().addLiveOut(X86::XMM0); 2713 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 2714 SDOperand()); 2715 } else if (MVT::isInteger(ArgVT)) { 2716 // Integer result -> EAX 2717 if (DAG.getMachineFunction().liveout_empty()) 2718 DAG.getMachineFunction().addLiveOut(X86::EAX); 2719 2720 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2721 SDOperand()); 2722 } else if (!X86ScalarSSE) { 2723 // FP return with fp-stack value. 2724 if (DAG.getMachineFunction().liveout_empty()) 2725 DAG.getMachineFunction().addLiveOut(X86::ST0); 2726 2727 std::vector<MVT::ValueType> Tys; 2728 Tys.push_back(MVT::Other); 2729 Tys.push_back(MVT::Flag); 2730 std::vector<SDOperand> Ops; 2731 Ops.push_back(Op.getOperand(0)); 2732 Ops.push_back(Op.getOperand(1)); 2733 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2734 } else { 2735 // FP return with ScalarSSE (return on fp-stack). 2736 if (DAG.getMachineFunction().liveout_empty()) 2737 DAG.getMachineFunction().addLiveOut(X86::ST0); 2738 2739 SDOperand MemLoc; 2740 SDOperand Chain = Op.getOperand(0); 2741 SDOperand Value = Op.getOperand(1); 2742 2743 if (Value.getOpcode() == ISD::LOAD && 2744 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2745 Chain = Value.getOperand(0); 2746 MemLoc = Value.getOperand(1); 2747 } else { 2748 // Spill the value to memory and reload it into top of stack. 2749 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2750 MachineFunction &MF = DAG.getMachineFunction(); 2751 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2752 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2753 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2754 Value, MemLoc, DAG.getSrcValue(0)); 2755 } 2756 std::vector<MVT::ValueType> Tys; 2757 Tys.push_back(MVT::f64); 2758 Tys.push_back(MVT::Other); 2759 std::vector<SDOperand> Ops; 2760 Ops.push_back(Chain); 2761 Ops.push_back(MemLoc); 2762 Ops.push_back(DAG.getValueType(ArgVT)); 2763 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2764 Tys.clear(); 2765 Tys.push_back(MVT::Other); 2766 Tys.push_back(MVT::Flag); 2767 Ops.clear(); 2768 Ops.push_back(Copy.getValue(1)); 2769 Ops.push_back(Copy); 2770 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2771 } 2772 break; 2773 } 2774 case 3: 2775 if (DAG.getMachineFunction().liveout_empty()) { 2776 DAG.getMachineFunction().addLiveOut(X86::EAX); 2777 DAG.getMachineFunction().addLiveOut(X86::EDX); 2778 } 2779 2780 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2781 SDOperand()); 2782 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2783 break; 2784 } 2785 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2786 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2787 Copy.getValue(1)); 2788 } 2789 case ISD::SCALAR_TO_VECTOR: { 2790 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2791 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2792 } 2793 case ISD::VECTOR_SHUFFLE: { 2794 SDOperand V1 = Op.getOperand(0); 2795 SDOperand V2 = Op.getOperand(1); 2796 SDOperand PermMask = Op.getOperand(2); 2797 MVT::ValueType VT = Op.getValueType(); 2798 unsigned NumElems = PermMask.getNumOperands(); 2799 2800 if (isSplatMask(PermMask.Val)) { 2801 if (NumElems <= 4) return Op; 2802 // Promote it to a v4i32 splat. 2803 return PromoteSplat(Op, DAG); 2804 } 2805 2806 // Normalize the node to match x86 shuffle ops if needed 2807 if (V2.getOpcode() != ISD::UNDEF) { 2808 bool DoSwap = false; 2809 2810 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 2811 DoSwap = true; 2812 else if (isLowerFromV2UpperFromV1(PermMask)) 2813 DoSwap = true; 2814 2815 if (DoSwap) { 2816 Op = CommuteVectorShuffle(Op, DAG); 2817 V1 = Op.getOperand(0); 2818 V2 = Op.getOperand(1); 2819 PermMask = Op.getOperand(2); 2820 } 2821 } 2822 2823 if (NumElems == 2) 2824 return Op; 2825 2826 if (X86::isMOVSMask(PermMask.Val) || 2827 X86::isMOVSHDUPMask(PermMask.Val) || 2828 X86::isMOVSLDUPMask(PermMask.Val)) 2829 return Op; 2830 2831 if (X86::isUNPCKLMask(PermMask.Val) || 2832 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2833 X86::isUNPCKHMask(PermMask.Val)) 2834 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2835 return Op; 2836 2837 // If VT is integer, try PSHUF* first, then SHUFP*. 2838 if (MVT::isInteger(VT)) { 2839 if (X86::isPSHUFDMask(PermMask.Val) || 2840 X86::isPSHUFHWMask(PermMask.Val) || 2841 X86::isPSHUFLWMask(PermMask.Val)) { 2842 if (V2.getOpcode() != ISD::UNDEF) 2843 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2844 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2845 return Op; 2846 } 2847 2848 if (X86::isSHUFPMask(PermMask.Val)) 2849 return Op; 2850 2851 // Handle v8i16 shuffle high / low shuffle node pair. 2852 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2853 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2854 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2855 std::vector<SDOperand> MaskVec; 2856 for (unsigned i = 0; i != 4; ++i) 2857 MaskVec.push_back(PermMask.getOperand(i)); 2858 for (unsigned i = 4; i != 8; ++i) 2859 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2860 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2861 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2862 MaskVec.clear(); 2863 for (unsigned i = 0; i != 4; ++i) 2864 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2865 for (unsigned i = 4; i != 8; ++i) 2866 MaskVec.push_back(PermMask.getOperand(i)); 2867 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2868 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2869 } 2870 } else { 2871 // Floating point cases in the other order. 2872 if (X86::isSHUFPMask(PermMask.Val)) 2873 return Op; 2874 if (X86::isPSHUFDMask(PermMask.Val) || 2875 X86::isPSHUFHWMask(PermMask.Val) || 2876 X86::isPSHUFLWMask(PermMask.Val)) { 2877 if (V2.getOpcode() != ISD::UNDEF) 2878 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2879 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2880 return Op; 2881 } 2882 } 2883 2884 return SDOperand(); 2885 } 2886 case ISD::BUILD_VECTOR: { 2887 // All one's are handled with pcmpeqd. 2888 if (ISD::isBuildVectorAllOnes(Op.Val)) 2889 return Op; 2890 2891 std::set<SDOperand> Values; 2892 SDOperand Elt0 = Op.getOperand(0); 2893 Values.insert(Elt0); 2894 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2895 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2896 (isa<ConstantFPSDNode>(Elt0) && 2897 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2898 bool RestAreZero = true; 2899 unsigned NumElems = Op.getNumOperands(); 2900 for (unsigned i = 1; i < NumElems; ++i) { 2901 SDOperand Elt = Op.getOperand(i); 2902 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2903 if (!FPC->isExactlyValue(+0.0)) 2904 RestAreZero = false; 2905 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2906 if (!C->isNullValue()) 2907 RestAreZero = false; 2908 } else 2909 RestAreZero = false; 2910 Values.insert(Elt); 2911 } 2912 2913 if (RestAreZero) { 2914 if (Elt0IsZero) return Op; 2915 2916 // Zero extend a scalar to a vector. 2917 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2918 } 2919 2920 if (Values.size() > 2) { 2921 // Expand into a number of unpckl*. 2922 // e.g. for v4f32 2923 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2924 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2925 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2926 MVT::ValueType VT = Op.getValueType(); 2927 SDOperand PermMask = getUnpacklMask(NumElems, DAG); 2928 std::vector<SDOperand> V(NumElems); 2929 for (unsigned i = 0; i < NumElems; ++i) 2930 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2931 NumElems >>= 1; 2932 while (NumElems != 0) { 2933 for (unsigned i = 0; i < NumElems; ++i) 2934 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2935 PermMask); 2936 NumElems >>= 1; 2937 } 2938 return V[0]; 2939 } 2940 2941 return SDOperand(); 2942 } 2943 case ISD::EXTRACT_VECTOR_ELT: { 2944 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2945 return SDOperand(); 2946 2947 MVT::ValueType VT = Op.getValueType(); 2948 // TODO: handle v16i8. 2949 if (MVT::getSizeInBits(VT) == 16) { 2950 // Transform it so it match pextrw which produces a 32-bit result. 2951 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2952 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2953 Op.getOperand(0), Op.getOperand(1)); 2954 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2955 DAG.getValueType(VT)); 2956 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2957 } else if (MVT::getSizeInBits(VT) == 32) { 2958 SDOperand Vec = Op.getOperand(0); 2959 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2960 if (Idx == 0) 2961 return Op; 2962 2963 // TODO: if Idex == 2, we can use unpckhps 2964 // SHUFPS the element to the lowest double word, then movss. 2965 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2966 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2967 MVT::getVectorBaseType(MaskVT)); 2968 std::vector<SDOperand> IdxVec; 2969 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2970 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2971 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2972 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2973 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2974 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2975 Vec, Vec, Mask); 2976 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2977 DAG.getConstant(0, MVT::i32)); 2978 } else if (MVT::getSizeInBits(VT) == 64) { 2979 SDOperand Vec = Op.getOperand(0); 2980 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2981 if (Idx == 0) 2982 return Op; 2983 2984 // UNPCKHPD the element to the lowest double word, then movsd. 2985 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2986 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2987 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2988 std::vector<SDOperand> IdxVec; 2989 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2990 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2991 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2992 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2993 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2994 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2995 DAG.getConstant(0, MVT::i32)); 2996 } 2997 2998 return SDOperand(); 2999 } 3000 case ISD::INSERT_VECTOR_ELT: { 3001 // Transform it so it match pinsrw which expects a 16-bit value in a R32 3002 // as its second argument. 3003 MVT::ValueType VT = Op.getValueType(); 3004 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3005 if (MVT::getSizeInBits(BaseVT) == 16) { 3006 SDOperand N1 = Op.getOperand(1); 3007 SDOperand N2 = Op.getOperand(2); 3008 if (N1.getValueType() != MVT::i32) 3009 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3010 if (N2.getValueType() != MVT::i32) 3011 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3012 return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2); 3013 } 3014 3015 return SDOperand(); 3016 } 3017 case ISD::INTRINSIC_WO_CHAIN: { 3018 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3019 switch (IntNo) { 3020 default: return SDOperand(); // Don't custom lower most intrinsics. 3021 // Comparison intrinsics. 3022 case Intrinsic::x86_sse_comieq_ss: 3023 case Intrinsic::x86_sse_comilt_ss: 3024 case Intrinsic::x86_sse_comile_ss: 3025 case Intrinsic::x86_sse_comigt_ss: 3026 case Intrinsic::x86_sse_comige_ss: 3027 case Intrinsic::x86_sse_comineq_ss: 3028 case Intrinsic::x86_sse_ucomieq_ss: 3029 case Intrinsic::x86_sse_ucomilt_ss: 3030 case Intrinsic::x86_sse_ucomile_ss: 3031 case Intrinsic::x86_sse_ucomigt_ss: 3032 case Intrinsic::x86_sse_ucomige_ss: 3033 case Intrinsic::x86_sse_ucomineq_ss: 3034 case Intrinsic::x86_sse2_comieq_sd: 3035 case Intrinsic::x86_sse2_comilt_sd: 3036 case Intrinsic::x86_sse2_comile_sd: 3037 case Intrinsic::x86_sse2_comigt_sd: 3038 case Intrinsic::x86_sse2_comige_sd: 3039 case Intrinsic::x86_sse2_comineq_sd: 3040 case Intrinsic::x86_sse2_ucomieq_sd: 3041 case Intrinsic::x86_sse2_ucomilt_sd: 3042 case Intrinsic::x86_sse2_ucomile_sd: 3043 case Intrinsic::x86_sse2_ucomigt_sd: 3044 case Intrinsic::x86_sse2_ucomige_sd: 3045 case Intrinsic::x86_sse2_ucomineq_sd: { 3046 unsigned Opc = 0; 3047 ISD::CondCode CC = ISD::SETCC_INVALID; 3048 switch (IntNo) { 3049 default: break; 3050 case Intrinsic::x86_sse_comieq_ss: 3051 case Intrinsic::x86_sse2_comieq_sd: 3052 Opc = X86ISD::COMI; 3053 CC = ISD::SETEQ; 3054 break; 3055 case Intrinsic::x86_sse_comilt_ss: 3056 case Intrinsic::x86_sse2_comilt_sd: 3057 Opc = X86ISD::COMI; 3058 CC = ISD::SETLT; 3059 break; 3060 case Intrinsic::x86_sse_comile_ss: 3061 case Intrinsic::x86_sse2_comile_sd: 3062 Opc = X86ISD::COMI; 3063 CC = ISD::SETLE; 3064 break; 3065 case Intrinsic::x86_sse_comigt_ss: 3066 case Intrinsic::x86_sse2_comigt_sd: 3067 Opc = X86ISD::COMI; 3068 CC = ISD::SETGT; 3069 break; 3070 case Intrinsic::x86_sse_comige_ss: 3071 case Intrinsic::x86_sse2_comige_sd: 3072 Opc = X86ISD::COMI; 3073 CC = ISD::SETGE; 3074 break; 3075 case Intrinsic::x86_sse_comineq_ss: 3076 case Intrinsic::x86_sse2_comineq_sd: 3077 Opc = X86ISD::COMI; 3078 CC = ISD::SETNE; 3079 break; 3080 case Intrinsic::x86_sse_ucomieq_ss: 3081 case Intrinsic::x86_sse2_ucomieq_sd: 3082 Opc = X86ISD::UCOMI; 3083 CC = ISD::SETEQ; 3084 break; 3085 case Intrinsic::x86_sse_ucomilt_ss: 3086 case Intrinsic::x86_sse2_ucomilt_sd: 3087 Opc = X86ISD::UCOMI; 3088 CC = ISD::SETLT; 3089 break; 3090 case Intrinsic::x86_sse_ucomile_ss: 3091 case Intrinsic::x86_sse2_ucomile_sd: 3092 Opc = X86ISD::UCOMI; 3093 CC = ISD::SETLE; 3094 break; 3095 case Intrinsic::x86_sse_ucomigt_ss: 3096 case Intrinsic::x86_sse2_ucomigt_sd: 3097 Opc = X86ISD::UCOMI; 3098 CC = ISD::SETGT; 3099 break; 3100 case Intrinsic::x86_sse_ucomige_ss: 3101 case Intrinsic::x86_sse2_ucomige_sd: 3102 Opc = X86ISD::UCOMI; 3103 CC = ISD::SETGE; 3104 break; 3105 case Intrinsic::x86_sse_ucomineq_ss: 3106 case Intrinsic::x86_sse2_ucomineq_sd: 3107 Opc = X86ISD::UCOMI; 3108 CC = ISD::SETNE; 3109 break; 3110 } 3111 bool Flip; 3112 unsigned X86CC; 3113 translateX86CC(CC, true, X86CC, Flip); 3114 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3115 Op.getOperand(Flip?1:2)); 3116 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3117 DAG.getConstant(X86CC, MVT::i8), Cond); 3118 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3119 } 3120 } 3121 } 3122 } 3123} 3124 3125const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3126 switch (Opcode) { 3127 default: return NULL; 3128 case X86ISD::SHLD: return "X86ISD::SHLD"; 3129 case X86ISD::SHRD: return "X86ISD::SHRD"; 3130 case X86ISD::FAND: return "X86ISD::FAND"; 3131 case X86ISD::FXOR: return "X86ISD::FXOR"; 3132 case X86ISD::FILD: return "X86ISD::FILD"; 3133 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3134 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3135 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3136 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3137 case X86ISD::FLD: return "X86ISD::FLD"; 3138 case X86ISD::FST: return "X86ISD::FST"; 3139 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3140 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3141 case X86ISD::CALL: return "X86ISD::CALL"; 3142 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3143 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3144 case X86ISD::CMP: return "X86ISD::CMP"; 3145 case X86ISD::TEST: return "X86ISD::TEST"; 3146 case X86ISD::COMI: return "X86ISD::COMI"; 3147 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3148 case X86ISD::SETCC: return "X86ISD::SETCC"; 3149 case X86ISD::CMOV: return "X86ISD::CMOV"; 3150 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3151 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3152 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3153 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3154 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3155 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3156 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3157 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3158 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 3159 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3160 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3161 } 3162} 3163 3164void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3165 uint64_t Mask, 3166 uint64_t &KnownZero, 3167 uint64_t &KnownOne, 3168 unsigned Depth) const { 3169 unsigned Opc = Op.getOpcode(); 3170 assert((Opc >= ISD::BUILTIN_OP_END || 3171 Opc == ISD::INTRINSIC_WO_CHAIN || 3172 Opc == ISD::INTRINSIC_W_CHAIN || 3173 Opc == ISD::INTRINSIC_VOID) && 3174 "Should use MaskedValueIsZero if you don't know whether Op" 3175 " is a target node!"); 3176 3177 KnownZero = KnownOne = 0; // Don't know anything. 3178 switch (Opc) { 3179 default: break; 3180 case X86ISD::SETCC: 3181 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3182 break; 3183 } 3184} 3185 3186std::vector<unsigned> X86TargetLowering:: 3187getRegClassForInlineAsmConstraint(const std::string &Constraint, 3188 MVT::ValueType VT) const { 3189 if (Constraint.size() == 1) { 3190 // FIXME: not handling fp-stack yet! 3191 // FIXME: not handling MMX registers yet ('y' constraint). 3192 switch (Constraint[0]) { // GCC X86 Constraint Letters 3193 default: break; // Unknown constriant letter 3194 case 'r': // GENERAL_REGS 3195 case 'R': // LEGACY_REGS 3196 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3197 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3198 case 'l': // INDEX_REGS 3199 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3200 X86::ESI, X86::EDI, X86::EBP, 0); 3201 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3202 case 'Q': // Q_REGS 3203 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3204 case 'x': // SSE_REGS if SSE1 allowed 3205 if (Subtarget->hasSSE1()) 3206 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3207 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3208 0); 3209 return std::vector<unsigned>(); 3210 case 'Y': // SSE_REGS if SSE2 allowed 3211 if (Subtarget->hasSSE2()) 3212 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3213 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3214 0); 3215 return std::vector<unsigned>(); 3216 } 3217 } 3218 3219 return std::vector<unsigned>(); 3220} 3221 3222/// isLegalAddressImmediate - Return true if the integer value or 3223/// GlobalValue can be used as the offset of the target addressing mode. 3224bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3225 // X86 allows a sign-extended 32-bit immediate field. 3226 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3227} 3228 3229bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3230 if (Subtarget->isTargetDarwin()) { 3231 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3232 if (RModel == Reloc::Static) 3233 return true; 3234 else if (RModel == Reloc::DynamicNoPIC) 3235 return !DarwinGVRequiresExtraLoad(GV); 3236 else 3237 return false; 3238 } else 3239 return true; 3240} 3241 3242/// isShuffleMaskLegal - Targets can use this to indicate that they only 3243/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3244/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3245/// are assumed to be legal. 3246bool 3247X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3248 // Only do shuffles on 128-bit vector types for now. 3249 if (MVT::getSizeInBits(VT) == 64) return false; 3250 return (Mask.Val->getNumOperands() == 2 || 3251 isSplatMask(Mask.Val) || 3252 X86::isMOVSMask(Mask.Val) || 3253 X86::isMOVSHDUPMask(Mask.Val) || 3254 X86::isMOVSLDUPMask(Mask.Val) || 3255 X86::isPSHUFDMask(Mask.Val) || 3256 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3257 X86::isSHUFPMask(Mask.Val) || 3258 X86::isUNPCKLMask(Mask.Val) || 3259 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3260 X86::isUNPCKHMask(Mask.Val)); 3261} 3262