X86ISelLowering.cpp revision cdfc3c82a72eeaa604a50426ffc51c21587e0406
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 289 } 290 291 if (Subtarget->hasSSE2()) { 292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 297 298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 308 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 312 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 313 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 314 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 315 316 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 317 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 318 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 319 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 321 } 322 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 324 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 325 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 326 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 328 329 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 330 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 331 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 332 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 333 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 334 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 335 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 336 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 337 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 338 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 339 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 340 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 341 } 342 343 // Custom lower v2i64 and v2f64 selects. 344 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 345 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 346 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 347 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 348 } 349 350 // We want to custom lower some of our intrinsics. 351 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 352 353 computeRegisterProperties(); 354 355 // FIXME: These should be based on subtarget info. Plus, the values should 356 // be smaller when we are in optimizing for size mode. 357 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 358 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 359 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 360 allowUnalignedMemoryAccesses = true; // x86 supports it! 361} 362 363std::vector<SDOperand> 364X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 365 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 366 return LowerFastCCArguments(F, DAG); 367 return LowerCCCArguments(F, DAG); 368} 369 370std::pair<SDOperand, SDOperand> 371X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 372 bool isVarArg, unsigned CallingConv, 373 bool isTailCall, 374 SDOperand Callee, ArgListTy &Args, 375 SelectionDAG &DAG) { 376 assert((!isVarArg || CallingConv == CallingConv::C) && 377 "Only C takes varargs!"); 378 379 // If the callee is a GlobalAddress node (quite common, every direct call is) 380 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 381 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 382 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 383 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 384 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 385 386 if (CallingConv == CallingConv::Fast && EnableFastCC) 387 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 388 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 389} 390 391//===----------------------------------------------------------------------===// 392// C Calling Convention implementation 393//===----------------------------------------------------------------------===// 394 395std::vector<SDOperand> 396X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 397 std::vector<SDOperand> ArgValues; 398 399 MachineFunction &MF = DAG.getMachineFunction(); 400 MachineFrameInfo *MFI = MF.getFrameInfo(); 401 402 // Add DAG nodes to load the arguments... On entry to a function on the X86, 403 // the stack frame looks like this: 404 // 405 // [ESP] -- return address 406 // [ESP + 4] -- first argument (leftmost lexically) 407 // [ESP + 8] -- second argument, if first argument is four bytes in size 408 // ... 409 // 410 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 411 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 412 MVT::ValueType ObjectVT = getValueType(I->getType()); 413 unsigned ArgIncrement = 4; 414 unsigned ObjSize; 415 switch (ObjectVT) { 416 default: assert(0 && "Unhandled argument type!"); 417 case MVT::i1: 418 case MVT::i8: ObjSize = 1; break; 419 case MVT::i16: ObjSize = 2; break; 420 case MVT::i32: ObjSize = 4; break; 421 case MVT::i64: ObjSize = ArgIncrement = 8; break; 422 case MVT::f32: ObjSize = 4; break; 423 case MVT::f64: ObjSize = ArgIncrement = 8; break; 424 } 425 // Create the frame index object for this incoming parameter... 426 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 427 428 // Create the SelectionDAG nodes corresponding to a load from this parameter 429 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 430 431 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 432 // dead loads. 433 SDOperand ArgValue; 434 if (!I->use_empty()) 435 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 436 DAG.getSrcValue(NULL)); 437 else { 438 if (MVT::isInteger(ObjectVT)) 439 ArgValue = DAG.getConstant(0, ObjectVT); 440 else 441 ArgValue = DAG.getConstantFP(0, ObjectVT); 442 } 443 ArgValues.push_back(ArgValue); 444 445 ArgOffset += ArgIncrement; // Move on to the next argument... 446 } 447 448 // If the function takes variable number of arguments, make a frame index for 449 // the start of the first vararg value... for expansion of llvm.va_start. 450 if (F.isVarArg()) 451 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 452 ReturnAddrIndex = 0; // No return address slot generated yet. 453 BytesToPopOnReturn = 0; // Callee pops nothing. 454 BytesCallerReserves = ArgOffset; 455 return ArgValues; 456} 457 458std::pair<SDOperand, SDOperand> 459X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 460 bool isVarArg, bool isTailCall, 461 SDOperand Callee, ArgListTy &Args, 462 SelectionDAG &DAG) { 463 // Count how many bytes are to be pushed on the stack. 464 unsigned NumBytes = 0; 465 466 if (Args.empty()) { 467 // Save zero bytes. 468 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 469 } else { 470 for (unsigned i = 0, e = Args.size(); i != e; ++i) 471 switch (getValueType(Args[i].second)) { 472 default: assert(0 && "Unknown value type!"); 473 case MVT::i1: 474 case MVT::i8: 475 case MVT::i16: 476 case MVT::i32: 477 case MVT::f32: 478 NumBytes += 4; 479 break; 480 case MVT::i64: 481 case MVT::f64: 482 NumBytes += 8; 483 break; 484 } 485 486 Chain = DAG.getCALLSEQ_START(Chain, 487 DAG.getConstant(NumBytes, getPointerTy())); 488 489 // Arguments go on the stack in reverse order, as specified by the ABI. 490 unsigned ArgOffset = 0; 491 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 492 std::vector<SDOperand> Stores; 493 494 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 495 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 496 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 497 498 switch (getValueType(Args[i].second)) { 499 default: assert(0 && "Unexpected ValueType for argument!"); 500 case MVT::i1: 501 case MVT::i8: 502 case MVT::i16: 503 // Promote the integer to 32 bits. If the input type is signed use a 504 // sign extend, otherwise use a zero extend. 505 if (Args[i].second->isSigned()) 506 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 507 else 508 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 509 510 // FALL THROUGH 511 case MVT::i32: 512 case MVT::f32: 513 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 514 Args[i].first, PtrOff, 515 DAG.getSrcValue(NULL))); 516 ArgOffset += 4; 517 break; 518 case MVT::i64: 519 case MVT::f64: 520 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 521 Args[i].first, PtrOff, 522 DAG.getSrcValue(NULL))); 523 ArgOffset += 8; 524 break; 525 } 526 } 527 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 528 } 529 530 std::vector<MVT::ValueType> RetVals; 531 MVT::ValueType RetTyVT = getValueType(RetTy); 532 RetVals.push_back(MVT::Other); 533 534 // The result values produced have to be legal. Promote the result. 535 switch (RetTyVT) { 536 case MVT::isVoid: break; 537 default: 538 RetVals.push_back(RetTyVT); 539 break; 540 case MVT::i1: 541 case MVT::i8: 542 case MVT::i16: 543 RetVals.push_back(MVT::i32); 544 break; 545 case MVT::f32: 546 if (X86ScalarSSE) 547 RetVals.push_back(MVT::f32); 548 else 549 RetVals.push_back(MVT::f64); 550 break; 551 case MVT::i64: 552 RetVals.push_back(MVT::i32); 553 RetVals.push_back(MVT::i32); 554 break; 555 } 556 557 std::vector<MVT::ValueType> NodeTys; 558 NodeTys.push_back(MVT::Other); // Returns a chain 559 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 560 std::vector<SDOperand> Ops; 561 Ops.push_back(Chain); 562 Ops.push_back(Callee); 563 564 // FIXME: Do not generate X86ISD::TAILCALL for now. 565 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 566 SDOperand InFlag = Chain.getValue(1); 567 568 NodeTys.clear(); 569 NodeTys.push_back(MVT::Other); // Returns a chain 570 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 571 Ops.clear(); 572 Ops.push_back(Chain); 573 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 574 Ops.push_back(DAG.getConstant(0, getPointerTy())); 575 Ops.push_back(InFlag); 576 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 577 InFlag = Chain.getValue(1); 578 579 SDOperand RetVal; 580 if (RetTyVT != MVT::isVoid) { 581 switch (RetTyVT) { 582 default: assert(0 && "Unknown value type to return!"); 583 case MVT::i1: 584 case MVT::i8: 585 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 586 Chain = RetVal.getValue(1); 587 if (RetTyVT == MVT::i1) 588 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 589 break; 590 case MVT::i16: 591 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 592 Chain = RetVal.getValue(1); 593 break; 594 case MVT::i32: 595 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 596 Chain = RetVal.getValue(1); 597 break; 598 case MVT::i64: { 599 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 600 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 601 Lo.getValue(2)); 602 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 603 Chain = Hi.getValue(1); 604 break; 605 } 606 case MVT::f32: 607 case MVT::f64: { 608 std::vector<MVT::ValueType> Tys; 609 Tys.push_back(MVT::f64); 610 Tys.push_back(MVT::Other); 611 Tys.push_back(MVT::Flag); 612 std::vector<SDOperand> Ops; 613 Ops.push_back(Chain); 614 Ops.push_back(InFlag); 615 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 616 Chain = RetVal.getValue(1); 617 InFlag = RetVal.getValue(2); 618 if (X86ScalarSSE) { 619 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 620 // shouldn't be necessary except that RFP cannot be live across 621 // multiple blocks. When stackifier is fixed, they can be uncoupled. 622 MachineFunction &MF = DAG.getMachineFunction(); 623 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 624 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 625 Tys.clear(); 626 Tys.push_back(MVT::Other); 627 Ops.clear(); 628 Ops.push_back(Chain); 629 Ops.push_back(RetVal); 630 Ops.push_back(StackSlot); 631 Ops.push_back(DAG.getValueType(RetTyVT)); 632 Ops.push_back(InFlag); 633 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 634 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 635 DAG.getSrcValue(NULL)); 636 Chain = RetVal.getValue(1); 637 } 638 639 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 640 // FIXME: we would really like to remember that this FP_ROUND 641 // operation is okay to eliminate if we allow excess FP precision. 642 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 643 break; 644 } 645 } 646 } 647 648 return std::make_pair(RetVal, Chain); 649} 650 651//===----------------------------------------------------------------------===// 652// Fast Calling Convention implementation 653//===----------------------------------------------------------------------===// 654// 655// The X86 'fast' calling convention passes up to two integer arguments in 656// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 657// and requires that the callee pop its arguments off the stack (allowing proper 658// tail calls), and has the same return value conventions as C calling convs. 659// 660// This calling convention always arranges for the callee pop value to be 8n+4 661// bytes, which is needed for tail recursion elimination and stack alignment 662// reasons. 663// 664// Note that this can be enhanced in the future to pass fp vals in registers 665// (when we have a global fp allocator) and do other tricks. 666// 667 668/// AddLiveIn - This helper function adds the specified physical register to the 669/// MachineFunction as a live in value. It also creates a corresponding virtual 670/// register for it. 671static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 672 TargetRegisterClass *RC) { 673 assert(RC->contains(PReg) && "Not the correct regclass!"); 674 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 675 MF.addLiveIn(PReg, VReg); 676 return VReg; 677} 678 679// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 680// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 681// EDX". Anything more is illegal. 682// 683// FIXME: The linscan register allocator currently has problem with 684// coalescing. At the time of this writing, whenever it decides to coalesce 685// a physreg with a virtreg, this increases the size of the physreg's live 686// range, and the live range cannot ever be reduced. This causes problems if 687// too many physregs are coaleced with virtregs, which can cause the register 688// allocator to wedge itself. 689// 690// This code triggers this problem more often if we pass args in registers, 691// so disable it until this is fixed. 692// 693// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 694// about code being dead. 695// 696static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 697 698 699std::vector<SDOperand> 700X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 701 std::vector<SDOperand> ArgValues; 702 703 MachineFunction &MF = DAG.getMachineFunction(); 704 MachineFrameInfo *MFI = MF.getFrameInfo(); 705 706 // Add DAG nodes to load the arguments... On entry to a function the stack 707 // frame looks like this: 708 // 709 // [ESP] -- return address 710 // [ESP + 4] -- first nonreg argument (leftmost lexically) 711 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 712 // ... 713 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 714 715 // Keep track of the number of integer regs passed so far. This can be either 716 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 717 // used). 718 unsigned NumIntRegs = 0; 719 720 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 721 MVT::ValueType ObjectVT = getValueType(I->getType()); 722 unsigned ArgIncrement = 4; 723 unsigned ObjSize = 0; 724 SDOperand ArgValue; 725 726 switch (ObjectVT) { 727 default: assert(0 && "Unhandled argument type!"); 728 case MVT::i1: 729 case MVT::i8: 730 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 731 if (!I->use_empty()) { 732 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 733 X86::R8RegisterClass); 734 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 735 DAG.setRoot(ArgValue.getValue(1)); 736 if (ObjectVT == MVT::i1) 737 // FIXME: Should insert a assertzext here. 738 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 739 } 740 ++NumIntRegs; 741 break; 742 } 743 744 ObjSize = 1; 745 break; 746 case MVT::i16: 747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 748 if (!I->use_empty()) { 749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 750 X86::R16RegisterClass); 751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 752 DAG.setRoot(ArgValue.getValue(1)); 753 } 754 ++NumIntRegs; 755 break; 756 } 757 ObjSize = 2; 758 break; 759 case MVT::i32: 760 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 761 if (!I->use_empty()) { 762 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 763 X86::R32RegisterClass); 764 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 765 DAG.setRoot(ArgValue.getValue(1)); 766 } 767 ++NumIntRegs; 768 break; 769 } 770 ObjSize = 4; 771 break; 772 case MVT::i64: 773 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 774 if (!I->use_empty()) { 775 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 776 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 777 778 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 779 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 780 DAG.setRoot(Hi.getValue(1)); 781 782 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 783 } 784 NumIntRegs += 2; 785 break; 786 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 787 if (!I->use_empty()) { 788 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 789 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 790 DAG.setRoot(Low.getValue(1)); 791 792 // Load the high part from memory. 793 // Create the frame index object for this incoming parameter... 794 int FI = MFI->CreateFixedObject(4, ArgOffset); 795 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 796 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 797 DAG.getSrcValue(NULL)); 798 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 799 } 800 ArgOffset += 4; 801 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 802 break; 803 } 804 ObjSize = ArgIncrement = 8; 805 break; 806 case MVT::f32: ObjSize = 4; break; 807 case MVT::f64: ObjSize = ArgIncrement = 8; break; 808 } 809 810 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 811 // dead loads. 812 if (ObjSize && !I->use_empty()) { 813 // Create the frame index object for this incoming parameter... 814 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 815 816 // Create the SelectionDAG nodes corresponding to a load from this 817 // parameter. 818 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 819 820 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 821 DAG.getSrcValue(NULL)); 822 } else if (ArgValue.Val == 0) { 823 if (MVT::isInteger(ObjectVT)) 824 ArgValue = DAG.getConstant(0, ObjectVT); 825 else 826 ArgValue = DAG.getConstantFP(0, ObjectVT); 827 } 828 ArgValues.push_back(ArgValue); 829 830 if (ObjSize) 831 ArgOffset += ArgIncrement; // Move on to the next argument. 832 } 833 834 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 835 // arguments and the arguments after the retaddr has been pushed are aligned. 836 if ((ArgOffset & 7) == 0) 837 ArgOffset += 4; 838 839 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 840 ReturnAddrIndex = 0; // No return address slot generated yet. 841 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 842 BytesCallerReserves = 0; 843 844 // Finally, inform the code generator which regs we return values in. 845 switch (getValueType(F.getReturnType())) { 846 default: assert(0 && "Unknown type!"); 847 case MVT::isVoid: break; 848 case MVT::i1: 849 case MVT::i8: 850 case MVT::i16: 851 case MVT::i32: 852 MF.addLiveOut(X86::EAX); 853 break; 854 case MVT::i64: 855 MF.addLiveOut(X86::EAX); 856 MF.addLiveOut(X86::EDX); 857 break; 858 case MVT::f32: 859 case MVT::f64: 860 MF.addLiveOut(X86::ST0); 861 break; 862 } 863 return ArgValues; 864} 865 866std::pair<SDOperand, SDOperand> 867X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 868 bool isTailCall, SDOperand Callee, 869 ArgListTy &Args, SelectionDAG &DAG) { 870 // Count how many bytes are to be pushed on the stack. 871 unsigned NumBytes = 0; 872 873 // Keep track of the number of integer regs passed so far. This can be either 874 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 875 // used). 876 unsigned NumIntRegs = 0; 877 878 for (unsigned i = 0, e = Args.size(); i != e; ++i) 879 switch (getValueType(Args[i].second)) { 880 default: assert(0 && "Unknown value type!"); 881 case MVT::i1: 882 case MVT::i8: 883 case MVT::i16: 884 case MVT::i32: 885 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 886 ++NumIntRegs; 887 break; 888 } 889 // fall through 890 case MVT::f32: 891 NumBytes += 4; 892 break; 893 case MVT::i64: 894 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 895 NumIntRegs += 2; 896 break; 897 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 898 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 899 NumBytes += 4; 900 break; 901 } 902 903 // fall through 904 case MVT::f64: 905 NumBytes += 8; 906 break; 907 } 908 909 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 910 // arguments and the arguments after the retaddr has been pushed are aligned. 911 if ((NumBytes & 7) == 0) 912 NumBytes += 4; 913 914 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 915 916 // Arguments go on the stack in reverse order, as specified by the ABI. 917 unsigned ArgOffset = 0; 918 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 919 NumIntRegs = 0; 920 std::vector<SDOperand> Stores; 921 std::vector<SDOperand> RegValuesToPass; 922 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 923 switch (getValueType(Args[i].second)) { 924 default: assert(0 && "Unexpected ValueType for argument!"); 925 case MVT::i1: 926 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 927 // Fall through. 928 case MVT::i8: 929 case MVT::i16: 930 case MVT::i32: 931 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 932 RegValuesToPass.push_back(Args[i].first); 933 ++NumIntRegs; 934 break; 935 } 936 // Fall through 937 case MVT::f32: { 938 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 939 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 940 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 941 Args[i].first, PtrOff, 942 DAG.getSrcValue(NULL))); 943 ArgOffset += 4; 944 break; 945 } 946 case MVT::i64: 947 // Can pass (at least) part of it in regs? 948 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 949 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 950 Args[i].first, DAG.getConstant(1, MVT::i32)); 951 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 952 Args[i].first, DAG.getConstant(0, MVT::i32)); 953 RegValuesToPass.push_back(Lo); 954 ++NumIntRegs; 955 956 // Pass both parts in regs? 957 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 958 RegValuesToPass.push_back(Hi); 959 ++NumIntRegs; 960 } else { 961 // Pass the high part in memory. 962 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 963 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 964 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 965 Hi, PtrOff, DAG.getSrcValue(NULL))); 966 ArgOffset += 4; 967 } 968 break; 969 } 970 // Fall through 971 case MVT::f64: 972 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 973 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 974 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 975 Args[i].first, PtrOff, 976 DAG.getSrcValue(NULL))); 977 ArgOffset += 8; 978 break; 979 } 980 } 981 if (!Stores.empty()) 982 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 983 984 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 985 // arguments and the arguments after the retaddr has been pushed are aligned. 986 if ((ArgOffset & 7) == 0) 987 ArgOffset += 4; 988 989 std::vector<MVT::ValueType> RetVals; 990 MVT::ValueType RetTyVT = getValueType(RetTy); 991 992 RetVals.push_back(MVT::Other); 993 994 // The result values produced have to be legal. Promote the result. 995 switch (RetTyVT) { 996 case MVT::isVoid: break; 997 default: 998 RetVals.push_back(RetTyVT); 999 break; 1000 case MVT::i1: 1001 case MVT::i8: 1002 case MVT::i16: 1003 RetVals.push_back(MVT::i32); 1004 break; 1005 case MVT::f32: 1006 if (X86ScalarSSE) 1007 RetVals.push_back(MVT::f32); 1008 else 1009 RetVals.push_back(MVT::f64); 1010 break; 1011 case MVT::i64: 1012 RetVals.push_back(MVT::i32); 1013 RetVals.push_back(MVT::i32); 1014 break; 1015 } 1016 1017 // Build a sequence of copy-to-reg nodes chained together with token chain 1018 // and flag operands which copy the outgoing args into registers. 1019 SDOperand InFlag; 1020 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1021 unsigned CCReg; 1022 SDOperand RegToPass = RegValuesToPass[i]; 1023 switch (RegToPass.getValueType()) { 1024 default: assert(0 && "Bad thing to pass in regs"); 1025 case MVT::i8: 1026 CCReg = (i == 0) ? X86::AL : X86::DL; 1027 break; 1028 case MVT::i16: 1029 CCReg = (i == 0) ? X86::AX : X86::DX; 1030 break; 1031 case MVT::i32: 1032 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1033 break; 1034 } 1035 1036 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1037 InFlag = Chain.getValue(1); 1038 } 1039 1040 std::vector<MVT::ValueType> NodeTys; 1041 NodeTys.push_back(MVT::Other); // Returns a chain 1042 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1043 std::vector<SDOperand> Ops; 1044 Ops.push_back(Chain); 1045 Ops.push_back(Callee); 1046 if (InFlag.Val) 1047 Ops.push_back(InFlag); 1048 1049 // FIXME: Do not generate X86ISD::TAILCALL for now. 1050 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1051 InFlag = Chain.getValue(1); 1052 1053 NodeTys.clear(); 1054 NodeTys.push_back(MVT::Other); // Returns a chain 1055 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1056 Ops.clear(); 1057 Ops.push_back(Chain); 1058 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1059 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1060 Ops.push_back(InFlag); 1061 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1062 InFlag = Chain.getValue(1); 1063 1064 SDOperand RetVal; 1065 if (RetTyVT != MVT::isVoid) { 1066 switch (RetTyVT) { 1067 default: assert(0 && "Unknown value type to return!"); 1068 case MVT::i1: 1069 case MVT::i8: 1070 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1071 Chain = RetVal.getValue(1); 1072 if (RetTyVT == MVT::i1) 1073 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1074 break; 1075 case MVT::i16: 1076 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1077 Chain = RetVal.getValue(1); 1078 break; 1079 case MVT::i32: 1080 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1081 Chain = RetVal.getValue(1); 1082 break; 1083 case MVT::i64: { 1084 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1085 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1086 Lo.getValue(2)); 1087 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1088 Chain = Hi.getValue(1); 1089 break; 1090 } 1091 case MVT::f32: 1092 case MVT::f64: { 1093 std::vector<MVT::ValueType> Tys; 1094 Tys.push_back(MVT::f64); 1095 Tys.push_back(MVT::Other); 1096 Tys.push_back(MVT::Flag); 1097 std::vector<SDOperand> Ops; 1098 Ops.push_back(Chain); 1099 Ops.push_back(InFlag); 1100 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1101 Chain = RetVal.getValue(1); 1102 InFlag = RetVal.getValue(2); 1103 if (X86ScalarSSE) { 1104 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1105 // shouldn't be necessary except that RFP cannot be live across 1106 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1107 MachineFunction &MF = DAG.getMachineFunction(); 1108 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1109 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1110 Tys.clear(); 1111 Tys.push_back(MVT::Other); 1112 Ops.clear(); 1113 Ops.push_back(Chain); 1114 Ops.push_back(RetVal); 1115 Ops.push_back(StackSlot); 1116 Ops.push_back(DAG.getValueType(RetTyVT)); 1117 Ops.push_back(InFlag); 1118 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1119 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1120 DAG.getSrcValue(NULL)); 1121 Chain = RetVal.getValue(1); 1122 } 1123 1124 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1125 // FIXME: we would really like to remember that this FP_ROUND 1126 // operation is okay to eliminate if we allow excess FP precision. 1127 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1128 break; 1129 } 1130 } 1131 } 1132 1133 return std::make_pair(RetVal, Chain); 1134} 1135 1136SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1137 if (ReturnAddrIndex == 0) { 1138 // Set up a frame object for the return address. 1139 MachineFunction &MF = DAG.getMachineFunction(); 1140 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1141 } 1142 1143 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1144} 1145 1146 1147 1148std::pair<SDOperand, SDOperand> X86TargetLowering:: 1149LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1150 SelectionDAG &DAG) { 1151 SDOperand Result; 1152 if (Depth) // Depths > 0 not supported yet! 1153 Result = DAG.getConstant(0, getPointerTy()); 1154 else { 1155 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1156 if (!isFrameAddress) 1157 // Just load the return address 1158 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1159 DAG.getSrcValue(NULL)); 1160 else 1161 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1162 DAG.getConstant(4, MVT::i32)); 1163 } 1164 return std::make_pair(Result, Chain); 1165} 1166 1167/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1168/// which corresponds to the condition code. 1169static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1170 switch (X86CC) { 1171 default: assert(0 && "Unknown X86 conditional code!"); 1172 case X86ISD::COND_A: return X86::JA; 1173 case X86ISD::COND_AE: return X86::JAE; 1174 case X86ISD::COND_B: return X86::JB; 1175 case X86ISD::COND_BE: return X86::JBE; 1176 case X86ISD::COND_E: return X86::JE; 1177 case X86ISD::COND_G: return X86::JG; 1178 case X86ISD::COND_GE: return X86::JGE; 1179 case X86ISD::COND_L: return X86::JL; 1180 case X86ISD::COND_LE: return X86::JLE; 1181 case X86ISD::COND_NE: return X86::JNE; 1182 case X86ISD::COND_NO: return X86::JNO; 1183 case X86ISD::COND_NP: return X86::JNP; 1184 case X86ISD::COND_NS: return X86::JNS; 1185 case X86ISD::COND_O: return X86::JO; 1186 case X86ISD::COND_P: return X86::JP; 1187 case X86ISD::COND_S: return X86::JS; 1188 } 1189} 1190 1191/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1192/// specific condition code. It returns a false if it cannot do a direct 1193/// translation. X86CC is the translated CondCode. Flip is set to true if the 1194/// the order of comparison operands should be flipped. 1195static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1196 unsigned &X86CC, bool &Flip) { 1197 Flip = false; 1198 X86CC = X86ISD::COND_INVALID; 1199 if (!isFP) { 1200 switch (SetCCOpcode) { 1201 default: break; 1202 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1203 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1204 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1205 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1206 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1207 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1208 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1209 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1210 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1211 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1212 } 1213 } else { 1214 // On a floating point condition, the flags are set as follows: 1215 // ZF PF CF op 1216 // 0 | 0 | 0 | X > Y 1217 // 0 | 0 | 1 | X < Y 1218 // 1 | 0 | 0 | X == Y 1219 // 1 | 1 | 1 | unordered 1220 switch (SetCCOpcode) { 1221 default: break; 1222 case ISD::SETUEQ: 1223 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1224 case ISD::SETOLT: Flip = true; // Fallthrough 1225 case ISD::SETOGT: 1226 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1227 case ISD::SETOLE: Flip = true; // Fallthrough 1228 case ISD::SETOGE: 1229 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1230 case ISD::SETUGT: Flip = true; // Fallthrough 1231 case ISD::SETULT: 1232 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1233 case ISD::SETUGE: Flip = true; // Fallthrough 1234 case ISD::SETULE: 1235 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1236 case ISD::SETONE: 1237 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1238 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1239 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1240 } 1241 } 1242 1243 return X86CC != X86ISD::COND_INVALID; 1244} 1245 1246static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1247 bool &Flip) { 1248 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1249} 1250 1251/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1252/// code. Current x86 isa includes the following FP cmov instructions: 1253/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1254static bool hasFPCMov(unsigned X86CC) { 1255 switch (X86CC) { 1256 default: 1257 return false; 1258 case X86ISD::COND_B: 1259 case X86ISD::COND_BE: 1260 case X86ISD::COND_E: 1261 case X86ISD::COND_P: 1262 case X86ISD::COND_A: 1263 case X86ISD::COND_AE: 1264 case X86ISD::COND_NE: 1265 case X86ISD::COND_NP: 1266 return true; 1267 } 1268} 1269 1270MachineBasicBlock * 1271X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1272 MachineBasicBlock *BB) { 1273 switch (MI->getOpcode()) { 1274 default: assert(false && "Unexpected instr type to insert"); 1275 case X86::CMOV_FR32: 1276 case X86::CMOV_FR64: 1277 case X86::CMOV_V4F32: 1278 case X86::CMOV_V2F64: 1279 case X86::CMOV_V2I64: { 1280 // To "insert" a SELECT_CC instruction, we actually have to insert the 1281 // diamond control-flow pattern. The incoming instruction knows the 1282 // destination vreg to set, the condition code register to branch on, the 1283 // true/false values to select between, and a branch opcode to use. 1284 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1285 ilist<MachineBasicBlock>::iterator It = BB; 1286 ++It; 1287 1288 // thisMBB: 1289 // ... 1290 // TrueVal = ... 1291 // cmpTY ccX, r1, r2 1292 // bCC copy1MBB 1293 // fallthrough --> copy0MBB 1294 MachineBasicBlock *thisMBB = BB; 1295 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1296 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1297 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1298 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1299 MachineFunction *F = BB->getParent(); 1300 F->getBasicBlockList().insert(It, copy0MBB); 1301 F->getBasicBlockList().insert(It, sinkMBB); 1302 // Update machine-CFG edges by first adding all successors of the current 1303 // block to the new block which will contain the Phi node for the select. 1304 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1305 e = BB->succ_end(); i != e; ++i) 1306 sinkMBB->addSuccessor(*i); 1307 // Next, remove all successors of the current block, and add the true 1308 // and fallthrough blocks as its successors. 1309 while(!BB->succ_empty()) 1310 BB->removeSuccessor(BB->succ_begin()); 1311 BB->addSuccessor(copy0MBB); 1312 BB->addSuccessor(sinkMBB); 1313 1314 // copy0MBB: 1315 // %FalseValue = ... 1316 // # fallthrough to sinkMBB 1317 BB = copy0MBB; 1318 1319 // Update machine-CFG edges 1320 BB->addSuccessor(sinkMBB); 1321 1322 // sinkMBB: 1323 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1324 // ... 1325 BB = sinkMBB; 1326 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1327 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1328 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1329 1330 delete MI; // The pseudo instruction is gone now. 1331 return BB; 1332 } 1333 1334 case X86::FP_TO_INT16_IN_MEM: 1335 case X86::FP_TO_INT32_IN_MEM: 1336 case X86::FP_TO_INT64_IN_MEM: { 1337 // Change the floating point control register to use "round towards zero" 1338 // mode when truncating to an integer value. 1339 MachineFunction *F = BB->getParent(); 1340 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1341 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1342 1343 // Load the old value of the high byte of the control word... 1344 unsigned OldCW = 1345 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1346 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1347 1348 // Set the high part to be round to zero... 1349 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1350 1351 // Reload the modified control word now... 1352 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1353 1354 // Restore the memory image of control word to original value 1355 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1356 1357 // Get the X86 opcode to use. 1358 unsigned Opc; 1359 switch (MI->getOpcode()) { 1360 default: assert(0 && "illegal opcode!"); 1361 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1362 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1363 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1364 } 1365 1366 X86AddressMode AM; 1367 MachineOperand &Op = MI->getOperand(0); 1368 if (Op.isRegister()) { 1369 AM.BaseType = X86AddressMode::RegBase; 1370 AM.Base.Reg = Op.getReg(); 1371 } else { 1372 AM.BaseType = X86AddressMode::FrameIndexBase; 1373 AM.Base.FrameIndex = Op.getFrameIndex(); 1374 } 1375 Op = MI->getOperand(1); 1376 if (Op.isImmediate()) 1377 AM.Scale = Op.getImmedValue(); 1378 Op = MI->getOperand(2); 1379 if (Op.isImmediate()) 1380 AM.IndexReg = Op.getImmedValue(); 1381 Op = MI->getOperand(3); 1382 if (Op.isGlobalAddress()) { 1383 AM.GV = Op.getGlobal(); 1384 } else { 1385 AM.Disp = Op.getImmedValue(); 1386 } 1387 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1388 1389 // Reload the original control word now. 1390 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1391 1392 delete MI; // The pseudo instruction is gone now. 1393 return BB; 1394 } 1395 } 1396} 1397 1398 1399//===----------------------------------------------------------------------===// 1400// X86 Custom Lowering Hooks 1401//===----------------------------------------------------------------------===// 1402 1403/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1404/// load. For Darwin, external and weak symbols are indirect, loading the value 1405/// at address GV rather then the value of GV itself. This means that the 1406/// GlobalAddress must be in the base or index register of the address, not the 1407/// GV offset field. 1408static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1409 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1410 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1411} 1412 1413/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1414/// true if Op is undef or if its value falls within the specified range (L, H]. 1415static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1416 if (Op.getOpcode() == ISD::UNDEF) 1417 return true; 1418 1419 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1420 return (Val >= Low && Val < Hi); 1421} 1422 1423/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1424/// true if Op is undef or if its value equal to the specified value. 1425static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1426 if (Op.getOpcode() == ISD::UNDEF) 1427 return true; 1428 return cast<ConstantSDNode>(Op)->getValue() == Val; 1429} 1430 1431/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1432/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1433bool X86::isPSHUFDMask(SDNode *N) { 1434 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1435 1436 if (N->getNumOperands() != 4) 1437 return false; 1438 1439 // Check if the value doesn't reference the second vector. 1440 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1441 SDOperand Arg = N->getOperand(i); 1442 if (Arg.getOpcode() == ISD::UNDEF) continue; 1443 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1444 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1445 return false; 1446 } 1447 1448 return true; 1449} 1450 1451/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1452/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1453bool X86::isPSHUFHWMask(SDNode *N) { 1454 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1455 1456 if (N->getNumOperands() != 8) 1457 return false; 1458 1459 // Lower quadword copied in order. 1460 for (unsigned i = 0; i != 4; ++i) { 1461 SDOperand Arg = N->getOperand(i); 1462 if (Arg.getOpcode() == ISD::UNDEF) continue; 1463 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1464 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1465 return false; 1466 } 1467 1468 // Upper quadword shuffled. 1469 for (unsigned i = 4; i != 8; ++i) { 1470 SDOperand Arg = N->getOperand(i); 1471 if (Arg.getOpcode() == ISD::UNDEF) continue; 1472 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1473 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1474 if (Val < 4 || Val > 7) 1475 return false; 1476 } 1477 1478 return true; 1479} 1480 1481/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1482/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1483bool X86::isPSHUFLWMask(SDNode *N) { 1484 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1485 1486 if (N->getNumOperands() != 8) 1487 return false; 1488 1489 // Upper quadword copied in order. 1490 for (unsigned i = 4; i != 8; ++i) 1491 if (!isUndefOrEqual(N->getOperand(i), i)) 1492 return false; 1493 1494 // Lower quadword shuffled. 1495 for (unsigned i = 0; i != 4; ++i) 1496 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1497 return false; 1498 1499 return true; 1500} 1501 1502/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1503/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1504bool X86::isSHUFPMask(SDNode *N) { 1505 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1506 1507 unsigned NumElems = N->getNumOperands(); 1508 if (NumElems == 2) { 1509 // The only cases that ought be handled by SHUFPD is 1510 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1511 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 1512 // Expect bit 0 == 1, bit1 == 2 1513 SDOperand Bit0 = N->getOperand(0); 1514 SDOperand Bit1 = N->getOperand(1); 1515 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 1516 return true; 1517 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 1518 return true; 1519 return false; 1520 } 1521 1522 if (NumElems != 4) return false; 1523 1524 // Each half must refer to only one of the vector. 1525 for (unsigned i = 0; i < 2; ++i) { 1526 SDOperand Arg = N->getOperand(i); 1527 if (Arg.getOpcode() == ISD::UNDEF) continue; 1528 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1529 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1530 if (Val >= 4) return false; 1531 } 1532 for (unsigned i = 2; i < 4; ++i) { 1533 SDOperand Arg = N->getOperand(i); 1534 if (Arg.getOpcode() == ISD::UNDEF) continue; 1535 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1536 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1537 if (Val < 4) return false; 1538 } 1539 1540 return true; 1541} 1542 1543/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1544/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1545bool X86::isMOVHLPSMask(SDNode *N) { 1546 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1547 1548 if (N->getNumOperands() != 4) 1549 return false; 1550 1551 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1552 return isUndefOrEqual(N->getOperand(0), 6) && 1553 isUndefOrEqual(N->getOperand(1), 7) && 1554 isUndefOrEqual(N->getOperand(2), 2) && 1555 isUndefOrEqual(N->getOperand(3), 3); 1556} 1557 1558/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 1559/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1560bool X86::isMOVLHPSMask(SDNode *N) { 1561 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1562 1563 if (N->getNumOperands() != 4) 1564 return false; 1565 1566 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 1567 return isUndefOrEqual(N->getOperand(0), 0) && 1568 isUndefOrEqual(N->getOperand(1), 1) && 1569 isUndefOrEqual(N->getOperand(2), 4) && 1570 isUndefOrEqual(N->getOperand(3), 5); 1571} 1572 1573/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1574/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1575bool X86::isMOVLPMask(SDNode *N) { 1576 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1577 1578 unsigned NumElems = N->getNumOperands(); 1579 if (NumElems != 2 && NumElems != 4) 1580 return false; 1581 1582 for (unsigned i = 0; i < NumElems/2; ++i) 1583 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1584 return false; 1585 1586 for (unsigned i = NumElems/2; i < NumElems; ++i) 1587 if (!isUndefOrEqual(N->getOperand(i), i)) 1588 return false; 1589 1590 return true; 1591} 1592 1593/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1594/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 1595bool X86::isMOVHPMask(SDNode *N) { 1596 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1597 1598 unsigned NumElems = N->getNumOperands(); 1599 if (NumElems != 2 && NumElems != 4) 1600 return false; 1601 1602 for (unsigned i = 0; i < NumElems/2; ++i) 1603 if (!isUndefOrEqual(N->getOperand(i), i)) 1604 return false; 1605 1606 for (unsigned i = 0; i < NumElems/2; ++i) { 1607 SDOperand Arg = N->getOperand(i + NumElems/2); 1608 if (!isUndefOrEqual(Arg, i + NumElems)) 1609 return false; 1610 } 1611 1612 return true; 1613} 1614 1615/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1616/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1617bool X86::isUNPCKLMask(SDNode *N) { 1618 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1619 1620 unsigned NumElems = N->getNumOperands(); 1621 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1622 return false; 1623 1624 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1625 SDOperand BitI = N->getOperand(i); 1626 SDOperand BitI1 = N->getOperand(i+1); 1627 if (!isUndefOrEqual(BitI, j)) 1628 return false; 1629 if (!isUndefOrEqual(BitI1, j + NumElems)) 1630 return false; 1631 } 1632 1633 return true; 1634} 1635 1636/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1637/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1638bool X86::isUNPCKHMask(SDNode *N) { 1639 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1640 1641 unsigned NumElems = N->getNumOperands(); 1642 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1643 return false; 1644 1645 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1646 SDOperand BitI = N->getOperand(i); 1647 SDOperand BitI1 = N->getOperand(i+1); 1648 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1649 return false; 1650 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1651 return false; 1652 } 1653 1654 return true; 1655} 1656 1657/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1658/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1659/// <0, 0, 1, 1> 1660bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1661 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1662 1663 unsigned NumElems = N->getNumOperands(); 1664 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1665 return false; 1666 1667 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1668 SDOperand BitI = N->getOperand(i); 1669 SDOperand BitI1 = N->getOperand(i+1); 1670 1671 if (!isUndefOrEqual(BitI, j)) 1672 return false; 1673 if (!isUndefOrEqual(BitI1, j)) 1674 return false; 1675 } 1676 1677 return true; 1678} 1679 1680/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 1681/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 1682bool X86::isMOVSMask(SDNode *N) { 1683 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1684 1685 unsigned NumElems = N->getNumOperands(); 1686 if (NumElems != 2 && NumElems != 4) 1687 return false; 1688 1689 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 1690 return false; 1691 1692 for (unsigned i = 1; i < NumElems; ++i) { 1693 SDOperand Arg = N->getOperand(i); 1694 if (!isUndefOrEqual(Arg, i)) 1695 return false; 1696 } 1697 1698 return true; 1699} 1700 1701/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1702/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1703bool X86::isMOVSHDUPMask(SDNode *N) { 1704 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1705 1706 if (N->getNumOperands() != 4) 1707 return false; 1708 1709 // Expect 1, 1, 3, 3 1710 for (unsigned i = 0; i < 2; ++i) { 1711 SDOperand Arg = N->getOperand(i); 1712 if (Arg.getOpcode() == ISD::UNDEF) continue; 1713 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1714 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1715 if (Val != 1) return false; 1716 } 1717 1718 bool HasHi = false; 1719 for (unsigned i = 2; i < 4; ++i) { 1720 SDOperand Arg = N->getOperand(i); 1721 if (Arg.getOpcode() == ISD::UNDEF) continue; 1722 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1723 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1724 if (Val != 3) return false; 1725 HasHi = true; 1726 } 1727 1728 // Don't use movshdup if it can be done with a shufps. 1729 return HasHi; 1730} 1731 1732/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1733/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1734bool X86::isMOVSLDUPMask(SDNode *N) { 1735 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1736 1737 if (N->getNumOperands() != 4) 1738 return false; 1739 1740 // Expect 0, 0, 2, 2 1741 for (unsigned i = 0; i < 2; ++i) { 1742 SDOperand Arg = N->getOperand(i); 1743 if (Arg.getOpcode() == ISD::UNDEF) continue; 1744 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1745 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1746 if (Val != 0) return false; 1747 } 1748 1749 bool HasHi = false; 1750 for (unsigned i = 2; i < 4; ++i) { 1751 SDOperand Arg = N->getOperand(i); 1752 if (Arg.getOpcode() == ISD::UNDEF) continue; 1753 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1754 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1755 if (Val != 2) return false; 1756 HasHi = true; 1757 } 1758 1759 // Don't use movshdup if it can be done with a shufps. 1760 return HasHi; 1761} 1762 1763/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1764/// a splat of a single element. 1765static bool isSplatMask(SDNode *N) { 1766 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1767 1768 // This is a splat operation if each element of the permute is the same, and 1769 // if the value doesn't reference the second vector. 1770 SDOperand Elt = N->getOperand(0); 1771 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 1772 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 1773 SDOperand Arg = N->getOperand(i); 1774 if (Arg.getOpcode() == ISD::UNDEF) continue; 1775 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1776 if (Arg != Elt) return false; 1777 } 1778 1779 // Make sure it is a splat of the first vector operand. 1780 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 1781} 1782 1783/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1784/// a splat of a single element and it's a 2 or 4 element mask. 1785bool X86::isSplatMask(SDNode *N) { 1786 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1787 1788 // We can only splat 64-bit, and 32-bit quantities. 1789 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1790 return false; 1791 return ::isSplatMask(N); 1792} 1793 1794/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1795/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1796/// instructions. 1797unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1798 unsigned NumOperands = N->getNumOperands(); 1799 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1800 unsigned Mask = 0; 1801 for (unsigned i = 0; i < NumOperands; ++i) { 1802 unsigned Val = 0; 1803 SDOperand Arg = N->getOperand(NumOperands-i-1); 1804 if (Arg.getOpcode() != ISD::UNDEF) 1805 Val = cast<ConstantSDNode>(Arg)->getValue(); 1806 if (Val >= NumOperands) Val -= NumOperands; 1807 Mask |= Val; 1808 if (i != NumOperands - 1) 1809 Mask <<= Shift; 1810 } 1811 1812 return Mask; 1813} 1814 1815/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1816/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1817/// instructions. 1818unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1819 unsigned Mask = 0; 1820 // 8 nodes, but we only care about the last 4. 1821 for (unsigned i = 7; i >= 4; --i) { 1822 unsigned Val = 0; 1823 SDOperand Arg = N->getOperand(i); 1824 if (Arg.getOpcode() != ISD::UNDEF) 1825 Val = cast<ConstantSDNode>(Arg)->getValue(); 1826 Mask |= (Val - 4); 1827 if (i != 4) 1828 Mask <<= 2; 1829 } 1830 1831 return Mask; 1832} 1833 1834/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1835/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1836/// instructions. 1837unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1838 unsigned Mask = 0; 1839 // 8 nodes, but we only care about the first 4. 1840 for (int i = 3; i >= 0; --i) { 1841 unsigned Val = 0; 1842 SDOperand Arg = N->getOperand(i); 1843 if (Arg.getOpcode() != ISD::UNDEF) 1844 Val = cast<ConstantSDNode>(Arg)->getValue(); 1845 Mask |= Val; 1846 if (i != 0) 1847 Mask <<= 2; 1848 } 1849 1850 return Mask; 1851} 1852 1853/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1854/// specifies a 8 element shuffle that can be broken into a pair of 1855/// PSHUFHW and PSHUFLW. 1856static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1857 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1858 1859 if (N->getNumOperands() != 8) 1860 return false; 1861 1862 // Lower quadword shuffled. 1863 for (unsigned i = 0; i != 4; ++i) { 1864 SDOperand Arg = N->getOperand(i); 1865 if (Arg.getOpcode() == ISD::UNDEF) continue; 1866 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1867 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1868 if (Val > 4) 1869 return false; 1870 } 1871 1872 // Upper quadword shuffled. 1873 for (unsigned i = 4; i != 8; ++i) { 1874 SDOperand Arg = N->getOperand(i); 1875 if (Arg.getOpcode() == ISD::UNDEF) continue; 1876 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1877 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1878 if (Val < 4 || Val > 7) 1879 return false; 1880 } 1881 1882 return true; 1883} 1884 1885/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1886/// values in ther permute mask. 1887static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1888 SDOperand V1 = Op.getOperand(0); 1889 SDOperand V2 = Op.getOperand(1); 1890 SDOperand Mask = Op.getOperand(2); 1891 MVT::ValueType VT = Op.getValueType(); 1892 MVT::ValueType MaskVT = Mask.getValueType(); 1893 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1894 unsigned NumElems = Mask.getNumOperands(); 1895 std::vector<SDOperand> MaskVec; 1896 1897 for (unsigned i = 0; i != NumElems; ++i) { 1898 SDOperand Arg = Mask.getOperand(i); 1899 if (Arg.getOpcode() == ISD::UNDEF) continue; 1900 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1901 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1902 if (Val < NumElems) 1903 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1904 else 1905 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1906 } 1907 1908 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1909 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1910} 1911 1912/// isScalarLoadToVector - Returns true if the node is a scalar load that 1913/// is promoted to a vector. 1914static inline bool isScalarLoadToVector(SDOperand Op) { 1915 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1916 Op = Op.getOperand(0); 1917 return (Op.getOpcode() == ISD::LOAD); 1918 } 1919 return false; 1920} 1921 1922/// ShouldXformedToMOVLP - Return true if the node should be transformed to 1923/// match movlp{d|s}. The lower half elements should come from V1 (and in 1924/// order), and the upper half elements should come from the upper half of 1925/// V2 (not necessarily in order). And since V1 will become the source of 1926/// the MOVLP, it must be a scalar load. 1927static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 1928 if (isScalarLoadToVector(V1)) { 1929 unsigned NumElems = Mask.getNumOperands(); 1930 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1931 if (!isUndefOrEqual(Mask.getOperand(i), i)) 1932 return false; 1933 for (unsigned i = NumElems/2; i != NumElems; ++i) 1934 if (!isUndefOrInRange(Mask.getOperand(i), 1935 NumElems+NumElems/2, NumElems*2)) 1936 return false; 1937 return true; 1938 } 1939 1940 return false; 1941} 1942 1943/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 1944/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1945/// half elements to come from vector 1 (which would equal the dest.) and 1946/// the upper half to come from vector 2. 1947static bool isLowerFromV2UpperFromV1(SDOperand Op) { 1948 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 1949 1950 unsigned NumElems = Op.getNumOperands(); 1951 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 1952 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 1953 return false; 1954 for (unsigned i = NumElems/2; i != NumElems; ++i) 1955 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 1956 return false; 1957 return true; 1958} 1959 1960/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 1961/// of specified width. 1962static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 1963 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 1964 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 1965 std::vector<SDOperand> MaskVec; 1966 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 1967 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 1968 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 1969 } 1970 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1971} 1972 1973/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 1974/// 1975static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 1976 SDOperand V1 = Op.getOperand(0); 1977 SDOperand PermMask = Op.getOperand(2); 1978 MVT::ValueType VT = Op.getValueType(); 1979 unsigned NumElems = PermMask.getNumOperands(); 1980 PermMask = getUnpacklMask(NumElems, DAG); 1981 while (NumElems != 4) { 1982 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, PermMask); 1983 NumElems >>= 1; 1984 } 1985 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 1986 1987 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 1988 SDOperand Zero = DAG.getConstant(0, MVT::getVectorBaseType(MaskVT)); 1989 std::vector<SDOperand> ZeroVec(4, Zero); 1990 SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, ZeroVec); 1991 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 1992 DAG.getNode(ISD::UNDEF, MVT::v4i32), 1993 SplatMask); 1994 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 1995} 1996 1997/// LowerOperation - Provide custom lowering hooks for some operations. 1998/// 1999SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 2000 switch (Op.getOpcode()) { 2001 default: assert(0 && "Should not custom lower this!"); 2002 case ISD::SHL_PARTS: 2003 case ISD::SRA_PARTS: 2004 case ISD::SRL_PARTS: { 2005 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2006 "Not an i64 shift!"); 2007 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2008 SDOperand ShOpLo = Op.getOperand(0); 2009 SDOperand ShOpHi = Op.getOperand(1); 2010 SDOperand ShAmt = Op.getOperand(2); 2011 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2012 DAG.getConstant(31, MVT::i8)) 2013 : DAG.getConstant(0, MVT::i32); 2014 2015 SDOperand Tmp2, Tmp3; 2016 if (Op.getOpcode() == ISD::SHL_PARTS) { 2017 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2018 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2019 } else { 2020 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2021 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2022 } 2023 2024 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2025 ShAmt, DAG.getConstant(32, MVT::i8)); 2026 2027 SDOperand Hi, Lo; 2028 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2029 2030 std::vector<MVT::ValueType> Tys; 2031 Tys.push_back(MVT::i32); 2032 Tys.push_back(MVT::Flag); 2033 std::vector<SDOperand> Ops; 2034 if (Op.getOpcode() == ISD::SHL_PARTS) { 2035 Ops.push_back(Tmp2); 2036 Ops.push_back(Tmp3); 2037 Ops.push_back(CC); 2038 Ops.push_back(InFlag); 2039 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2040 InFlag = Hi.getValue(1); 2041 2042 Ops.clear(); 2043 Ops.push_back(Tmp3); 2044 Ops.push_back(Tmp1); 2045 Ops.push_back(CC); 2046 Ops.push_back(InFlag); 2047 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2048 } else { 2049 Ops.push_back(Tmp2); 2050 Ops.push_back(Tmp3); 2051 Ops.push_back(CC); 2052 Ops.push_back(InFlag); 2053 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2054 InFlag = Lo.getValue(1); 2055 2056 Ops.clear(); 2057 Ops.push_back(Tmp3); 2058 Ops.push_back(Tmp1); 2059 Ops.push_back(CC); 2060 Ops.push_back(InFlag); 2061 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2062 } 2063 2064 Tys.clear(); 2065 Tys.push_back(MVT::i32); 2066 Tys.push_back(MVT::i32); 2067 Ops.clear(); 2068 Ops.push_back(Lo); 2069 Ops.push_back(Hi); 2070 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2071 } 2072 case ISD::SINT_TO_FP: { 2073 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2074 Op.getOperand(0).getValueType() >= MVT::i16 && 2075 "Unknown SINT_TO_FP to lower!"); 2076 2077 SDOperand Result; 2078 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2079 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2080 MachineFunction &MF = DAG.getMachineFunction(); 2081 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2082 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2083 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2084 DAG.getEntryNode(), Op.getOperand(0), 2085 StackSlot, DAG.getSrcValue(NULL)); 2086 2087 // Build the FILD 2088 std::vector<MVT::ValueType> Tys; 2089 Tys.push_back(MVT::f64); 2090 Tys.push_back(MVT::Other); 2091 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2092 std::vector<SDOperand> Ops; 2093 Ops.push_back(Chain); 2094 Ops.push_back(StackSlot); 2095 Ops.push_back(DAG.getValueType(SrcVT)); 2096 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2097 Tys, Ops); 2098 2099 if (X86ScalarSSE) { 2100 Chain = Result.getValue(1); 2101 SDOperand InFlag = Result.getValue(2); 2102 2103 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2104 // shouldn't be necessary except that RFP cannot be live across 2105 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2106 MachineFunction &MF = DAG.getMachineFunction(); 2107 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2108 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2109 std::vector<MVT::ValueType> Tys; 2110 Tys.push_back(MVT::Other); 2111 std::vector<SDOperand> Ops; 2112 Ops.push_back(Chain); 2113 Ops.push_back(Result); 2114 Ops.push_back(StackSlot); 2115 Ops.push_back(DAG.getValueType(Op.getValueType())); 2116 Ops.push_back(InFlag); 2117 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2118 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2119 DAG.getSrcValue(NULL)); 2120 } 2121 2122 return Result; 2123 } 2124 case ISD::FP_TO_SINT: { 2125 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2126 "Unknown FP_TO_SINT to lower!"); 2127 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2128 // stack slot. 2129 MachineFunction &MF = DAG.getMachineFunction(); 2130 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2131 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2132 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2133 2134 unsigned Opc; 2135 switch (Op.getValueType()) { 2136 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2137 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2138 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2139 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2140 } 2141 2142 SDOperand Chain = DAG.getEntryNode(); 2143 SDOperand Value = Op.getOperand(0); 2144 if (X86ScalarSSE) { 2145 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2146 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2147 DAG.getSrcValue(0)); 2148 std::vector<MVT::ValueType> Tys; 2149 Tys.push_back(MVT::f64); 2150 Tys.push_back(MVT::Other); 2151 std::vector<SDOperand> Ops; 2152 Ops.push_back(Chain); 2153 Ops.push_back(StackSlot); 2154 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2155 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2156 Chain = Value.getValue(1); 2157 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2158 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2159 } 2160 2161 // Build the FP_TO_INT*_IN_MEM 2162 std::vector<SDOperand> Ops; 2163 Ops.push_back(Chain); 2164 Ops.push_back(Value); 2165 Ops.push_back(StackSlot); 2166 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2167 2168 // Load the result. 2169 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2170 DAG.getSrcValue(NULL)); 2171 } 2172 case ISD::READCYCLECOUNTER: { 2173 std::vector<MVT::ValueType> Tys; 2174 Tys.push_back(MVT::Other); 2175 Tys.push_back(MVT::Flag); 2176 std::vector<SDOperand> Ops; 2177 Ops.push_back(Op.getOperand(0)); 2178 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2179 Ops.clear(); 2180 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2181 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2182 MVT::i32, Ops[0].getValue(2))); 2183 Ops.push_back(Ops[1].getValue(1)); 2184 Tys[0] = Tys[1] = MVT::i32; 2185 Tys.push_back(MVT::Other); 2186 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2187 } 2188 case ISD::FABS: { 2189 MVT::ValueType VT = Op.getValueType(); 2190 const Type *OpNTy = MVT::getTypeForValueType(VT); 2191 std::vector<Constant*> CV; 2192 if (VT == MVT::f64) { 2193 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2194 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2195 } else { 2196 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2197 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2198 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2199 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2200 } 2201 Constant *CS = ConstantStruct::get(CV); 2202 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2203 SDOperand Mask 2204 = DAG.getNode(X86ISD::LOAD_PACK, 2205 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2206 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2207 } 2208 case ISD::FNEG: { 2209 MVT::ValueType VT = Op.getValueType(); 2210 const Type *OpNTy = MVT::getTypeForValueType(VT); 2211 std::vector<Constant*> CV; 2212 if (VT == MVT::f64) { 2213 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2214 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2215 } else { 2216 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2217 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2218 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2219 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2220 } 2221 Constant *CS = ConstantStruct::get(CV); 2222 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2223 SDOperand Mask 2224 = DAG.getNode(X86ISD::LOAD_PACK, 2225 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2226 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2227 } 2228 case ISD::SETCC: { 2229 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2230 SDOperand Cond; 2231 SDOperand CC = Op.getOperand(2); 2232 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2233 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2234 bool Flip; 2235 unsigned X86CC; 2236 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2237 if (Flip) 2238 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2239 Op.getOperand(1), Op.getOperand(0)); 2240 else 2241 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2242 Op.getOperand(0), Op.getOperand(1)); 2243 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2244 DAG.getConstant(X86CC, MVT::i8), Cond); 2245 } else { 2246 assert(isFP && "Illegal integer SetCC!"); 2247 2248 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2249 Op.getOperand(0), Op.getOperand(1)); 2250 std::vector<MVT::ValueType> Tys; 2251 std::vector<SDOperand> Ops; 2252 switch (SetCCOpcode) { 2253 default: assert(false && "Illegal floating point SetCC!"); 2254 case ISD::SETOEQ: { // !PF & ZF 2255 Tys.push_back(MVT::i8); 2256 Tys.push_back(MVT::Flag); 2257 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2258 Ops.push_back(Cond); 2259 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2260 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2261 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2262 Tmp1.getValue(1)); 2263 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2264 } 2265 case ISD::SETUNE: { // PF | !ZF 2266 Tys.push_back(MVT::i8); 2267 Tys.push_back(MVT::Flag); 2268 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2269 Ops.push_back(Cond); 2270 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2271 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2272 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2273 Tmp1.getValue(1)); 2274 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2275 } 2276 } 2277 } 2278 } 2279 case ISD::SELECT: { 2280 MVT::ValueType VT = Op.getValueType(); 2281 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2282 bool addTest = false; 2283 SDOperand Op0 = Op.getOperand(0); 2284 SDOperand Cond, CC; 2285 if (Op0.getOpcode() == ISD::SETCC) 2286 Op0 = LowerOperation(Op0, DAG); 2287 2288 if (Op0.getOpcode() == X86ISD::SETCC) { 2289 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2290 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2291 // have another use it will be eliminated. 2292 // If the X86ISD::SETCC has more than one use, then it's probably better 2293 // to use a test instead of duplicating the X86ISD::CMP (for register 2294 // pressure reason). 2295 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2296 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2297 CmpOpc == X86ISD::UCOMI) { 2298 if (!Op0.hasOneUse()) { 2299 std::vector<MVT::ValueType> Tys; 2300 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2301 Tys.push_back(Op0.Val->getValueType(i)); 2302 std::vector<SDOperand> Ops; 2303 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2304 Ops.push_back(Op0.getOperand(i)); 2305 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2306 } 2307 2308 CC = Op0.getOperand(0); 2309 Cond = Op0.getOperand(1); 2310 // Make a copy as flag result cannot be used by more than one. 2311 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2312 Cond.getOperand(0), Cond.getOperand(1)); 2313 addTest = 2314 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2315 } else 2316 addTest = true; 2317 } else 2318 addTest = true; 2319 2320 if (addTest) { 2321 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2322 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2323 } 2324 2325 std::vector<MVT::ValueType> Tys; 2326 Tys.push_back(Op.getValueType()); 2327 Tys.push_back(MVT::Flag); 2328 std::vector<SDOperand> Ops; 2329 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2330 // condition is true. 2331 Ops.push_back(Op.getOperand(2)); 2332 Ops.push_back(Op.getOperand(1)); 2333 Ops.push_back(CC); 2334 Ops.push_back(Cond); 2335 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2336 } 2337 case ISD::BRCOND: { 2338 bool addTest = false; 2339 SDOperand Cond = Op.getOperand(1); 2340 SDOperand Dest = Op.getOperand(2); 2341 SDOperand CC; 2342 if (Cond.getOpcode() == ISD::SETCC) 2343 Cond = LowerOperation(Cond, DAG); 2344 2345 if (Cond.getOpcode() == X86ISD::SETCC) { 2346 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2347 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2348 // have another use it will be eliminated. 2349 // If the X86ISD::SETCC has more than one use, then it's probably better 2350 // to use a test instead of duplicating the X86ISD::CMP (for register 2351 // pressure reason). 2352 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2353 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2354 CmpOpc == X86ISD::UCOMI) { 2355 if (!Cond.hasOneUse()) { 2356 std::vector<MVT::ValueType> Tys; 2357 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2358 Tys.push_back(Cond.Val->getValueType(i)); 2359 std::vector<SDOperand> Ops; 2360 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2361 Ops.push_back(Cond.getOperand(i)); 2362 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2363 } 2364 2365 CC = Cond.getOperand(0); 2366 Cond = Cond.getOperand(1); 2367 // Make a copy as flag result cannot be used by more than one. 2368 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2369 Cond.getOperand(0), Cond.getOperand(1)); 2370 } else 2371 addTest = true; 2372 } else 2373 addTest = true; 2374 2375 if (addTest) { 2376 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2377 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2378 } 2379 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2380 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2381 } 2382 case ISD::MEMSET: { 2383 SDOperand InFlag(0, 0); 2384 SDOperand Chain = Op.getOperand(0); 2385 unsigned Align = 2386 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2387 if (Align == 0) Align = 1; 2388 2389 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2390 // If not DWORD aligned, call memset if size is less than the threshold. 2391 // It knows how to align to the right boundary first. 2392 if ((Align & 3) != 0 || 2393 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2394 MVT::ValueType IntPtr = getPointerTy(); 2395 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2396 std::vector<std::pair<SDOperand, const Type*> > Args; 2397 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2398 // Extend the ubyte argument to be an int value for the call. 2399 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2400 Args.push_back(std::make_pair(Val, IntPtrTy)); 2401 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2402 std::pair<SDOperand,SDOperand> CallResult = 2403 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2404 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2405 return CallResult.second; 2406 } 2407 2408 MVT::ValueType AVT; 2409 SDOperand Count; 2410 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2411 unsigned BytesLeft = 0; 2412 bool TwoRepStos = false; 2413 if (ValC) { 2414 unsigned ValReg; 2415 unsigned Val = ValC->getValue() & 255; 2416 2417 // If the value is a constant, then we can potentially use larger sets. 2418 switch (Align & 3) { 2419 case 2: // WORD aligned 2420 AVT = MVT::i16; 2421 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2422 BytesLeft = I->getValue() % 2; 2423 Val = (Val << 8) | Val; 2424 ValReg = X86::AX; 2425 break; 2426 case 0: // DWORD aligned 2427 AVT = MVT::i32; 2428 if (I) { 2429 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2430 BytesLeft = I->getValue() % 4; 2431 } else { 2432 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2433 DAG.getConstant(2, MVT::i8)); 2434 TwoRepStos = true; 2435 } 2436 Val = (Val << 8) | Val; 2437 Val = (Val << 16) | Val; 2438 ValReg = X86::EAX; 2439 break; 2440 default: // Byte aligned 2441 AVT = MVT::i8; 2442 Count = Op.getOperand(3); 2443 ValReg = X86::AL; 2444 break; 2445 } 2446 2447 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2448 InFlag); 2449 InFlag = Chain.getValue(1); 2450 } else { 2451 AVT = MVT::i8; 2452 Count = Op.getOperand(3); 2453 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2454 InFlag = Chain.getValue(1); 2455 } 2456 2457 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2458 InFlag = Chain.getValue(1); 2459 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2460 InFlag = Chain.getValue(1); 2461 2462 std::vector<MVT::ValueType> Tys; 2463 Tys.push_back(MVT::Other); 2464 Tys.push_back(MVT::Flag); 2465 std::vector<SDOperand> Ops; 2466 Ops.push_back(Chain); 2467 Ops.push_back(DAG.getValueType(AVT)); 2468 Ops.push_back(InFlag); 2469 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2470 2471 if (TwoRepStos) { 2472 InFlag = Chain.getValue(1); 2473 Count = Op.getOperand(3); 2474 MVT::ValueType CVT = Count.getValueType(); 2475 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2476 DAG.getConstant(3, CVT)); 2477 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2478 InFlag = Chain.getValue(1); 2479 Tys.clear(); 2480 Tys.push_back(MVT::Other); 2481 Tys.push_back(MVT::Flag); 2482 Ops.clear(); 2483 Ops.push_back(Chain); 2484 Ops.push_back(DAG.getValueType(MVT::i8)); 2485 Ops.push_back(InFlag); 2486 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2487 } else if (BytesLeft) { 2488 // Issue stores for the last 1 - 3 bytes. 2489 SDOperand Value; 2490 unsigned Val = ValC->getValue() & 255; 2491 unsigned Offset = I->getValue() - BytesLeft; 2492 SDOperand DstAddr = Op.getOperand(1); 2493 MVT::ValueType AddrVT = DstAddr.getValueType(); 2494 if (BytesLeft >= 2) { 2495 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2496 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2497 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2498 DAG.getConstant(Offset, AddrVT)), 2499 DAG.getSrcValue(NULL)); 2500 BytesLeft -= 2; 2501 Offset += 2; 2502 } 2503 2504 if (BytesLeft == 1) { 2505 Value = DAG.getConstant(Val, MVT::i8); 2506 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2507 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2508 DAG.getConstant(Offset, AddrVT)), 2509 DAG.getSrcValue(NULL)); 2510 } 2511 } 2512 2513 return Chain; 2514 } 2515 case ISD::MEMCPY: { 2516 SDOperand Chain = Op.getOperand(0); 2517 unsigned Align = 2518 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2519 if (Align == 0) Align = 1; 2520 2521 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2522 // If not DWORD aligned, call memcpy if size is less than the threshold. 2523 // It knows how to align to the right boundary first. 2524 if ((Align & 3) != 0 || 2525 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2526 MVT::ValueType IntPtr = getPointerTy(); 2527 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2528 std::vector<std::pair<SDOperand, const Type*> > Args; 2529 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2530 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2531 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2532 std::pair<SDOperand,SDOperand> CallResult = 2533 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2534 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2535 return CallResult.second; 2536 } 2537 2538 MVT::ValueType AVT; 2539 SDOperand Count; 2540 unsigned BytesLeft = 0; 2541 bool TwoRepMovs = false; 2542 switch (Align & 3) { 2543 case 2: // WORD aligned 2544 AVT = MVT::i16; 2545 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2546 BytesLeft = I->getValue() % 2; 2547 break; 2548 case 0: // DWORD aligned 2549 AVT = MVT::i32; 2550 if (I) { 2551 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2552 BytesLeft = I->getValue() % 4; 2553 } else { 2554 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2555 DAG.getConstant(2, MVT::i8)); 2556 TwoRepMovs = true; 2557 } 2558 break; 2559 default: // Byte aligned 2560 AVT = MVT::i8; 2561 Count = Op.getOperand(3); 2562 break; 2563 } 2564 2565 SDOperand InFlag(0, 0); 2566 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2567 InFlag = Chain.getValue(1); 2568 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2569 InFlag = Chain.getValue(1); 2570 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2571 InFlag = Chain.getValue(1); 2572 2573 std::vector<MVT::ValueType> Tys; 2574 Tys.push_back(MVT::Other); 2575 Tys.push_back(MVT::Flag); 2576 std::vector<SDOperand> Ops; 2577 Ops.push_back(Chain); 2578 Ops.push_back(DAG.getValueType(AVT)); 2579 Ops.push_back(InFlag); 2580 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2581 2582 if (TwoRepMovs) { 2583 InFlag = Chain.getValue(1); 2584 Count = Op.getOperand(3); 2585 MVT::ValueType CVT = Count.getValueType(); 2586 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2587 DAG.getConstant(3, CVT)); 2588 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2589 InFlag = Chain.getValue(1); 2590 Tys.clear(); 2591 Tys.push_back(MVT::Other); 2592 Tys.push_back(MVT::Flag); 2593 Ops.clear(); 2594 Ops.push_back(Chain); 2595 Ops.push_back(DAG.getValueType(MVT::i8)); 2596 Ops.push_back(InFlag); 2597 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2598 } else if (BytesLeft) { 2599 // Issue loads and stores for the last 1 - 3 bytes. 2600 unsigned Offset = I->getValue() - BytesLeft; 2601 SDOperand DstAddr = Op.getOperand(1); 2602 MVT::ValueType DstVT = DstAddr.getValueType(); 2603 SDOperand SrcAddr = Op.getOperand(2); 2604 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2605 SDOperand Value; 2606 if (BytesLeft >= 2) { 2607 Value = DAG.getLoad(MVT::i16, Chain, 2608 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2609 DAG.getConstant(Offset, SrcVT)), 2610 DAG.getSrcValue(NULL)); 2611 Chain = Value.getValue(1); 2612 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2613 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2614 DAG.getConstant(Offset, DstVT)), 2615 DAG.getSrcValue(NULL)); 2616 BytesLeft -= 2; 2617 Offset += 2; 2618 } 2619 2620 if (BytesLeft == 1) { 2621 Value = DAG.getLoad(MVT::i8, Chain, 2622 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2623 DAG.getConstant(Offset, SrcVT)), 2624 DAG.getSrcValue(NULL)); 2625 Chain = Value.getValue(1); 2626 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2627 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2628 DAG.getConstant(Offset, DstVT)), 2629 DAG.getSrcValue(NULL)); 2630 } 2631 } 2632 2633 return Chain; 2634 } 2635 2636 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2637 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2638 // one of the above mentioned nodes. It has to be wrapped because otherwise 2639 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2640 // be used to form addressing mode. These wrapped nodes will be selected 2641 // into MOV32ri. 2642 case ISD::ConstantPool: { 2643 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2644 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2645 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2646 CP->getAlignment())); 2647 if (Subtarget->isTargetDarwin()) { 2648 // With PIC, the address is actually $g + Offset. 2649 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2650 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2651 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2652 } 2653 2654 return Result; 2655 } 2656 case ISD::GlobalAddress: { 2657 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2658 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2659 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2660 if (Subtarget->isTargetDarwin()) { 2661 // With PIC, the address is actually $g + Offset. 2662 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2663 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2664 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2665 2666 // For Darwin, external and weak symbols are indirect, so we want to load 2667 // the value at address GV, not the value of GV itself. This means that 2668 // the GlobalAddress must be in the base or index register of the address, 2669 // not the GV offset field. 2670 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2671 DarwinGVRequiresExtraLoad(GV)) 2672 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2673 Result, DAG.getSrcValue(NULL)); 2674 } 2675 2676 return Result; 2677 } 2678 case ISD::ExternalSymbol: { 2679 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2680 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2681 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2682 if (Subtarget->isTargetDarwin()) { 2683 // With PIC, the address is actually $g + Offset. 2684 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2685 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2686 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2687 } 2688 2689 return Result; 2690 } 2691 case ISD::VASTART: { 2692 // vastart just stores the address of the VarArgsFrameIndex slot into the 2693 // memory location argument. 2694 // FIXME: Replace MVT::i32 with PointerTy 2695 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2696 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2697 Op.getOperand(1), Op.getOperand(2)); 2698 } 2699 case ISD::RET: { 2700 SDOperand Copy; 2701 2702 switch(Op.getNumOperands()) { 2703 default: 2704 assert(0 && "Do not know how to return this many arguments!"); 2705 abort(); 2706 case 1: // ret void. 2707 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2708 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2709 case 2: { 2710 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2711 2712 if (MVT::isVector(ArgVT)) { 2713 // Integer or FP vector result -> XMM0. 2714 if (DAG.getMachineFunction().liveout_empty()) 2715 DAG.getMachineFunction().addLiveOut(X86::XMM0); 2716 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 2717 SDOperand()); 2718 } else if (MVT::isInteger(ArgVT)) { 2719 // Integer result -> EAX 2720 if (DAG.getMachineFunction().liveout_empty()) 2721 DAG.getMachineFunction().addLiveOut(X86::EAX); 2722 2723 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2724 SDOperand()); 2725 } else if (!X86ScalarSSE) { 2726 // FP return with fp-stack value. 2727 if (DAG.getMachineFunction().liveout_empty()) 2728 DAG.getMachineFunction().addLiveOut(X86::ST0); 2729 2730 std::vector<MVT::ValueType> Tys; 2731 Tys.push_back(MVT::Other); 2732 Tys.push_back(MVT::Flag); 2733 std::vector<SDOperand> Ops; 2734 Ops.push_back(Op.getOperand(0)); 2735 Ops.push_back(Op.getOperand(1)); 2736 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2737 } else { 2738 // FP return with ScalarSSE (return on fp-stack). 2739 if (DAG.getMachineFunction().liveout_empty()) 2740 DAG.getMachineFunction().addLiveOut(X86::ST0); 2741 2742 SDOperand MemLoc; 2743 SDOperand Chain = Op.getOperand(0); 2744 SDOperand Value = Op.getOperand(1); 2745 2746 if (Value.getOpcode() == ISD::LOAD && 2747 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2748 Chain = Value.getOperand(0); 2749 MemLoc = Value.getOperand(1); 2750 } else { 2751 // Spill the value to memory and reload it into top of stack. 2752 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2753 MachineFunction &MF = DAG.getMachineFunction(); 2754 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2755 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2756 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2757 Value, MemLoc, DAG.getSrcValue(0)); 2758 } 2759 std::vector<MVT::ValueType> Tys; 2760 Tys.push_back(MVT::f64); 2761 Tys.push_back(MVT::Other); 2762 std::vector<SDOperand> Ops; 2763 Ops.push_back(Chain); 2764 Ops.push_back(MemLoc); 2765 Ops.push_back(DAG.getValueType(ArgVT)); 2766 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2767 Tys.clear(); 2768 Tys.push_back(MVT::Other); 2769 Tys.push_back(MVT::Flag); 2770 Ops.clear(); 2771 Ops.push_back(Copy.getValue(1)); 2772 Ops.push_back(Copy); 2773 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2774 } 2775 break; 2776 } 2777 case 3: 2778 if (DAG.getMachineFunction().liveout_empty()) { 2779 DAG.getMachineFunction().addLiveOut(X86::EAX); 2780 DAG.getMachineFunction().addLiveOut(X86::EDX); 2781 } 2782 2783 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2784 SDOperand()); 2785 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2786 break; 2787 } 2788 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2789 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2790 Copy.getValue(1)); 2791 } 2792 case ISD::SCALAR_TO_VECTOR: { 2793 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2794 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2795 } 2796 case ISD::VECTOR_SHUFFLE: { 2797 SDOperand V1 = Op.getOperand(0); 2798 SDOperand V2 = Op.getOperand(1); 2799 SDOperand PermMask = Op.getOperand(2); 2800 MVT::ValueType VT = Op.getValueType(); 2801 unsigned NumElems = PermMask.getNumOperands(); 2802 2803 if (isSplatMask(PermMask.Val)) { 2804 if (NumElems <= 4) return Op; 2805 // Promote it to a v4i32 splat. 2806 return PromoteSplat(Op, DAG); 2807 } 2808 2809 // Normalize the node to match x86 shuffle ops if needed 2810 if (V2.getOpcode() != ISD::UNDEF) { 2811 bool DoSwap = false; 2812 2813 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 2814 DoSwap = true; 2815 else if (isLowerFromV2UpperFromV1(PermMask)) 2816 DoSwap = true; 2817 2818 if (DoSwap) { 2819 Op = CommuteVectorShuffle(Op, DAG); 2820 V1 = Op.getOperand(0); 2821 V2 = Op.getOperand(1); 2822 PermMask = Op.getOperand(2); 2823 } 2824 } 2825 2826 if (NumElems == 2) 2827 return Op; 2828 2829 if (X86::isMOVSMask(PermMask.Val) || 2830 X86::isMOVSHDUPMask(PermMask.Val) || 2831 X86::isMOVSLDUPMask(PermMask.Val)) 2832 return Op; 2833 2834 if (X86::isUNPCKLMask(PermMask.Val) || 2835 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2836 X86::isUNPCKHMask(PermMask.Val)) 2837 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 2838 return Op; 2839 2840 // If VT is integer, try PSHUF* first, then SHUFP*. 2841 if (MVT::isInteger(VT)) { 2842 if (X86::isPSHUFDMask(PermMask.Val) || 2843 X86::isPSHUFHWMask(PermMask.Val) || 2844 X86::isPSHUFLWMask(PermMask.Val)) { 2845 if (V2.getOpcode() != ISD::UNDEF) 2846 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2847 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2848 return Op; 2849 } 2850 2851 if (X86::isSHUFPMask(PermMask.Val)) 2852 return Op; 2853 2854 // Handle v8i16 shuffle high / low shuffle node pair. 2855 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2856 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2857 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2858 std::vector<SDOperand> MaskVec; 2859 for (unsigned i = 0; i != 4; ++i) 2860 MaskVec.push_back(PermMask.getOperand(i)); 2861 for (unsigned i = 4; i != 8; ++i) 2862 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2863 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2864 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2865 MaskVec.clear(); 2866 for (unsigned i = 0; i != 4; ++i) 2867 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2868 for (unsigned i = 4; i != 8; ++i) 2869 MaskVec.push_back(PermMask.getOperand(i)); 2870 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2871 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2872 } 2873 } else { 2874 // Floating point cases in the other order. 2875 if (X86::isSHUFPMask(PermMask.Val)) 2876 return Op; 2877 if (X86::isPSHUFDMask(PermMask.Val) || 2878 X86::isPSHUFHWMask(PermMask.Val) || 2879 X86::isPSHUFLWMask(PermMask.Val)) { 2880 if (V2.getOpcode() != ISD::UNDEF) 2881 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2882 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2883 return Op; 2884 } 2885 } 2886 2887 return SDOperand(); 2888 } 2889 case ISD::BUILD_VECTOR: { 2890 // All one's are handled with pcmpeqd. 2891 if (ISD::isBuildVectorAllOnes(Op.Val)) 2892 return Op; 2893 2894 std::set<SDOperand> Values; 2895 SDOperand Elt0 = Op.getOperand(0); 2896 Values.insert(Elt0); 2897 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 2898 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 2899 (isa<ConstantFPSDNode>(Elt0) && 2900 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 2901 bool RestAreZero = true; 2902 unsigned NumElems = Op.getNumOperands(); 2903 for (unsigned i = 1; i < NumElems; ++i) { 2904 SDOperand Elt = Op.getOperand(i); 2905 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 2906 if (!FPC->isExactlyValue(+0.0)) 2907 RestAreZero = false; 2908 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2909 if (!C->isNullValue()) 2910 RestAreZero = false; 2911 } else 2912 RestAreZero = false; 2913 Values.insert(Elt); 2914 } 2915 2916 if (RestAreZero) { 2917 if (Elt0IsZero) return Op; 2918 2919 // Zero extend a scalar to a vector. 2920 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 2921 } 2922 2923 if (Values.size() > 2) { 2924 // Expand into a number of unpckl*. 2925 // e.g. for v4f32 2926 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2927 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2928 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2929 MVT::ValueType VT = Op.getValueType(); 2930 SDOperand PermMask = getUnpacklMask(NumElems, DAG); 2931 std::vector<SDOperand> V(NumElems); 2932 for (unsigned i = 0; i < NumElems; ++i) 2933 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2934 NumElems >>= 1; 2935 while (NumElems != 0) { 2936 for (unsigned i = 0; i < NumElems; ++i) 2937 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2938 PermMask); 2939 NumElems >>= 1; 2940 } 2941 return V[0]; 2942 } 2943 2944 return SDOperand(); 2945 } 2946 case ISD::EXTRACT_VECTOR_ELT: { 2947 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2948 return SDOperand(); 2949 2950 MVT::ValueType VT = Op.getValueType(); 2951 // TODO: handle v16i8. 2952 if (MVT::getSizeInBits(VT) == 16) { 2953 // Transform it so it match pextrw which produces a 32-bit result. 2954 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2955 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2956 Op.getOperand(0), Op.getOperand(1)); 2957 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2958 DAG.getValueType(VT)); 2959 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2960 } else if (MVT::getSizeInBits(VT) == 32) { 2961 SDOperand Vec = Op.getOperand(0); 2962 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2963 if (Idx == 0) 2964 return Op; 2965 2966 // TODO: if Idex == 2, we can use unpckhps 2967 // SHUFPS the element to the lowest double word, then movss. 2968 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2969 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 2970 MVT::getVectorBaseType(MaskVT)); 2971 std::vector<SDOperand> IdxVec; 2972 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 2973 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2974 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2975 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2976 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2977 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2978 Vec, Vec, Mask); 2979 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2980 DAG.getConstant(0, MVT::i32)); 2981 } else if (MVT::getSizeInBits(VT) == 64) { 2982 SDOperand Vec = Op.getOperand(0); 2983 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2984 if (Idx == 0) 2985 return Op; 2986 2987 // UNPCKHPD the element to the lowest double word, then movsd. 2988 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2989 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2990 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2991 std::vector<SDOperand> IdxVec; 2992 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 2993 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 2994 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 2995 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2996 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2997 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2998 DAG.getConstant(0, MVT::i32)); 2999 } 3000 3001 return SDOperand(); 3002 } 3003 case ISD::INSERT_VECTOR_ELT: { 3004 // Transform it so it match pinsrw which expects a 16-bit value in a R32 3005 // as its second argument. 3006 MVT::ValueType VT = Op.getValueType(); 3007 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3008 SDOperand N0 = Op.getOperand(0); 3009 SDOperand N1 = Op.getOperand(1); 3010 SDOperand N2 = Op.getOperand(2); 3011 if (MVT::getSizeInBits(BaseVT) == 16) { 3012 if (N1.getValueType() != MVT::i32) 3013 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3014 if (N2.getValueType() != MVT::i32) 3015 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3016 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3017 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3018 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3019 if (Idx == 0) { 3020 // Use a movss. 3021 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3022 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3023 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3024 std::vector<SDOperand> MaskVec; 3025 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3026 for (unsigned i = 1; i <= 3; ++i) 3027 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3028 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3029 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 3030 } else { 3031 // Use two pinsrw instructions to insert a 32 bit value. 3032 Idx <<= 1; 3033 if (MVT::isFloatingPoint(N1.getValueType())) { 3034 if (N1.getOpcode() == ISD::LOAD) { 3035 // Just load directly from f32mem to R32. 3036 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 3037 N1.getOperand(2)); 3038 } else { 3039 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3040 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3041 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3042 DAG.getConstant(0, MVT::i32)); 3043 } 3044 } 3045 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3046 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3047 DAG.getConstant(Idx, MVT::i32)); 3048 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3049 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3050 DAG.getConstant(Idx+1, MVT::i32)); 3051 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3052 } 3053 } 3054 3055 return SDOperand(); 3056 } 3057 case ISD::INTRINSIC_WO_CHAIN: { 3058 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3059 switch (IntNo) { 3060 default: return SDOperand(); // Don't custom lower most intrinsics. 3061 // Comparison intrinsics. 3062 case Intrinsic::x86_sse_comieq_ss: 3063 case Intrinsic::x86_sse_comilt_ss: 3064 case Intrinsic::x86_sse_comile_ss: 3065 case Intrinsic::x86_sse_comigt_ss: 3066 case Intrinsic::x86_sse_comige_ss: 3067 case Intrinsic::x86_sse_comineq_ss: 3068 case Intrinsic::x86_sse_ucomieq_ss: 3069 case Intrinsic::x86_sse_ucomilt_ss: 3070 case Intrinsic::x86_sse_ucomile_ss: 3071 case Intrinsic::x86_sse_ucomigt_ss: 3072 case Intrinsic::x86_sse_ucomige_ss: 3073 case Intrinsic::x86_sse_ucomineq_ss: 3074 case Intrinsic::x86_sse2_comieq_sd: 3075 case Intrinsic::x86_sse2_comilt_sd: 3076 case Intrinsic::x86_sse2_comile_sd: 3077 case Intrinsic::x86_sse2_comigt_sd: 3078 case Intrinsic::x86_sse2_comige_sd: 3079 case Intrinsic::x86_sse2_comineq_sd: 3080 case Intrinsic::x86_sse2_ucomieq_sd: 3081 case Intrinsic::x86_sse2_ucomilt_sd: 3082 case Intrinsic::x86_sse2_ucomile_sd: 3083 case Intrinsic::x86_sse2_ucomigt_sd: 3084 case Intrinsic::x86_sse2_ucomige_sd: 3085 case Intrinsic::x86_sse2_ucomineq_sd: { 3086 unsigned Opc = 0; 3087 ISD::CondCode CC = ISD::SETCC_INVALID; 3088 switch (IntNo) { 3089 default: break; 3090 case Intrinsic::x86_sse_comieq_ss: 3091 case Intrinsic::x86_sse2_comieq_sd: 3092 Opc = X86ISD::COMI; 3093 CC = ISD::SETEQ; 3094 break; 3095 case Intrinsic::x86_sse_comilt_ss: 3096 case Intrinsic::x86_sse2_comilt_sd: 3097 Opc = X86ISD::COMI; 3098 CC = ISD::SETLT; 3099 break; 3100 case Intrinsic::x86_sse_comile_ss: 3101 case Intrinsic::x86_sse2_comile_sd: 3102 Opc = X86ISD::COMI; 3103 CC = ISD::SETLE; 3104 break; 3105 case Intrinsic::x86_sse_comigt_ss: 3106 case Intrinsic::x86_sse2_comigt_sd: 3107 Opc = X86ISD::COMI; 3108 CC = ISD::SETGT; 3109 break; 3110 case Intrinsic::x86_sse_comige_ss: 3111 case Intrinsic::x86_sse2_comige_sd: 3112 Opc = X86ISD::COMI; 3113 CC = ISD::SETGE; 3114 break; 3115 case Intrinsic::x86_sse_comineq_ss: 3116 case Intrinsic::x86_sse2_comineq_sd: 3117 Opc = X86ISD::COMI; 3118 CC = ISD::SETNE; 3119 break; 3120 case Intrinsic::x86_sse_ucomieq_ss: 3121 case Intrinsic::x86_sse2_ucomieq_sd: 3122 Opc = X86ISD::UCOMI; 3123 CC = ISD::SETEQ; 3124 break; 3125 case Intrinsic::x86_sse_ucomilt_ss: 3126 case Intrinsic::x86_sse2_ucomilt_sd: 3127 Opc = X86ISD::UCOMI; 3128 CC = ISD::SETLT; 3129 break; 3130 case Intrinsic::x86_sse_ucomile_ss: 3131 case Intrinsic::x86_sse2_ucomile_sd: 3132 Opc = X86ISD::UCOMI; 3133 CC = ISD::SETLE; 3134 break; 3135 case Intrinsic::x86_sse_ucomigt_ss: 3136 case Intrinsic::x86_sse2_ucomigt_sd: 3137 Opc = X86ISD::UCOMI; 3138 CC = ISD::SETGT; 3139 break; 3140 case Intrinsic::x86_sse_ucomige_ss: 3141 case Intrinsic::x86_sse2_ucomige_sd: 3142 Opc = X86ISD::UCOMI; 3143 CC = ISD::SETGE; 3144 break; 3145 case Intrinsic::x86_sse_ucomineq_ss: 3146 case Intrinsic::x86_sse2_ucomineq_sd: 3147 Opc = X86ISD::UCOMI; 3148 CC = ISD::SETNE; 3149 break; 3150 } 3151 bool Flip; 3152 unsigned X86CC; 3153 translateX86CC(CC, true, X86CC, Flip); 3154 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3155 Op.getOperand(Flip?1:2)); 3156 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3157 DAG.getConstant(X86CC, MVT::i8), Cond); 3158 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3159 } 3160 } 3161 } 3162 } 3163} 3164 3165const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3166 switch (Opcode) { 3167 default: return NULL; 3168 case X86ISD::SHLD: return "X86ISD::SHLD"; 3169 case X86ISD::SHRD: return "X86ISD::SHRD"; 3170 case X86ISD::FAND: return "X86ISD::FAND"; 3171 case X86ISD::FXOR: return "X86ISD::FXOR"; 3172 case X86ISD::FILD: return "X86ISD::FILD"; 3173 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3174 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3175 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3176 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3177 case X86ISD::FLD: return "X86ISD::FLD"; 3178 case X86ISD::FST: return "X86ISD::FST"; 3179 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3180 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3181 case X86ISD::CALL: return "X86ISD::CALL"; 3182 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3183 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3184 case X86ISD::CMP: return "X86ISD::CMP"; 3185 case X86ISD::TEST: return "X86ISD::TEST"; 3186 case X86ISD::COMI: return "X86ISD::COMI"; 3187 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3188 case X86ISD::SETCC: return "X86ISD::SETCC"; 3189 case X86ISD::CMOV: return "X86ISD::CMOV"; 3190 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3191 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3192 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3193 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3194 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3195 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3196 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3197 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3198 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 3199 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3200 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3201 } 3202} 3203 3204void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3205 uint64_t Mask, 3206 uint64_t &KnownZero, 3207 uint64_t &KnownOne, 3208 unsigned Depth) const { 3209 unsigned Opc = Op.getOpcode(); 3210 assert((Opc >= ISD::BUILTIN_OP_END || 3211 Opc == ISD::INTRINSIC_WO_CHAIN || 3212 Opc == ISD::INTRINSIC_W_CHAIN || 3213 Opc == ISD::INTRINSIC_VOID) && 3214 "Should use MaskedValueIsZero if you don't know whether Op" 3215 " is a target node!"); 3216 3217 KnownZero = KnownOne = 0; // Don't know anything. 3218 switch (Opc) { 3219 default: break; 3220 case X86ISD::SETCC: 3221 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3222 break; 3223 } 3224} 3225 3226std::vector<unsigned> X86TargetLowering:: 3227getRegClassForInlineAsmConstraint(const std::string &Constraint, 3228 MVT::ValueType VT) const { 3229 if (Constraint.size() == 1) { 3230 // FIXME: not handling fp-stack yet! 3231 // FIXME: not handling MMX registers yet ('y' constraint). 3232 switch (Constraint[0]) { // GCC X86 Constraint Letters 3233 default: break; // Unknown constriant letter 3234 case 'r': // GENERAL_REGS 3235 case 'R': // LEGACY_REGS 3236 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3237 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3238 case 'l': // INDEX_REGS 3239 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3240 X86::ESI, X86::EDI, X86::EBP, 0); 3241 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3242 case 'Q': // Q_REGS 3243 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3244 case 'x': // SSE_REGS if SSE1 allowed 3245 if (Subtarget->hasSSE1()) 3246 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3247 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3248 0); 3249 return std::vector<unsigned>(); 3250 case 'Y': // SSE_REGS if SSE2 allowed 3251 if (Subtarget->hasSSE2()) 3252 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3253 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3254 0); 3255 return std::vector<unsigned>(); 3256 } 3257 } 3258 3259 return std::vector<unsigned>(); 3260} 3261 3262/// isLegalAddressImmediate - Return true if the integer value or 3263/// GlobalValue can be used as the offset of the target addressing mode. 3264bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3265 // X86 allows a sign-extended 32-bit immediate field. 3266 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3267} 3268 3269bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3270 if (Subtarget->isTargetDarwin()) { 3271 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3272 if (RModel == Reloc::Static) 3273 return true; 3274 else if (RModel == Reloc::DynamicNoPIC) 3275 return !DarwinGVRequiresExtraLoad(GV); 3276 else 3277 return false; 3278 } else 3279 return true; 3280} 3281 3282/// isShuffleMaskLegal - Targets can use this to indicate that they only 3283/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3284/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3285/// are assumed to be legal. 3286bool 3287X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3288 // Only do shuffles on 128-bit vector types for now. 3289 if (MVT::getSizeInBits(VT) == 64) return false; 3290 return (Mask.Val->getNumOperands() == 2 || 3291 isSplatMask(Mask.Val) || 3292 X86::isMOVSMask(Mask.Val) || 3293 X86::isMOVSHDUPMask(Mask.Val) || 3294 X86::isMOVSLDUPMask(Mask.Val) || 3295 X86::isPSHUFDMask(Mask.Val) || 3296 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3297 X86::isSHUFPMask(Mask.Val) || 3298 X86::isUNPCKLMask(Mask.Val) || 3299 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3300 X86::isUNPCKHMask(Mask.Val)); 3301} 3302