X86ISelLowering.cpp revision c78d3b43f5156f2a2718886337bc27aac83e8e93
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 173 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 174 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 175 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 176 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 178 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 179 // X86 wants to expand memset / memcpy itself. 180 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 181 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 182 183 // We don't have line number support yet. 184 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 185 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 186 // FIXME - use subtarget debug flags 187 if (!Subtarget->isTargetDarwin()) 188 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // Use the default implementation. 194 setOperationAction(ISD::VAARG , MVT::Other, Expand); 195 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 196 setOperationAction(ISD::VAEND , MVT::Other, Expand); 197 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 198 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 199 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 200 201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 203 204 if (X86ScalarSSE) { 205 // Set up the FP register classes. 206 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 207 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 208 209 // SSE has no load+extend ops 210 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 211 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 212 213 // Use ANDPD to simulate FABS. 214 setOperationAction(ISD::FABS , MVT::f64, Custom); 215 setOperationAction(ISD::FABS , MVT::f32, Custom); 216 217 // Use XORP to simulate FNEG. 218 setOperationAction(ISD::FNEG , MVT::f64, Custom); 219 setOperationAction(ISD::FNEG , MVT::f32, Custom); 220 221 // We don't support sin/cos/fmod 222 setOperationAction(ISD::FSIN , MVT::f64, Expand); 223 setOperationAction(ISD::FCOS , MVT::f64, Expand); 224 setOperationAction(ISD::FREM , MVT::f64, Expand); 225 setOperationAction(ISD::FSIN , MVT::f32, Expand); 226 setOperationAction(ISD::FCOS , MVT::f32, Expand); 227 setOperationAction(ISD::FREM , MVT::f32, Expand); 228 229 // Expand FP immediates into loads from the stack, except for the special 230 // cases we handle. 231 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 232 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 233 addLegalFPImmediate(+0.0); // xorps / xorpd 234 } else { 235 // Set up the FP register classes. 236 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 237 238 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 239 240 if (!UnsafeFPMath) { 241 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 242 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 243 } 244 245 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 246 addLegalFPImmediate(+0.0); // FLD0 247 addLegalFPImmediate(+1.0); // FLD1 248 addLegalFPImmediate(-0.0); // FLD0/FCHS 249 addLegalFPImmediate(-1.0); // FLD1/FCHS 250 } 251 252 // First set operation action for all vector types to expand. Then we 253 // will selectively turn on ones that can be effectively codegen'd. 254 for (unsigned VT = (unsigned)MVT::Vector + 1; 255 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 256 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 263 } 264 265 if (Subtarget->hasMMX()) { 266 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 268 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 269 270 // FIXME: add MMX packed arithmetics 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 273 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 274 } 275 276 if (Subtarget->hasSSE1()) { 277 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 278 279 setOperationAction(ISD::AND, MVT::v4f32, Legal); 280 setOperationAction(ISD::OR, MVT::v4f32, Legal); 281 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 282 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 283 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 284 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 285 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 286 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 287 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 288 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 289 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 290 } 291 292 if (Subtarget->hasSSE2()) { 293 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 297 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 298 299 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 300 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 301 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 302 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 303 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 304 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 305 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 306 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 307 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 308 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 309 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 311 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 312 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 314 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 315 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 316 317 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 318 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 319 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 320 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 321 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 322 } 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 324 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 325 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 326 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 328 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 329 330 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 331 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 332 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 333 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 334 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 335 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 336 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 337 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 338 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 339 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 340 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 341 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 342 } 343 344 // Custom lower v2i64 and v2f64 selects. 345 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 346 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 347 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 348 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 349 } 350 351 // We want to custom lower some of our intrinsics. 352 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 353 354 computeRegisterProperties(); 355 356 // FIXME: These should be based on subtarget info. Plus, the values should 357 // be smaller when we are in optimizing for size mode. 358 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 359 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 360 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 361 allowUnalignedMemoryAccesses = true; // x86 supports it! 362} 363 364std::vector<SDOperand> 365X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 366 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 367 return LowerFastCCArguments(F, DAG); 368 return LowerCCCArguments(F, DAG); 369} 370 371std::pair<SDOperand, SDOperand> 372X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 373 bool isVarArg, unsigned CallingConv, 374 bool isTailCall, 375 SDOperand Callee, ArgListTy &Args, 376 SelectionDAG &DAG) { 377 assert((!isVarArg || CallingConv == CallingConv::C) && 378 "Only C takes varargs!"); 379 380 // If the callee is a GlobalAddress node (quite common, every direct call is) 381 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 382 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 383 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 384 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 385 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 386 387 if (CallingConv == CallingConv::Fast && EnableFastCC) 388 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 389 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 390} 391 392//===----------------------------------------------------------------------===// 393// C Calling Convention implementation 394//===----------------------------------------------------------------------===// 395 396std::vector<SDOperand> 397X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 398 std::vector<SDOperand> ArgValues; 399 400 MachineFunction &MF = DAG.getMachineFunction(); 401 MachineFrameInfo *MFI = MF.getFrameInfo(); 402 403 // Add DAG nodes to load the arguments... On entry to a function on the X86, 404 // the stack frame looks like this: 405 // 406 // [ESP] -- return address 407 // [ESP + 4] -- first argument (leftmost lexically) 408 // [ESP + 8] -- second argument, if first argument is four bytes in size 409 // ... 410 // 411 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 412 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 413 MVT::ValueType ObjectVT = getValueType(I->getType()); 414 unsigned ArgIncrement = 4; 415 unsigned ObjSize; 416 switch (ObjectVT) { 417 default: assert(0 && "Unhandled argument type!"); 418 case MVT::i1: 419 case MVT::i8: ObjSize = 1; break; 420 case MVT::i16: ObjSize = 2; break; 421 case MVT::i32: ObjSize = 4; break; 422 case MVT::i64: ObjSize = ArgIncrement = 8; break; 423 case MVT::f32: ObjSize = 4; break; 424 case MVT::f64: ObjSize = ArgIncrement = 8; break; 425 } 426 // Create the frame index object for this incoming parameter... 427 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 428 429 // Create the SelectionDAG nodes corresponding to a load from this parameter 430 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 431 432 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 433 // dead loads. 434 SDOperand ArgValue; 435 if (!I->use_empty()) 436 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 437 DAG.getSrcValue(NULL)); 438 else { 439 if (MVT::isInteger(ObjectVT)) 440 ArgValue = DAG.getConstant(0, ObjectVT); 441 else 442 ArgValue = DAG.getConstantFP(0, ObjectVT); 443 } 444 ArgValues.push_back(ArgValue); 445 446 ArgOffset += ArgIncrement; // Move on to the next argument... 447 } 448 449 // If the function takes variable number of arguments, make a frame index for 450 // the start of the first vararg value... for expansion of llvm.va_start. 451 if (F.isVarArg()) 452 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 453 ReturnAddrIndex = 0; // No return address slot generated yet. 454 BytesToPopOnReturn = 0; // Callee pops nothing. 455 BytesCallerReserves = ArgOffset; 456 return ArgValues; 457} 458 459std::pair<SDOperand, SDOperand> 460X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 461 bool isVarArg, bool isTailCall, 462 SDOperand Callee, ArgListTy &Args, 463 SelectionDAG &DAG) { 464 // Count how many bytes are to be pushed on the stack. 465 unsigned NumBytes = 0; 466 467 if (Args.empty()) { 468 // Save zero bytes. 469 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 470 } else { 471 for (unsigned i = 0, e = Args.size(); i != e; ++i) 472 switch (getValueType(Args[i].second)) { 473 default: assert(0 && "Unknown value type!"); 474 case MVT::i1: 475 case MVT::i8: 476 case MVT::i16: 477 case MVT::i32: 478 case MVT::f32: 479 NumBytes += 4; 480 break; 481 case MVT::i64: 482 case MVT::f64: 483 NumBytes += 8; 484 break; 485 } 486 487 Chain = DAG.getCALLSEQ_START(Chain, 488 DAG.getConstant(NumBytes, getPointerTy())); 489 490 // Arguments go on the stack in reverse order, as specified by the ABI. 491 unsigned ArgOffset = 0; 492 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 493 std::vector<SDOperand> Stores; 494 495 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 496 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 497 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 498 499 switch (getValueType(Args[i].second)) { 500 default: assert(0 && "Unexpected ValueType for argument!"); 501 case MVT::i1: 502 case MVT::i8: 503 case MVT::i16: 504 // Promote the integer to 32 bits. If the input type is signed use a 505 // sign extend, otherwise use a zero extend. 506 if (Args[i].second->isSigned()) 507 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 508 else 509 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 510 511 // FALL THROUGH 512 case MVT::i32: 513 case MVT::f32: 514 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 515 Args[i].first, PtrOff, 516 DAG.getSrcValue(NULL))); 517 ArgOffset += 4; 518 break; 519 case MVT::i64: 520 case MVT::f64: 521 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 522 Args[i].first, PtrOff, 523 DAG.getSrcValue(NULL))); 524 ArgOffset += 8; 525 break; 526 } 527 } 528 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 529 } 530 531 std::vector<MVT::ValueType> RetVals; 532 MVT::ValueType RetTyVT = getValueType(RetTy); 533 RetVals.push_back(MVT::Other); 534 535 // The result values produced have to be legal. Promote the result. 536 switch (RetTyVT) { 537 case MVT::isVoid: break; 538 default: 539 RetVals.push_back(RetTyVT); 540 break; 541 case MVT::i1: 542 case MVT::i8: 543 case MVT::i16: 544 RetVals.push_back(MVT::i32); 545 break; 546 case MVT::f32: 547 if (X86ScalarSSE) 548 RetVals.push_back(MVT::f32); 549 else 550 RetVals.push_back(MVT::f64); 551 break; 552 case MVT::i64: 553 RetVals.push_back(MVT::i32); 554 RetVals.push_back(MVT::i32); 555 break; 556 } 557 558 std::vector<MVT::ValueType> NodeTys; 559 NodeTys.push_back(MVT::Other); // Returns a chain 560 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 561 std::vector<SDOperand> Ops; 562 Ops.push_back(Chain); 563 Ops.push_back(Callee); 564 565 // FIXME: Do not generate X86ISD::TAILCALL for now. 566 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 567 SDOperand InFlag = Chain.getValue(1); 568 569 NodeTys.clear(); 570 NodeTys.push_back(MVT::Other); // Returns a chain 571 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 572 Ops.clear(); 573 Ops.push_back(Chain); 574 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 575 Ops.push_back(DAG.getConstant(0, getPointerTy())); 576 Ops.push_back(InFlag); 577 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 578 InFlag = Chain.getValue(1); 579 580 SDOperand RetVal; 581 if (RetTyVT != MVT::isVoid) { 582 switch (RetTyVT) { 583 default: assert(0 && "Unknown value type to return!"); 584 case MVT::i1: 585 case MVT::i8: 586 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 587 Chain = RetVal.getValue(1); 588 if (RetTyVT == MVT::i1) 589 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 590 break; 591 case MVT::i16: 592 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 593 Chain = RetVal.getValue(1); 594 break; 595 case MVT::i32: 596 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 597 Chain = RetVal.getValue(1); 598 break; 599 case MVT::i64: { 600 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 601 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 602 Lo.getValue(2)); 603 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 604 Chain = Hi.getValue(1); 605 break; 606 } 607 case MVT::f32: 608 case MVT::f64: { 609 std::vector<MVT::ValueType> Tys; 610 Tys.push_back(MVT::f64); 611 Tys.push_back(MVT::Other); 612 Tys.push_back(MVT::Flag); 613 std::vector<SDOperand> Ops; 614 Ops.push_back(Chain); 615 Ops.push_back(InFlag); 616 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 617 Chain = RetVal.getValue(1); 618 InFlag = RetVal.getValue(2); 619 if (X86ScalarSSE) { 620 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 621 // shouldn't be necessary except that RFP cannot be live across 622 // multiple blocks. When stackifier is fixed, they can be uncoupled. 623 MachineFunction &MF = DAG.getMachineFunction(); 624 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 625 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 626 Tys.clear(); 627 Tys.push_back(MVT::Other); 628 Ops.clear(); 629 Ops.push_back(Chain); 630 Ops.push_back(RetVal); 631 Ops.push_back(StackSlot); 632 Ops.push_back(DAG.getValueType(RetTyVT)); 633 Ops.push_back(InFlag); 634 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 635 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 636 DAG.getSrcValue(NULL)); 637 Chain = RetVal.getValue(1); 638 } 639 640 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 641 // FIXME: we would really like to remember that this FP_ROUND 642 // operation is okay to eliminate if we allow excess FP precision. 643 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 644 break; 645 } 646 } 647 } 648 649 return std::make_pair(RetVal, Chain); 650} 651 652//===----------------------------------------------------------------------===// 653// Fast Calling Convention implementation 654//===----------------------------------------------------------------------===// 655// 656// The X86 'fast' calling convention passes up to two integer arguments in 657// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 658// and requires that the callee pop its arguments off the stack (allowing proper 659// tail calls), and has the same return value conventions as C calling convs. 660// 661// This calling convention always arranges for the callee pop value to be 8n+4 662// bytes, which is needed for tail recursion elimination and stack alignment 663// reasons. 664// 665// Note that this can be enhanced in the future to pass fp vals in registers 666// (when we have a global fp allocator) and do other tricks. 667// 668 669/// AddLiveIn - This helper function adds the specified physical register to the 670/// MachineFunction as a live in value. It also creates a corresponding virtual 671/// register for it. 672static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 673 TargetRegisterClass *RC) { 674 assert(RC->contains(PReg) && "Not the correct regclass!"); 675 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 676 MF.addLiveIn(PReg, VReg); 677 return VReg; 678} 679 680// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 681// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 682// EDX". Anything more is illegal. 683// 684// FIXME: The linscan register allocator currently has problem with 685// coalescing. At the time of this writing, whenever it decides to coalesce 686// a physreg with a virtreg, this increases the size of the physreg's live 687// range, and the live range cannot ever be reduced. This causes problems if 688// too many physregs are coaleced with virtregs, which can cause the register 689// allocator to wedge itself. 690// 691// This code triggers this problem more often if we pass args in registers, 692// so disable it until this is fixed. 693// 694// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 695// about code being dead. 696// 697static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 698 699 700std::vector<SDOperand> 701X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 702 std::vector<SDOperand> ArgValues; 703 704 MachineFunction &MF = DAG.getMachineFunction(); 705 MachineFrameInfo *MFI = MF.getFrameInfo(); 706 707 // Add DAG nodes to load the arguments... On entry to a function the stack 708 // frame looks like this: 709 // 710 // [ESP] -- return address 711 // [ESP + 4] -- first nonreg argument (leftmost lexically) 712 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 713 // ... 714 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 715 716 // Keep track of the number of integer regs passed so far. This can be either 717 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 718 // used). 719 unsigned NumIntRegs = 0; 720 721 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 722 MVT::ValueType ObjectVT = getValueType(I->getType()); 723 unsigned ArgIncrement = 4; 724 unsigned ObjSize = 0; 725 SDOperand ArgValue; 726 727 switch (ObjectVT) { 728 default: assert(0 && "Unhandled argument type!"); 729 case MVT::i1: 730 case MVT::i8: 731 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 732 if (!I->use_empty()) { 733 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 734 X86::R8RegisterClass); 735 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 736 DAG.setRoot(ArgValue.getValue(1)); 737 if (ObjectVT == MVT::i1) 738 // FIXME: Should insert a assertzext here. 739 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 740 } 741 ++NumIntRegs; 742 break; 743 } 744 745 ObjSize = 1; 746 break; 747 case MVT::i16: 748 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 749 if (!I->use_empty()) { 750 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 751 X86::R16RegisterClass); 752 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 753 DAG.setRoot(ArgValue.getValue(1)); 754 } 755 ++NumIntRegs; 756 break; 757 } 758 ObjSize = 2; 759 break; 760 case MVT::i32: 761 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 762 if (!I->use_empty()) { 763 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 764 X86::R32RegisterClass); 765 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 766 DAG.setRoot(ArgValue.getValue(1)); 767 } 768 ++NumIntRegs; 769 break; 770 } 771 ObjSize = 4; 772 break; 773 case MVT::i64: 774 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 775 if (!I->use_empty()) { 776 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 777 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 778 779 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 780 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 781 DAG.setRoot(Hi.getValue(1)); 782 783 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 784 } 785 NumIntRegs += 2; 786 break; 787 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 788 if (!I->use_empty()) { 789 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 790 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 791 DAG.setRoot(Low.getValue(1)); 792 793 // Load the high part from memory. 794 // Create the frame index object for this incoming parameter... 795 int FI = MFI->CreateFixedObject(4, ArgOffset); 796 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 797 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 798 DAG.getSrcValue(NULL)); 799 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 800 } 801 ArgOffset += 4; 802 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 803 break; 804 } 805 ObjSize = ArgIncrement = 8; 806 break; 807 case MVT::f32: ObjSize = 4; break; 808 case MVT::f64: ObjSize = ArgIncrement = 8; break; 809 } 810 811 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 812 // dead loads. 813 if (ObjSize && !I->use_empty()) { 814 // Create the frame index object for this incoming parameter... 815 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 816 817 // Create the SelectionDAG nodes corresponding to a load from this 818 // parameter. 819 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 820 821 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 822 DAG.getSrcValue(NULL)); 823 } else if (ArgValue.Val == 0) { 824 if (MVT::isInteger(ObjectVT)) 825 ArgValue = DAG.getConstant(0, ObjectVT); 826 else 827 ArgValue = DAG.getConstantFP(0, ObjectVT); 828 } 829 ArgValues.push_back(ArgValue); 830 831 if (ObjSize) 832 ArgOffset += ArgIncrement; // Move on to the next argument. 833 } 834 835 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 836 // arguments and the arguments after the retaddr has been pushed are aligned. 837 if ((ArgOffset & 7) == 0) 838 ArgOffset += 4; 839 840 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 841 ReturnAddrIndex = 0; // No return address slot generated yet. 842 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 843 BytesCallerReserves = 0; 844 845 // Finally, inform the code generator which regs we return values in. 846 switch (getValueType(F.getReturnType())) { 847 default: assert(0 && "Unknown type!"); 848 case MVT::isVoid: break; 849 case MVT::i1: 850 case MVT::i8: 851 case MVT::i16: 852 case MVT::i32: 853 MF.addLiveOut(X86::EAX); 854 break; 855 case MVT::i64: 856 MF.addLiveOut(X86::EAX); 857 MF.addLiveOut(X86::EDX); 858 break; 859 case MVT::f32: 860 case MVT::f64: 861 MF.addLiveOut(X86::ST0); 862 break; 863 } 864 return ArgValues; 865} 866 867std::pair<SDOperand, SDOperand> 868X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 869 bool isTailCall, SDOperand Callee, 870 ArgListTy &Args, SelectionDAG &DAG) { 871 // Count how many bytes are to be pushed on the stack. 872 unsigned NumBytes = 0; 873 874 // Keep track of the number of integer regs passed so far. This can be either 875 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 876 // used). 877 unsigned NumIntRegs = 0; 878 879 for (unsigned i = 0, e = Args.size(); i != e; ++i) 880 switch (getValueType(Args[i].second)) { 881 default: assert(0 && "Unknown value type!"); 882 case MVT::i1: 883 case MVT::i8: 884 case MVT::i16: 885 case MVT::i32: 886 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 887 ++NumIntRegs; 888 break; 889 } 890 // fall through 891 case MVT::f32: 892 NumBytes += 4; 893 break; 894 case MVT::i64: 895 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 896 NumIntRegs += 2; 897 break; 898 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 899 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 900 NumBytes += 4; 901 break; 902 } 903 904 // fall through 905 case MVT::f64: 906 NumBytes += 8; 907 break; 908 } 909 910 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 911 // arguments and the arguments after the retaddr has been pushed are aligned. 912 if ((NumBytes & 7) == 0) 913 NumBytes += 4; 914 915 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 916 917 // Arguments go on the stack in reverse order, as specified by the ABI. 918 unsigned ArgOffset = 0; 919 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 920 NumIntRegs = 0; 921 std::vector<SDOperand> Stores; 922 std::vector<SDOperand> RegValuesToPass; 923 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 924 switch (getValueType(Args[i].second)) { 925 default: assert(0 && "Unexpected ValueType for argument!"); 926 case MVT::i1: 927 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 928 // Fall through. 929 case MVT::i8: 930 case MVT::i16: 931 case MVT::i32: 932 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 933 RegValuesToPass.push_back(Args[i].first); 934 ++NumIntRegs; 935 break; 936 } 937 // Fall through 938 case MVT::f32: { 939 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 940 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 941 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 942 Args[i].first, PtrOff, 943 DAG.getSrcValue(NULL))); 944 ArgOffset += 4; 945 break; 946 } 947 case MVT::i64: 948 // Can pass (at least) part of it in regs? 949 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 950 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 951 Args[i].first, DAG.getConstant(1, MVT::i32)); 952 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 953 Args[i].first, DAG.getConstant(0, MVT::i32)); 954 RegValuesToPass.push_back(Lo); 955 ++NumIntRegs; 956 957 // Pass both parts in regs? 958 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 959 RegValuesToPass.push_back(Hi); 960 ++NumIntRegs; 961 } else { 962 // Pass the high part in memory. 963 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 964 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 965 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 966 Hi, PtrOff, DAG.getSrcValue(NULL))); 967 ArgOffset += 4; 968 } 969 break; 970 } 971 // Fall through 972 case MVT::f64: 973 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 974 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 975 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 976 Args[i].first, PtrOff, 977 DAG.getSrcValue(NULL))); 978 ArgOffset += 8; 979 break; 980 } 981 } 982 if (!Stores.empty()) 983 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 984 985 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 986 // arguments and the arguments after the retaddr has been pushed are aligned. 987 if ((ArgOffset & 7) == 0) 988 ArgOffset += 4; 989 990 std::vector<MVT::ValueType> RetVals; 991 MVT::ValueType RetTyVT = getValueType(RetTy); 992 993 RetVals.push_back(MVT::Other); 994 995 // The result values produced have to be legal. Promote the result. 996 switch (RetTyVT) { 997 case MVT::isVoid: break; 998 default: 999 RetVals.push_back(RetTyVT); 1000 break; 1001 case MVT::i1: 1002 case MVT::i8: 1003 case MVT::i16: 1004 RetVals.push_back(MVT::i32); 1005 break; 1006 case MVT::f32: 1007 if (X86ScalarSSE) 1008 RetVals.push_back(MVT::f32); 1009 else 1010 RetVals.push_back(MVT::f64); 1011 break; 1012 case MVT::i64: 1013 RetVals.push_back(MVT::i32); 1014 RetVals.push_back(MVT::i32); 1015 break; 1016 } 1017 1018 // Build a sequence of copy-to-reg nodes chained together with token chain 1019 // and flag operands which copy the outgoing args into registers. 1020 SDOperand InFlag; 1021 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1022 unsigned CCReg; 1023 SDOperand RegToPass = RegValuesToPass[i]; 1024 switch (RegToPass.getValueType()) { 1025 default: assert(0 && "Bad thing to pass in regs"); 1026 case MVT::i8: 1027 CCReg = (i == 0) ? X86::AL : X86::DL; 1028 break; 1029 case MVT::i16: 1030 CCReg = (i == 0) ? X86::AX : X86::DX; 1031 break; 1032 case MVT::i32: 1033 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1034 break; 1035 } 1036 1037 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1038 InFlag = Chain.getValue(1); 1039 } 1040 1041 std::vector<MVT::ValueType> NodeTys; 1042 NodeTys.push_back(MVT::Other); // Returns a chain 1043 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1044 std::vector<SDOperand> Ops; 1045 Ops.push_back(Chain); 1046 Ops.push_back(Callee); 1047 if (InFlag.Val) 1048 Ops.push_back(InFlag); 1049 1050 // FIXME: Do not generate X86ISD::TAILCALL for now. 1051 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1052 InFlag = Chain.getValue(1); 1053 1054 NodeTys.clear(); 1055 NodeTys.push_back(MVT::Other); // Returns a chain 1056 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1057 Ops.clear(); 1058 Ops.push_back(Chain); 1059 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1060 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1061 Ops.push_back(InFlag); 1062 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1063 InFlag = Chain.getValue(1); 1064 1065 SDOperand RetVal; 1066 if (RetTyVT != MVT::isVoid) { 1067 switch (RetTyVT) { 1068 default: assert(0 && "Unknown value type to return!"); 1069 case MVT::i1: 1070 case MVT::i8: 1071 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1072 Chain = RetVal.getValue(1); 1073 if (RetTyVT == MVT::i1) 1074 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1075 break; 1076 case MVT::i16: 1077 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1078 Chain = RetVal.getValue(1); 1079 break; 1080 case MVT::i32: 1081 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1082 Chain = RetVal.getValue(1); 1083 break; 1084 case MVT::i64: { 1085 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1086 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1087 Lo.getValue(2)); 1088 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1089 Chain = Hi.getValue(1); 1090 break; 1091 } 1092 case MVT::f32: 1093 case MVT::f64: { 1094 std::vector<MVT::ValueType> Tys; 1095 Tys.push_back(MVT::f64); 1096 Tys.push_back(MVT::Other); 1097 Tys.push_back(MVT::Flag); 1098 std::vector<SDOperand> Ops; 1099 Ops.push_back(Chain); 1100 Ops.push_back(InFlag); 1101 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1102 Chain = RetVal.getValue(1); 1103 InFlag = RetVal.getValue(2); 1104 if (X86ScalarSSE) { 1105 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1106 // shouldn't be necessary except that RFP cannot be live across 1107 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1108 MachineFunction &MF = DAG.getMachineFunction(); 1109 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1110 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1111 Tys.clear(); 1112 Tys.push_back(MVT::Other); 1113 Ops.clear(); 1114 Ops.push_back(Chain); 1115 Ops.push_back(RetVal); 1116 Ops.push_back(StackSlot); 1117 Ops.push_back(DAG.getValueType(RetTyVT)); 1118 Ops.push_back(InFlag); 1119 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1120 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1121 DAG.getSrcValue(NULL)); 1122 Chain = RetVal.getValue(1); 1123 } 1124 1125 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1126 // FIXME: we would really like to remember that this FP_ROUND 1127 // operation is okay to eliminate if we allow excess FP precision. 1128 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1129 break; 1130 } 1131 } 1132 } 1133 1134 return std::make_pair(RetVal, Chain); 1135} 1136 1137SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1138 if (ReturnAddrIndex == 0) { 1139 // Set up a frame object for the return address. 1140 MachineFunction &MF = DAG.getMachineFunction(); 1141 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1142 } 1143 1144 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1145} 1146 1147 1148 1149std::pair<SDOperand, SDOperand> X86TargetLowering:: 1150LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1151 SelectionDAG &DAG) { 1152 SDOperand Result; 1153 if (Depth) // Depths > 0 not supported yet! 1154 Result = DAG.getConstant(0, getPointerTy()); 1155 else { 1156 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1157 if (!isFrameAddress) 1158 // Just load the return address 1159 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1160 DAG.getSrcValue(NULL)); 1161 else 1162 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1163 DAG.getConstant(4, MVT::i32)); 1164 } 1165 return std::make_pair(Result, Chain); 1166} 1167 1168/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1169/// which corresponds to the condition code. 1170static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1171 switch (X86CC) { 1172 default: assert(0 && "Unknown X86 conditional code!"); 1173 case X86ISD::COND_A: return X86::JA; 1174 case X86ISD::COND_AE: return X86::JAE; 1175 case X86ISD::COND_B: return X86::JB; 1176 case X86ISD::COND_BE: return X86::JBE; 1177 case X86ISD::COND_E: return X86::JE; 1178 case X86ISD::COND_G: return X86::JG; 1179 case X86ISD::COND_GE: return X86::JGE; 1180 case X86ISD::COND_L: return X86::JL; 1181 case X86ISD::COND_LE: return X86::JLE; 1182 case X86ISD::COND_NE: return X86::JNE; 1183 case X86ISD::COND_NO: return X86::JNO; 1184 case X86ISD::COND_NP: return X86::JNP; 1185 case X86ISD::COND_NS: return X86::JNS; 1186 case X86ISD::COND_O: return X86::JO; 1187 case X86ISD::COND_P: return X86::JP; 1188 case X86ISD::COND_S: return X86::JS; 1189 } 1190} 1191 1192/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1193/// specific condition code. It returns a false if it cannot do a direct 1194/// translation. X86CC is the translated CondCode. Flip is set to true if the 1195/// the order of comparison operands should be flipped. 1196static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1197 unsigned &X86CC, bool &Flip) { 1198 Flip = false; 1199 X86CC = X86ISD::COND_INVALID; 1200 if (!isFP) { 1201 switch (SetCCOpcode) { 1202 default: break; 1203 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1204 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1205 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1206 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1207 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1208 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1209 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1210 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1211 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1212 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1213 } 1214 } else { 1215 // On a floating point condition, the flags are set as follows: 1216 // ZF PF CF op 1217 // 0 | 0 | 0 | X > Y 1218 // 0 | 0 | 1 | X < Y 1219 // 1 | 0 | 0 | X == Y 1220 // 1 | 1 | 1 | unordered 1221 switch (SetCCOpcode) { 1222 default: break; 1223 case ISD::SETUEQ: 1224 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1225 case ISD::SETOLT: Flip = true; // Fallthrough 1226 case ISD::SETOGT: 1227 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1228 case ISD::SETOLE: Flip = true; // Fallthrough 1229 case ISD::SETOGE: 1230 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1231 case ISD::SETUGT: Flip = true; // Fallthrough 1232 case ISD::SETULT: 1233 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1234 case ISD::SETUGE: Flip = true; // Fallthrough 1235 case ISD::SETULE: 1236 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1237 case ISD::SETONE: 1238 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1239 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1240 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1241 } 1242 } 1243 1244 return X86CC != X86ISD::COND_INVALID; 1245} 1246 1247static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1248 bool &Flip) { 1249 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1250} 1251 1252/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1253/// code. Current x86 isa includes the following FP cmov instructions: 1254/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1255static bool hasFPCMov(unsigned X86CC) { 1256 switch (X86CC) { 1257 default: 1258 return false; 1259 case X86ISD::COND_B: 1260 case X86ISD::COND_BE: 1261 case X86ISD::COND_E: 1262 case X86ISD::COND_P: 1263 case X86ISD::COND_A: 1264 case X86ISD::COND_AE: 1265 case X86ISD::COND_NE: 1266 case X86ISD::COND_NP: 1267 return true; 1268 } 1269} 1270 1271MachineBasicBlock * 1272X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1273 MachineBasicBlock *BB) { 1274 switch (MI->getOpcode()) { 1275 default: assert(false && "Unexpected instr type to insert"); 1276 case X86::CMOV_FR32: 1277 case X86::CMOV_FR64: 1278 case X86::CMOV_V4F32: 1279 case X86::CMOV_V2F64: 1280 case X86::CMOV_V2I64: { 1281 // To "insert" a SELECT_CC instruction, we actually have to insert the 1282 // diamond control-flow pattern. The incoming instruction knows the 1283 // destination vreg to set, the condition code register to branch on, the 1284 // true/false values to select between, and a branch opcode to use. 1285 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1286 ilist<MachineBasicBlock>::iterator It = BB; 1287 ++It; 1288 1289 // thisMBB: 1290 // ... 1291 // TrueVal = ... 1292 // cmpTY ccX, r1, r2 1293 // bCC copy1MBB 1294 // fallthrough --> copy0MBB 1295 MachineBasicBlock *thisMBB = BB; 1296 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1297 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1298 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1299 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1300 MachineFunction *F = BB->getParent(); 1301 F->getBasicBlockList().insert(It, copy0MBB); 1302 F->getBasicBlockList().insert(It, sinkMBB); 1303 // Update machine-CFG edges by first adding all successors of the current 1304 // block to the new block which will contain the Phi node for the select. 1305 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1306 e = BB->succ_end(); i != e; ++i) 1307 sinkMBB->addSuccessor(*i); 1308 // Next, remove all successors of the current block, and add the true 1309 // and fallthrough blocks as its successors. 1310 while(!BB->succ_empty()) 1311 BB->removeSuccessor(BB->succ_begin()); 1312 BB->addSuccessor(copy0MBB); 1313 BB->addSuccessor(sinkMBB); 1314 1315 // copy0MBB: 1316 // %FalseValue = ... 1317 // # fallthrough to sinkMBB 1318 BB = copy0MBB; 1319 1320 // Update machine-CFG edges 1321 BB->addSuccessor(sinkMBB); 1322 1323 // sinkMBB: 1324 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1325 // ... 1326 BB = sinkMBB; 1327 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1328 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1329 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1330 1331 delete MI; // The pseudo instruction is gone now. 1332 return BB; 1333 } 1334 1335 case X86::FP_TO_INT16_IN_MEM: 1336 case X86::FP_TO_INT32_IN_MEM: 1337 case X86::FP_TO_INT64_IN_MEM: { 1338 // Change the floating point control register to use "round towards zero" 1339 // mode when truncating to an integer value. 1340 MachineFunction *F = BB->getParent(); 1341 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1342 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1343 1344 // Load the old value of the high byte of the control word... 1345 unsigned OldCW = 1346 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1347 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1348 1349 // Set the high part to be round to zero... 1350 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1351 1352 // Reload the modified control word now... 1353 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1354 1355 // Restore the memory image of control word to original value 1356 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1357 1358 // Get the X86 opcode to use. 1359 unsigned Opc; 1360 switch (MI->getOpcode()) { 1361 default: assert(0 && "illegal opcode!"); 1362 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1363 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1364 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1365 } 1366 1367 X86AddressMode AM; 1368 MachineOperand &Op = MI->getOperand(0); 1369 if (Op.isRegister()) { 1370 AM.BaseType = X86AddressMode::RegBase; 1371 AM.Base.Reg = Op.getReg(); 1372 } else { 1373 AM.BaseType = X86AddressMode::FrameIndexBase; 1374 AM.Base.FrameIndex = Op.getFrameIndex(); 1375 } 1376 Op = MI->getOperand(1); 1377 if (Op.isImmediate()) 1378 AM.Scale = Op.getImmedValue(); 1379 Op = MI->getOperand(2); 1380 if (Op.isImmediate()) 1381 AM.IndexReg = Op.getImmedValue(); 1382 Op = MI->getOperand(3); 1383 if (Op.isGlobalAddress()) { 1384 AM.GV = Op.getGlobal(); 1385 } else { 1386 AM.Disp = Op.getImmedValue(); 1387 } 1388 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1389 1390 // Reload the original control word now. 1391 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1392 1393 delete MI; // The pseudo instruction is gone now. 1394 return BB; 1395 } 1396 } 1397} 1398 1399 1400//===----------------------------------------------------------------------===// 1401// X86 Custom Lowering Hooks 1402//===----------------------------------------------------------------------===// 1403 1404/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1405/// load. For Darwin, external and weak symbols are indirect, loading the value 1406/// at address GV rather then the value of GV itself. This means that the 1407/// GlobalAddress must be in the base or index register of the address, not the 1408/// GV offset field. 1409static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1410 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1411 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1412} 1413 1414/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1415/// true if Op is undef or if its value falls within the specified range (L, H]. 1416static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1417 if (Op.getOpcode() == ISD::UNDEF) 1418 return true; 1419 1420 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1421 return (Val >= Low && Val < Hi); 1422} 1423 1424/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1425/// true if Op is undef or if its value equal to the specified value. 1426static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1427 if (Op.getOpcode() == ISD::UNDEF) 1428 return true; 1429 return cast<ConstantSDNode>(Op)->getValue() == Val; 1430} 1431 1432/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1433/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1434bool X86::isPSHUFDMask(SDNode *N) { 1435 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1436 1437 if (N->getNumOperands() != 4) 1438 return false; 1439 1440 // Check if the value doesn't reference the second vector. 1441 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1442 SDOperand Arg = N->getOperand(i); 1443 if (Arg.getOpcode() == ISD::UNDEF) continue; 1444 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1445 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1446 return false; 1447 } 1448 1449 return true; 1450} 1451 1452/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1453/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1454bool X86::isPSHUFHWMask(SDNode *N) { 1455 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1456 1457 if (N->getNumOperands() != 8) 1458 return false; 1459 1460 // Lower quadword copied in order. 1461 for (unsigned i = 0; i != 4; ++i) { 1462 SDOperand Arg = N->getOperand(i); 1463 if (Arg.getOpcode() == ISD::UNDEF) continue; 1464 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1465 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1466 return false; 1467 } 1468 1469 // Upper quadword shuffled. 1470 for (unsigned i = 4; i != 8; ++i) { 1471 SDOperand Arg = N->getOperand(i); 1472 if (Arg.getOpcode() == ISD::UNDEF) continue; 1473 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1474 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1475 if (Val < 4 || Val > 7) 1476 return false; 1477 } 1478 1479 return true; 1480} 1481 1482/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1483/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1484bool X86::isPSHUFLWMask(SDNode *N) { 1485 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1486 1487 if (N->getNumOperands() != 8) 1488 return false; 1489 1490 // Upper quadword copied in order. 1491 for (unsigned i = 4; i != 8; ++i) 1492 if (!isUndefOrEqual(N->getOperand(i), i)) 1493 return false; 1494 1495 // Lower quadword shuffled. 1496 for (unsigned i = 0; i != 4; ++i) 1497 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1498 return false; 1499 1500 return true; 1501} 1502 1503/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1504/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1505static bool isSHUFPMask(std::vector<SDOperand> &N) { 1506 unsigned NumElems = N.size(); 1507 if (NumElems != 2 && NumElems != 4) return false; 1508 1509 unsigned Half = NumElems / 2; 1510 for (unsigned i = 0; i < Half; ++i) 1511 if (!isUndefOrInRange(N[i], 0, NumElems)) 1512 return false; 1513 for (unsigned i = Half; i < NumElems; ++i) 1514 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1515 return false; 1516 1517 return true; 1518} 1519 1520bool X86::isSHUFPMask(SDNode *N) { 1521 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1522 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1523 return ::isSHUFPMask(Ops); 1524} 1525 1526/// isCommutedSHUFP - Returns true if the shuffle mask is except 1527/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1528/// half elements to come from vector 1 (which would equal the dest.) and 1529/// the upper half to come from vector 2. 1530static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1531 unsigned NumElems = Ops.size(); 1532 if (NumElems != 2 && NumElems != 4) return false; 1533 1534 unsigned Half = NumElems / 2; 1535 for (unsigned i = 0; i < Half; ++i) 1536 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1537 return false; 1538 for (unsigned i = Half; i < NumElems; ++i) 1539 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1540 return false; 1541 return true; 1542} 1543 1544static bool isCommutedSHUFP(SDNode *N) { 1545 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1546 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1547 return isCommutedSHUFP(Ops); 1548} 1549 1550/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1551/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1552bool X86::isMOVHLPSMask(SDNode *N) { 1553 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1554 1555 if (N->getNumOperands() != 4) 1556 return false; 1557 1558 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1559 return isUndefOrEqual(N->getOperand(0), 6) && 1560 isUndefOrEqual(N->getOperand(1), 7) && 1561 isUndefOrEqual(N->getOperand(2), 2) && 1562 isUndefOrEqual(N->getOperand(3), 3); 1563} 1564 1565/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1566/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1567bool X86::isMOVLPMask(SDNode *N) { 1568 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1569 1570 unsigned NumElems = N->getNumOperands(); 1571 if (NumElems != 2 && NumElems != 4) 1572 return false; 1573 1574 for (unsigned i = 0; i < NumElems/2; ++i) 1575 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1576 return false; 1577 1578 for (unsigned i = NumElems/2; i < NumElems; ++i) 1579 if (!isUndefOrEqual(N->getOperand(i), i)) 1580 return false; 1581 1582 return true; 1583} 1584 1585/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1586/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1587/// and MOVLHPS. 1588bool X86::isMOVHPMask(SDNode *N) { 1589 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1590 1591 unsigned NumElems = N->getNumOperands(); 1592 if (NumElems != 2 && NumElems != 4) 1593 return false; 1594 1595 for (unsigned i = 0; i < NumElems/2; ++i) 1596 if (!isUndefOrEqual(N->getOperand(i), i)) 1597 return false; 1598 1599 for (unsigned i = 0; i < NumElems/2; ++i) { 1600 SDOperand Arg = N->getOperand(i + NumElems/2); 1601 if (!isUndefOrEqual(Arg, i + NumElems)) 1602 return false; 1603 } 1604 1605 return true; 1606} 1607 1608/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1609/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1610bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1611 unsigned NumElems = N.size(); 1612 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1613 return false; 1614 1615 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1616 SDOperand BitI = N[i]; 1617 SDOperand BitI1 = N[i+1]; 1618 if (!isUndefOrEqual(BitI, j)) 1619 return false; 1620 if (V2IsSplat) { 1621 if (isUndefOrEqual(BitI1, NumElems)) 1622 return false; 1623 } else { 1624 if (!isUndefOrEqual(BitI1, j + NumElems)) 1625 return false; 1626 } 1627 } 1628 1629 return true; 1630} 1631 1632bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1633 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1634 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1635 return ::isUNPCKLMask(Ops, V2IsSplat); 1636} 1637 1638/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1639/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1640bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1641 unsigned NumElems = N.size(); 1642 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1643 return false; 1644 1645 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1646 SDOperand BitI = N[i]; 1647 SDOperand BitI1 = N[i+1]; 1648 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1649 return false; 1650 if (V2IsSplat) { 1651 if (isUndefOrEqual(BitI1, NumElems)) 1652 return false; 1653 } else { 1654 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1655 return false; 1656 } 1657 } 1658 1659 return true; 1660} 1661 1662bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1663 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1664 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1665 return ::isUNPCKHMask(Ops, V2IsSplat); 1666} 1667 1668/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1669/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1670/// <0, 0, 1, 1> 1671bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1672 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1673 1674 unsigned NumElems = N->getNumOperands(); 1675 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1676 return false; 1677 1678 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1679 SDOperand BitI = N->getOperand(i); 1680 SDOperand BitI1 = N->getOperand(i+1); 1681 1682 if (!isUndefOrEqual(BitI, j)) 1683 return false; 1684 if (!isUndefOrEqual(BitI1, j)) 1685 return false; 1686 } 1687 1688 return true; 1689} 1690 1691/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1692/// specifies a shuffle of elements that is suitable for input to MOVSS, 1693/// MOVSD, and MOVD, i.e. setting the lowest element. 1694static bool isMOVLMask(std::vector<SDOperand> &N) { 1695 unsigned NumElems = N.size(); 1696 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1697 return false; 1698 1699 if (!isUndefOrEqual(N[0], NumElems)) 1700 return false; 1701 1702 for (unsigned i = 1; i < NumElems; ++i) { 1703 SDOperand Arg = N[i]; 1704 if (!isUndefOrEqual(Arg, i)) 1705 return false; 1706 } 1707 1708 return true; 1709} 1710 1711bool X86::isMOVLMask(SDNode *N) { 1712 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1713 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1714 return ::isMOVLMask(Ops); 1715} 1716 1717/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1718/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1719/// element of vector 2 and the other elements to come from vector 1 in order. 1720static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1721 unsigned NumElems = Ops.size(); 1722 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1723 return false; 1724 1725 if (!isUndefOrEqual(Ops[0], 0)) 1726 return false; 1727 1728 for (unsigned i = 1; i < NumElems; ++i) { 1729 SDOperand Arg = Ops[i]; 1730 if (V2IsSplat) { 1731 if (!isUndefOrEqual(Arg, NumElems)) 1732 return false; 1733 } else { 1734 if (!isUndefOrEqual(Arg, i+NumElems)) 1735 return false; 1736 } 1737 } 1738 1739 return true; 1740} 1741 1742static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1743 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1744 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1745 return isCommutedMOVL(Ops, V2IsSplat); 1746} 1747 1748/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1749/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1750bool X86::isMOVSHDUPMask(SDNode *N) { 1751 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1752 1753 if (N->getNumOperands() != 4) 1754 return false; 1755 1756 // Expect 1, 1, 3, 3 1757 for (unsigned i = 0; i < 2; ++i) { 1758 SDOperand Arg = N->getOperand(i); 1759 if (Arg.getOpcode() == ISD::UNDEF) continue; 1760 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1761 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1762 if (Val != 1) return false; 1763 } 1764 1765 bool HasHi = false; 1766 for (unsigned i = 2; i < 4; ++i) { 1767 SDOperand Arg = N->getOperand(i); 1768 if (Arg.getOpcode() == ISD::UNDEF) continue; 1769 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1770 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1771 if (Val != 3) return false; 1772 HasHi = true; 1773 } 1774 1775 // Don't use movshdup if it can be done with a shufps. 1776 return HasHi; 1777} 1778 1779/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1780/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1781bool X86::isMOVSLDUPMask(SDNode *N) { 1782 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1783 1784 if (N->getNumOperands() != 4) 1785 return false; 1786 1787 // Expect 0, 0, 2, 2 1788 for (unsigned i = 0; i < 2; ++i) { 1789 SDOperand Arg = N->getOperand(i); 1790 if (Arg.getOpcode() == ISD::UNDEF) continue; 1791 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1792 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1793 if (Val != 0) return false; 1794 } 1795 1796 bool HasHi = false; 1797 for (unsigned i = 2; i < 4; ++i) { 1798 SDOperand Arg = N->getOperand(i); 1799 if (Arg.getOpcode() == ISD::UNDEF) continue; 1800 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1801 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1802 if (Val != 2) return false; 1803 HasHi = true; 1804 } 1805 1806 // Don't use movshdup if it can be done with a shufps. 1807 return HasHi; 1808} 1809 1810/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1811/// a splat of a single element. 1812static bool isSplatMask(SDNode *N) { 1813 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1814 1815 // This is a splat operation if each element of the permute is the same, and 1816 // if the value doesn't reference the second vector. 1817 unsigned NumElems = N->getNumOperands(); 1818 SDOperand ElementBase; 1819 unsigned i = 0; 1820 for (; i != NumElems; ++i) { 1821 SDOperand Elt = N->getOperand(i); 1822 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 1823 ElementBase = Elt; 1824 break; 1825 } 1826 } 1827 1828 if (!ElementBase.Val) 1829 return false; 1830 1831 for (; i != NumElems; ++i) { 1832 SDOperand Arg = N->getOperand(i); 1833 if (Arg.getOpcode() == ISD::UNDEF) continue; 1834 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1835 if (Arg != ElementBase) return false; 1836 } 1837 1838 // Make sure it is a splat of the first vector operand. 1839 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1840} 1841 1842/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1843/// a splat of a single element and it's a 2 or 4 element mask. 1844bool X86::isSplatMask(SDNode *N) { 1845 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1846 1847 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 1848 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1849 return false; 1850 return ::isSplatMask(N); 1851} 1852 1853/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1854/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1855/// instructions. 1856unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1857 unsigned NumOperands = N->getNumOperands(); 1858 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1859 unsigned Mask = 0; 1860 for (unsigned i = 0; i < NumOperands; ++i) { 1861 unsigned Val = 0; 1862 SDOperand Arg = N->getOperand(NumOperands-i-1); 1863 if (Arg.getOpcode() != ISD::UNDEF) 1864 Val = cast<ConstantSDNode>(Arg)->getValue(); 1865 if (Val >= NumOperands) Val -= NumOperands; 1866 Mask |= Val; 1867 if (i != NumOperands - 1) 1868 Mask <<= Shift; 1869 } 1870 1871 return Mask; 1872} 1873 1874/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1875/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1876/// instructions. 1877unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1878 unsigned Mask = 0; 1879 // 8 nodes, but we only care about the last 4. 1880 for (unsigned i = 7; i >= 4; --i) { 1881 unsigned Val = 0; 1882 SDOperand Arg = N->getOperand(i); 1883 if (Arg.getOpcode() != ISD::UNDEF) 1884 Val = cast<ConstantSDNode>(Arg)->getValue(); 1885 Mask |= (Val - 4); 1886 if (i != 4) 1887 Mask <<= 2; 1888 } 1889 1890 return Mask; 1891} 1892 1893/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1894/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1895/// instructions. 1896unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1897 unsigned Mask = 0; 1898 // 8 nodes, but we only care about the first 4. 1899 for (int i = 3; i >= 0; --i) { 1900 unsigned Val = 0; 1901 SDOperand Arg = N->getOperand(i); 1902 if (Arg.getOpcode() != ISD::UNDEF) 1903 Val = cast<ConstantSDNode>(Arg)->getValue(); 1904 Mask |= Val; 1905 if (i != 0) 1906 Mask <<= 2; 1907 } 1908 1909 return Mask; 1910} 1911 1912/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1913/// specifies a 8 element shuffle that can be broken into a pair of 1914/// PSHUFHW and PSHUFLW. 1915static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1916 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1917 1918 if (N->getNumOperands() != 8) 1919 return false; 1920 1921 // Lower quadword shuffled. 1922 for (unsigned i = 0; i != 4; ++i) { 1923 SDOperand Arg = N->getOperand(i); 1924 if (Arg.getOpcode() == ISD::UNDEF) continue; 1925 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1926 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1927 if (Val > 4) 1928 return false; 1929 } 1930 1931 // Upper quadword shuffled. 1932 for (unsigned i = 4; i != 8; ++i) { 1933 SDOperand Arg = N->getOperand(i); 1934 if (Arg.getOpcode() == ISD::UNDEF) continue; 1935 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1936 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1937 if (Val < 4 || Val > 7) 1938 return false; 1939 } 1940 1941 return true; 1942} 1943 1944/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1945/// values in ther permute mask. 1946static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1947 SDOperand V1 = Op.getOperand(0); 1948 SDOperand V2 = Op.getOperand(1); 1949 SDOperand Mask = Op.getOperand(2); 1950 MVT::ValueType VT = Op.getValueType(); 1951 MVT::ValueType MaskVT = Mask.getValueType(); 1952 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1953 unsigned NumElems = Mask.getNumOperands(); 1954 std::vector<SDOperand> MaskVec; 1955 1956 for (unsigned i = 0; i != NumElems; ++i) { 1957 SDOperand Arg = Mask.getOperand(i); 1958 if (Arg.getOpcode() == ISD::UNDEF) { 1959 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 1960 continue; 1961 } 1962 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1963 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1964 if (Val < NumElems) 1965 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1966 else 1967 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1968 } 1969 1970 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1971 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1972} 1973 1974/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 1975/// match movhlps. The lower half elements should come from upper half of 1976/// V1 (and in order), and the upper half elements should come from the upper 1977/// half of V2 (and in order). 1978static bool ShouldXformToMOVHLPS(SDNode *Mask) { 1979 unsigned NumElems = Mask->getNumOperands(); 1980 if (NumElems != 4) 1981 return false; 1982 for (unsigned i = 0, e = 2; i != e; ++i) 1983 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 1984 return false; 1985 for (unsigned i = 2; i != 4; ++i) 1986 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 1987 return false; 1988 return true; 1989} 1990 1991/// isScalarLoadToVector - Returns true if the node is a scalar load that 1992/// is promoted to a vector. 1993static inline bool isScalarLoadToVector(SDNode *N) { 1994 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 1995 N = N->getOperand(0).Val; 1996 return (N->getOpcode() == ISD::LOAD); 1997 } 1998 return false; 1999} 2000 2001/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2002/// match movlp{s|d}. The lower half elements should come from lower half of 2003/// V1 (and in order), and the upper half elements should come from the upper 2004/// half of V2 (and in order). And since V1 will become the source of the 2005/// MOVLP, it must be either a vector load or a scalar load to vector. 2006static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2007 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2008 return false; 2009 2010 unsigned NumElems = Mask->getNumOperands(); 2011 if (NumElems != 2 && NumElems != 4) 2012 return false; 2013 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2014 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2015 return false; 2016 for (unsigned i = NumElems/2; i != NumElems; ++i) 2017 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2018 return false; 2019 return true; 2020} 2021 2022/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2023/// all the same. 2024static bool isSplatVector(SDNode *N) { 2025 if (N->getOpcode() != ISD::BUILD_VECTOR) 2026 return false; 2027 2028 SDOperand SplatValue = N->getOperand(0); 2029 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2030 if (N->getOperand(i) != SplatValue) 2031 return false; 2032 return true; 2033} 2034 2035/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2036/// that point to V2 points to its first element. 2037static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2038 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2039 2040 bool Changed = false; 2041 std::vector<SDOperand> MaskVec; 2042 unsigned NumElems = Mask.getNumOperands(); 2043 for (unsigned i = 0; i != NumElems; ++i) { 2044 SDOperand Arg = Mask.getOperand(i); 2045 if (Arg.getOpcode() != ISD::UNDEF) { 2046 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2047 if (Val > NumElems) { 2048 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2049 Changed = true; 2050 } 2051 } 2052 MaskVec.push_back(Arg); 2053 } 2054 2055 if (Changed) 2056 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 2057 return Mask; 2058} 2059 2060/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2061/// operation of specified width. 2062static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2063 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2064 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2065 2066 std::vector<SDOperand> MaskVec; 2067 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2068 for (unsigned i = 1; i != NumElems; ++i) 2069 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2070 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2071} 2072 2073/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2074/// of specified width. 2075static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2076 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2077 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2078 std::vector<SDOperand> MaskVec; 2079 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2080 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2081 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2082 } 2083 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2084} 2085 2086/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2087/// of specified width. 2088static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2089 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2090 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2091 unsigned Half = NumElems/2; 2092 std::vector<SDOperand> MaskVec; 2093 for (unsigned i = 0; i != Half; ++i) { 2094 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2095 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2096 } 2097 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2098} 2099 2100/// getZeroVector - Returns a vector of specified type with all zero elements. 2101/// 2102static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2103 assert(MVT::isVector(VT) && "Expected a vector type"); 2104 unsigned NumElems = getVectorNumElements(VT); 2105 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2106 bool isFP = MVT::isFloatingPoint(EVT); 2107 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2108 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2109 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 2110} 2111 2112/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2113/// 2114static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2115 SDOperand V1 = Op.getOperand(0); 2116 SDOperand Mask = Op.getOperand(2); 2117 MVT::ValueType VT = Op.getValueType(); 2118 unsigned NumElems = Mask.getNumOperands(); 2119 Mask = getUnpacklMask(NumElems, DAG); 2120 while (NumElems != 4) { 2121 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2122 NumElems >>= 1; 2123 } 2124 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2125 2126 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2127 Mask = getZeroVector(MaskVT, DAG); 2128 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2129 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2130 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2131} 2132 2133/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2134/// constant +0.0. 2135static inline bool isZeroNode(SDOperand Elt) { 2136 return ((isa<ConstantSDNode>(Elt) && 2137 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2138 (isa<ConstantFPSDNode>(Elt) && 2139 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2140} 2141 2142/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2143/// vector and zero or undef vector. 2144static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2145 unsigned NumElems, unsigned Idx, 2146 bool isZero, SelectionDAG &DAG) { 2147 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2148 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2149 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2150 SDOperand Zero = DAG.getConstant(0, EVT); 2151 std::vector<SDOperand> MaskVec(NumElems, Zero); 2152 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2153 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2154 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2155} 2156 2157/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2158/// 2159static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2160 unsigned NumNonZero, unsigned NumZero, 2161 SelectionDAG &DAG) { 2162 if (NumNonZero > 8) 2163 return SDOperand(); 2164 2165 SDOperand V(0, 0); 2166 bool First = true; 2167 for (unsigned i = 0; i < 16; ++i) { 2168 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2169 if (ThisIsNonZero && First) { 2170 if (NumZero) 2171 V = getZeroVector(MVT::v8i16, DAG); 2172 else 2173 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2174 First = false; 2175 } 2176 2177 if ((i & 1) != 0) { 2178 SDOperand ThisElt(0, 0), LastElt(0, 0); 2179 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2180 if (LastIsNonZero) { 2181 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2182 } 2183 if (ThisIsNonZero) { 2184 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2185 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2186 ThisElt, DAG.getConstant(8, MVT::i8)); 2187 if (LastIsNonZero) 2188 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2189 } else 2190 ThisElt = LastElt; 2191 2192 if (ThisElt.Val) 2193 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2194 DAG.getConstant(i/2, MVT::i32)); 2195 } 2196 } 2197 2198 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2199} 2200 2201/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2202/// 2203static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2204 unsigned NumNonZero, unsigned NumZero, 2205 SelectionDAG &DAG) { 2206 if (NumNonZero > 4) 2207 return SDOperand(); 2208 2209 SDOperand V(0, 0); 2210 bool First = true; 2211 for (unsigned i = 0; i < 8; ++i) { 2212 bool isNonZero = (NonZeros & (1 << i)) != 0; 2213 if (isNonZero) { 2214 if (First) { 2215 if (NumZero) 2216 V = getZeroVector(MVT::v8i16, DAG); 2217 else 2218 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2219 First = false; 2220 } 2221 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2222 DAG.getConstant(i, MVT::i32)); 2223 } 2224 } 2225 2226 return V; 2227} 2228 2229/// LowerOperation - Provide custom lowering hooks for some operations. 2230/// 2231SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 2232 switch (Op.getOpcode()) { 2233 default: assert(0 && "Should not custom lower this!"); 2234 case ISD::SHL_PARTS: 2235 case ISD::SRA_PARTS: 2236 case ISD::SRL_PARTS: { 2237 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2238 "Not an i64 shift!"); 2239 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2240 SDOperand ShOpLo = Op.getOperand(0); 2241 SDOperand ShOpHi = Op.getOperand(1); 2242 SDOperand ShAmt = Op.getOperand(2); 2243 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2244 DAG.getConstant(31, MVT::i8)) 2245 : DAG.getConstant(0, MVT::i32); 2246 2247 SDOperand Tmp2, Tmp3; 2248 if (Op.getOpcode() == ISD::SHL_PARTS) { 2249 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2250 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2251 } else { 2252 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2253 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2254 } 2255 2256 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2257 ShAmt, DAG.getConstant(32, MVT::i8)); 2258 2259 SDOperand Hi, Lo; 2260 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2261 2262 std::vector<MVT::ValueType> Tys; 2263 Tys.push_back(MVT::i32); 2264 Tys.push_back(MVT::Flag); 2265 std::vector<SDOperand> Ops; 2266 if (Op.getOpcode() == ISD::SHL_PARTS) { 2267 Ops.push_back(Tmp2); 2268 Ops.push_back(Tmp3); 2269 Ops.push_back(CC); 2270 Ops.push_back(InFlag); 2271 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2272 InFlag = Hi.getValue(1); 2273 2274 Ops.clear(); 2275 Ops.push_back(Tmp3); 2276 Ops.push_back(Tmp1); 2277 Ops.push_back(CC); 2278 Ops.push_back(InFlag); 2279 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2280 } else { 2281 Ops.push_back(Tmp2); 2282 Ops.push_back(Tmp3); 2283 Ops.push_back(CC); 2284 Ops.push_back(InFlag); 2285 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2286 InFlag = Lo.getValue(1); 2287 2288 Ops.clear(); 2289 Ops.push_back(Tmp3); 2290 Ops.push_back(Tmp1); 2291 Ops.push_back(CC); 2292 Ops.push_back(InFlag); 2293 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2294 } 2295 2296 Tys.clear(); 2297 Tys.push_back(MVT::i32); 2298 Tys.push_back(MVT::i32); 2299 Ops.clear(); 2300 Ops.push_back(Lo); 2301 Ops.push_back(Hi); 2302 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2303 } 2304 case ISD::SINT_TO_FP: { 2305 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2306 Op.getOperand(0).getValueType() >= MVT::i16 && 2307 "Unknown SINT_TO_FP to lower!"); 2308 2309 SDOperand Result; 2310 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2311 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2312 MachineFunction &MF = DAG.getMachineFunction(); 2313 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2314 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2315 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2316 DAG.getEntryNode(), Op.getOperand(0), 2317 StackSlot, DAG.getSrcValue(NULL)); 2318 2319 // Build the FILD 2320 std::vector<MVT::ValueType> Tys; 2321 Tys.push_back(MVT::f64); 2322 Tys.push_back(MVT::Other); 2323 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2324 std::vector<SDOperand> Ops; 2325 Ops.push_back(Chain); 2326 Ops.push_back(StackSlot); 2327 Ops.push_back(DAG.getValueType(SrcVT)); 2328 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2329 Tys, Ops); 2330 2331 if (X86ScalarSSE) { 2332 Chain = Result.getValue(1); 2333 SDOperand InFlag = Result.getValue(2); 2334 2335 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2336 // shouldn't be necessary except that RFP cannot be live across 2337 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2338 MachineFunction &MF = DAG.getMachineFunction(); 2339 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2340 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2341 std::vector<MVT::ValueType> Tys; 2342 Tys.push_back(MVT::Other); 2343 std::vector<SDOperand> Ops; 2344 Ops.push_back(Chain); 2345 Ops.push_back(Result); 2346 Ops.push_back(StackSlot); 2347 Ops.push_back(DAG.getValueType(Op.getValueType())); 2348 Ops.push_back(InFlag); 2349 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2350 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2351 DAG.getSrcValue(NULL)); 2352 } 2353 2354 return Result; 2355 } 2356 case ISD::FP_TO_SINT: { 2357 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2358 "Unknown FP_TO_SINT to lower!"); 2359 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2360 // stack slot. 2361 MachineFunction &MF = DAG.getMachineFunction(); 2362 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2363 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2364 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2365 2366 unsigned Opc; 2367 switch (Op.getValueType()) { 2368 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2369 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2370 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2371 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2372 } 2373 2374 SDOperand Chain = DAG.getEntryNode(); 2375 SDOperand Value = Op.getOperand(0); 2376 if (X86ScalarSSE) { 2377 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2378 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2379 DAG.getSrcValue(0)); 2380 std::vector<MVT::ValueType> Tys; 2381 Tys.push_back(MVT::f64); 2382 Tys.push_back(MVT::Other); 2383 std::vector<SDOperand> Ops; 2384 Ops.push_back(Chain); 2385 Ops.push_back(StackSlot); 2386 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2387 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2388 Chain = Value.getValue(1); 2389 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2390 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2391 } 2392 2393 // Build the FP_TO_INT*_IN_MEM 2394 std::vector<SDOperand> Ops; 2395 Ops.push_back(Chain); 2396 Ops.push_back(Value); 2397 Ops.push_back(StackSlot); 2398 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2399 2400 // Load the result. 2401 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2402 DAG.getSrcValue(NULL)); 2403 } 2404 case ISD::READCYCLECOUNTER: { 2405 std::vector<MVT::ValueType> Tys; 2406 Tys.push_back(MVT::Other); 2407 Tys.push_back(MVT::Flag); 2408 std::vector<SDOperand> Ops; 2409 Ops.push_back(Op.getOperand(0)); 2410 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2411 Ops.clear(); 2412 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2413 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2414 MVT::i32, Ops[0].getValue(2))); 2415 Ops.push_back(Ops[1].getValue(1)); 2416 Tys[0] = Tys[1] = MVT::i32; 2417 Tys.push_back(MVT::Other); 2418 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2419 } 2420 case ISD::FABS: { 2421 MVT::ValueType VT = Op.getValueType(); 2422 const Type *OpNTy = MVT::getTypeForValueType(VT); 2423 std::vector<Constant*> CV; 2424 if (VT == MVT::f64) { 2425 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2426 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2427 } else { 2428 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2429 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2430 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2431 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2432 } 2433 Constant *CS = ConstantStruct::get(CV); 2434 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2435 SDOperand Mask 2436 = DAG.getNode(X86ISD::LOAD_PACK, 2437 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2438 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2439 } 2440 case ISD::FNEG: { 2441 MVT::ValueType VT = Op.getValueType(); 2442 const Type *OpNTy = MVT::getTypeForValueType(VT); 2443 std::vector<Constant*> CV; 2444 if (VT == MVT::f64) { 2445 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2446 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2447 } else { 2448 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2449 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2450 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2451 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2452 } 2453 Constant *CS = ConstantStruct::get(CV); 2454 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2455 SDOperand Mask 2456 = DAG.getNode(X86ISD::LOAD_PACK, 2457 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2458 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2459 } 2460 case ISD::SETCC: { 2461 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2462 SDOperand Cond; 2463 SDOperand CC = Op.getOperand(2); 2464 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2465 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2466 bool Flip; 2467 unsigned X86CC; 2468 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2469 if (Flip) 2470 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2471 Op.getOperand(1), Op.getOperand(0)); 2472 else 2473 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2474 Op.getOperand(0), Op.getOperand(1)); 2475 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2476 DAG.getConstant(X86CC, MVT::i8), Cond); 2477 } else { 2478 assert(isFP && "Illegal integer SetCC!"); 2479 2480 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2481 Op.getOperand(0), Op.getOperand(1)); 2482 std::vector<MVT::ValueType> Tys; 2483 std::vector<SDOperand> Ops; 2484 switch (SetCCOpcode) { 2485 default: assert(false && "Illegal floating point SetCC!"); 2486 case ISD::SETOEQ: { // !PF & ZF 2487 Tys.push_back(MVT::i8); 2488 Tys.push_back(MVT::Flag); 2489 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2490 Ops.push_back(Cond); 2491 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2492 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2493 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2494 Tmp1.getValue(1)); 2495 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2496 } 2497 case ISD::SETUNE: { // PF | !ZF 2498 Tys.push_back(MVT::i8); 2499 Tys.push_back(MVT::Flag); 2500 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2501 Ops.push_back(Cond); 2502 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2503 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2504 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2505 Tmp1.getValue(1)); 2506 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2507 } 2508 } 2509 } 2510 } 2511 case ISD::SELECT: { 2512 MVT::ValueType VT = Op.getValueType(); 2513 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2514 bool addTest = false; 2515 SDOperand Op0 = Op.getOperand(0); 2516 SDOperand Cond, CC; 2517 if (Op0.getOpcode() == ISD::SETCC) 2518 Op0 = LowerOperation(Op0, DAG); 2519 2520 if (Op0.getOpcode() == X86ISD::SETCC) { 2521 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2522 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2523 // have another use it will be eliminated. 2524 // If the X86ISD::SETCC has more than one use, then it's probably better 2525 // to use a test instead of duplicating the X86ISD::CMP (for register 2526 // pressure reason). 2527 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2528 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2529 CmpOpc == X86ISD::UCOMI) { 2530 if (!Op0.hasOneUse()) { 2531 std::vector<MVT::ValueType> Tys; 2532 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2533 Tys.push_back(Op0.Val->getValueType(i)); 2534 std::vector<SDOperand> Ops; 2535 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2536 Ops.push_back(Op0.getOperand(i)); 2537 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2538 } 2539 2540 CC = Op0.getOperand(0); 2541 Cond = Op0.getOperand(1); 2542 // Make a copy as flag result cannot be used by more than one. 2543 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2544 Cond.getOperand(0), Cond.getOperand(1)); 2545 addTest = 2546 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2547 } else 2548 addTest = true; 2549 } else 2550 addTest = true; 2551 2552 if (addTest) { 2553 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2554 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2555 } 2556 2557 std::vector<MVT::ValueType> Tys; 2558 Tys.push_back(Op.getValueType()); 2559 Tys.push_back(MVT::Flag); 2560 std::vector<SDOperand> Ops; 2561 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2562 // condition is true. 2563 Ops.push_back(Op.getOperand(2)); 2564 Ops.push_back(Op.getOperand(1)); 2565 Ops.push_back(CC); 2566 Ops.push_back(Cond); 2567 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2568 } 2569 case ISD::BRCOND: { 2570 bool addTest = false; 2571 SDOperand Cond = Op.getOperand(1); 2572 SDOperand Dest = Op.getOperand(2); 2573 SDOperand CC; 2574 if (Cond.getOpcode() == ISD::SETCC) 2575 Cond = LowerOperation(Cond, DAG); 2576 2577 if (Cond.getOpcode() == X86ISD::SETCC) { 2578 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2579 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2580 // have another use it will be eliminated. 2581 // If the X86ISD::SETCC has more than one use, then it's probably better 2582 // to use a test instead of duplicating the X86ISD::CMP (for register 2583 // pressure reason). 2584 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2585 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2586 CmpOpc == X86ISD::UCOMI) { 2587 if (!Cond.hasOneUse()) { 2588 std::vector<MVT::ValueType> Tys; 2589 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2590 Tys.push_back(Cond.Val->getValueType(i)); 2591 std::vector<SDOperand> Ops; 2592 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2593 Ops.push_back(Cond.getOperand(i)); 2594 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2595 } 2596 2597 CC = Cond.getOperand(0); 2598 Cond = Cond.getOperand(1); 2599 // Make a copy as flag result cannot be used by more than one. 2600 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2601 Cond.getOperand(0), Cond.getOperand(1)); 2602 } else 2603 addTest = true; 2604 } else 2605 addTest = true; 2606 2607 if (addTest) { 2608 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2609 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2610 } 2611 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2612 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2613 } 2614 case ISD::MEMSET: { 2615 SDOperand InFlag(0, 0); 2616 SDOperand Chain = Op.getOperand(0); 2617 unsigned Align = 2618 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2619 if (Align == 0) Align = 1; 2620 2621 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2622 // If not DWORD aligned, call memset if size is less than the threshold. 2623 // It knows how to align to the right boundary first. 2624 if ((Align & 3) != 0 || 2625 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2626 MVT::ValueType IntPtr = getPointerTy(); 2627 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2628 std::vector<std::pair<SDOperand, const Type*> > Args; 2629 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2630 // Extend the ubyte argument to be an int value for the call. 2631 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2632 Args.push_back(std::make_pair(Val, IntPtrTy)); 2633 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2634 std::pair<SDOperand,SDOperand> CallResult = 2635 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2636 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2637 return CallResult.second; 2638 } 2639 2640 MVT::ValueType AVT; 2641 SDOperand Count; 2642 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2643 unsigned BytesLeft = 0; 2644 bool TwoRepStos = false; 2645 if (ValC) { 2646 unsigned ValReg; 2647 unsigned Val = ValC->getValue() & 255; 2648 2649 // If the value is a constant, then we can potentially use larger sets. 2650 switch (Align & 3) { 2651 case 2: // WORD aligned 2652 AVT = MVT::i16; 2653 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2654 BytesLeft = I->getValue() % 2; 2655 Val = (Val << 8) | Val; 2656 ValReg = X86::AX; 2657 break; 2658 case 0: // DWORD aligned 2659 AVT = MVT::i32; 2660 if (I) { 2661 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2662 BytesLeft = I->getValue() % 4; 2663 } else { 2664 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2665 DAG.getConstant(2, MVT::i8)); 2666 TwoRepStos = true; 2667 } 2668 Val = (Val << 8) | Val; 2669 Val = (Val << 16) | Val; 2670 ValReg = X86::EAX; 2671 break; 2672 default: // Byte aligned 2673 AVT = MVT::i8; 2674 Count = Op.getOperand(3); 2675 ValReg = X86::AL; 2676 break; 2677 } 2678 2679 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2680 InFlag); 2681 InFlag = Chain.getValue(1); 2682 } else { 2683 AVT = MVT::i8; 2684 Count = Op.getOperand(3); 2685 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2686 InFlag = Chain.getValue(1); 2687 } 2688 2689 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2690 InFlag = Chain.getValue(1); 2691 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2692 InFlag = Chain.getValue(1); 2693 2694 std::vector<MVT::ValueType> Tys; 2695 Tys.push_back(MVT::Other); 2696 Tys.push_back(MVT::Flag); 2697 std::vector<SDOperand> Ops; 2698 Ops.push_back(Chain); 2699 Ops.push_back(DAG.getValueType(AVT)); 2700 Ops.push_back(InFlag); 2701 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2702 2703 if (TwoRepStos) { 2704 InFlag = Chain.getValue(1); 2705 Count = Op.getOperand(3); 2706 MVT::ValueType CVT = Count.getValueType(); 2707 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2708 DAG.getConstant(3, CVT)); 2709 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2710 InFlag = Chain.getValue(1); 2711 Tys.clear(); 2712 Tys.push_back(MVT::Other); 2713 Tys.push_back(MVT::Flag); 2714 Ops.clear(); 2715 Ops.push_back(Chain); 2716 Ops.push_back(DAG.getValueType(MVT::i8)); 2717 Ops.push_back(InFlag); 2718 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2719 } else if (BytesLeft) { 2720 // Issue stores for the last 1 - 3 bytes. 2721 SDOperand Value; 2722 unsigned Val = ValC->getValue() & 255; 2723 unsigned Offset = I->getValue() - BytesLeft; 2724 SDOperand DstAddr = Op.getOperand(1); 2725 MVT::ValueType AddrVT = DstAddr.getValueType(); 2726 if (BytesLeft >= 2) { 2727 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2728 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2729 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2730 DAG.getConstant(Offset, AddrVT)), 2731 DAG.getSrcValue(NULL)); 2732 BytesLeft -= 2; 2733 Offset += 2; 2734 } 2735 2736 if (BytesLeft == 1) { 2737 Value = DAG.getConstant(Val, MVT::i8); 2738 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2739 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2740 DAG.getConstant(Offset, AddrVT)), 2741 DAG.getSrcValue(NULL)); 2742 } 2743 } 2744 2745 return Chain; 2746 } 2747 case ISD::MEMCPY: { 2748 SDOperand Chain = Op.getOperand(0); 2749 unsigned Align = 2750 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2751 if (Align == 0) Align = 1; 2752 2753 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2754 // If not DWORD aligned, call memcpy if size is less than the threshold. 2755 // It knows how to align to the right boundary first. 2756 if ((Align & 3) != 0 || 2757 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2758 MVT::ValueType IntPtr = getPointerTy(); 2759 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2760 std::vector<std::pair<SDOperand, const Type*> > Args; 2761 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2762 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2763 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2764 std::pair<SDOperand,SDOperand> CallResult = 2765 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2766 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2767 return CallResult.second; 2768 } 2769 2770 MVT::ValueType AVT; 2771 SDOperand Count; 2772 unsigned BytesLeft = 0; 2773 bool TwoRepMovs = false; 2774 switch (Align & 3) { 2775 case 2: // WORD aligned 2776 AVT = MVT::i16; 2777 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2778 BytesLeft = I->getValue() % 2; 2779 break; 2780 case 0: // DWORD aligned 2781 AVT = MVT::i32; 2782 if (I) { 2783 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2784 BytesLeft = I->getValue() % 4; 2785 } else { 2786 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2787 DAG.getConstant(2, MVT::i8)); 2788 TwoRepMovs = true; 2789 } 2790 break; 2791 default: // Byte aligned 2792 AVT = MVT::i8; 2793 Count = Op.getOperand(3); 2794 break; 2795 } 2796 2797 SDOperand InFlag(0, 0); 2798 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2799 InFlag = Chain.getValue(1); 2800 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2801 InFlag = Chain.getValue(1); 2802 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2803 InFlag = Chain.getValue(1); 2804 2805 std::vector<MVT::ValueType> Tys; 2806 Tys.push_back(MVT::Other); 2807 Tys.push_back(MVT::Flag); 2808 std::vector<SDOperand> Ops; 2809 Ops.push_back(Chain); 2810 Ops.push_back(DAG.getValueType(AVT)); 2811 Ops.push_back(InFlag); 2812 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2813 2814 if (TwoRepMovs) { 2815 InFlag = Chain.getValue(1); 2816 Count = Op.getOperand(3); 2817 MVT::ValueType CVT = Count.getValueType(); 2818 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2819 DAG.getConstant(3, CVT)); 2820 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2821 InFlag = Chain.getValue(1); 2822 Tys.clear(); 2823 Tys.push_back(MVT::Other); 2824 Tys.push_back(MVT::Flag); 2825 Ops.clear(); 2826 Ops.push_back(Chain); 2827 Ops.push_back(DAG.getValueType(MVT::i8)); 2828 Ops.push_back(InFlag); 2829 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2830 } else if (BytesLeft) { 2831 // Issue loads and stores for the last 1 - 3 bytes. 2832 unsigned Offset = I->getValue() - BytesLeft; 2833 SDOperand DstAddr = Op.getOperand(1); 2834 MVT::ValueType DstVT = DstAddr.getValueType(); 2835 SDOperand SrcAddr = Op.getOperand(2); 2836 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2837 SDOperand Value; 2838 if (BytesLeft >= 2) { 2839 Value = DAG.getLoad(MVT::i16, Chain, 2840 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2841 DAG.getConstant(Offset, SrcVT)), 2842 DAG.getSrcValue(NULL)); 2843 Chain = Value.getValue(1); 2844 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2845 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2846 DAG.getConstant(Offset, DstVT)), 2847 DAG.getSrcValue(NULL)); 2848 BytesLeft -= 2; 2849 Offset += 2; 2850 } 2851 2852 if (BytesLeft == 1) { 2853 Value = DAG.getLoad(MVT::i8, Chain, 2854 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2855 DAG.getConstant(Offset, SrcVT)), 2856 DAG.getSrcValue(NULL)); 2857 Chain = Value.getValue(1); 2858 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2859 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2860 DAG.getConstant(Offset, DstVT)), 2861 DAG.getSrcValue(NULL)); 2862 } 2863 } 2864 2865 return Chain; 2866 } 2867 2868 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2869 // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2870 // one of the above mentioned nodes. It has to be wrapped because otherwise 2871 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2872 // be used to form addressing mode. These wrapped nodes will be selected 2873 // into MOV32ri. 2874 case ISD::ConstantPool: { 2875 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2876 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2877 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2878 CP->getAlignment())); 2879 if (Subtarget->isTargetDarwin()) { 2880 // With PIC, the address is actually $g + Offset. 2881 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2882 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2883 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2884 } 2885 2886 return Result; 2887 } 2888 case ISD::JumpTable: { 2889 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2890 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2891 DAG.getTargetJumpTable(JT->getIndex(), 2892 getPointerTy())); 2893 if (Subtarget->isTargetDarwin()) { 2894 // With PIC, the address is actually $g + Offset. 2895 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2896 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2897 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2898 } 2899 2900 return Result; 2901 } 2902 case ISD::GlobalAddress: { 2903 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2904 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2905 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2906 if (Subtarget->isTargetDarwin()) { 2907 // With PIC, the address is actually $g + Offset. 2908 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2909 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2910 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2911 2912 // For Darwin, external and weak symbols are indirect, so we want to load 2913 // the value at address GV, not the value of GV itself. This means that 2914 // the GlobalAddress must be in the base or index register of the address, 2915 // not the GV offset field. 2916 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2917 DarwinGVRequiresExtraLoad(GV)) 2918 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2919 Result, DAG.getSrcValue(NULL)); 2920 } 2921 2922 return Result; 2923 } 2924 case ISD::ExternalSymbol: { 2925 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2926 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2927 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2928 if (Subtarget->isTargetDarwin()) { 2929 // With PIC, the address is actually $g + Offset. 2930 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2931 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2932 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2933 } 2934 2935 return Result; 2936 } 2937 case ISD::VASTART: { 2938 // vastart just stores the address of the VarArgsFrameIndex slot into the 2939 // memory location argument. 2940 // FIXME: Replace MVT::i32 with PointerTy 2941 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2942 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2943 Op.getOperand(1), Op.getOperand(2)); 2944 } 2945 case ISD::RET: { 2946 SDOperand Copy; 2947 2948 switch(Op.getNumOperands()) { 2949 default: 2950 assert(0 && "Do not know how to return this many arguments!"); 2951 abort(); 2952 case 1: // ret void. 2953 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2954 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2955 case 2: { 2956 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2957 2958 if (MVT::isVector(ArgVT)) { 2959 // Integer or FP vector result -> XMM0. 2960 if (DAG.getMachineFunction().liveout_empty()) 2961 DAG.getMachineFunction().addLiveOut(X86::XMM0); 2962 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 2963 SDOperand()); 2964 } else if (MVT::isInteger(ArgVT)) { 2965 // Integer result -> EAX 2966 if (DAG.getMachineFunction().liveout_empty()) 2967 DAG.getMachineFunction().addLiveOut(X86::EAX); 2968 2969 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2970 SDOperand()); 2971 } else if (!X86ScalarSSE) { 2972 // FP return with fp-stack value. 2973 if (DAG.getMachineFunction().liveout_empty()) 2974 DAG.getMachineFunction().addLiveOut(X86::ST0); 2975 2976 std::vector<MVT::ValueType> Tys; 2977 Tys.push_back(MVT::Other); 2978 Tys.push_back(MVT::Flag); 2979 std::vector<SDOperand> Ops; 2980 Ops.push_back(Op.getOperand(0)); 2981 Ops.push_back(Op.getOperand(1)); 2982 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2983 } else { 2984 // FP return with ScalarSSE (return on fp-stack). 2985 if (DAG.getMachineFunction().liveout_empty()) 2986 DAG.getMachineFunction().addLiveOut(X86::ST0); 2987 2988 SDOperand MemLoc; 2989 SDOperand Chain = Op.getOperand(0); 2990 SDOperand Value = Op.getOperand(1); 2991 2992 if (Value.getOpcode() == ISD::LOAD && 2993 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2994 Chain = Value.getOperand(0); 2995 MemLoc = Value.getOperand(1); 2996 } else { 2997 // Spill the value to memory and reload it into top of stack. 2998 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2999 MachineFunction &MF = DAG.getMachineFunction(); 3000 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3001 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3002 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3003 Value, MemLoc, DAG.getSrcValue(0)); 3004 } 3005 std::vector<MVT::ValueType> Tys; 3006 Tys.push_back(MVT::f64); 3007 Tys.push_back(MVT::Other); 3008 std::vector<SDOperand> Ops; 3009 Ops.push_back(Chain); 3010 Ops.push_back(MemLoc); 3011 Ops.push_back(DAG.getValueType(ArgVT)); 3012 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 3013 Tys.clear(); 3014 Tys.push_back(MVT::Other); 3015 Tys.push_back(MVT::Flag); 3016 Ops.clear(); 3017 Ops.push_back(Copy.getValue(1)); 3018 Ops.push_back(Copy); 3019 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 3020 } 3021 break; 3022 } 3023 case 3: 3024 if (DAG.getMachineFunction().liveout_empty()) { 3025 DAG.getMachineFunction().addLiveOut(X86::EAX); 3026 DAG.getMachineFunction().addLiveOut(X86::EDX); 3027 } 3028 3029 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 3030 SDOperand()); 3031 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 3032 break; 3033 } 3034 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3035 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3036 Copy.getValue(1)); 3037 } 3038 case ISD::SCALAR_TO_VECTOR: { 3039 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3040 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3041 } 3042 case ISD::VECTOR_SHUFFLE: { 3043 SDOperand V1 = Op.getOperand(0); 3044 SDOperand V2 = Op.getOperand(1); 3045 SDOperand PermMask = Op.getOperand(2); 3046 MVT::ValueType VT = Op.getValueType(); 3047 unsigned NumElems = PermMask.getNumOperands(); 3048 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3049 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3050 3051 if (isSplatMask(PermMask.Val)) { 3052 if (NumElems <= 4) return Op; 3053 // Promote it to a v4i32 splat. 3054 return PromoteSplat(Op, DAG); 3055 } 3056 3057 if (X86::isMOVLMask(PermMask.Val)) 3058 return (V1IsUndef) ? V2 : Op; 3059 3060 if (X86::isMOVSHDUPMask(PermMask.Val) || 3061 X86::isMOVSLDUPMask(PermMask.Val) || 3062 X86::isMOVHLPSMask(PermMask.Val) || 3063 X86::isMOVHPMask(PermMask.Val) || 3064 X86::isMOVLPMask(PermMask.Val)) 3065 return Op; 3066 3067 if (ShouldXformToMOVHLPS(PermMask.Val) || 3068 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 3069 return CommuteVectorShuffle(Op, DAG); 3070 3071 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 3072 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 3073 if (V1IsSplat && !V2IsSplat) { 3074 Op = CommuteVectorShuffle(Op, DAG); 3075 V1 = Op.getOperand(0); 3076 V2 = Op.getOperand(1); 3077 PermMask = Op.getOperand(2); 3078 V2IsSplat = true; 3079 } 3080 3081 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 3082 if (V2IsUndef) return V1; 3083 Op = CommuteVectorShuffle(Op, DAG); 3084 V1 = Op.getOperand(0); 3085 V2 = Op.getOperand(1); 3086 PermMask = Op.getOperand(2); 3087 if (V2IsSplat) { 3088 // V2 is a splat, so the mask may be malformed. That is, it may point 3089 // to any V2 element. The instruction selectior won't like this. Get 3090 // a corrected mask and commute to form a proper MOVS{S|D}. 3091 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3092 if (NewMask.Val != PermMask.Val) 3093 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3094 } 3095 return Op; 3096 } 3097 3098 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3099 X86::isUNPCKLMask(PermMask.Val) || 3100 X86::isUNPCKHMask(PermMask.Val)) 3101 return Op; 3102 3103 if (V2IsSplat) { 3104 // Normalize mask so all entries that point to V2 points to its first 3105 // element then try to match unpck{h|l} again. If match, return a 3106 // new vector_shuffle with the corrected mask. 3107 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3108 if (NewMask.Val != PermMask.Val) { 3109 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3110 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3111 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3112 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3113 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3114 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3115 } 3116 } 3117 } 3118 3119 // Normalize the node to match x86 shuffle ops if needed 3120 if (V2.getOpcode() != ISD::UNDEF) 3121 if (isCommutedSHUFP(PermMask.Val)) { 3122 Op = CommuteVectorShuffle(Op, DAG); 3123 V1 = Op.getOperand(0); 3124 V2 = Op.getOperand(1); 3125 PermMask = Op.getOperand(2); 3126 } 3127 3128 // If VT is integer, try PSHUF* first, then SHUFP*. 3129 if (MVT::isInteger(VT)) { 3130 if (X86::isPSHUFDMask(PermMask.Val) || 3131 X86::isPSHUFHWMask(PermMask.Val) || 3132 X86::isPSHUFLWMask(PermMask.Val)) { 3133 if (V2.getOpcode() != ISD::UNDEF) 3134 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3135 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3136 return Op; 3137 } 3138 3139 if (X86::isSHUFPMask(PermMask.Val)) 3140 return Op; 3141 3142 // Handle v8i16 shuffle high / low shuffle node pair. 3143 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3144 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3145 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3146 std::vector<SDOperand> MaskVec; 3147 for (unsigned i = 0; i != 4; ++i) 3148 MaskVec.push_back(PermMask.getOperand(i)); 3149 for (unsigned i = 4; i != 8; ++i) 3150 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3151 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3152 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3153 MaskVec.clear(); 3154 for (unsigned i = 0; i != 4; ++i) 3155 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3156 for (unsigned i = 4; i != 8; ++i) 3157 MaskVec.push_back(PermMask.getOperand(i)); 3158 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3159 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3160 } 3161 } else { 3162 // Floating point cases in the other order. 3163 if (X86::isSHUFPMask(PermMask.Val)) 3164 return Op; 3165 if (X86::isPSHUFDMask(PermMask.Val) || 3166 X86::isPSHUFHWMask(PermMask.Val) || 3167 X86::isPSHUFLWMask(PermMask.Val)) { 3168 if (V2.getOpcode() != ISD::UNDEF) 3169 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3170 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3171 return Op; 3172 } 3173 } 3174 3175 if (NumElems == 4) { 3176 // Break it into (shuffle shuffle_hi, shuffle_lo). 3177 MVT::ValueType MaskVT = PermMask.getValueType(); 3178 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3179 std::map<unsigned, std::pair<int, int> > Locs; 3180 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3181 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3182 std::vector<SDOperand> *MaskPtr = &LoMask; 3183 unsigned MaskIdx = 0; 3184 unsigned LoIdx = 0; 3185 unsigned HiIdx = NumElems/2; 3186 for (unsigned i = 0; i != NumElems; ++i) { 3187 if (i == NumElems/2) { 3188 MaskPtr = &HiMask; 3189 MaskIdx = 1; 3190 LoIdx = 0; 3191 HiIdx = NumElems/2; 3192 } 3193 SDOperand Elt = PermMask.getOperand(i); 3194 if (Elt.getOpcode() == ISD::UNDEF) { 3195 Locs[i] = std::make_pair(-1, -1); 3196 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3197 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3198 (*MaskPtr)[LoIdx] = Elt; 3199 LoIdx++; 3200 } else { 3201 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3202 (*MaskPtr)[HiIdx] = Elt; 3203 HiIdx++; 3204 } 3205 } 3206 3207 SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3208 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 3209 SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3210 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 3211 std::vector<SDOperand> MaskOps; 3212 for (unsigned i = 0; i != NumElems; ++i) { 3213 if (Locs[i].first == -1) { 3214 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3215 } else { 3216 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3217 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3218 } 3219 } 3220 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3221 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 3222 } 3223 3224 return SDOperand(); 3225 } 3226 case ISD::BUILD_VECTOR: { 3227 // All zero's are handled with pxor. 3228 if (ISD::isBuildVectorAllZeros(Op.Val)) 3229 return Op; 3230 3231 // All one's are handled with pcmpeqd. 3232 if (ISD::isBuildVectorAllOnes(Op.Val)) 3233 return Op; 3234 3235 MVT::ValueType VT = Op.getValueType(); 3236 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3237 unsigned EVTBits = MVT::getSizeInBits(EVT); 3238 3239 // Let legalizer expand 2-widde build_vector's. 3240 if (EVTBits == 64) 3241 return SDOperand(); 3242 3243 unsigned NumElems = Op.getNumOperands(); 3244 unsigned NumZero = 0; 3245 unsigned NumNonZero = 0; 3246 unsigned NonZeros = 0; 3247 std::set<SDOperand> Values; 3248 for (unsigned i = 0; i < NumElems; ++i) { 3249 SDOperand Elt = Op.getOperand(i); 3250 if (Elt.getOpcode() != ISD::UNDEF) { 3251 Values.insert(Elt); 3252 if (isZeroNode(Elt)) 3253 NumZero++; 3254 else { 3255 NonZeros |= (1 << i); 3256 NumNonZero++; 3257 } 3258 } 3259 } 3260 3261 if (NumNonZero == 0) 3262 // Must be a mix of zero and undef. Return a zero vector. 3263 return getZeroVector(VT, DAG); 3264 3265 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3266 if (Values.size() == 1) 3267 return SDOperand(); 3268 3269 // Special case for single non-zero element. 3270 if (NumNonZero == 1) { 3271 unsigned Idx = CountTrailingZeros_32(NonZeros); 3272 SDOperand Item = Op.getOperand(Idx); 3273 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3274 if (Idx == 0) 3275 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3276 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3277 NumZero > 0, DAG); 3278 3279 if (EVTBits == 32) { 3280 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3281 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3282 DAG); 3283 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3284 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3285 std::vector<SDOperand> MaskVec; 3286 for (unsigned i = 0; i < NumElems; i++) 3287 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3288 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3289 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3290 DAG.getNode(ISD::UNDEF, VT), Mask); 3291 } 3292 } 3293 3294 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3295 if (EVTBits == 8) { 3296 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 3297 if (V.Val) return V; 3298 } 3299 3300 if (EVTBits == 16) { 3301 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 3302 if (V.Val) return V; 3303 } 3304 3305 // If element VT is == 32 bits, turn it into a number of shuffles. 3306 std::vector<SDOperand> V(NumElems); 3307 if (NumElems == 4 && NumZero > 0) { 3308 for (unsigned i = 0; i < 4; ++i) { 3309 bool isZero = !(NonZeros & (1 << i)); 3310 if (isZero) 3311 V[i] = getZeroVector(VT, DAG); 3312 else 3313 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3314 } 3315 3316 for (unsigned i = 0; i < 2; ++i) { 3317 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3318 default: break; 3319 case 0: 3320 V[i] = V[i*2]; // Must be a zero vector. 3321 break; 3322 case 1: 3323 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3324 getMOVLMask(NumElems, DAG)); 3325 break; 3326 case 2: 3327 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3328 getMOVLMask(NumElems, DAG)); 3329 break; 3330 case 3: 3331 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3332 getUnpacklMask(NumElems, DAG)); 3333 break; 3334 } 3335 } 3336 3337 // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) 3338 // clears the upper bits. 3339 // FIXME: we can do the same for v4f32 case when we know both parts of 3340 // the lower half come from scalar_to_vector (loadf32). We should do 3341 // that in post legalizer dag combiner with target specific hooks. 3342 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3343 return V[0]; 3344 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3345 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3346 std::vector<SDOperand> MaskVec; 3347 bool Reverse = (NonZeros & 0x3) == 2; 3348 for (unsigned i = 0; i < 2; ++i) 3349 if (Reverse) 3350 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3351 else 3352 MaskVec.push_back(DAG.getConstant(i, EVT)); 3353 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3354 for (unsigned i = 0; i < 2; ++i) 3355 if (Reverse) 3356 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3357 else 3358 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3359 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3360 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3361 } 3362 3363 if (Values.size() > 2) { 3364 // Expand into a number of unpckl*. 3365 // e.g. for v4f32 3366 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3367 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3368 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3369 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3370 for (unsigned i = 0; i < NumElems; ++i) 3371 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3372 NumElems >>= 1; 3373 while (NumElems != 0) { 3374 for (unsigned i = 0; i < NumElems; ++i) 3375 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3376 UnpckMask); 3377 NumElems >>= 1; 3378 } 3379 return V[0]; 3380 } 3381 3382 return SDOperand(); 3383 } 3384 case ISD::EXTRACT_VECTOR_ELT: { 3385 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3386 return SDOperand(); 3387 3388 MVT::ValueType VT = Op.getValueType(); 3389 // TODO: handle v16i8. 3390 if (MVT::getSizeInBits(VT) == 16) { 3391 // Transform it so it match pextrw which produces a 32-bit result. 3392 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3393 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3394 Op.getOperand(0), Op.getOperand(1)); 3395 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3396 DAG.getValueType(VT)); 3397 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3398 } else if (MVT::getSizeInBits(VT) == 32) { 3399 SDOperand Vec = Op.getOperand(0); 3400 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3401 if (Idx == 0) 3402 return Op; 3403 3404 // SHUFPS the element to the lowest double word, then movss. 3405 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3406 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 3407 MVT::getVectorBaseType(MaskVT)); 3408 std::vector<SDOperand> IdxVec; 3409 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3410 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3411 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3412 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3413 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 3414 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3415 Vec, Vec, Mask); 3416 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3417 DAG.getConstant(0, MVT::i32)); 3418 } else if (MVT::getSizeInBits(VT) == 64) { 3419 SDOperand Vec = Op.getOperand(0); 3420 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3421 if (Idx == 0) 3422 return Op; 3423 3424 // UNPCKHPD the element to the lowest double word, then movsd. 3425 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3426 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3427 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3428 std::vector<SDOperand> IdxVec; 3429 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3430 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3431 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 3432 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3433 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3434 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3435 DAG.getConstant(0, MVT::i32)); 3436 } 3437 3438 return SDOperand(); 3439 } 3440 case ISD::INSERT_VECTOR_ELT: { 3441 // Transform it so it match pinsrw which expects a 16-bit value in a R32 3442 // as its second argument. 3443 MVT::ValueType VT = Op.getValueType(); 3444 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3445 SDOperand N0 = Op.getOperand(0); 3446 SDOperand N1 = Op.getOperand(1); 3447 SDOperand N2 = Op.getOperand(2); 3448 if (MVT::getSizeInBits(BaseVT) == 16) { 3449 if (N1.getValueType() != MVT::i32) 3450 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3451 if (N2.getValueType() != MVT::i32) 3452 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3453 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3454 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3455 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3456 if (Idx == 0) { 3457 // Use a movss. 3458 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3459 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3460 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3461 std::vector<SDOperand> MaskVec; 3462 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3463 for (unsigned i = 1; i <= 3; ++i) 3464 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3465 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3466 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 3467 } else { 3468 // Use two pinsrw instructions to insert a 32 bit value. 3469 Idx <<= 1; 3470 if (MVT::isFloatingPoint(N1.getValueType())) { 3471 if (N1.getOpcode() == ISD::LOAD) { 3472 // Just load directly from f32mem to R32. 3473 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 3474 N1.getOperand(2)); 3475 } else { 3476 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3477 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3478 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3479 DAG.getConstant(0, MVT::i32)); 3480 } 3481 } 3482 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3483 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3484 DAG.getConstant(Idx, MVT::i32)); 3485 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3486 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3487 DAG.getConstant(Idx+1, MVT::i32)); 3488 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3489 } 3490 } 3491 3492 return SDOperand(); 3493 } 3494 case ISD::INTRINSIC_WO_CHAIN: { 3495 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3496 switch (IntNo) { 3497 default: return SDOperand(); // Don't custom lower most intrinsics. 3498 // Comparison intrinsics. 3499 case Intrinsic::x86_sse_comieq_ss: 3500 case Intrinsic::x86_sse_comilt_ss: 3501 case Intrinsic::x86_sse_comile_ss: 3502 case Intrinsic::x86_sse_comigt_ss: 3503 case Intrinsic::x86_sse_comige_ss: 3504 case Intrinsic::x86_sse_comineq_ss: 3505 case Intrinsic::x86_sse_ucomieq_ss: 3506 case Intrinsic::x86_sse_ucomilt_ss: 3507 case Intrinsic::x86_sse_ucomile_ss: 3508 case Intrinsic::x86_sse_ucomigt_ss: 3509 case Intrinsic::x86_sse_ucomige_ss: 3510 case Intrinsic::x86_sse_ucomineq_ss: 3511 case Intrinsic::x86_sse2_comieq_sd: 3512 case Intrinsic::x86_sse2_comilt_sd: 3513 case Intrinsic::x86_sse2_comile_sd: 3514 case Intrinsic::x86_sse2_comigt_sd: 3515 case Intrinsic::x86_sse2_comige_sd: 3516 case Intrinsic::x86_sse2_comineq_sd: 3517 case Intrinsic::x86_sse2_ucomieq_sd: 3518 case Intrinsic::x86_sse2_ucomilt_sd: 3519 case Intrinsic::x86_sse2_ucomile_sd: 3520 case Intrinsic::x86_sse2_ucomigt_sd: 3521 case Intrinsic::x86_sse2_ucomige_sd: 3522 case Intrinsic::x86_sse2_ucomineq_sd: { 3523 unsigned Opc = 0; 3524 ISD::CondCode CC = ISD::SETCC_INVALID; 3525 switch (IntNo) { 3526 default: break; 3527 case Intrinsic::x86_sse_comieq_ss: 3528 case Intrinsic::x86_sse2_comieq_sd: 3529 Opc = X86ISD::COMI; 3530 CC = ISD::SETEQ; 3531 break; 3532 case Intrinsic::x86_sse_comilt_ss: 3533 case Intrinsic::x86_sse2_comilt_sd: 3534 Opc = X86ISD::COMI; 3535 CC = ISD::SETLT; 3536 break; 3537 case Intrinsic::x86_sse_comile_ss: 3538 case Intrinsic::x86_sse2_comile_sd: 3539 Opc = X86ISD::COMI; 3540 CC = ISD::SETLE; 3541 break; 3542 case Intrinsic::x86_sse_comigt_ss: 3543 case Intrinsic::x86_sse2_comigt_sd: 3544 Opc = X86ISD::COMI; 3545 CC = ISD::SETGT; 3546 break; 3547 case Intrinsic::x86_sse_comige_ss: 3548 case Intrinsic::x86_sse2_comige_sd: 3549 Opc = X86ISD::COMI; 3550 CC = ISD::SETGE; 3551 break; 3552 case Intrinsic::x86_sse_comineq_ss: 3553 case Intrinsic::x86_sse2_comineq_sd: 3554 Opc = X86ISD::COMI; 3555 CC = ISD::SETNE; 3556 break; 3557 case Intrinsic::x86_sse_ucomieq_ss: 3558 case Intrinsic::x86_sse2_ucomieq_sd: 3559 Opc = X86ISD::UCOMI; 3560 CC = ISD::SETEQ; 3561 break; 3562 case Intrinsic::x86_sse_ucomilt_ss: 3563 case Intrinsic::x86_sse2_ucomilt_sd: 3564 Opc = X86ISD::UCOMI; 3565 CC = ISD::SETLT; 3566 break; 3567 case Intrinsic::x86_sse_ucomile_ss: 3568 case Intrinsic::x86_sse2_ucomile_sd: 3569 Opc = X86ISD::UCOMI; 3570 CC = ISD::SETLE; 3571 break; 3572 case Intrinsic::x86_sse_ucomigt_ss: 3573 case Intrinsic::x86_sse2_ucomigt_sd: 3574 Opc = X86ISD::UCOMI; 3575 CC = ISD::SETGT; 3576 break; 3577 case Intrinsic::x86_sse_ucomige_ss: 3578 case Intrinsic::x86_sse2_ucomige_sd: 3579 Opc = X86ISD::UCOMI; 3580 CC = ISD::SETGE; 3581 break; 3582 case Intrinsic::x86_sse_ucomineq_ss: 3583 case Intrinsic::x86_sse2_ucomineq_sd: 3584 Opc = X86ISD::UCOMI; 3585 CC = ISD::SETNE; 3586 break; 3587 } 3588 bool Flip; 3589 unsigned X86CC; 3590 translateX86CC(CC, true, X86CC, Flip); 3591 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3592 Op.getOperand(Flip?1:2)); 3593 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3594 DAG.getConstant(X86CC, MVT::i8), Cond); 3595 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3596 } 3597 } 3598 } 3599 } 3600} 3601 3602const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3603 switch (Opcode) { 3604 default: return NULL; 3605 case X86ISD::SHLD: return "X86ISD::SHLD"; 3606 case X86ISD::SHRD: return "X86ISD::SHRD"; 3607 case X86ISD::FAND: return "X86ISD::FAND"; 3608 case X86ISD::FXOR: return "X86ISD::FXOR"; 3609 case X86ISD::FILD: return "X86ISD::FILD"; 3610 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3611 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3612 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3613 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3614 case X86ISD::FLD: return "X86ISD::FLD"; 3615 case X86ISD::FST: return "X86ISD::FST"; 3616 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3617 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3618 case X86ISD::CALL: return "X86ISD::CALL"; 3619 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3620 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3621 case X86ISD::CMP: return "X86ISD::CMP"; 3622 case X86ISD::TEST: return "X86ISD::TEST"; 3623 case X86ISD::COMI: return "X86ISD::COMI"; 3624 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3625 case X86ISD::SETCC: return "X86ISD::SETCC"; 3626 case X86ISD::CMOV: return "X86ISD::CMOV"; 3627 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3628 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3629 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3630 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3631 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3632 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3633 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3634 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3635 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3636 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3637 } 3638} 3639 3640void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3641 uint64_t Mask, 3642 uint64_t &KnownZero, 3643 uint64_t &KnownOne, 3644 unsigned Depth) const { 3645 unsigned Opc = Op.getOpcode(); 3646 assert((Opc >= ISD::BUILTIN_OP_END || 3647 Opc == ISD::INTRINSIC_WO_CHAIN || 3648 Opc == ISD::INTRINSIC_W_CHAIN || 3649 Opc == ISD::INTRINSIC_VOID) && 3650 "Should use MaskedValueIsZero if you don't know whether Op" 3651 " is a target node!"); 3652 3653 KnownZero = KnownOne = 0; // Don't know anything. 3654 switch (Opc) { 3655 default: break; 3656 case X86ISD::SETCC: 3657 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3658 break; 3659 } 3660} 3661 3662std::vector<unsigned> X86TargetLowering:: 3663getRegClassForInlineAsmConstraint(const std::string &Constraint, 3664 MVT::ValueType VT) const { 3665 if (Constraint.size() == 1) { 3666 // FIXME: not handling fp-stack yet! 3667 // FIXME: not handling MMX registers yet ('y' constraint). 3668 switch (Constraint[0]) { // GCC X86 Constraint Letters 3669 default: break; // Unknown constriant letter 3670 case 'r': // GENERAL_REGS 3671 case 'R': // LEGACY_REGS 3672 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3673 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3674 case 'l': // INDEX_REGS 3675 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3676 X86::ESI, X86::EDI, X86::EBP, 0); 3677 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3678 case 'Q': // Q_REGS 3679 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3680 case 'x': // SSE_REGS if SSE1 allowed 3681 if (Subtarget->hasSSE1()) 3682 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3683 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3684 0); 3685 return std::vector<unsigned>(); 3686 case 'Y': // SSE_REGS if SSE2 allowed 3687 if (Subtarget->hasSSE2()) 3688 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3689 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3690 0); 3691 return std::vector<unsigned>(); 3692 } 3693 } 3694 3695 return std::vector<unsigned>(); 3696} 3697 3698/// isLegalAddressImmediate - Return true if the integer value or 3699/// GlobalValue can be used as the offset of the target addressing mode. 3700bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3701 // X86 allows a sign-extended 32-bit immediate field. 3702 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3703} 3704 3705bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3706 if (Subtarget->isTargetDarwin()) { 3707 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3708 if (RModel == Reloc::Static) 3709 return true; 3710 else if (RModel == Reloc::DynamicNoPIC) 3711 return !DarwinGVRequiresExtraLoad(GV); 3712 else 3713 return false; 3714 } else 3715 return true; 3716} 3717 3718/// isShuffleMaskLegal - Targets can use this to indicate that they only 3719/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3720/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3721/// are assumed to be legal. 3722bool 3723X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3724 // Only do shuffles on 128-bit vector types for now. 3725 if (MVT::getSizeInBits(VT) == 64) return false; 3726 return (Mask.Val->getNumOperands() <= 4 || 3727 isSplatMask(Mask.Val) || 3728 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3729 X86::isUNPCKLMask(Mask.Val) || 3730 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3731 X86::isUNPCKHMask(Mask.Val)); 3732} 3733 3734bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 3735 MVT::ValueType EVT, 3736 SelectionDAG &DAG) const { 3737 unsigned NumElts = BVOps.size(); 3738 // Only do shuffles on 128-bit vector types for now. 3739 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 3740 if (NumElts == 2) return true; 3741 if (NumElts == 4) { 3742 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 3743 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 3744 } 3745 return false; 3746} 3747