X86ISelLowering.cpp revision a083af14c8130e7cb1f4812b6d6fdcf2cc21882f
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86TargetMachine.h" 19#include "llvm/CallingConv.h" 20#include "llvm/Constants.h" 21#include "llvm/Function.h" 22#include "llvm/Intrinsics.h" 23#include "llvm/ADT/VectorExtras.h" 24#include "llvm/Analysis/ScalarEvolutionExpressions.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SSARegMap.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32using namespace llvm; 33 34// FIXME: temporary. 35#include "llvm/Support/CommandLine.h" 36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 37 cl::desc("Enable fastcc on X86")); 38 39X86TargetLowering::X86TargetLowering(TargetMachine &TM) 40 : TargetLowering(TM) { 41 Subtarget = &TM.getSubtarget<X86Subtarget>(); 42 X86ScalarSSE = Subtarget->hasSSE2(); 43 44 // Set up the TargetLowering object. 45 46 // X86 is weird, it always uses i8 for shift amounts and setcc results. 47 setShiftAmountType(MVT::i8); 48 setSetCCResultType(MVT::i8); 49 setSetCCResultContents(ZeroOrOneSetCCResult); 50 setSchedulingPreference(SchedulingForRegPressure); 51 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 52 setStackPointerRegisterToSaveRestore(X86::ESP); 53 54 if (!Subtarget->isTargetDarwin()) 55 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 56 setUseUnderscoreSetJmpLongJmp(true); 57 58 // Add legal addressing mode scale values. 59 addLegalAddressScale(8); 60 addLegalAddressScale(4); 61 addLegalAddressScale(2); 62 // Enter the ones which require both scale + index last. These are more 63 // expensive. 64 addLegalAddressScale(9); 65 addLegalAddressScale(5); 66 addLegalAddressScale(3); 67 68 // Set up the register classes. 69 addRegisterClass(MVT::i8, X86::R8RegisterClass); 70 addRegisterClass(MVT::i16, X86::R16RegisterClass); 71 addRegisterClass(MVT::i32, X86::R32RegisterClass); 72 73 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 74 // operation. 75 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 76 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 77 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 78 79 if (X86ScalarSSE) 80 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 82 else 83 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 84 85 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 86 // this operation. 87 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 89 // SSE has no i16 to fp conversion, only i32 90 if (X86ScalarSSE) 91 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 92 else { 93 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 94 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 95 } 96 97 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 98 // isn't legal. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 101 102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 106 107 if (X86ScalarSSE) { 108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 109 } else { 110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 112 } 113 114 // Handle FP_TO_UINT by promoting the destination to a larger signed 115 // conversion. 116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 119 120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 121 // Expand FP_TO_UINT into a select. 122 // FIXME: We would like to use a Custom expander here eventually to do 123 // the optimal thing for SSE vs. the default expansion in the legalizer. 124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 125 else 126 // With SSE3 we can use fisttpll to convert to a signed i64. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 128 129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 131 132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 153 154 // These should be promoted to a larger select which is supported. 155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 157 158 // X86 wants to expand cmov itself. 159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 168 // X86 ret instruction may pop stack. 169 setOperationAction(ISD::RET , MVT::Other, Custom); 170 // Darwin ABI issue. 171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 178 // X86 wants to expand memset / memcpy itself. 179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 181 182 // We don't have line number support yet. 183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 185 // FIXME - use subtarget debug flags 186 if (!Subtarget->isTargetDarwin()) 187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 188 189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 191 192 // Use the default implementation. 193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 199 200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 202 203 if (X86ScalarSSE) { 204 // Set up the FP register classes. 205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 207 208 // SSE has no load+extend ops 209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 211 212 // Use ANDPD to simulate FABS. 213 setOperationAction(ISD::FABS , MVT::f64, Custom); 214 setOperationAction(ISD::FABS , MVT::f32, Custom); 215 216 // Use XORP to simulate FNEG. 217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 219 220 // We don't support sin/cos/fmod 221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 223 setOperationAction(ISD::FREM , MVT::f64, Expand); 224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 226 setOperationAction(ISD::FREM , MVT::f32, Expand); 227 228 // Expand FP immediates into loads from the stack, except for the special 229 // cases we handle. 230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 232 addLegalFPImmediate(+0.0); // xorps / xorpd 233 } else { 234 // Set up the FP register classes. 235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 236 237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 238 239 if (!UnsafeFPMath) { 240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 242 } 243 244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 245 addLegalFPImmediate(+0.0); // FLD0 246 addLegalFPImmediate(+1.0); // FLD1 247 addLegalFPImmediate(-0.0); // FLD0/FCHS 248 addLegalFPImmediate(-1.0); // FLD1/FCHS 249 } 250 251 // First set operation action for all vector types to expand. Then we 252 // will selectively turn on ones that can be effectively codegen'd. 253 for (unsigned VT = (unsigned)MVT::Vector + 1; 254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 262 } 263 264 if (Subtarget->hasMMX()) { 265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 268 269 // FIXME: add MMX packed arithmetics 270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 273 } 274 275 if (Subtarget->hasSSE1()) { 276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 277 278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 289 } 290 291 if (Subtarget->hasSSE2()) { 292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 297 298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 308 309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 312 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 313 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 314 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 315 316 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 317 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 318 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 319 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 321 } 322 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 324 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 325 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 326 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 327 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 328 329 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 330 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 331 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 332 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 333 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 334 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 335 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 336 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 337 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 338 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 339 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 340 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 341 } 342 343 // Custom lower v2i64 and v2f64 selects. 344 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 345 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 346 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 347 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 348 } 349 350 // We want to custom lower some of our intrinsics. 351 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 352 353 computeRegisterProperties(); 354 355 // FIXME: These should be based on subtarget info. Plus, the values should 356 // be smaller when we are in optimizing for size mode. 357 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 358 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 359 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 360 allowUnalignedMemoryAccesses = true; // x86 supports it! 361} 362 363std::vector<SDOperand> 364X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 365 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 366 return LowerFastCCArguments(F, DAG); 367 return LowerCCCArguments(F, DAG); 368} 369 370std::pair<SDOperand, SDOperand> 371X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 372 bool isVarArg, unsigned CallingConv, 373 bool isTailCall, 374 SDOperand Callee, ArgListTy &Args, 375 SelectionDAG &DAG) { 376 assert((!isVarArg || CallingConv == CallingConv::C) && 377 "Only C takes varargs!"); 378 379 // If the callee is a GlobalAddress node (quite common, every direct call is) 380 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 381 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 382 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 383 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 384 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 385 386 if (CallingConv == CallingConv::Fast && EnableFastCC) 387 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 388 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 389} 390 391//===----------------------------------------------------------------------===// 392// C Calling Convention implementation 393//===----------------------------------------------------------------------===// 394 395std::vector<SDOperand> 396X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 397 std::vector<SDOperand> ArgValues; 398 399 MachineFunction &MF = DAG.getMachineFunction(); 400 MachineFrameInfo *MFI = MF.getFrameInfo(); 401 402 // Add DAG nodes to load the arguments... On entry to a function on the X86, 403 // the stack frame looks like this: 404 // 405 // [ESP] -- return address 406 // [ESP + 4] -- first argument (leftmost lexically) 407 // [ESP + 8] -- second argument, if first argument is four bytes in size 408 // ... 409 // 410 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 411 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 412 MVT::ValueType ObjectVT = getValueType(I->getType()); 413 unsigned ArgIncrement = 4; 414 unsigned ObjSize; 415 switch (ObjectVT) { 416 default: assert(0 && "Unhandled argument type!"); 417 case MVT::i1: 418 case MVT::i8: ObjSize = 1; break; 419 case MVT::i16: ObjSize = 2; break; 420 case MVT::i32: ObjSize = 4; break; 421 case MVT::i64: ObjSize = ArgIncrement = 8; break; 422 case MVT::f32: ObjSize = 4; break; 423 case MVT::f64: ObjSize = ArgIncrement = 8; break; 424 } 425 // Create the frame index object for this incoming parameter... 426 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 427 428 // Create the SelectionDAG nodes corresponding to a load from this parameter 429 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 430 431 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 432 // dead loads. 433 SDOperand ArgValue; 434 if (!I->use_empty()) 435 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 436 DAG.getSrcValue(NULL)); 437 else { 438 if (MVT::isInteger(ObjectVT)) 439 ArgValue = DAG.getConstant(0, ObjectVT); 440 else 441 ArgValue = DAG.getConstantFP(0, ObjectVT); 442 } 443 ArgValues.push_back(ArgValue); 444 445 ArgOffset += ArgIncrement; // Move on to the next argument... 446 } 447 448 // If the function takes variable number of arguments, make a frame index for 449 // the start of the first vararg value... for expansion of llvm.va_start. 450 if (F.isVarArg()) 451 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 452 ReturnAddrIndex = 0; // No return address slot generated yet. 453 BytesToPopOnReturn = 0; // Callee pops nothing. 454 BytesCallerReserves = ArgOffset; 455 return ArgValues; 456} 457 458std::pair<SDOperand, SDOperand> 459X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 460 bool isVarArg, bool isTailCall, 461 SDOperand Callee, ArgListTy &Args, 462 SelectionDAG &DAG) { 463 // Count how many bytes are to be pushed on the stack. 464 unsigned NumBytes = 0; 465 466 if (Args.empty()) { 467 // Save zero bytes. 468 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 469 } else { 470 for (unsigned i = 0, e = Args.size(); i != e; ++i) 471 switch (getValueType(Args[i].second)) { 472 default: assert(0 && "Unknown value type!"); 473 case MVT::i1: 474 case MVT::i8: 475 case MVT::i16: 476 case MVT::i32: 477 case MVT::f32: 478 NumBytes += 4; 479 break; 480 case MVT::i64: 481 case MVT::f64: 482 NumBytes += 8; 483 break; 484 } 485 486 Chain = DAG.getCALLSEQ_START(Chain, 487 DAG.getConstant(NumBytes, getPointerTy())); 488 489 // Arguments go on the stack in reverse order, as specified by the ABI. 490 unsigned ArgOffset = 0; 491 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 492 std::vector<SDOperand> Stores; 493 494 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 495 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 496 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 497 498 switch (getValueType(Args[i].second)) { 499 default: assert(0 && "Unexpected ValueType for argument!"); 500 case MVT::i1: 501 case MVT::i8: 502 case MVT::i16: 503 // Promote the integer to 32 bits. If the input type is signed use a 504 // sign extend, otherwise use a zero extend. 505 if (Args[i].second->isSigned()) 506 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 507 else 508 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 509 510 // FALL THROUGH 511 case MVT::i32: 512 case MVT::f32: 513 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 514 Args[i].first, PtrOff, 515 DAG.getSrcValue(NULL))); 516 ArgOffset += 4; 517 break; 518 case MVT::i64: 519 case MVT::f64: 520 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 521 Args[i].first, PtrOff, 522 DAG.getSrcValue(NULL))); 523 ArgOffset += 8; 524 break; 525 } 526 } 527 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 528 } 529 530 std::vector<MVT::ValueType> RetVals; 531 MVT::ValueType RetTyVT = getValueType(RetTy); 532 RetVals.push_back(MVT::Other); 533 534 // The result values produced have to be legal. Promote the result. 535 switch (RetTyVT) { 536 case MVT::isVoid: break; 537 default: 538 RetVals.push_back(RetTyVT); 539 break; 540 case MVT::i1: 541 case MVT::i8: 542 case MVT::i16: 543 RetVals.push_back(MVT::i32); 544 break; 545 case MVT::f32: 546 if (X86ScalarSSE) 547 RetVals.push_back(MVT::f32); 548 else 549 RetVals.push_back(MVT::f64); 550 break; 551 case MVT::i64: 552 RetVals.push_back(MVT::i32); 553 RetVals.push_back(MVT::i32); 554 break; 555 } 556 557 std::vector<MVT::ValueType> NodeTys; 558 NodeTys.push_back(MVT::Other); // Returns a chain 559 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 560 std::vector<SDOperand> Ops; 561 Ops.push_back(Chain); 562 Ops.push_back(Callee); 563 564 // FIXME: Do not generate X86ISD::TAILCALL for now. 565 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 566 SDOperand InFlag = Chain.getValue(1); 567 568 NodeTys.clear(); 569 NodeTys.push_back(MVT::Other); // Returns a chain 570 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 571 Ops.clear(); 572 Ops.push_back(Chain); 573 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 574 Ops.push_back(DAG.getConstant(0, getPointerTy())); 575 Ops.push_back(InFlag); 576 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 577 InFlag = Chain.getValue(1); 578 579 SDOperand RetVal; 580 if (RetTyVT != MVT::isVoid) { 581 switch (RetTyVT) { 582 default: assert(0 && "Unknown value type to return!"); 583 case MVT::i1: 584 case MVT::i8: 585 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 586 Chain = RetVal.getValue(1); 587 if (RetTyVT == MVT::i1) 588 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 589 break; 590 case MVT::i16: 591 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 592 Chain = RetVal.getValue(1); 593 break; 594 case MVT::i32: 595 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 596 Chain = RetVal.getValue(1); 597 break; 598 case MVT::i64: { 599 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 600 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 601 Lo.getValue(2)); 602 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 603 Chain = Hi.getValue(1); 604 break; 605 } 606 case MVT::f32: 607 case MVT::f64: { 608 std::vector<MVT::ValueType> Tys; 609 Tys.push_back(MVT::f64); 610 Tys.push_back(MVT::Other); 611 Tys.push_back(MVT::Flag); 612 std::vector<SDOperand> Ops; 613 Ops.push_back(Chain); 614 Ops.push_back(InFlag); 615 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 616 Chain = RetVal.getValue(1); 617 InFlag = RetVal.getValue(2); 618 if (X86ScalarSSE) { 619 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 620 // shouldn't be necessary except that RFP cannot be live across 621 // multiple blocks. When stackifier is fixed, they can be uncoupled. 622 MachineFunction &MF = DAG.getMachineFunction(); 623 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 624 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 625 Tys.clear(); 626 Tys.push_back(MVT::Other); 627 Ops.clear(); 628 Ops.push_back(Chain); 629 Ops.push_back(RetVal); 630 Ops.push_back(StackSlot); 631 Ops.push_back(DAG.getValueType(RetTyVT)); 632 Ops.push_back(InFlag); 633 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 634 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 635 DAG.getSrcValue(NULL)); 636 Chain = RetVal.getValue(1); 637 } 638 639 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 640 // FIXME: we would really like to remember that this FP_ROUND 641 // operation is okay to eliminate if we allow excess FP precision. 642 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 643 break; 644 } 645 } 646 } 647 648 return std::make_pair(RetVal, Chain); 649} 650 651//===----------------------------------------------------------------------===// 652// Fast Calling Convention implementation 653//===----------------------------------------------------------------------===// 654// 655// The X86 'fast' calling convention passes up to two integer arguments in 656// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 657// and requires that the callee pop its arguments off the stack (allowing proper 658// tail calls), and has the same return value conventions as C calling convs. 659// 660// This calling convention always arranges for the callee pop value to be 8n+4 661// bytes, which is needed for tail recursion elimination and stack alignment 662// reasons. 663// 664// Note that this can be enhanced in the future to pass fp vals in registers 665// (when we have a global fp allocator) and do other tricks. 666// 667 668/// AddLiveIn - This helper function adds the specified physical register to the 669/// MachineFunction as a live in value. It also creates a corresponding virtual 670/// register for it. 671static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 672 TargetRegisterClass *RC) { 673 assert(RC->contains(PReg) && "Not the correct regclass!"); 674 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 675 MF.addLiveIn(PReg, VReg); 676 return VReg; 677} 678 679// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 680// to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 681// EDX". Anything more is illegal. 682// 683// FIXME: The linscan register allocator currently has problem with 684// coalescing. At the time of this writing, whenever it decides to coalesce 685// a physreg with a virtreg, this increases the size of the physreg's live 686// range, and the live range cannot ever be reduced. This causes problems if 687// too many physregs are coaleced with virtregs, which can cause the register 688// allocator to wedge itself. 689// 690// This code triggers this problem more often if we pass args in registers, 691// so disable it until this is fixed. 692// 693// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 694// about code being dead. 695// 696static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 697 698 699std::vector<SDOperand> 700X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 701 std::vector<SDOperand> ArgValues; 702 703 MachineFunction &MF = DAG.getMachineFunction(); 704 MachineFrameInfo *MFI = MF.getFrameInfo(); 705 706 // Add DAG nodes to load the arguments... On entry to a function the stack 707 // frame looks like this: 708 // 709 // [ESP] -- return address 710 // [ESP + 4] -- first nonreg argument (leftmost lexically) 711 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 712 // ... 713 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 714 715 // Keep track of the number of integer regs passed so far. This can be either 716 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 717 // used). 718 unsigned NumIntRegs = 0; 719 720 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 721 MVT::ValueType ObjectVT = getValueType(I->getType()); 722 unsigned ArgIncrement = 4; 723 unsigned ObjSize = 0; 724 SDOperand ArgValue; 725 726 switch (ObjectVT) { 727 default: assert(0 && "Unhandled argument type!"); 728 case MVT::i1: 729 case MVT::i8: 730 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 731 if (!I->use_empty()) { 732 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 733 X86::R8RegisterClass); 734 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 735 DAG.setRoot(ArgValue.getValue(1)); 736 if (ObjectVT == MVT::i1) 737 // FIXME: Should insert a assertzext here. 738 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 739 } 740 ++NumIntRegs; 741 break; 742 } 743 744 ObjSize = 1; 745 break; 746 case MVT::i16: 747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 748 if (!I->use_empty()) { 749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 750 X86::R16RegisterClass); 751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 752 DAG.setRoot(ArgValue.getValue(1)); 753 } 754 ++NumIntRegs; 755 break; 756 } 757 ObjSize = 2; 758 break; 759 case MVT::i32: 760 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 761 if (!I->use_empty()) { 762 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 763 X86::R32RegisterClass); 764 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 765 DAG.setRoot(ArgValue.getValue(1)); 766 } 767 ++NumIntRegs; 768 break; 769 } 770 ObjSize = 4; 771 break; 772 case MVT::i64: 773 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 774 if (!I->use_empty()) { 775 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 776 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 777 778 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 779 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 780 DAG.setRoot(Hi.getValue(1)); 781 782 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 783 } 784 NumIntRegs += 2; 785 break; 786 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 787 if (!I->use_empty()) { 788 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 789 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 790 DAG.setRoot(Low.getValue(1)); 791 792 // Load the high part from memory. 793 // Create the frame index object for this incoming parameter... 794 int FI = MFI->CreateFixedObject(4, ArgOffset); 795 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 796 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 797 DAG.getSrcValue(NULL)); 798 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 799 } 800 ArgOffset += 4; 801 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 802 break; 803 } 804 ObjSize = ArgIncrement = 8; 805 break; 806 case MVT::f32: ObjSize = 4; break; 807 case MVT::f64: ObjSize = ArgIncrement = 8; break; 808 } 809 810 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 811 // dead loads. 812 if (ObjSize && !I->use_empty()) { 813 // Create the frame index object for this incoming parameter... 814 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 815 816 // Create the SelectionDAG nodes corresponding to a load from this 817 // parameter. 818 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 819 820 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 821 DAG.getSrcValue(NULL)); 822 } else if (ArgValue.Val == 0) { 823 if (MVT::isInteger(ObjectVT)) 824 ArgValue = DAG.getConstant(0, ObjectVT); 825 else 826 ArgValue = DAG.getConstantFP(0, ObjectVT); 827 } 828 ArgValues.push_back(ArgValue); 829 830 if (ObjSize) 831 ArgOffset += ArgIncrement; // Move on to the next argument. 832 } 833 834 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 835 // arguments and the arguments after the retaddr has been pushed are aligned. 836 if ((ArgOffset & 7) == 0) 837 ArgOffset += 4; 838 839 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 840 ReturnAddrIndex = 0; // No return address slot generated yet. 841 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 842 BytesCallerReserves = 0; 843 844 // Finally, inform the code generator which regs we return values in. 845 switch (getValueType(F.getReturnType())) { 846 default: assert(0 && "Unknown type!"); 847 case MVT::isVoid: break; 848 case MVT::i1: 849 case MVT::i8: 850 case MVT::i16: 851 case MVT::i32: 852 MF.addLiveOut(X86::EAX); 853 break; 854 case MVT::i64: 855 MF.addLiveOut(X86::EAX); 856 MF.addLiveOut(X86::EDX); 857 break; 858 case MVT::f32: 859 case MVT::f64: 860 MF.addLiveOut(X86::ST0); 861 break; 862 } 863 return ArgValues; 864} 865 866std::pair<SDOperand, SDOperand> 867X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 868 bool isTailCall, SDOperand Callee, 869 ArgListTy &Args, SelectionDAG &DAG) { 870 // Count how many bytes are to be pushed on the stack. 871 unsigned NumBytes = 0; 872 873 // Keep track of the number of integer regs passed so far. This can be either 874 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 875 // used). 876 unsigned NumIntRegs = 0; 877 878 for (unsigned i = 0, e = Args.size(); i != e; ++i) 879 switch (getValueType(Args[i].second)) { 880 default: assert(0 && "Unknown value type!"); 881 case MVT::i1: 882 case MVT::i8: 883 case MVT::i16: 884 case MVT::i32: 885 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 886 ++NumIntRegs; 887 break; 888 } 889 // fall through 890 case MVT::f32: 891 NumBytes += 4; 892 break; 893 case MVT::i64: 894 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 895 NumIntRegs += 2; 896 break; 897 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 898 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 899 NumBytes += 4; 900 break; 901 } 902 903 // fall through 904 case MVT::f64: 905 NumBytes += 8; 906 break; 907 } 908 909 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 910 // arguments and the arguments after the retaddr has been pushed are aligned. 911 if ((NumBytes & 7) == 0) 912 NumBytes += 4; 913 914 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 915 916 // Arguments go on the stack in reverse order, as specified by the ABI. 917 unsigned ArgOffset = 0; 918 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 919 NumIntRegs = 0; 920 std::vector<SDOperand> Stores; 921 std::vector<SDOperand> RegValuesToPass; 922 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 923 switch (getValueType(Args[i].second)) { 924 default: assert(0 && "Unexpected ValueType for argument!"); 925 case MVT::i1: 926 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 927 // Fall through. 928 case MVT::i8: 929 case MVT::i16: 930 case MVT::i32: 931 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 932 RegValuesToPass.push_back(Args[i].first); 933 ++NumIntRegs; 934 break; 935 } 936 // Fall through 937 case MVT::f32: { 938 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 939 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 940 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 941 Args[i].first, PtrOff, 942 DAG.getSrcValue(NULL))); 943 ArgOffset += 4; 944 break; 945 } 946 case MVT::i64: 947 // Can pass (at least) part of it in regs? 948 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 949 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 950 Args[i].first, DAG.getConstant(1, MVT::i32)); 951 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 952 Args[i].first, DAG.getConstant(0, MVT::i32)); 953 RegValuesToPass.push_back(Lo); 954 ++NumIntRegs; 955 956 // Pass both parts in regs? 957 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 958 RegValuesToPass.push_back(Hi); 959 ++NumIntRegs; 960 } else { 961 // Pass the high part in memory. 962 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 963 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 964 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 965 Hi, PtrOff, DAG.getSrcValue(NULL))); 966 ArgOffset += 4; 967 } 968 break; 969 } 970 // Fall through 971 case MVT::f64: 972 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 973 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 974 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 975 Args[i].first, PtrOff, 976 DAG.getSrcValue(NULL))); 977 ArgOffset += 8; 978 break; 979 } 980 } 981 if (!Stores.empty()) 982 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 983 984 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 985 // arguments and the arguments after the retaddr has been pushed are aligned. 986 if ((ArgOffset & 7) == 0) 987 ArgOffset += 4; 988 989 std::vector<MVT::ValueType> RetVals; 990 MVT::ValueType RetTyVT = getValueType(RetTy); 991 992 RetVals.push_back(MVT::Other); 993 994 // The result values produced have to be legal. Promote the result. 995 switch (RetTyVT) { 996 case MVT::isVoid: break; 997 default: 998 RetVals.push_back(RetTyVT); 999 break; 1000 case MVT::i1: 1001 case MVT::i8: 1002 case MVT::i16: 1003 RetVals.push_back(MVT::i32); 1004 break; 1005 case MVT::f32: 1006 if (X86ScalarSSE) 1007 RetVals.push_back(MVT::f32); 1008 else 1009 RetVals.push_back(MVT::f64); 1010 break; 1011 case MVT::i64: 1012 RetVals.push_back(MVT::i32); 1013 RetVals.push_back(MVT::i32); 1014 break; 1015 } 1016 1017 // Build a sequence of copy-to-reg nodes chained together with token chain 1018 // and flag operands which copy the outgoing args into registers. 1019 SDOperand InFlag; 1020 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 1021 unsigned CCReg; 1022 SDOperand RegToPass = RegValuesToPass[i]; 1023 switch (RegToPass.getValueType()) { 1024 default: assert(0 && "Bad thing to pass in regs"); 1025 case MVT::i8: 1026 CCReg = (i == 0) ? X86::AL : X86::DL; 1027 break; 1028 case MVT::i16: 1029 CCReg = (i == 0) ? X86::AX : X86::DX; 1030 break; 1031 case MVT::i32: 1032 CCReg = (i == 0) ? X86::EAX : X86::EDX; 1033 break; 1034 } 1035 1036 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 1037 InFlag = Chain.getValue(1); 1038 } 1039 1040 std::vector<MVT::ValueType> NodeTys; 1041 NodeTys.push_back(MVT::Other); // Returns a chain 1042 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1043 std::vector<SDOperand> Ops; 1044 Ops.push_back(Chain); 1045 Ops.push_back(Callee); 1046 if (InFlag.Val) 1047 Ops.push_back(InFlag); 1048 1049 // FIXME: Do not generate X86ISD::TAILCALL for now. 1050 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 1051 InFlag = Chain.getValue(1); 1052 1053 NodeTys.clear(); 1054 NodeTys.push_back(MVT::Other); // Returns a chain 1055 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1056 Ops.clear(); 1057 Ops.push_back(Chain); 1058 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1059 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 1060 Ops.push_back(InFlag); 1061 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 1062 InFlag = Chain.getValue(1); 1063 1064 SDOperand RetVal; 1065 if (RetTyVT != MVT::isVoid) { 1066 switch (RetTyVT) { 1067 default: assert(0 && "Unknown value type to return!"); 1068 case MVT::i1: 1069 case MVT::i8: 1070 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 1071 Chain = RetVal.getValue(1); 1072 if (RetTyVT == MVT::i1) 1073 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 1074 break; 1075 case MVT::i16: 1076 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 1077 Chain = RetVal.getValue(1); 1078 break; 1079 case MVT::i32: 1080 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1081 Chain = RetVal.getValue(1); 1082 break; 1083 case MVT::i64: { 1084 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 1085 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 1086 Lo.getValue(2)); 1087 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 1088 Chain = Hi.getValue(1); 1089 break; 1090 } 1091 case MVT::f32: 1092 case MVT::f64: { 1093 std::vector<MVT::ValueType> Tys; 1094 Tys.push_back(MVT::f64); 1095 Tys.push_back(MVT::Other); 1096 Tys.push_back(MVT::Flag); 1097 std::vector<SDOperand> Ops; 1098 Ops.push_back(Chain); 1099 Ops.push_back(InFlag); 1100 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 1101 Chain = RetVal.getValue(1); 1102 InFlag = RetVal.getValue(2); 1103 if (X86ScalarSSE) { 1104 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1105 // shouldn't be necessary except that RFP cannot be live across 1106 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1107 MachineFunction &MF = DAG.getMachineFunction(); 1108 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1109 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1110 Tys.clear(); 1111 Tys.push_back(MVT::Other); 1112 Ops.clear(); 1113 Ops.push_back(Chain); 1114 Ops.push_back(RetVal); 1115 Ops.push_back(StackSlot); 1116 Ops.push_back(DAG.getValueType(RetTyVT)); 1117 Ops.push_back(InFlag); 1118 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 1119 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 1120 DAG.getSrcValue(NULL)); 1121 Chain = RetVal.getValue(1); 1122 } 1123 1124 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 1125 // FIXME: we would really like to remember that this FP_ROUND 1126 // operation is okay to eliminate if we allow excess FP precision. 1127 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1128 break; 1129 } 1130 } 1131 } 1132 1133 return std::make_pair(RetVal, Chain); 1134} 1135 1136SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1137 if (ReturnAddrIndex == 0) { 1138 // Set up a frame object for the return address. 1139 MachineFunction &MF = DAG.getMachineFunction(); 1140 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1141 } 1142 1143 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 1144} 1145 1146 1147 1148std::pair<SDOperand, SDOperand> X86TargetLowering:: 1149LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1150 SelectionDAG &DAG) { 1151 SDOperand Result; 1152 if (Depth) // Depths > 0 not supported yet! 1153 Result = DAG.getConstant(0, getPointerTy()); 1154 else { 1155 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1156 if (!isFrameAddress) 1157 // Just load the return address 1158 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 1159 DAG.getSrcValue(NULL)); 1160 else 1161 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 1162 DAG.getConstant(4, MVT::i32)); 1163 } 1164 return std::make_pair(Result, Chain); 1165} 1166 1167/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1168/// which corresponds to the condition code. 1169static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1170 switch (X86CC) { 1171 default: assert(0 && "Unknown X86 conditional code!"); 1172 case X86ISD::COND_A: return X86::JA; 1173 case X86ISD::COND_AE: return X86::JAE; 1174 case X86ISD::COND_B: return X86::JB; 1175 case X86ISD::COND_BE: return X86::JBE; 1176 case X86ISD::COND_E: return X86::JE; 1177 case X86ISD::COND_G: return X86::JG; 1178 case X86ISD::COND_GE: return X86::JGE; 1179 case X86ISD::COND_L: return X86::JL; 1180 case X86ISD::COND_LE: return X86::JLE; 1181 case X86ISD::COND_NE: return X86::JNE; 1182 case X86ISD::COND_NO: return X86::JNO; 1183 case X86ISD::COND_NP: return X86::JNP; 1184 case X86ISD::COND_NS: return X86::JNS; 1185 case X86ISD::COND_O: return X86::JO; 1186 case X86ISD::COND_P: return X86::JP; 1187 case X86ISD::COND_S: return X86::JS; 1188 } 1189} 1190 1191/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1192/// specific condition code. It returns a false if it cannot do a direct 1193/// translation. X86CC is the translated CondCode. Flip is set to true if the 1194/// the order of comparison operands should be flipped. 1195static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1196 unsigned &X86CC, bool &Flip) { 1197 Flip = false; 1198 X86CC = X86ISD::COND_INVALID; 1199 if (!isFP) { 1200 switch (SetCCOpcode) { 1201 default: break; 1202 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1203 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1204 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1205 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1206 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1207 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1208 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1209 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1210 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1211 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1212 } 1213 } else { 1214 // On a floating point condition, the flags are set as follows: 1215 // ZF PF CF op 1216 // 0 | 0 | 0 | X > Y 1217 // 0 | 0 | 1 | X < Y 1218 // 1 | 0 | 0 | X == Y 1219 // 1 | 1 | 1 | unordered 1220 switch (SetCCOpcode) { 1221 default: break; 1222 case ISD::SETUEQ: 1223 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1224 case ISD::SETOLT: Flip = true; // Fallthrough 1225 case ISD::SETOGT: 1226 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1227 case ISD::SETOLE: Flip = true; // Fallthrough 1228 case ISD::SETOGE: 1229 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1230 case ISD::SETUGT: Flip = true; // Fallthrough 1231 case ISD::SETULT: 1232 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1233 case ISD::SETUGE: Flip = true; // Fallthrough 1234 case ISD::SETULE: 1235 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1236 case ISD::SETONE: 1237 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1238 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1239 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1240 } 1241 } 1242 1243 return X86CC != X86ISD::COND_INVALID; 1244} 1245 1246static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1247 bool &Flip) { 1248 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1249} 1250 1251/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1252/// code. Current x86 isa includes the following FP cmov instructions: 1253/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1254static bool hasFPCMov(unsigned X86CC) { 1255 switch (X86CC) { 1256 default: 1257 return false; 1258 case X86ISD::COND_B: 1259 case X86ISD::COND_BE: 1260 case X86ISD::COND_E: 1261 case X86ISD::COND_P: 1262 case X86ISD::COND_A: 1263 case X86ISD::COND_AE: 1264 case X86ISD::COND_NE: 1265 case X86ISD::COND_NP: 1266 return true; 1267 } 1268} 1269 1270MachineBasicBlock * 1271X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1272 MachineBasicBlock *BB) { 1273 switch (MI->getOpcode()) { 1274 default: assert(false && "Unexpected instr type to insert"); 1275 case X86::CMOV_FR32: 1276 case X86::CMOV_FR64: 1277 case X86::CMOV_V4F32: 1278 case X86::CMOV_V2F64: 1279 case X86::CMOV_V2I64: { 1280 // To "insert" a SELECT_CC instruction, we actually have to insert the 1281 // diamond control-flow pattern. The incoming instruction knows the 1282 // destination vreg to set, the condition code register to branch on, the 1283 // true/false values to select between, and a branch opcode to use. 1284 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1285 ilist<MachineBasicBlock>::iterator It = BB; 1286 ++It; 1287 1288 // thisMBB: 1289 // ... 1290 // TrueVal = ... 1291 // cmpTY ccX, r1, r2 1292 // bCC copy1MBB 1293 // fallthrough --> copy0MBB 1294 MachineBasicBlock *thisMBB = BB; 1295 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1296 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1297 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 1298 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 1299 MachineFunction *F = BB->getParent(); 1300 F->getBasicBlockList().insert(It, copy0MBB); 1301 F->getBasicBlockList().insert(It, sinkMBB); 1302 // Update machine-CFG edges by first adding all successors of the current 1303 // block to the new block which will contain the Phi node for the select. 1304 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1305 e = BB->succ_end(); i != e; ++i) 1306 sinkMBB->addSuccessor(*i); 1307 // Next, remove all successors of the current block, and add the true 1308 // and fallthrough blocks as its successors. 1309 while(!BB->succ_empty()) 1310 BB->removeSuccessor(BB->succ_begin()); 1311 BB->addSuccessor(copy0MBB); 1312 BB->addSuccessor(sinkMBB); 1313 1314 // copy0MBB: 1315 // %FalseValue = ... 1316 // # fallthrough to sinkMBB 1317 BB = copy0MBB; 1318 1319 // Update machine-CFG edges 1320 BB->addSuccessor(sinkMBB); 1321 1322 // sinkMBB: 1323 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1324 // ... 1325 BB = sinkMBB; 1326 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 1327 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 1328 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1329 1330 delete MI; // The pseudo instruction is gone now. 1331 return BB; 1332 } 1333 1334 case X86::FP_TO_INT16_IN_MEM: 1335 case X86::FP_TO_INT32_IN_MEM: 1336 case X86::FP_TO_INT64_IN_MEM: { 1337 // Change the floating point control register to use "round towards zero" 1338 // mode when truncating to an integer value. 1339 MachineFunction *F = BB->getParent(); 1340 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 1341 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 1342 1343 // Load the old value of the high byte of the control word... 1344 unsigned OldCW = 1345 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 1346 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 1347 1348 // Set the high part to be round to zero... 1349 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 1350 1351 // Reload the modified control word now... 1352 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1353 1354 // Restore the memory image of control word to original value 1355 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 1356 1357 // Get the X86 opcode to use. 1358 unsigned Opc; 1359 switch (MI->getOpcode()) { 1360 default: assert(0 && "illegal opcode!"); 1361 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 1362 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 1363 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 1364 } 1365 1366 X86AddressMode AM; 1367 MachineOperand &Op = MI->getOperand(0); 1368 if (Op.isRegister()) { 1369 AM.BaseType = X86AddressMode::RegBase; 1370 AM.Base.Reg = Op.getReg(); 1371 } else { 1372 AM.BaseType = X86AddressMode::FrameIndexBase; 1373 AM.Base.FrameIndex = Op.getFrameIndex(); 1374 } 1375 Op = MI->getOperand(1); 1376 if (Op.isImmediate()) 1377 AM.Scale = Op.getImmedValue(); 1378 Op = MI->getOperand(2); 1379 if (Op.isImmediate()) 1380 AM.IndexReg = Op.getImmedValue(); 1381 Op = MI->getOperand(3); 1382 if (Op.isGlobalAddress()) { 1383 AM.GV = Op.getGlobal(); 1384 } else { 1385 AM.Disp = Op.getImmedValue(); 1386 } 1387 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 1388 1389 // Reload the original control word now. 1390 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 1391 1392 delete MI; // The pseudo instruction is gone now. 1393 return BB; 1394 } 1395 } 1396} 1397 1398 1399//===----------------------------------------------------------------------===// 1400// X86 Custom Lowering Hooks 1401//===----------------------------------------------------------------------===// 1402 1403/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1404/// load. For Darwin, external and weak symbols are indirect, loading the value 1405/// at address GV rather then the value of GV itself. This means that the 1406/// GlobalAddress must be in the base or index register of the address, not the 1407/// GV offset field. 1408static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1409 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1410 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1411} 1412 1413/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1414/// true if Op is undef or if its value falls within the specified range (L, H]. 1415static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1416 if (Op.getOpcode() == ISD::UNDEF) 1417 return true; 1418 1419 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1420 return (Val >= Low && Val < Hi); 1421} 1422 1423/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1424/// true if Op is undef or if its value equal to the specified value. 1425static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1426 if (Op.getOpcode() == ISD::UNDEF) 1427 return true; 1428 return cast<ConstantSDNode>(Op)->getValue() == Val; 1429} 1430 1431/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1432/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1433bool X86::isPSHUFDMask(SDNode *N) { 1434 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1435 1436 if (N->getNumOperands() != 4) 1437 return false; 1438 1439 // Check if the value doesn't reference the second vector. 1440 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1441 SDOperand Arg = N->getOperand(i); 1442 if (Arg.getOpcode() == ISD::UNDEF) continue; 1443 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1444 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1445 return false; 1446 } 1447 1448 return true; 1449} 1450 1451/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1452/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1453bool X86::isPSHUFHWMask(SDNode *N) { 1454 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1455 1456 if (N->getNumOperands() != 8) 1457 return false; 1458 1459 // Lower quadword copied in order. 1460 for (unsigned i = 0; i != 4; ++i) { 1461 SDOperand Arg = N->getOperand(i); 1462 if (Arg.getOpcode() == ISD::UNDEF) continue; 1463 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1464 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1465 return false; 1466 } 1467 1468 // Upper quadword shuffled. 1469 for (unsigned i = 4; i != 8; ++i) { 1470 SDOperand Arg = N->getOperand(i); 1471 if (Arg.getOpcode() == ISD::UNDEF) continue; 1472 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1473 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1474 if (Val < 4 || Val > 7) 1475 return false; 1476 } 1477 1478 return true; 1479} 1480 1481/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1482/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1483bool X86::isPSHUFLWMask(SDNode *N) { 1484 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1485 1486 if (N->getNumOperands() != 8) 1487 return false; 1488 1489 // Upper quadword copied in order. 1490 for (unsigned i = 4; i != 8; ++i) 1491 if (!isUndefOrEqual(N->getOperand(i), i)) 1492 return false; 1493 1494 // Lower quadword shuffled. 1495 for (unsigned i = 0; i != 4; ++i) 1496 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1497 return false; 1498 1499 return true; 1500} 1501 1502/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1503/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1504static bool isSHUFPMask(std::vector<SDOperand> &N) { 1505 unsigned NumElems = N.size(); 1506 if (NumElems != 2 && NumElems != 4) return false; 1507 1508 unsigned Half = NumElems / 2; 1509 for (unsigned i = 0; i < Half; ++i) 1510 if (!isUndefOrInRange(N[i], 0, NumElems)) 1511 return false; 1512 for (unsigned i = Half; i < NumElems; ++i) 1513 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 1514 return false; 1515 1516 return true; 1517} 1518 1519bool X86::isSHUFPMask(SDNode *N) { 1520 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1521 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1522 return ::isSHUFPMask(Ops); 1523} 1524 1525/// isCommutedSHUFP - Returns true if the shuffle mask is except 1526/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1527/// half elements to come from vector 1 (which would equal the dest.) and 1528/// the upper half to come from vector 2. 1529static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 1530 unsigned NumElems = Ops.size(); 1531 if (NumElems != 2 && NumElems != 4) return false; 1532 1533 unsigned Half = NumElems / 2; 1534 for (unsigned i = 0; i < Half; ++i) 1535 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 1536 return false; 1537 for (unsigned i = Half; i < NumElems; ++i) 1538 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 1539 return false; 1540 return true; 1541} 1542 1543static bool isCommutedSHUFP(SDNode *N) { 1544 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1545 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1546 return isCommutedSHUFP(Ops); 1547} 1548 1549/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1550/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1551bool X86::isMOVHLPSMask(SDNode *N) { 1552 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1553 1554 if (N->getNumOperands() != 4) 1555 return false; 1556 1557 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1558 return isUndefOrEqual(N->getOperand(0), 6) && 1559 isUndefOrEqual(N->getOperand(1), 7) && 1560 isUndefOrEqual(N->getOperand(2), 2) && 1561 isUndefOrEqual(N->getOperand(3), 3); 1562} 1563 1564/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1565/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1566bool X86::isMOVLPMask(SDNode *N) { 1567 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1568 1569 unsigned NumElems = N->getNumOperands(); 1570 if (NumElems != 2 && NumElems != 4) 1571 return false; 1572 1573 for (unsigned i = 0; i < NumElems/2; ++i) 1574 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1575 return false; 1576 1577 for (unsigned i = NumElems/2; i < NumElems; ++i) 1578 if (!isUndefOrEqual(N->getOperand(i), i)) 1579 return false; 1580 1581 return true; 1582} 1583 1584/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1585/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1586/// and MOVLHPS. 1587bool X86::isMOVHPMask(SDNode *N) { 1588 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1589 1590 unsigned NumElems = N->getNumOperands(); 1591 if (NumElems != 2 && NumElems != 4) 1592 return false; 1593 1594 for (unsigned i = 0; i < NumElems/2; ++i) 1595 if (!isUndefOrEqual(N->getOperand(i), i)) 1596 return false; 1597 1598 for (unsigned i = 0; i < NumElems/2; ++i) { 1599 SDOperand Arg = N->getOperand(i + NumElems/2); 1600 if (!isUndefOrEqual(Arg, i + NumElems)) 1601 return false; 1602 } 1603 1604 return true; 1605} 1606 1607/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1608/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1609bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1610 unsigned NumElems = N.size(); 1611 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1612 return false; 1613 1614 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1615 SDOperand BitI = N[i]; 1616 SDOperand BitI1 = N[i+1]; 1617 if (!isUndefOrEqual(BitI, j)) 1618 return false; 1619 if (V2IsSplat) { 1620 if (isUndefOrEqual(BitI1, NumElems)) 1621 return false; 1622 } else { 1623 if (!isUndefOrEqual(BitI1, j + NumElems)) 1624 return false; 1625 } 1626 } 1627 1628 return true; 1629} 1630 1631bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1632 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1633 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1634 return ::isUNPCKLMask(Ops, V2IsSplat); 1635} 1636 1637/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1638/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1639bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 1640 unsigned NumElems = N.size(); 1641 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1642 return false; 1643 1644 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1645 SDOperand BitI = N[i]; 1646 SDOperand BitI1 = N[i+1]; 1647 if (!isUndefOrEqual(BitI, j + NumElems/2)) 1648 return false; 1649 if (V2IsSplat) { 1650 if (isUndefOrEqual(BitI1, NumElems)) 1651 return false; 1652 } else { 1653 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 1654 return false; 1655 } 1656 } 1657 1658 return true; 1659} 1660 1661bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1662 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1663 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1664 return ::isUNPCKHMask(Ops, V2IsSplat); 1665} 1666 1667/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1668/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1669/// <0, 0, 1, 1> 1670bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1671 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1672 1673 unsigned NumElems = N->getNumOperands(); 1674 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 1675 return false; 1676 1677 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1678 SDOperand BitI = N->getOperand(i); 1679 SDOperand BitI1 = N->getOperand(i+1); 1680 1681 if (!isUndefOrEqual(BitI, j)) 1682 return false; 1683 if (!isUndefOrEqual(BitI1, j)) 1684 return false; 1685 } 1686 1687 return true; 1688} 1689 1690/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1691/// specifies a shuffle of elements that is suitable for input to MOVSS, 1692/// MOVSD, and MOVD, i.e. setting the lowest element. 1693static bool isMOVLMask(std::vector<SDOperand> &N) { 1694 unsigned NumElems = N.size(); 1695 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1696 return false; 1697 1698 if (!isUndefOrEqual(N[0], NumElems)) 1699 return false; 1700 1701 for (unsigned i = 1; i < NumElems; ++i) { 1702 SDOperand Arg = N[i]; 1703 if (!isUndefOrEqual(Arg, i)) 1704 return false; 1705 } 1706 1707 return true; 1708} 1709 1710bool X86::isMOVLMask(SDNode *N) { 1711 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1712 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1713 return ::isMOVLMask(Ops); 1714} 1715 1716/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1717/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1718/// element of vector 2 and the other elements to come from vector 1 in order. 1719static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 1720 unsigned NumElems = Ops.size(); 1721 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1722 return false; 1723 1724 if (!isUndefOrEqual(Ops[0], 0)) 1725 return false; 1726 1727 for (unsigned i = 1; i < NumElems; ++i) { 1728 SDOperand Arg = Ops[i]; 1729 if (V2IsSplat) { 1730 if (!isUndefOrEqual(Arg, NumElems)) 1731 return false; 1732 } else { 1733 if (!isUndefOrEqual(Arg, i+NumElems)) 1734 return false; 1735 } 1736 } 1737 1738 return true; 1739} 1740 1741static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 1742 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1743 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 1744 return isCommutedMOVL(Ops, V2IsSplat); 1745} 1746 1747/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1748/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1749bool X86::isMOVSHDUPMask(SDNode *N) { 1750 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1751 1752 if (N->getNumOperands() != 4) 1753 return false; 1754 1755 // Expect 1, 1, 3, 3 1756 for (unsigned i = 0; i < 2; ++i) { 1757 SDOperand Arg = N->getOperand(i); 1758 if (Arg.getOpcode() == ISD::UNDEF) continue; 1759 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1760 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1761 if (Val != 1) return false; 1762 } 1763 1764 bool HasHi = false; 1765 for (unsigned i = 2; i < 4; ++i) { 1766 SDOperand Arg = N->getOperand(i); 1767 if (Arg.getOpcode() == ISD::UNDEF) continue; 1768 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1769 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1770 if (Val != 3) return false; 1771 HasHi = true; 1772 } 1773 1774 // Don't use movshdup if it can be done with a shufps. 1775 return HasHi; 1776} 1777 1778/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1779/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1780bool X86::isMOVSLDUPMask(SDNode *N) { 1781 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1782 1783 if (N->getNumOperands() != 4) 1784 return false; 1785 1786 // Expect 0, 0, 2, 2 1787 for (unsigned i = 0; i < 2; ++i) { 1788 SDOperand Arg = N->getOperand(i); 1789 if (Arg.getOpcode() == ISD::UNDEF) continue; 1790 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1791 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1792 if (Val != 0) return false; 1793 } 1794 1795 bool HasHi = false; 1796 for (unsigned i = 2; i < 4; ++i) { 1797 SDOperand Arg = N->getOperand(i); 1798 if (Arg.getOpcode() == ISD::UNDEF) continue; 1799 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1800 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1801 if (Val != 2) return false; 1802 HasHi = true; 1803 } 1804 1805 // Don't use movshdup if it can be done with a shufps. 1806 return HasHi; 1807} 1808 1809/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1810/// a splat of a single element. 1811static bool isSplatMask(SDNode *N) { 1812 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1813 1814 // This is a splat operation if each element of the permute is the same, and 1815 // if the value doesn't reference the second vector. 1816 unsigned NumElems = N->getNumOperands(); 1817 SDOperand ElementBase; 1818 unsigned i = 0; 1819 for (; i != NumElems; ++i) { 1820 SDOperand Elt = N->getOperand(i); 1821 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 1822 ElementBase = Elt; 1823 break; 1824 } 1825 } 1826 1827 if (!ElementBase.Val) 1828 return false; 1829 1830 for (; i != NumElems; ++i) { 1831 SDOperand Arg = N->getOperand(i); 1832 if (Arg.getOpcode() == ISD::UNDEF) continue; 1833 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1834 if (Arg != ElementBase) return false; 1835 } 1836 1837 // Make sure it is a splat of the first vector operand. 1838 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 1839} 1840 1841/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1842/// a splat of a single element and it's a 2 or 4 element mask. 1843bool X86::isSplatMask(SDNode *N) { 1844 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1845 1846 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 1847 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 1848 return false; 1849 return ::isSplatMask(N); 1850} 1851 1852/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 1853/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 1854/// instructions. 1855unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 1856 unsigned NumOperands = N->getNumOperands(); 1857 unsigned Shift = (NumOperands == 4) ? 2 : 1; 1858 unsigned Mask = 0; 1859 for (unsigned i = 0; i < NumOperands; ++i) { 1860 unsigned Val = 0; 1861 SDOperand Arg = N->getOperand(NumOperands-i-1); 1862 if (Arg.getOpcode() != ISD::UNDEF) 1863 Val = cast<ConstantSDNode>(Arg)->getValue(); 1864 if (Val >= NumOperands) Val -= NumOperands; 1865 Mask |= Val; 1866 if (i != NumOperands - 1) 1867 Mask <<= Shift; 1868 } 1869 1870 return Mask; 1871} 1872 1873/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 1874/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 1875/// instructions. 1876unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 1877 unsigned Mask = 0; 1878 // 8 nodes, but we only care about the last 4. 1879 for (unsigned i = 7; i >= 4; --i) { 1880 unsigned Val = 0; 1881 SDOperand Arg = N->getOperand(i); 1882 if (Arg.getOpcode() != ISD::UNDEF) 1883 Val = cast<ConstantSDNode>(Arg)->getValue(); 1884 Mask |= (Val - 4); 1885 if (i != 4) 1886 Mask <<= 2; 1887 } 1888 1889 return Mask; 1890} 1891 1892/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 1893/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 1894/// instructions. 1895unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 1896 unsigned Mask = 0; 1897 // 8 nodes, but we only care about the first 4. 1898 for (int i = 3; i >= 0; --i) { 1899 unsigned Val = 0; 1900 SDOperand Arg = N->getOperand(i); 1901 if (Arg.getOpcode() != ISD::UNDEF) 1902 Val = cast<ConstantSDNode>(Arg)->getValue(); 1903 Mask |= Val; 1904 if (i != 0) 1905 Mask <<= 2; 1906 } 1907 1908 return Mask; 1909} 1910 1911/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 1912/// specifies a 8 element shuffle that can be broken into a pair of 1913/// PSHUFHW and PSHUFLW. 1914static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 1915 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1916 1917 if (N->getNumOperands() != 8) 1918 return false; 1919 1920 // Lower quadword shuffled. 1921 for (unsigned i = 0; i != 4; ++i) { 1922 SDOperand Arg = N->getOperand(i); 1923 if (Arg.getOpcode() == ISD::UNDEF) continue; 1924 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1925 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1926 if (Val > 4) 1927 return false; 1928 } 1929 1930 // Upper quadword shuffled. 1931 for (unsigned i = 4; i != 8; ++i) { 1932 SDOperand Arg = N->getOperand(i); 1933 if (Arg.getOpcode() == ISD::UNDEF) continue; 1934 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1935 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1936 if (Val < 4 || Val > 7) 1937 return false; 1938 } 1939 1940 return true; 1941} 1942 1943/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 1944/// values in ther permute mask. 1945static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 1946 SDOperand V1 = Op.getOperand(0); 1947 SDOperand V2 = Op.getOperand(1); 1948 SDOperand Mask = Op.getOperand(2); 1949 MVT::ValueType VT = Op.getValueType(); 1950 MVT::ValueType MaskVT = Mask.getValueType(); 1951 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 1952 unsigned NumElems = Mask.getNumOperands(); 1953 std::vector<SDOperand> MaskVec; 1954 1955 for (unsigned i = 0; i != NumElems; ++i) { 1956 SDOperand Arg = Mask.getOperand(i); 1957 if (Arg.getOpcode() == ISD::UNDEF) { 1958 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 1959 continue; 1960 } 1961 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1962 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1963 if (Val < NumElems) 1964 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 1965 else 1966 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 1967 } 1968 1969 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 1970 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 1971} 1972 1973/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 1974/// match movhlps. The lower half elements should come from upper half of 1975/// V1 (and in order), and the upper half elements should come from the upper 1976/// half of V2 (and in order). 1977static bool ShouldXformToMOVHLPS(SDNode *Mask) { 1978 unsigned NumElems = Mask->getNumOperands(); 1979 if (NumElems != 4) 1980 return false; 1981 for (unsigned i = 0, e = 2; i != e; ++i) 1982 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 1983 return false; 1984 for (unsigned i = 2; i != 4; ++i) 1985 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 1986 return false; 1987 return true; 1988} 1989 1990/// isScalarLoadToVector - Returns true if the node is a scalar load that 1991/// is promoted to a vector. 1992static inline bool isScalarLoadToVector(SDNode *N) { 1993 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 1994 N = N->getOperand(0).Val; 1995 return (N->getOpcode() == ISD::LOAD); 1996 } 1997 return false; 1998} 1999 2000/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2001/// match movlp{s|d}. The lower half elements should come from lower half of 2002/// V1 (and in order), and the upper half elements should come from the upper 2003/// half of V2 (and in order). And since V1 will become the source of the 2004/// MOVLP, it must be either a vector load or a scalar load to vector. 2005static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2006 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2007 return false; 2008 2009 unsigned NumElems = Mask->getNumOperands(); 2010 if (NumElems != 2 && NumElems != 4) 2011 return false; 2012 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2013 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2014 return false; 2015 for (unsigned i = NumElems/2; i != NumElems; ++i) 2016 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2017 return false; 2018 return true; 2019} 2020 2021/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2022/// all the same. 2023static bool isSplatVector(SDNode *N) { 2024 if (N->getOpcode() != ISD::BUILD_VECTOR) 2025 return false; 2026 2027 SDOperand SplatValue = N->getOperand(0); 2028 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2029 if (N->getOperand(i) != SplatValue) 2030 return false; 2031 return true; 2032} 2033 2034/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2035/// that point to V2 points to its first element. 2036static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2037 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2038 2039 bool Changed = false; 2040 std::vector<SDOperand> MaskVec; 2041 unsigned NumElems = Mask.getNumOperands(); 2042 for (unsigned i = 0; i != NumElems; ++i) { 2043 SDOperand Arg = Mask.getOperand(i); 2044 if (Arg.getOpcode() != ISD::UNDEF) { 2045 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2046 if (Val > NumElems) { 2047 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2048 Changed = true; 2049 } 2050 } 2051 MaskVec.push_back(Arg); 2052 } 2053 2054 if (Changed) 2055 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 2056 return Mask; 2057} 2058 2059/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2060/// operation of specified width. 2061static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2062 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2063 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2064 2065 std::vector<SDOperand> MaskVec; 2066 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2067 for (unsigned i = 1; i != NumElems; ++i) 2068 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2069 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2070} 2071 2072/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2073/// of specified width. 2074static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2075 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2076 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2077 std::vector<SDOperand> MaskVec; 2078 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2079 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2080 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2081 } 2082 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2083} 2084 2085/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2086/// of specified width. 2087static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2088 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2089 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2090 unsigned Half = NumElems/2; 2091 std::vector<SDOperand> MaskVec; 2092 for (unsigned i = 0; i != Half; ++i) { 2093 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2094 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2095 } 2096 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2097} 2098 2099/// getZeroVector - Returns a vector of specified type with all zero elements. 2100/// 2101static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2102 assert(MVT::isVector(VT) && "Expected a vector type"); 2103 unsigned NumElems = getVectorNumElements(VT); 2104 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2105 bool isFP = MVT::isFloatingPoint(EVT); 2106 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2107 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2108 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 2109} 2110 2111/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2112/// 2113static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2114 SDOperand V1 = Op.getOperand(0); 2115 SDOperand Mask = Op.getOperand(2); 2116 MVT::ValueType VT = Op.getValueType(); 2117 unsigned NumElems = Mask.getNumOperands(); 2118 Mask = getUnpacklMask(NumElems, DAG); 2119 while (NumElems != 4) { 2120 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2121 NumElems >>= 1; 2122 } 2123 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2124 2125 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2126 Mask = getZeroVector(MaskVT, DAG); 2127 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2128 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2129 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2130} 2131 2132/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2133/// constant +0.0. 2134static inline bool isZeroNode(SDOperand Elt) { 2135 return ((isa<ConstantSDNode>(Elt) && 2136 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2137 (isa<ConstantFPSDNode>(Elt) && 2138 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2139} 2140 2141/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2142/// vector and zero or undef vector. 2143static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2144 unsigned NumElems, unsigned Idx, 2145 bool isZero, SelectionDAG &DAG) { 2146 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2147 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2148 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2149 SDOperand Zero = DAG.getConstant(0, EVT); 2150 std::vector<SDOperand> MaskVec(NumElems, Zero); 2151 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2152 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 2153 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2154} 2155 2156/// LowerOperation - Provide custom lowering hooks for some operations. 2157/// 2158SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 2159 switch (Op.getOpcode()) { 2160 default: assert(0 && "Should not custom lower this!"); 2161 case ISD::SHL_PARTS: 2162 case ISD::SRA_PARTS: 2163 case ISD::SRL_PARTS: { 2164 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2165 "Not an i64 shift!"); 2166 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 2167 SDOperand ShOpLo = Op.getOperand(0); 2168 SDOperand ShOpHi = Op.getOperand(1); 2169 SDOperand ShAmt = Op.getOperand(2); 2170 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 2171 DAG.getConstant(31, MVT::i8)) 2172 : DAG.getConstant(0, MVT::i32); 2173 2174 SDOperand Tmp2, Tmp3; 2175 if (Op.getOpcode() == ISD::SHL_PARTS) { 2176 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 2177 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 2178 } else { 2179 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 2180 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 2181 } 2182 2183 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 2184 ShAmt, DAG.getConstant(32, MVT::i8)); 2185 2186 SDOperand Hi, Lo; 2187 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2188 2189 std::vector<MVT::ValueType> Tys; 2190 Tys.push_back(MVT::i32); 2191 Tys.push_back(MVT::Flag); 2192 std::vector<SDOperand> Ops; 2193 if (Op.getOpcode() == ISD::SHL_PARTS) { 2194 Ops.push_back(Tmp2); 2195 Ops.push_back(Tmp3); 2196 Ops.push_back(CC); 2197 Ops.push_back(InFlag); 2198 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2199 InFlag = Hi.getValue(1); 2200 2201 Ops.clear(); 2202 Ops.push_back(Tmp3); 2203 Ops.push_back(Tmp1); 2204 Ops.push_back(CC); 2205 Ops.push_back(InFlag); 2206 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2207 } else { 2208 Ops.push_back(Tmp2); 2209 Ops.push_back(Tmp3); 2210 Ops.push_back(CC); 2211 Ops.push_back(InFlag); 2212 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2213 InFlag = Lo.getValue(1); 2214 2215 Ops.clear(); 2216 Ops.push_back(Tmp3); 2217 Ops.push_back(Tmp1); 2218 Ops.push_back(CC); 2219 Ops.push_back(InFlag); 2220 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 2221 } 2222 2223 Tys.clear(); 2224 Tys.push_back(MVT::i32); 2225 Tys.push_back(MVT::i32); 2226 Ops.clear(); 2227 Ops.push_back(Lo); 2228 Ops.push_back(Hi); 2229 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2230 } 2231 case ISD::SINT_TO_FP: { 2232 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 2233 Op.getOperand(0).getValueType() >= MVT::i16 && 2234 "Unknown SINT_TO_FP to lower!"); 2235 2236 SDOperand Result; 2237 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 2238 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 2239 MachineFunction &MF = DAG.getMachineFunction(); 2240 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2241 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2242 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 2243 DAG.getEntryNode(), Op.getOperand(0), 2244 StackSlot, DAG.getSrcValue(NULL)); 2245 2246 // Build the FILD 2247 std::vector<MVT::ValueType> Tys; 2248 Tys.push_back(MVT::f64); 2249 Tys.push_back(MVT::Other); 2250 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 2251 std::vector<SDOperand> Ops; 2252 Ops.push_back(Chain); 2253 Ops.push_back(StackSlot); 2254 Ops.push_back(DAG.getValueType(SrcVT)); 2255 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 2256 Tys, Ops); 2257 2258 if (X86ScalarSSE) { 2259 Chain = Result.getValue(1); 2260 SDOperand InFlag = Result.getValue(2); 2261 2262 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 2263 // shouldn't be necessary except that RFP cannot be live across 2264 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2265 MachineFunction &MF = DAG.getMachineFunction(); 2266 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2267 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2268 std::vector<MVT::ValueType> Tys; 2269 Tys.push_back(MVT::Other); 2270 std::vector<SDOperand> Ops; 2271 Ops.push_back(Chain); 2272 Ops.push_back(Result); 2273 Ops.push_back(StackSlot); 2274 Ops.push_back(DAG.getValueType(Op.getValueType())); 2275 Ops.push_back(InFlag); 2276 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 2277 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 2278 DAG.getSrcValue(NULL)); 2279 } 2280 2281 return Result; 2282 } 2283 case ISD::FP_TO_SINT: { 2284 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 2285 "Unknown FP_TO_SINT to lower!"); 2286 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 2287 // stack slot. 2288 MachineFunction &MF = DAG.getMachineFunction(); 2289 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 2290 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2291 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2292 2293 unsigned Opc; 2294 switch (Op.getValueType()) { 2295 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 2296 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 2297 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 2298 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 2299 } 2300 2301 SDOperand Chain = DAG.getEntryNode(); 2302 SDOperand Value = Op.getOperand(0); 2303 if (X86ScalarSSE) { 2304 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 2305 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 2306 DAG.getSrcValue(0)); 2307 std::vector<MVT::ValueType> Tys; 2308 Tys.push_back(MVT::f64); 2309 Tys.push_back(MVT::Other); 2310 std::vector<SDOperand> Ops; 2311 Ops.push_back(Chain); 2312 Ops.push_back(StackSlot); 2313 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 2314 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 2315 Chain = Value.getValue(1); 2316 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 2317 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2318 } 2319 2320 // Build the FP_TO_INT*_IN_MEM 2321 std::vector<SDOperand> Ops; 2322 Ops.push_back(Chain); 2323 Ops.push_back(Value); 2324 Ops.push_back(StackSlot); 2325 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 2326 2327 // Load the result. 2328 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 2329 DAG.getSrcValue(NULL)); 2330 } 2331 case ISD::READCYCLECOUNTER: { 2332 std::vector<MVT::ValueType> Tys; 2333 Tys.push_back(MVT::Other); 2334 Tys.push_back(MVT::Flag); 2335 std::vector<SDOperand> Ops; 2336 Ops.push_back(Op.getOperand(0)); 2337 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 2338 Ops.clear(); 2339 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 2340 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 2341 MVT::i32, Ops[0].getValue(2))); 2342 Ops.push_back(Ops[1].getValue(1)); 2343 Tys[0] = Tys[1] = MVT::i32; 2344 Tys.push_back(MVT::Other); 2345 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 2346 } 2347 case ISD::FABS: { 2348 MVT::ValueType VT = Op.getValueType(); 2349 const Type *OpNTy = MVT::getTypeForValueType(VT); 2350 std::vector<Constant*> CV; 2351 if (VT == MVT::f64) { 2352 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 2353 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2354 } else { 2355 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 2356 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2357 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2358 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2359 } 2360 Constant *CS = ConstantStruct::get(CV); 2361 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2362 SDOperand Mask 2363 = DAG.getNode(X86ISD::LOAD_PACK, 2364 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2365 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 2366 } 2367 case ISD::FNEG: { 2368 MVT::ValueType VT = Op.getValueType(); 2369 const Type *OpNTy = MVT::getTypeForValueType(VT); 2370 std::vector<Constant*> CV; 2371 if (VT == MVT::f64) { 2372 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 2373 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2374 } else { 2375 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 2376 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2377 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2378 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 2379 } 2380 Constant *CS = ConstantStruct::get(CV); 2381 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 2382 SDOperand Mask 2383 = DAG.getNode(X86ISD::LOAD_PACK, 2384 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 2385 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 2386 } 2387 case ISD::SETCC: { 2388 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 2389 SDOperand Cond; 2390 SDOperand CC = Op.getOperand(2); 2391 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2392 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 2393 bool Flip; 2394 unsigned X86CC; 2395 if (translateX86CC(CC, isFP, X86CC, Flip)) { 2396 if (Flip) 2397 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2398 Op.getOperand(1), Op.getOperand(0)); 2399 else 2400 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2401 Op.getOperand(0), Op.getOperand(1)); 2402 return DAG.getNode(X86ISD::SETCC, MVT::i8, 2403 DAG.getConstant(X86CC, MVT::i8), Cond); 2404 } else { 2405 assert(isFP && "Illegal integer SetCC!"); 2406 2407 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 2408 Op.getOperand(0), Op.getOperand(1)); 2409 std::vector<MVT::ValueType> Tys; 2410 std::vector<SDOperand> Ops; 2411 switch (SetCCOpcode) { 2412 default: assert(false && "Illegal floating point SetCC!"); 2413 case ISD::SETOEQ: { // !PF & ZF 2414 Tys.push_back(MVT::i8); 2415 Tys.push_back(MVT::Flag); 2416 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 2417 Ops.push_back(Cond); 2418 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2419 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2420 DAG.getConstant(X86ISD::COND_E, MVT::i8), 2421 Tmp1.getValue(1)); 2422 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 2423 } 2424 case ISD::SETUNE: { // PF | !ZF 2425 Tys.push_back(MVT::i8); 2426 Tys.push_back(MVT::Flag); 2427 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 2428 Ops.push_back(Cond); 2429 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2430 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 2431 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 2432 Tmp1.getValue(1)); 2433 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 2434 } 2435 } 2436 } 2437 } 2438 case ISD::SELECT: { 2439 MVT::ValueType VT = Op.getValueType(); 2440 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 2441 bool addTest = false; 2442 SDOperand Op0 = Op.getOperand(0); 2443 SDOperand Cond, CC; 2444 if (Op0.getOpcode() == ISD::SETCC) 2445 Op0 = LowerOperation(Op0, DAG); 2446 2447 if (Op0.getOpcode() == X86ISD::SETCC) { 2448 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2449 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2450 // have another use it will be eliminated. 2451 // If the X86ISD::SETCC has more than one use, then it's probably better 2452 // to use a test instead of duplicating the X86ISD::CMP (for register 2453 // pressure reason). 2454 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 2455 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2456 CmpOpc == X86ISD::UCOMI) { 2457 if (!Op0.hasOneUse()) { 2458 std::vector<MVT::ValueType> Tys; 2459 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 2460 Tys.push_back(Op0.Val->getValueType(i)); 2461 std::vector<SDOperand> Ops; 2462 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 2463 Ops.push_back(Op0.getOperand(i)); 2464 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2465 } 2466 2467 CC = Op0.getOperand(0); 2468 Cond = Op0.getOperand(1); 2469 // Make a copy as flag result cannot be used by more than one. 2470 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2471 Cond.getOperand(0), Cond.getOperand(1)); 2472 addTest = 2473 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 2474 } else 2475 addTest = true; 2476 } else 2477 addTest = true; 2478 2479 if (addTest) { 2480 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2481 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 2482 } 2483 2484 std::vector<MVT::ValueType> Tys; 2485 Tys.push_back(Op.getValueType()); 2486 Tys.push_back(MVT::Flag); 2487 std::vector<SDOperand> Ops; 2488 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 2489 // condition is true. 2490 Ops.push_back(Op.getOperand(2)); 2491 Ops.push_back(Op.getOperand(1)); 2492 Ops.push_back(CC); 2493 Ops.push_back(Cond); 2494 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 2495 } 2496 case ISD::BRCOND: { 2497 bool addTest = false; 2498 SDOperand Cond = Op.getOperand(1); 2499 SDOperand Dest = Op.getOperand(2); 2500 SDOperand CC; 2501 if (Cond.getOpcode() == ISD::SETCC) 2502 Cond = LowerOperation(Cond, DAG); 2503 2504 if (Cond.getOpcode() == X86ISD::SETCC) { 2505 // If condition flag is set by a X86ISD::CMP, then make a copy of it 2506 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 2507 // have another use it will be eliminated. 2508 // If the X86ISD::SETCC has more than one use, then it's probably better 2509 // to use a test instead of duplicating the X86ISD::CMP (for register 2510 // pressure reason). 2511 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 2512 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 2513 CmpOpc == X86ISD::UCOMI) { 2514 if (!Cond.hasOneUse()) { 2515 std::vector<MVT::ValueType> Tys; 2516 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 2517 Tys.push_back(Cond.Val->getValueType(i)); 2518 std::vector<SDOperand> Ops; 2519 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 2520 Ops.push_back(Cond.getOperand(i)); 2521 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 2522 } 2523 2524 CC = Cond.getOperand(0); 2525 Cond = Cond.getOperand(1); 2526 // Make a copy as flag result cannot be used by more than one. 2527 Cond = DAG.getNode(CmpOpc, MVT::Flag, 2528 Cond.getOperand(0), Cond.getOperand(1)); 2529 } else 2530 addTest = true; 2531 } else 2532 addTest = true; 2533 2534 if (addTest) { 2535 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 2536 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 2537 } 2538 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 2539 Op.getOperand(0), Op.getOperand(2), CC, Cond); 2540 } 2541 case ISD::MEMSET: { 2542 SDOperand InFlag(0, 0); 2543 SDOperand Chain = Op.getOperand(0); 2544 unsigned Align = 2545 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2546 if (Align == 0) Align = 1; 2547 2548 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2549 // If not DWORD aligned, call memset if size is less than the threshold. 2550 // It knows how to align to the right boundary first. 2551 if ((Align & 3) != 0 || 2552 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2553 MVT::ValueType IntPtr = getPointerTy(); 2554 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2555 std::vector<std::pair<SDOperand, const Type*> > Args; 2556 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2557 // Extend the ubyte argument to be an int value for the call. 2558 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 2559 Args.push_back(std::make_pair(Val, IntPtrTy)); 2560 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2561 std::pair<SDOperand,SDOperand> CallResult = 2562 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2563 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 2564 return CallResult.second; 2565 } 2566 2567 MVT::ValueType AVT; 2568 SDOperand Count; 2569 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2570 unsigned BytesLeft = 0; 2571 bool TwoRepStos = false; 2572 if (ValC) { 2573 unsigned ValReg; 2574 unsigned Val = ValC->getValue() & 255; 2575 2576 // If the value is a constant, then we can potentially use larger sets. 2577 switch (Align & 3) { 2578 case 2: // WORD aligned 2579 AVT = MVT::i16; 2580 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2581 BytesLeft = I->getValue() % 2; 2582 Val = (Val << 8) | Val; 2583 ValReg = X86::AX; 2584 break; 2585 case 0: // DWORD aligned 2586 AVT = MVT::i32; 2587 if (I) { 2588 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2589 BytesLeft = I->getValue() % 4; 2590 } else { 2591 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2592 DAG.getConstant(2, MVT::i8)); 2593 TwoRepStos = true; 2594 } 2595 Val = (Val << 8) | Val; 2596 Val = (Val << 16) | Val; 2597 ValReg = X86::EAX; 2598 break; 2599 default: // Byte aligned 2600 AVT = MVT::i8; 2601 Count = Op.getOperand(3); 2602 ValReg = X86::AL; 2603 break; 2604 } 2605 2606 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 2607 InFlag); 2608 InFlag = Chain.getValue(1); 2609 } else { 2610 AVT = MVT::i8; 2611 Count = Op.getOperand(3); 2612 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 2613 InFlag = Chain.getValue(1); 2614 } 2615 2616 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2617 InFlag = Chain.getValue(1); 2618 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2619 InFlag = Chain.getValue(1); 2620 2621 std::vector<MVT::ValueType> Tys; 2622 Tys.push_back(MVT::Other); 2623 Tys.push_back(MVT::Flag); 2624 std::vector<SDOperand> Ops; 2625 Ops.push_back(Chain); 2626 Ops.push_back(DAG.getValueType(AVT)); 2627 Ops.push_back(InFlag); 2628 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2629 2630 if (TwoRepStos) { 2631 InFlag = Chain.getValue(1); 2632 Count = Op.getOperand(3); 2633 MVT::ValueType CVT = Count.getValueType(); 2634 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2635 DAG.getConstant(3, CVT)); 2636 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2637 InFlag = Chain.getValue(1); 2638 Tys.clear(); 2639 Tys.push_back(MVT::Other); 2640 Tys.push_back(MVT::Flag); 2641 Ops.clear(); 2642 Ops.push_back(Chain); 2643 Ops.push_back(DAG.getValueType(MVT::i8)); 2644 Ops.push_back(InFlag); 2645 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 2646 } else if (BytesLeft) { 2647 // Issue stores for the last 1 - 3 bytes. 2648 SDOperand Value; 2649 unsigned Val = ValC->getValue() & 255; 2650 unsigned Offset = I->getValue() - BytesLeft; 2651 SDOperand DstAddr = Op.getOperand(1); 2652 MVT::ValueType AddrVT = DstAddr.getValueType(); 2653 if (BytesLeft >= 2) { 2654 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 2655 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2656 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2657 DAG.getConstant(Offset, AddrVT)), 2658 DAG.getSrcValue(NULL)); 2659 BytesLeft -= 2; 2660 Offset += 2; 2661 } 2662 2663 if (BytesLeft == 1) { 2664 Value = DAG.getConstant(Val, MVT::i8); 2665 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2666 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 2667 DAG.getConstant(Offset, AddrVT)), 2668 DAG.getSrcValue(NULL)); 2669 } 2670 } 2671 2672 return Chain; 2673 } 2674 case ISD::MEMCPY: { 2675 SDOperand Chain = Op.getOperand(0); 2676 unsigned Align = 2677 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 2678 if (Align == 0) Align = 1; 2679 2680 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 2681 // If not DWORD aligned, call memcpy if size is less than the threshold. 2682 // It knows how to align to the right boundary first. 2683 if ((Align & 3) != 0 || 2684 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 2685 MVT::ValueType IntPtr = getPointerTy(); 2686 const Type *IntPtrTy = getTargetData().getIntPtrType(); 2687 std::vector<std::pair<SDOperand, const Type*> > Args; 2688 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 2689 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 2690 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 2691 std::pair<SDOperand,SDOperand> CallResult = 2692 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 2693 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 2694 return CallResult.second; 2695 } 2696 2697 MVT::ValueType AVT; 2698 SDOperand Count; 2699 unsigned BytesLeft = 0; 2700 bool TwoRepMovs = false; 2701 switch (Align & 3) { 2702 case 2: // WORD aligned 2703 AVT = MVT::i16; 2704 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 2705 BytesLeft = I->getValue() % 2; 2706 break; 2707 case 0: // DWORD aligned 2708 AVT = MVT::i32; 2709 if (I) { 2710 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 2711 BytesLeft = I->getValue() % 4; 2712 } else { 2713 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 2714 DAG.getConstant(2, MVT::i8)); 2715 TwoRepMovs = true; 2716 } 2717 break; 2718 default: // Byte aligned 2719 AVT = MVT::i8; 2720 Count = Op.getOperand(3); 2721 break; 2722 } 2723 2724 SDOperand InFlag(0, 0); 2725 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 2726 InFlag = Chain.getValue(1); 2727 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 2728 InFlag = Chain.getValue(1); 2729 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 2730 InFlag = Chain.getValue(1); 2731 2732 std::vector<MVT::ValueType> Tys; 2733 Tys.push_back(MVT::Other); 2734 Tys.push_back(MVT::Flag); 2735 std::vector<SDOperand> Ops; 2736 Ops.push_back(Chain); 2737 Ops.push_back(DAG.getValueType(AVT)); 2738 Ops.push_back(InFlag); 2739 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2740 2741 if (TwoRepMovs) { 2742 InFlag = Chain.getValue(1); 2743 Count = Op.getOperand(3); 2744 MVT::ValueType CVT = Count.getValueType(); 2745 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 2746 DAG.getConstant(3, CVT)); 2747 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 2748 InFlag = Chain.getValue(1); 2749 Tys.clear(); 2750 Tys.push_back(MVT::Other); 2751 Tys.push_back(MVT::Flag); 2752 Ops.clear(); 2753 Ops.push_back(Chain); 2754 Ops.push_back(DAG.getValueType(MVT::i8)); 2755 Ops.push_back(InFlag); 2756 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 2757 } else if (BytesLeft) { 2758 // Issue loads and stores for the last 1 - 3 bytes. 2759 unsigned Offset = I->getValue() - BytesLeft; 2760 SDOperand DstAddr = Op.getOperand(1); 2761 MVT::ValueType DstVT = DstAddr.getValueType(); 2762 SDOperand SrcAddr = Op.getOperand(2); 2763 MVT::ValueType SrcVT = SrcAddr.getValueType(); 2764 SDOperand Value; 2765 if (BytesLeft >= 2) { 2766 Value = DAG.getLoad(MVT::i16, Chain, 2767 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2768 DAG.getConstant(Offset, SrcVT)), 2769 DAG.getSrcValue(NULL)); 2770 Chain = Value.getValue(1); 2771 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2772 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2773 DAG.getConstant(Offset, DstVT)), 2774 DAG.getSrcValue(NULL)); 2775 BytesLeft -= 2; 2776 Offset += 2; 2777 } 2778 2779 if (BytesLeft == 1) { 2780 Value = DAG.getLoad(MVT::i8, Chain, 2781 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 2782 DAG.getConstant(Offset, SrcVT)), 2783 DAG.getSrcValue(NULL)); 2784 Chain = Value.getValue(1); 2785 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2786 DAG.getNode(ISD::ADD, DstVT, DstAddr, 2787 DAG.getConstant(Offset, DstVT)), 2788 DAG.getSrcValue(NULL)); 2789 } 2790 } 2791 2792 return Chain; 2793 } 2794 2795 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 2796 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 2797 // one of the above mentioned nodes. It has to be wrapped because otherwise 2798 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2799 // be used to form addressing mode. These wrapped nodes will be selected 2800 // into MOV32ri. 2801 case ISD::ConstantPool: { 2802 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2803 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2804 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 2805 CP->getAlignment())); 2806 if (Subtarget->isTargetDarwin()) { 2807 // With PIC, the address is actually $g + Offset. 2808 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2809 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2810 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2811 } 2812 2813 return Result; 2814 } 2815 case ISD::GlobalAddress: { 2816 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2817 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2818 DAG.getTargetGlobalAddress(GV, getPointerTy())); 2819 if (Subtarget->isTargetDarwin()) { 2820 // With PIC, the address is actually $g + Offset. 2821 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2822 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2823 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2824 2825 // For Darwin, external and weak symbols are indirect, so we want to load 2826 // the value at address GV, not the value of GV itself. This means that 2827 // the GlobalAddress must be in the base or index register of the address, 2828 // not the GV offset field. 2829 if (getTargetMachine().getRelocationModel() != Reloc::Static && 2830 DarwinGVRequiresExtraLoad(GV)) 2831 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 2832 Result, DAG.getSrcValue(NULL)); 2833 } 2834 2835 return Result; 2836 } 2837 case ISD::ExternalSymbol: { 2838 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2839 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 2840 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 2841 if (Subtarget->isTargetDarwin()) { 2842 // With PIC, the address is actually $g + Offset. 2843 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 2844 Result = DAG.getNode(ISD::ADD, getPointerTy(), 2845 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 2846 } 2847 2848 return Result; 2849 } 2850 case ISD::VASTART: { 2851 // vastart just stores the address of the VarArgsFrameIndex slot into the 2852 // memory location argument. 2853 // FIXME: Replace MVT::i32 with PointerTy 2854 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 2855 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 2856 Op.getOperand(1), Op.getOperand(2)); 2857 } 2858 case ISD::RET: { 2859 SDOperand Copy; 2860 2861 switch(Op.getNumOperands()) { 2862 default: 2863 assert(0 && "Do not know how to return this many arguments!"); 2864 abort(); 2865 case 1: // ret void. 2866 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 2867 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 2868 case 2: { 2869 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 2870 2871 if (MVT::isVector(ArgVT)) { 2872 // Integer or FP vector result -> XMM0. 2873 if (DAG.getMachineFunction().liveout_empty()) 2874 DAG.getMachineFunction().addLiveOut(X86::XMM0); 2875 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 2876 SDOperand()); 2877 } else if (MVT::isInteger(ArgVT)) { 2878 // Integer result -> EAX 2879 if (DAG.getMachineFunction().liveout_empty()) 2880 DAG.getMachineFunction().addLiveOut(X86::EAX); 2881 2882 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 2883 SDOperand()); 2884 } else if (!X86ScalarSSE) { 2885 // FP return with fp-stack value. 2886 if (DAG.getMachineFunction().liveout_empty()) 2887 DAG.getMachineFunction().addLiveOut(X86::ST0); 2888 2889 std::vector<MVT::ValueType> Tys; 2890 Tys.push_back(MVT::Other); 2891 Tys.push_back(MVT::Flag); 2892 std::vector<SDOperand> Ops; 2893 Ops.push_back(Op.getOperand(0)); 2894 Ops.push_back(Op.getOperand(1)); 2895 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2896 } else { 2897 // FP return with ScalarSSE (return on fp-stack). 2898 if (DAG.getMachineFunction().liveout_empty()) 2899 DAG.getMachineFunction().addLiveOut(X86::ST0); 2900 2901 SDOperand MemLoc; 2902 SDOperand Chain = Op.getOperand(0); 2903 SDOperand Value = Op.getOperand(1); 2904 2905 if (Value.getOpcode() == ISD::LOAD && 2906 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 2907 Chain = Value.getOperand(0); 2908 MemLoc = Value.getOperand(1); 2909 } else { 2910 // Spill the value to memory and reload it into top of stack. 2911 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 2912 MachineFunction &MF = DAG.getMachineFunction(); 2913 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 2914 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 2915 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 2916 Value, MemLoc, DAG.getSrcValue(0)); 2917 } 2918 std::vector<MVT::ValueType> Tys; 2919 Tys.push_back(MVT::f64); 2920 Tys.push_back(MVT::Other); 2921 std::vector<SDOperand> Ops; 2922 Ops.push_back(Chain); 2923 Ops.push_back(MemLoc); 2924 Ops.push_back(DAG.getValueType(ArgVT)); 2925 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 2926 Tys.clear(); 2927 Tys.push_back(MVT::Other); 2928 Tys.push_back(MVT::Flag); 2929 Ops.clear(); 2930 Ops.push_back(Copy.getValue(1)); 2931 Ops.push_back(Copy); 2932 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 2933 } 2934 break; 2935 } 2936 case 3: 2937 if (DAG.getMachineFunction().liveout_empty()) { 2938 DAG.getMachineFunction().addLiveOut(X86::EAX); 2939 DAG.getMachineFunction().addLiveOut(X86::EDX); 2940 } 2941 2942 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 2943 SDOperand()); 2944 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 2945 break; 2946 } 2947 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 2948 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 2949 Copy.getValue(1)); 2950 } 2951 case ISD::SCALAR_TO_VECTOR: { 2952 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 2953 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 2954 } 2955 case ISD::VECTOR_SHUFFLE: { 2956 SDOperand V1 = Op.getOperand(0); 2957 SDOperand V2 = Op.getOperand(1); 2958 SDOperand PermMask = Op.getOperand(2); 2959 MVT::ValueType VT = Op.getValueType(); 2960 unsigned NumElems = PermMask.getNumOperands(); 2961 2962 if (isSplatMask(PermMask.Val)) { 2963 if (NumElems <= 4) return Op; 2964 // Promote it to a v4i32 splat. 2965 return PromoteSplat(Op, DAG); 2966 } 2967 2968 if (X86::isMOVLMask(PermMask.Val) || 2969 X86::isMOVSHDUPMask(PermMask.Val) || 2970 X86::isMOVSLDUPMask(PermMask.Val) || 2971 X86::isMOVHLPSMask(PermMask.Val) || 2972 X86::isMOVHPMask(PermMask.Val) || 2973 X86::isMOVLPMask(PermMask.Val)) 2974 return Op; 2975 2976 if (ShouldXformToMOVHLPS(PermMask.Val) || 2977 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2978 return CommuteVectorShuffle(Op, DAG); 2979 2980 bool V1IsSplat = isSplatVector(V1.Val); 2981 bool V2IsSplat = isSplatVector(V2.Val); 2982 if (V1IsSplat && !V2IsSplat) { 2983 Op = CommuteVectorShuffle(Op, DAG); 2984 V1 = Op.getOperand(0); 2985 V2 = Op.getOperand(1); 2986 PermMask = Op.getOperand(2); 2987 V2IsSplat = true; 2988 } 2989 2990 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 2991 Op = CommuteVectorShuffle(Op, DAG); 2992 V1 = Op.getOperand(0); 2993 V2 = Op.getOperand(1); 2994 PermMask = Op.getOperand(2); 2995 if (V2IsSplat) { 2996 // V2 is a splat, so the mask may be malformed. That is, it may point 2997 // to any V2 element. The instruction selectior won't like this. Get 2998 // a corrected mask and commute to form a proper MOVS{S|D}. 2999 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3000 if (NewMask.Val != PermMask.Val) 3001 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3002 } 3003 return Op; 3004 } 3005 3006 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3007 X86::isUNPCKLMask(PermMask.Val) || 3008 X86::isUNPCKHMask(PermMask.Val)) 3009 return Op; 3010 3011 if (V2IsSplat) { 3012 // Normalize mask so all entries that point to V2 points to its first 3013 // element then try to match unpck{h|l} again. If match, return a 3014 // new vector_shuffle with the corrected mask. 3015 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3016 if (NewMask.Val != PermMask.Val) { 3017 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3018 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3019 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3020 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3021 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3022 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3023 } 3024 } 3025 } 3026 3027 // Normalize the node to match x86 shuffle ops if needed 3028 if (V2.getOpcode() != ISD::UNDEF) 3029 if (isCommutedSHUFP(PermMask.Val)) { 3030 Op = CommuteVectorShuffle(Op, DAG); 3031 V1 = Op.getOperand(0); 3032 V2 = Op.getOperand(1); 3033 PermMask = Op.getOperand(2); 3034 } 3035 3036 // If VT is integer, try PSHUF* first, then SHUFP*. 3037 if (MVT::isInteger(VT)) { 3038 if (X86::isPSHUFDMask(PermMask.Val) || 3039 X86::isPSHUFHWMask(PermMask.Val) || 3040 X86::isPSHUFLWMask(PermMask.Val)) { 3041 if (V2.getOpcode() != ISD::UNDEF) 3042 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3043 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3044 return Op; 3045 } 3046 3047 if (X86::isSHUFPMask(PermMask.Val)) 3048 return Op; 3049 3050 // Handle v8i16 shuffle high / low shuffle node pair. 3051 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3052 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3053 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3054 std::vector<SDOperand> MaskVec; 3055 for (unsigned i = 0; i != 4; ++i) 3056 MaskVec.push_back(PermMask.getOperand(i)); 3057 for (unsigned i = 4; i != 8; ++i) 3058 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3059 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3060 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3061 MaskVec.clear(); 3062 for (unsigned i = 0; i != 4; ++i) 3063 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3064 for (unsigned i = 4; i != 8; ++i) 3065 MaskVec.push_back(PermMask.getOperand(i)); 3066 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3067 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3068 } 3069 } else { 3070 // Floating point cases in the other order. 3071 if (X86::isSHUFPMask(PermMask.Val)) 3072 return Op; 3073 if (X86::isPSHUFDMask(PermMask.Val) || 3074 X86::isPSHUFHWMask(PermMask.Val) || 3075 X86::isPSHUFLWMask(PermMask.Val)) { 3076 if (V2.getOpcode() != ISD::UNDEF) 3077 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3078 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3079 return Op; 3080 } 3081 } 3082 3083 if (NumElems == 4) { 3084 // Break it into (shuffle shuffle_hi, shuffle_lo). 3085 MVT::ValueType MaskVT = PermMask.getValueType(); 3086 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3087 std::map<unsigned, std::pair<int, int> > Locs; 3088 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3089 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3090 std::vector<SDOperand> *MaskPtr = &LoMask; 3091 unsigned MaskIdx = 0; 3092 unsigned LoIdx = 0; 3093 unsigned HiIdx = NumElems/2; 3094 for (unsigned i = 0; i != NumElems; ++i) { 3095 if (i == NumElems/2) { 3096 MaskPtr = &HiMask; 3097 MaskIdx = 1; 3098 LoIdx = 0; 3099 HiIdx = NumElems/2; 3100 } 3101 SDOperand Elt = PermMask.getOperand(i); 3102 if (Elt.getOpcode() == ISD::UNDEF) { 3103 Locs[i] = std::make_pair(-1, -1); 3104 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3105 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3106 (*MaskPtr)[LoIdx] = Elt; 3107 LoIdx++; 3108 } else { 3109 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3110 (*MaskPtr)[HiIdx] = Elt; 3111 HiIdx++; 3112 } 3113 } 3114 3115 SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3116 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 3117 SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3118 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 3119 std::vector<SDOperand> MaskOps; 3120 for (unsigned i = 0; i != NumElems; ++i) { 3121 if (Locs[i].first == -1) { 3122 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3123 } else { 3124 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3125 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3126 } 3127 } 3128 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3129 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 3130 } 3131 3132 return SDOperand(); 3133 } 3134 case ISD::BUILD_VECTOR: { 3135 // All one's are handled with pcmpeqd. 3136 if (ISD::isBuildVectorAllOnes(Op.Val)) 3137 return Op; 3138 3139 unsigned NumElems = Op.getNumOperands(); 3140 unsigned Half = NumElems/2; 3141 MVT::ValueType VT = Op.getValueType(); 3142 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3143 unsigned NumZero = 0; 3144 unsigned NonZeros = 0; 3145 std::set<SDOperand> Values; 3146 for (unsigned i = 0; i < NumElems; ++i) { 3147 SDOperand Elt = Op.getOperand(i); 3148 Values.insert(Elt); 3149 if (isZeroNode(Elt)) 3150 NumZero++; 3151 else if (Elt.getOpcode() != ISD::UNDEF) 3152 NonZeros |= (1 << i); 3153 } 3154 3155 unsigned NumNonZero = CountPopulation_32(NonZeros); 3156 if (NumNonZero == 0) 3157 return Op; 3158 3159 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3160 if (Values.size() == 1) 3161 return SDOperand(); 3162 3163 // If element VT is >= 32 bits, turn it into a number of shuffles. 3164 if (NumNonZero == 1) { 3165 unsigned Idx = CountTrailingZeros_32(NonZeros); 3166 SDOperand Item = Op.getOperand(Idx); 3167 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3168 if (Idx == 0) 3169 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3170 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3171 NumZero > 0, DAG); 3172 3173 if (MVT::getSizeInBits(EVT) >= 32) { 3174 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3175 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3176 DAG); 3177 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3178 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3179 std::vector<SDOperand> MaskVec; 3180 for (unsigned i = 0; i < NumElems; i++) 3181 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3182 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3183 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3184 DAG.getNode(ISD::UNDEF, VT), Mask); 3185 } 3186 } 3187 3188 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3189 if (MVT::getSizeInBits(EVT) <= 16) { 3190 if (NumNonZero <= Half) { 3191 SDOperand V(0, 0); 3192 3193 for (unsigned i = 0; i < NumNonZero; ++i) { 3194 unsigned Idx = CountTrailingZeros_32(NonZeros); 3195 NonZeros ^= (1 << Idx); 3196 SDOperand Item = Op.getOperand(Idx); 3197 if (i == 0) { 3198 if (NumZero) 3199 V = getZeroVector(MVT::v8i16, DAG); 3200 else 3201 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3202 } 3203 if (EVT == MVT::i8) { 3204 Item = DAG.getNode(ISD::ANY_EXTEND, MVT::i16, Item); 3205 if ((Idx % 2) != 0) 3206 Item = DAG.getNode(ISD::SHL, MVT::i16, 3207 Item, DAG.getConstant(8, MVT::i8)); 3208 Idx /= 2; 3209 } 3210 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Item, 3211 DAG.getConstant(Idx, MVT::i32)); 3212 } 3213 3214 if (EVT == MVT::i8) 3215 V = DAG.getNode(ISD::BIT_CONVERT, VT, V); 3216 return V; 3217 } 3218 } 3219 3220 std::vector<SDOperand> V(NumElems); 3221 if (NumElems == 4 && NumZero > 0) { 3222 for (unsigned i = 0; i < 4; ++i) { 3223 bool isZero = !(NonZeros & (1 << i)); 3224 if (isZero) 3225 V[i] = getZeroVector(VT, DAG); 3226 else 3227 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3228 } 3229 3230 for (unsigned i = 0; i < 2; ++i) { 3231 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3232 default: break; 3233 case 0: 3234 V[i] = V[i*2]; // Must be a zero vector. 3235 break; 3236 case 1: 3237 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3238 getMOVLMask(NumElems, DAG)); 3239 break; 3240 case 2: 3241 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3242 getMOVLMask(NumElems, DAG)); 3243 break; 3244 case 3: 3245 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3246 getUnpacklMask(NumElems, DAG)); 3247 break; 3248 } 3249 } 3250 3251 // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd) 3252 // clears the upper bits. 3253 // FIXME: we can do the same for v4f32 case when we know both parts of 3254 // the lower half come from scalar_to_vector (loadf32). We should do 3255 // that in post legalizer dag combiner with target specific hooks. 3256 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3257 return V[0]; 3258 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3259 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3260 std::vector<SDOperand> MaskVec; 3261 bool Reverse = (NonZeros & 0x3) == 2; 3262 for (unsigned i = 0; i < 2; ++i) 3263 if (Reverse) 3264 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3265 else 3266 MaskVec.push_back(DAG.getConstant(i, EVT)); 3267 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3268 for (unsigned i = 0; i < 2; ++i) 3269 if (Reverse) 3270 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3271 else 3272 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3273 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 3274 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3275 } 3276 3277 if (Values.size() > 2) { 3278 // Expand into a number of unpckl*. 3279 // e.g. for v4f32 3280 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3281 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3282 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3283 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3284 for (unsigned i = 0; i < NumElems; ++i) 3285 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3286 NumElems >>= 1; 3287 while (NumElems != 0) { 3288 for (unsigned i = 0; i < NumElems; ++i) 3289 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3290 UnpckMask); 3291 NumElems >>= 1; 3292 } 3293 return V[0]; 3294 } 3295 3296 return SDOperand(); 3297 } 3298 case ISD::EXTRACT_VECTOR_ELT: { 3299 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3300 return SDOperand(); 3301 3302 MVT::ValueType VT = Op.getValueType(); 3303 // TODO: handle v16i8. 3304 if (MVT::getSizeInBits(VT) == 16) { 3305 // Transform it so it match pextrw which produces a 32-bit result. 3306 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3307 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3308 Op.getOperand(0), Op.getOperand(1)); 3309 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3310 DAG.getValueType(VT)); 3311 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3312 } else if (MVT::getSizeInBits(VT) == 32) { 3313 SDOperand Vec = Op.getOperand(0); 3314 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3315 if (Idx == 0) 3316 return Op; 3317 3318 // SHUFPS the element to the lowest double word, then movss. 3319 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3320 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 3321 MVT::getVectorBaseType(MaskVT)); 3322 std::vector<SDOperand> IdxVec; 3323 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3324 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3325 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3326 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3327 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 3328 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3329 Vec, Vec, Mask); 3330 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3331 DAG.getConstant(0, MVT::i32)); 3332 } else if (MVT::getSizeInBits(VT) == 64) { 3333 SDOperand Vec = Op.getOperand(0); 3334 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3335 if (Idx == 0) 3336 return Op; 3337 3338 // UNPCKHPD the element to the lowest double word, then movsd. 3339 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3340 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3341 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3342 std::vector<SDOperand> IdxVec; 3343 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3344 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3345 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 3346 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3347 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3348 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3349 DAG.getConstant(0, MVT::i32)); 3350 } 3351 3352 return SDOperand(); 3353 } 3354 case ISD::INSERT_VECTOR_ELT: { 3355 // Transform it so it match pinsrw which expects a 16-bit value in a R32 3356 // as its second argument. 3357 MVT::ValueType VT = Op.getValueType(); 3358 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3359 SDOperand N0 = Op.getOperand(0); 3360 SDOperand N1 = Op.getOperand(1); 3361 SDOperand N2 = Op.getOperand(2); 3362 if (MVT::getSizeInBits(BaseVT) == 16) { 3363 if (N1.getValueType() != MVT::i32) 3364 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3365 if (N2.getValueType() != MVT::i32) 3366 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3367 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3368 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3369 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3370 if (Idx == 0) { 3371 // Use a movss. 3372 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3373 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3374 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3375 std::vector<SDOperand> MaskVec; 3376 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3377 for (unsigned i = 1; i <= 3; ++i) 3378 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3379 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3380 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 3381 } else { 3382 // Use two pinsrw instructions to insert a 32 bit value. 3383 Idx <<= 1; 3384 if (MVT::isFloatingPoint(N1.getValueType())) { 3385 if (N1.getOpcode() == ISD::LOAD) { 3386 // Just load directly from f32mem to R32. 3387 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 3388 N1.getOperand(2)); 3389 } else { 3390 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3391 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3392 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3393 DAG.getConstant(0, MVT::i32)); 3394 } 3395 } 3396 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3397 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3398 DAG.getConstant(Idx, MVT::i32)); 3399 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3400 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3401 DAG.getConstant(Idx+1, MVT::i32)); 3402 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3403 } 3404 } 3405 3406 return SDOperand(); 3407 } 3408 case ISD::INTRINSIC_WO_CHAIN: { 3409 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 3410 switch (IntNo) { 3411 default: return SDOperand(); // Don't custom lower most intrinsics. 3412 // Comparison intrinsics. 3413 case Intrinsic::x86_sse_comieq_ss: 3414 case Intrinsic::x86_sse_comilt_ss: 3415 case Intrinsic::x86_sse_comile_ss: 3416 case Intrinsic::x86_sse_comigt_ss: 3417 case Intrinsic::x86_sse_comige_ss: 3418 case Intrinsic::x86_sse_comineq_ss: 3419 case Intrinsic::x86_sse_ucomieq_ss: 3420 case Intrinsic::x86_sse_ucomilt_ss: 3421 case Intrinsic::x86_sse_ucomile_ss: 3422 case Intrinsic::x86_sse_ucomigt_ss: 3423 case Intrinsic::x86_sse_ucomige_ss: 3424 case Intrinsic::x86_sse_ucomineq_ss: 3425 case Intrinsic::x86_sse2_comieq_sd: 3426 case Intrinsic::x86_sse2_comilt_sd: 3427 case Intrinsic::x86_sse2_comile_sd: 3428 case Intrinsic::x86_sse2_comigt_sd: 3429 case Intrinsic::x86_sse2_comige_sd: 3430 case Intrinsic::x86_sse2_comineq_sd: 3431 case Intrinsic::x86_sse2_ucomieq_sd: 3432 case Intrinsic::x86_sse2_ucomilt_sd: 3433 case Intrinsic::x86_sse2_ucomile_sd: 3434 case Intrinsic::x86_sse2_ucomigt_sd: 3435 case Intrinsic::x86_sse2_ucomige_sd: 3436 case Intrinsic::x86_sse2_ucomineq_sd: { 3437 unsigned Opc = 0; 3438 ISD::CondCode CC = ISD::SETCC_INVALID; 3439 switch (IntNo) { 3440 default: break; 3441 case Intrinsic::x86_sse_comieq_ss: 3442 case Intrinsic::x86_sse2_comieq_sd: 3443 Opc = X86ISD::COMI; 3444 CC = ISD::SETEQ; 3445 break; 3446 case Intrinsic::x86_sse_comilt_ss: 3447 case Intrinsic::x86_sse2_comilt_sd: 3448 Opc = X86ISD::COMI; 3449 CC = ISD::SETLT; 3450 break; 3451 case Intrinsic::x86_sse_comile_ss: 3452 case Intrinsic::x86_sse2_comile_sd: 3453 Opc = X86ISD::COMI; 3454 CC = ISD::SETLE; 3455 break; 3456 case Intrinsic::x86_sse_comigt_ss: 3457 case Intrinsic::x86_sse2_comigt_sd: 3458 Opc = X86ISD::COMI; 3459 CC = ISD::SETGT; 3460 break; 3461 case Intrinsic::x86_sse_comige_ss: 3462 case Intrinsic::x86_sse2_comige_sd: 3463 Opc = X86ISD::COMI; 3464 CC = ISD::SETGE; 3465 break; 3466 case Intrinsic::x86_sse_comineq_ss: 3467 case Intrinsic::x86_sse2_comineq_sd: 3468 Opc = X86ISD::COMI; 3469 CC = ISD::SETNE; 3470 break; 3471 case Intrinsic::x86_sse_ucomieq_ss: 3472 case Intrinsic::x86_sse2_ucomieq_sd: 3473 Opc = X86ISD::UCOMI; 3474 CC = ISD::SETEQ; 3475 break; 3476 case Intrinsic::x86_sse_ucomilt_ss: 3477 case Intrinsic::x86_sse2_ucomilt_sd: 3478 Opc = X86ISD::UCOMI; 3479 CC = ISD::SETLT; 3480 break; 3481 case Intrinsic::x86_sse_ucomile_ss: 3482 case Intrinsic::x86_sse2_ucomile_sd: 3483 Opc = X86ISD::UCOMI; 3484 CC = ISD::SETLE; 3485 break; 3486 case Intrinsic::x86_sse_ucomigt_ss: 3487 case Intrinsic::x86_sse2_ucomigt_sd: 3488 Opc = X86ISD::UCOMI; 3489 CC = ISD::SETGT; 3490 break; 3491 case Intrinsic::x86_sse_ucomige_ss: 3492 case Intrinsic::x86_sse2_ucomige_sd: 3493 Opc = X86ISD::UCOMI; 3494 CC = ISD::SETGE; 3495 break; 3496 case Intrinsic::x86_sse_ucomineq_ss: 3497 case Intrinsic::x86_sse2_ucomineq_sd: 3498 Opc = X86ISD::UCOMI; 3499 CC = ISD::SETNE; 3500 break; 3501 } 3502 bool Flip; 3503 unsigned X86CC; 3504 translateX86CC(CC, true, X86CC, Flip); 3505 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 3506 Op.getOperand(Flip?1:2)); 3507 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 3508 DAG.getConstant(X86CC, MVT::i8), Cond); 3509 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 3510 } 3511 } 3512 } 3513 } 3514} 3515 3516const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 3517 switch (Opcode) { 3518 default: return NULL; 3519 case X86ISD::SHLD: return "X86ISD::SHLD"; 3520 case X86ISD::SHRD: return "X86ISD::SHRD"; 3521 case X86ISD::FAND: return "X86ISD::FAND"; 3522 case X86ISD::FXOR: return "X86ISD::FXOR"; 3523 case X86ISD::FILD: return "X86ISD::FILD"; 3524 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 3525 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 3526 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 3527 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 3528 case X86ISD::FLD: return "X86ISD::FLD"; 3529 case X86ISD::FST: return "X86ISD::FST"; 3530 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 3531 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 3532 case X86ISD::CALL: return "X86ISD::CALL"; 3533 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 3534 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 3535 case X86ISD::CMP: return "X86ISD::CMP"; 3536 case X86ISD::TEST: return "X86ISD::TEST"; 3537 case X86ISD::COMI: return "X86ISD::COMI"; 3538 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 3539 case X86ISD::SETCC: return "X86ISD::SETCC"; 3540 case X86ISD::CMOV: return "X86ISD::CMOV"; 3541 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 3542 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 3543 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 3544 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 3545 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 3546 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 3547 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 3548 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 3549 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 3550 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 3551 } 3552} 3553 3554void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3555 uint64_t Mask, 3556 uint64_t &KnownZero, 3557 uint64_t &KnownOne, 3558 unsigned Depth) const { 3559 unsigned Opc = Op.getOpcode(); 3560 assert((Opc >= ISD::BUILTIN_OP_END || 3561 Opc == ISD::INTRINSIC_WO_CHAIN || 3562 Opc == ISD::INTRINSIC_W_CHAIN || 3563 Opc == ISD::INTRINSIC_VOID) && 3564 "Should use MaskedValueIsZero if you don't know whether Op" 3565 " is a target node!"); 3566 3567 KnownZero = KnownOne = 0; // Don't know anything. 3568 switch (Opc) { 3569 default: break; 3570 case X86ISD::SETCC: 3571 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 3572 break; 3573 } 3574} 3575 3576std::vector<unsigned> X86TargetLowering:: 3577getRegClassForInlineAsmConstraint(const std::string &Constraint, 3578 MVT::ValueType VT) const { 3579 if (Constraint.size() == 1) { 3580 // FIXME: not handling fp-stack yet! 3581 // FIXME: not handling MMX registers yet ('y' constraint). 3582 switch (Constraint[0]) { // GCC X86 Constraint Letters 3583 default: break; // Unknown constriant letter 3584 case 'r': // GENERAL_REGS 3585 case 'R': // LEGACY_REGS 3586 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3587 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 3588 case 'l': // INDEX_REGS 3589 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 3590 X86::ESI, X86::EDI, X86::EBP, 0); 3591 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 3592 case 'Q': // Q_REGS 3593 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 3594 case 'x': // SSE_REGS if SSE1 allowed 3595 if (Subtarget->hasSSE1()) 3596 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3597 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3598 0); 3599 return std::vector<unsigned>(); 3600 case 'Y': // SSE_REGS if SSE2 allowed 3601 if (Subtarget->hasSSE2()) 3602 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3603 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 3604 0); 3605 return std::vector<unsigned>(); 3606 } 3607 } 3608 3609 return std::vector<unsigned>(); 3610} 3611 3612/// isLegalAddressImmediate - Return true if the integer value or 3613/// GlobalValue can be used as the offset of the target addressing mode. 3614bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 3615 // X86 allows a sign-extended 32-bit immediate field. 3616 return (V > -(1LL << 32) && V < (1LL << 32)-1); 3617} 3618 3619bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 3620 if (Subtarget->isTargetDarwin()) { 3621 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 3622 if (RModel == Reloc::Static) 3623 return true; 3624 else if (RModel == Reloc::DynamicNoPIC) 3625 return !DarwinGVRequiresExtraLoad(GV); 3626 else 3627 return false; 3628 } else 3629 return true; 3630} 3631 3632/// isShuffleMaskLegal - Targets can use this to indicate that they only 3633/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3634/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3635/// are assumed to be legal. 3636bool 3637X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 3638 // Only do shuffles on 128-bit vector types for now. 3639 if (MVT::getSizeInBits(VT) == 64) return false; 3640 return (Mask.Val->getNumOperands() <= 4 || 3641 isSplatMask(Mask.Val) || 3642 isPSHUFHW_PSHUFLWMask(Mask.Val) || 3643 X86::isUNPCKLMask(Mask.Val) || 3644 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 3645 X86::isUNPCKHMask(Mask.Val)); 3646} 3647 3648bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 3649 MVT::ValueType EVT, 3650 SelectionDAG &DAG) const { 3651 unsigned NumElts = BVOps.size(); 3652 // Only do shuffles on 128-bit vector types for now. 3653 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 3654 if (NumElts == 2) return true; 3655 if (NumElts == 4) { 3656 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 3657 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 3658 } 3659 return false; 3660} 3661