X86ISelLowering.cpp revision bfd68a785810fcc4dfc82f583c4f2bea192d59f4
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/ADT/VectorExtras.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/SSARegMap.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36// FIXME: temporary. 37#include "llvm/Support/CommandLine.h" 38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 39 cl::desc("Enable fastcc on X86")); 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 46 47 // Set up the TargetLowering object. 48 49 // X86 is weird, it always uses i8 for shift amounts and setcc results. 50 setShiftAmountType(MVT::i8); 51 setSetCCResultType(MVT::i8); 52 setSetCCResultContents(ZeroOrOneSetCCResult); 53 setSchedulingPreference(SchedulingForRegPressure); 54 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 55 setStackPointerRegisterToSaveRestore(X86StackPtr); 56 57 if (!Subtarget->isTargetDarwin()) 58 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 59 setUseUnderscoreSetJmpLongJmp(true); 60 61 // Add legal addressing mode scale values. 62 addLegalAddressScale(8); 63 addLegalAddressScale(4); 64 addLegalAddressScale(2); 65 // Enter the ones which require both scale + index last. These are more 66 // expensive. 67 addLegalAddressScale(9); 68 addLegalAddressScale(5); 69 addLegalAddressScale(3); 70 71 // Set up the register classes. 72 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 73 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 74 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 75 if (Subtarget->is64Bit()) 76 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 77 78 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 79 // operation. 80 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 81 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 82 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 83 84 if (Subtarget->is64Bit()) { 85 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 86 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 87 } else { 88 if (X86ScalarSSE) 89 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 90 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 91 else 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 93 } 94 95 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 96 // this operation. 97 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 98 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 99 // SSE has no i16 to fp conversion, only i32 100 if (X86ScalarSSE) 101 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 102 else { 103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 104 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 105 } 106 107 if (!Subtarget->is64Bit()) { 108 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 109 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 110 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 111 } 112 113 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 114 // this operation. 115 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 116 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 117 118 if (X86ScalarSSE) { 119 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 120 } else { 121 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 122 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 123 } 124 125 // Handle FP_TO_UINT by promoting the destination to a larger signed 126 // conversion. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 128 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 129 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 130 131 if (Subtarget->is64Bit()) { 132 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 133 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 134 } else { 135 if (X86ScalarSSE && !Subtarget->hasSSE3()) 136 // Expand FP_TO_UINT into a select. 137 // FIXME: We would like to use a Custom expander here eventually to do 138 // the optimal thing for SSE vs. the default expansion in the legalizer. 139 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 140 else 141 // With SSE3 we can use fisttpll to convert to a signed i64. 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 143 } 144 145 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 146 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 147 148 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 149 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 150 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 151 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 152 if (Subtarget->is64Bit()) 153 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 157 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 158 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 159 setOperationAction(ISD::FREM , MVT::f64 , Expand); 160 161 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 162 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 163 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 164 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 165 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 166 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 167 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 168 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 169 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 170 if (Subtarget->is64Bit()) { 171 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 172 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 173 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 174 } 175 176 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 177 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 178 179 // These should be promoted to a larger select which is supported. 180 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 181 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 182 // X86 wants to expand cmov itself. 183 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 184 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 185 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 186 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 187 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 188 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 190 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 191 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 192 if (Subtarget->is64Bit()) { 193 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 195 } 196 // X86 ret instruction may pop stack. 197 setOperationAction(ISD::RET , MVT::Other, Custom); 198 // Darwin ABI issue. 199 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 200 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 201 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 202 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 203 if (Subtarget->is64Bit()) { 204 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 205 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 206 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 207 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 208 } 209 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 210 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 211 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 212 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 213 // X86 wants to expand memset / memcpy itself. 214 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 215 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 216 217 // We don't have line number support yet. 218 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 219 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 220 // FIXME - use subtarget debug flags 221 if (!Subtarget->isTargetDarwin()) 222 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 223 224 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 225 setOperationAction(ISD::VASTART , MVT::Other, Custom); 226 227 // Use the default implementation. 228 setOperationAction(ISD::VAARG , MVT::Other, Expand); 229 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 230 setOperationAction(ISD::VAEND , MVT::Other, Expand); 231 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 232 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 233 if (Subtarget->is64Bit()) 234 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 235 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 236 237 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 238 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 239 240 if (X86ScalarSSE) { 241 // Set up the FP register classes. 242 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 243 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 244 245 // Use ANDPD to simulate FABS. 246 setOperationAction(ISD::FABS , MVT::f64, Custom); 247 setOperationAction(ISD::FABS , MVT::f32, Custom); 248 249 // Use XORP to simulate FNEG. 250 setOperationAction(ISD::FNEG , MVT::f64, Custom); 251 setOperationAction(ISD::FNEG , MVT::f32, Custom); 252 253 // We don't support sin/cos/fmod 254 setOperationAction(ISD::FSIN , MVT::f64, Expand); 255 setOperationAction(ISD::FCOS , MVT::f64, Expand); 256 setOperationAction(ISD::FREM , MVT::f64, Expand); 257 setOperationAction(ISD::FSIN , MVT::f32, Expand); 258 setOperationAction(ISD::FCOS , MVT::f32, Expand); 259 setOperationAction(ISD::FREM , MVT::f32, Expand); 260 261 // Expand FP immediates into loads from the stack, except for the special 262 // cases we handle. 263 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 264 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 265 addLegalFPImmediate(+0.0); // xorps / xorpd 266 } else { 267 // Set up the FP register classes. 268 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 269 270 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 271 272 if (!UnsafeFPMath) { 273 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 274 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 275 } 276 277 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 278 addLegalFPImmediate(+0.0); // FLD0 279 addLegalFPImmediate(+1.0); // FLD1 280 addLegalFPImmediate(-0.0); // FLD0/FCHS 281 addLegalFPImmediate(-1.0); // FLD1/FCHS 282 } 283 284 // First set operation action for all vector types to expand. Then we 285 // will selectively turn on ones that can be effectively codegen'd. 286 for (unsigned VT = (unsigned)MVT::Vector + 1; 287 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 288 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 289 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 290 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 291 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 292 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 293 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 294 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 295 } 296 297 if (Subtarget->hasMMX()) { 298 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 299 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 300 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 301 302 // FIXME: add MMX packed arithmetics 303 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 304 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 305 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 306 } 307 308 if (Subtarget->hasSSE1()) { 309 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 310 311 setOperationAction(ISD::AND, MVT::v4f32, Legal); 312 setOperationAction(ISD::OR, MVT::v4f32, Legal); 313 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 314 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 315 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 316 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 317 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 318 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 319 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 321 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 322 } 323 324 if (Subtarget->hasSSE2()) { 325 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 326 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 327 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 328 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 329 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 330 331 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 332 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 333 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 334 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 335 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 336 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 337 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 338 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 339 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 340 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 341 342 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 343 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 344 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 345 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 346 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 347 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 348 349 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 350 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 351 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 353 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 354 } 355 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 356 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 357 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 358 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 359 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 361 362 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 363 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 364 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 365 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 366 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 367 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 368 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 369 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 370 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 371 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 372 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 373 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 374 } 375 376 // Custom lower v2i64 and v2f64 selects. 377 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 378 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 379 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 380 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 381 } 382 383 // We want to custom lower some of our intrinsics. 384 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 385 386 // We have target-specific dag combine patterns for the following nodes: 387 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 388 389 computeRegisterProperties(); 390 391 // FIXME: These should be based on subtarget info. Plus, the values should 392 // be smaller when we are in optimizing for size mode. 393 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 394 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 395 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 396 allowUnalignedMemoryAccesses = true; // x86 supports it! 397} 398 399//===----------------------------------------------------------------------===// 400// C Calling Convention implementation 401//===----------------------------------------------------------------------===// 402 403/// AddLiveIn - This helper function adds the specified physical register to the 404/// MachineFunction as a live in value. It also creates a corresponding virtual 405/// register for it. 406static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 407 TargetRegisterClass *RC) { 408 assert(RC->contains(PReg) && "Not the correct regclass!"); 409 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 410 MF.addLiveIn(PReg, VReg); 411 return VReg; 412} 413 414/// HowToPassCCCArgument - Returns how an formal argument of the specified type 415/// should be passed. If it is through stack, returns the size of the stack 416/// slot; if it is through XMM register, returns the number of XMM registers 417/// are needed. 418static void 419HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 420 unsigned &ObjSize, unsigned &ObjXMMRegs) { 421 ObjXMMRegs = 0; 422 423 switch (ObjectVT) { 424 default: assert(0 && "Unhandled argument type!"); 425 case MVT::i8: ObjSize = 1; break; 426 case MVT::i16: ObjSize = 2; break; 427 case MVT::i32: ObjSize = 4; break; 428 case MVT::i64: ObjSize = 8; break; 429 case MVT::f32: ObjSize = 4; break; 430 case MVT::f64: ObjSize = 8; break; 431 case MVT::v16i8: 432 case MVT::v8i16: 433 case MVT::v4i32: 434 case MVT::v2i64: 435 case MVT::v4f32: 436 case MVT::v2f64: 437 if (NumXMMRegs < 4) 438 ObjXMMRegs = 1; 439 else 440 ObjSize = 16; 441 break; 442 } 443} 444 445SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 446 unsigned NumArgs = Op.Val->getNumValues() - 1; 447 MachineFunction &MF = DAG.getMachineFunction(); 448 MachineFrameInfo *MFI = MF.getFrameInfo(); 449 SDOperand Root = Op.getOperand(0); 450 std::vector<SDOperand> ArgValues; 451 452 // Add DAG nodes to load the arguments... On entry to a function on the X86, 453 // the stack frame looks like this: 454 // 455 // [ESP] -- return address 456 // [ESP + 4] -- first argument (leftmost lexically) 457 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 458 // ... 459 // 460 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 461 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 462 static const unsigned XMMArgRegs[] = { 463 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 464 }; 465 for (unsigned i = 0; i < NumArgs; ++i) { 466 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 467 unsigned ArgIncrement = 4; 468 unsigned ObjSize = 0; 469 unsigned ObjXMMRegs = 0; 470 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 471 if (ObjSize > 4) 472 ArgIncrement = ObjSize; 473 474 SDOperand ArgValue; 475 if (ObjXMMRegs) { 476 // Passed in a XMM register. 477 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 478 X86::VR128RegisterClass); 479 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 480 ArgValues.push_back(ArgValue); 481 NumXMMRegs += ObjXMMRegs; 482 } else { 483 // XMM arguments have to be aligned on 16-byte boundary. 484 if (ObjSize == 16) 485 ArgOffset = ((ArgOffset + 15) / 16) * 16; 486 // Create the frame index object for this incoming parameter... 487 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 488 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 489 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 490 DAG.getSrcValue(NULL)); 491 ArgValues.push_back(ArgValue); 492 ArgOffset += ArgIncrement; // Move on to the next argument... 493 } 494 } 495 496 ArgValues.push_back(Root); 497 498 // If the function takes variable number of arguments, make a frame index for 499 // the start of the first vararg value... for expansion of llvm.va_start. 500 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 501 if (isVarArg) 502 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 503 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 504 ReturnAddrIndex = 0; // No return address slot generated yet. 505 BytesToPopOnReturn = 0; // Callee pops nothing. 506 BytesCallerReserves = ArgOffset; 507 508 // If this is a struct return on Darwin/X86, the callee pops the hidden struct 509 // pointer. 510 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet && 511 Subtarget->isTargetDarwin()) 512 BytesToPopOnReturn = 4; 513 514 // Return the new list of results. 515 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 516 Op.Val->value_end()); 517 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 518} 519 520 521SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 522 SDOperand Chain = Op.getOperand(0); 523 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 524 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 525 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 526 SDOperand Callee = Op.getOperand(4); 527 MVT::ValueType RetVT= Op.Val->getValueType(0); 528 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 529 530 // Keep track of the number of XMM regs passed so far. 531 unsigned NumXMMRegs = 0; 532 static const unsigned XMMArgRegs[] = { 533 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 534 }; 535 536 // Count how many bytes are to be pushed on the stack. 537 unsigned NumBytes = 0; 538 for (unsigned i = 0; i != NumOps; ++i) { 539 SDOperand Arg = Op.getOperand(5+2*i); 540 541 switch (Arg.getValueType()) { 542 default: assert(0 && "Unexpected ValueType for argument!"); 543 case MVT::i8: 544 case MVT::i16: 545 case MVT::i32: 546 case MVT::f32: 547 NumBytes += 4; 548 break; 549 case MVT::i64: 550 case MVT::f64: 551 NumBytes += 8; 552 break; 553 case MVT::v16i8: 554 case MVT::v8i16: 555 case MVT::v4i32: 556 case MVT::v2i64: 557 case MVT::v4f32: 558 case MVT::v2f64: 559 if (NumXMMRegs < 4) 560 ++NumXMMRegs; 561 else { 562 // XMM arguments have to be aligned on 16-byte boundary. 563 NumBytes = ((NumBytes + 15) / 16) * 16; 564 NumBytes += 16; 565 } 566 break; 567 } 568 } 569 570 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 571 572 // Arguments go on the stack in reverse order, as specified by the ABI. 573 unsigned ArgOffset = 0; 574 NumXMMRegs = 0; 575 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 576 std::vector<SDOperand> MemOpChains; 577 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 578 for (unsigned i = 0; i != NumOps; ++i) { 579 SDOperand Arg = Op.getOperand(5+2*i); 580 581 switch (Arg.getValueType()) { 582 default: assert(0 && "Unexpected ValueType for argument!"); 583 case MVT::i8: 584 case MVT::i16: { 585 // Promote the integer to 32 bits. If the input type is signed use a 586 // sign extend, otherwise use a zero extend. 587 unsigned ExtOp = 588 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 589 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 590 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 591 } 592 // Fallthrough 593 594 case MVT::i32: 595 case MVT::f32: { 596 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 597 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 598 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 599 Arg, PtrOff, DAG.getSrcValue(NULL))); 600 ArgOffset += 4; 601 break; 602 } 603 case MVT::i64: 604 case MVT::f64: { 605 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 606 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 607 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 608 Arg, PtrOff, DAG.getSrcValue(NULL))); 609 ArgOffset += 8; 610 break; 611 } 612 case MVT::v16i8: 613 case MVT::v8i16: 614 case MVT::v4i32: 615 case MVT::v2i64: 616 case MVT::v4f32: 617 case MVT::v2f64: 618 if (NumXMMRegs < 4) { 619 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 620 NumXMMRegs++; 621 } else { 622 // XMM arguments have to be aligned on 16-byte boundary. 623 ArgOffset = ((ArgOffset + 15) / 16) * 16; 624 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 625 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 626 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 627 Arg, PtrOff, DAG.getSrcValue(NULL))); 628 ArgOffset += 16; 629 } 630 } 631 } 632 633 if (!MemOpChains.empty()) 634 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 635 &MemOpChains[0], MemOpChains.size()); 636 637 // Build a sequence of copy-to-reg nodes chained together with token chain 638 // and flag operands which copy the outgoing args into registers. 639 SDOperand InFlag; 640 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 641 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 642 InFlag); 643 InFlag = Chain.getValue(1); 644 } 645 646 // If the callee is a GlobalAddress node (quite common, every direct call is) 647 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 648 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 649 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 650 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 651 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 652 653 std::vector<MVT::ValueType> NodeTys; 654 NodeTys.push_back(MVT::Other); // Returns a chain 655 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 656 std::vector<SDOperand> Ops; 657 Ops.push_back(Chain); 658 Ops.push_back(Callee); 659 660 // Add argument registers to the end of the list so that they are known live 661 // into the call. 662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 663 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 664 RegsToPass[i].second.getValueType())); 665 666 if (InFlag.Val) 667 Ops.push_back(InFlag); 668 669 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 670 NodeTys, &Ops[0], Ops.size()); 671 InFlag = Chain.getValue(1); 672 673 // Create the CALLSEQ_END node. 674 unsigned NumBytesForCalleeToPush = 0; 675 676 // If this is is a call to a struct-return function on Darwin/X86, the callee 677 // pops the hidden struct pointer, so we have to push it back. 678 if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) 679 NumBytesForCalleeToPush = 4; 680 681 NodeTys.clear(); 682 NodeTys.push_back(MVT::Other); // Returns a chain 683 if (RetVT != MVT::Other) 684 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 685 Ops.clear(); 686 Ops.push_back(Chain); 687 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 688 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 689 Ops.push_back(InFlag); 690 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 691 if (RetVT != MVT::Other) 692 InFlag = Chain.getValue(1); 693 694 std::vector<SDOperand> ResultVals; 695 NodeTys.clear(); 696 switch (RetVT) { 697 default: assert(0 && "Unknown value type to return!"); 698 case MVT::Other: break; 699 case MVT::i8: 700 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 701 ResultVals.push_back(Chain.getValue(0)); 702 NodeTys.push_back(MVT::i8); 703 break; 704 case MVT::i16: 705 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 706 ResultVals.push_back(Chain.getValue(0)); 707 NodeTys.push_back(MVT::i16); 708 break; 709 case MVT::i32: 710 if (Op.Val->getValueType(1) == MVT::i32) { 711 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 712 ResultVals.push_back(Chain.getValue(0)); 713 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 714 Chain.getValue(2)).getValue(1); 715 ResultVals.push_back(Chain.getValue(0)); 716 NodeTys.push_back(MVT::i32); 717 } else { 718 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 719 ResultVals.push_back(Chain.getValue(0)); 720 } 721 NodeTys.push_back(MVT::i32); 722 break; 723 case MVT::v16i8: 724 case MVT::v8i16: 725 case MVT::v4i32: 726 case MVT::v2i64: 727 case MVT::v4f32: 728 case MVT::v2f64: 729 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 730 ResultVals.push_back(Chain.getValue(0)); 731 NodeTys.push_back(RetVT); 732 break; 733 case MVT::f32: 734 case MVT::f64: { 735 std::vector<MVT::ValueType> Tys; 736 Tys.push_back(MVT::f64); 737 Tys.push_back(MVT::Other); 738 Tys.push_back(MVT::Flag); 739 std::vector<SDOperand> Ops; 740 Ops.push_back(Chain); 741 Ops.push_back(InFlag); 742 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 743 &Ops[0], Ops.size()); 744 Chain = RetVal.getValue(1); 745 InFlag = RetVal.getValue(2); 746 if (X86ScalarSSE) { 747 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 748 // shouldn't be necessary except that RFP cannot be live across 749 // multiple blocks. When stackifier is fixed, they can be uncoupled. 750 MachineFunction &MF = DAG.getMachineFunction(); 751 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 752 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 753 Tys.clear(); 754 Tys.push_back(MVT::Other); 755 Ops.clear(); 756 Ops.push_back(Chain); 757 Ops.push_back(RetVal); 758 Ops.push_back(StackSlot); 759 Ops.push_back(DAG.getValueType(RetVT)); 760 Ops.push_back(InFlag); 761 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 762 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 763 DAG.getSrcValue(NULL)); 764 Chain = RetVal.getValue(1); 765 } 766 767 if (RetVT == MVT::f32 && !X86ScalarSSE) 768 // FIXME: we would really like to remember that this FP_ROUND 769 // operation is okay to eliminate if we allow excess FP precision. 770 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 771 ResultVals.push_back(RetVal); 772 NodeTys.push_back(RetVT); 773 break; 774 } 775 } 776 777 // If the function returns void, just return the chain. 778 if (ResultVals.empty()) 779 return Chain; 780 781 // Otherwise, merge everything together with a MERGE_VALUES node. 782 NodeTys.push_back(MVT::Other); 783 ResultVals.push_back(Chain); 784 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 785 &ResultVals[0], ResultVals.size()); 786 return Res.getValue(Op.ResNo); 787} 788 789 790//===----------------------------------------------------------------------===// 791// X86-64 C Calling Convention implementation 792//===----------------------------------------------------------------------===// 793 794/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified 795/// type should be passed. If it is through stack, returns the size of the stack 796/// slot; if it is through integer or XMM register, returns the number of 797/// integer or XMM registers are needed. 798static void 799HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT, 800 unsigned NumIntRegs, unsigned NumXMMRegs, 801 unsigned &ObjSize, unsigned &ObjIntRegs, 802 unsigned &ObjXMMRegs) { 803 ObjSize = 0; 804 ObjIntRegs = 0; 805 ObjXMMRegs = 0; 806 807 switch (ObjectVT) { 808 default: assert(0 && "Unhandled argument type!"); 809 case MVT::i8: 810 case MVT::i16: 811 case MVT::i32: 812 case MVT::i64: 813 if (NumIntRegs < 6) 814 ObjIntRegs = 1; 815 else { 816 switch (ObjectVT) { 817 default: break; 818 case MVT::i8: ObjSize = 1; break; 819 case MVT::i16: ObjSize = 2; break; 820 case MVT::i32: ObjSize = 4; break; 821 case MVT::i64: ObjSize = 8; break; 822 } 823 } 824 break; 825 case MVT::f32: 826 case MVT::f64: 827 case MVT::v16i8: 828 case MVT::v8i16: 829 case MVT::v4i32: 830 case MVT::v2i64: 831 case MVT::v4f32: 832 case MVT::v2f64: 833 if (NumXMMRegs < 8) 834 ObjXMMRegs = 1; 835 else { 836 switch (ObjectVT) { 837 default: break; 838 case MVT::f32: ObjSize = 4; break; 839 case MVT::f64: ObjSize = 8; break; 840 case MVT::v16i8: 841 case MVT::v8i16: 842 case MVT::v4i32: 843 case MVT::v2i64: 844 case MVT::v4f32: 845 case MVT::v2f64: ObjSize = 16; break; 846 } 847 break; 848 } 849 } 850} 851 852SDOperand 853X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 854 unsigned NumArgs = Op.Val->getNumValues() - 1; 855 MachineFunction &MF = DAG.getMachineFunction(); 856 MachineFrameInfo *MFI = MF.getFrameInfo(); 857 SDOperand Root = Op.getOperand(0); 858 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 859 std::vector<SDOperand> ArgValues; 860 861 // Add DAG nodes to load the arguments... On entry to a function on the X86, 862 // the stack frame looks like this: 863 // 864 // [RSP] -- return address 865 // [RSP + 8] -- first nonreg argument (leftmost lexically) 866 // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size 867 // ... 868 // 869 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 870 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 871 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 872 873 static const unsigned GPR8ArgRegs[] = { 874 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 875 }; 876 static const unsigned GPR16ArgRegs[] = { 877 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 878 }; 879 static const unsigned GPR32ArgRegs[] = { 880 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 881 }; 882 static const unsigned GPR64ArgRegs[] = { 883 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 884 }; 885 static const unsigned XMMArgRegs[] = { 886 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 887 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 888 }; 889 890 for (unsigned i = 0; i < NumArgs; ++i) { 891 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 892 unsigned ArgIncrement = 8; 893 unsigned ObjSize = 0; 894 unsigned ObjIntRegs = 0; 895 unsigned ObjXMMRegs = 0; 896 897 // FIXME: __int128 and long double support? 898 HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 899 ObjSize, ObjIntRegs, ObjXMMRegs); 900 if (ObjSize > 8) 901 ArgIncrement = ObjSize; 902 903 unsigned Reg = 0; 904 SDOperand ArgValue; 905 if (ObjIntRegs || ObjXMMRegs) { 906 switch (ObjectVT) { 907 default: assert(0 && "Unhandled argument type!"); 908 case MVT::i8: 909 case MVT::i16: 910 case MVT::i32: 911 case MVT::i64: { 912 TargetRegisterClass *RC = NULL; 913 switch (ObjectVT) { 914 default: break; 915 case MVT::i8: 916 RC = X86::GR8RegisterClass; 917 Reg = GPR8ArgRegs[NumIntRegs]; 918 break; 919 case MVT::i16: 920 RC = X86::GR16RegisterClass; 921 Reg = GPR16ArgRegs[NumIntRegs]; 922 break; 923 case MVT::i32: 924 RC = X86::GR32RegisterClass; 925 Reg = GPR32ArgRegs[NumIntRegs]; 926 break; 927 case MVT::i64: 928 RC = X86::GR64RegisterClass; 929 Reg = GPR64ArgRegs[NumIntRegs]; 930 break; 931 } 932 Reg = AddLiveIn(MF, Reg, RC); 933 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 934 break; 935 } 936 case MVT::f32: 937 case MVT::f64: 938 case MVT::v16i8: 939 case MVT::v8i16: 940 case MVT::v4i32: 941 case MVT::v2i64: 942 case MVT::v4f32: 943 case MVT::v2f64: { 944 TargetRegisterClass *RC= (ObjectVT == MVT::f32) ? 945 X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ? 946 X86::FR64RegisterClass : X86::VR128RegisterClass); 947 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC); 948 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 949 break; 950 } 951 } 952 NumIntRegs += ObjIntRegs; 953 NumXMMRegs += ObjXMMRegs; 954 } else if (ObjSize) { 955 // XMM arguments have to be aligned on 16-byte boundary. 956 if (ObjSize == 16) 957 ArgOffset = ((ArgOffset + 15) / 16) * 16; 958 // Create the SelectionDAG nodes corresponding to a load from this 959 // parameter. 960 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 961 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 962 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 963 DAG.getSrcValue(NULL)); 964 ArgOffset += ArgIncrement; // Move on to the next argument. 965 } 966 967 ArgValues.push_back(ArgValue); 968 } 969 970 // If the function takes variable number of arguments, make a frame index for 971 // the start of the first vararg value... for expansion of llvm.va_start. 972 if (isVarArg) { 973 // For X86-64, if there are vararg parameters that are passed via 974 // registers, then we must store them to their spots on the stack so they 975 // may be loaded by deferencing the result of va_next. 976 VarArgsGPOffset = NumIntRegs * 8; 977 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 978 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 979 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 980 981 // Store the integer parameter registers. 982 std::vector<SDOperand> MemOps; 983 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 984 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 985 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 986 for (; NumIntRegs != 6; ++NumIntRegs) { 987 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 988 X86::GR64RegisterClass); 989 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 990 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 991 Val, FIN, DAG.getSrcValue(NULL)); 992 MemOps.push_back(Store); 993 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 994 DAG.getConstant(8, getPointerTy())); 995 } 996 997 // Now store the XMM (fp + vector) parameter registers. 998 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 999 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1000 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1001 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1002 X86::VR128RegisterClass); 1003 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1004 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1005 Val, FIN, DAG.getSrcValue(NULL)); 1006 MemOps.push_back(Store); 1007 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1008 DAG.getConstant(16, getPointerTy())); 1009 } 1010 if (!MemOps.empty()) 1011 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1012 &MemOps[0], MemOps.size()); 1013 } 1014 1015 ArgValues.push_back(Root); 1016 1017 ReturnAddrIndex = 0; // No return address slot generated yet. 1018 BytesToPopOnReturn = 0; // Callee pops nothing. 1019 BytesCallerReserves = ArgOffset; 1020 1021 // Return the new list of results. 1022 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1023 Op.Val->value_end()); 1024 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1025} 1026 1027SDOperand 1028X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) { 1029 SDOperand Chain = Op.getOperand(0); 1030 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1031 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1032 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1033 SDOperand Callee = Op.getOperand(4); 1034 MVT::ValueType RetVT= Op.Val->getValueType(0); 1035 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1036 1037 // Count how many bytes are to be pushed on the stack. 1038 unsigned NumBytes = 0; 1039 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 1040 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1041 1042 static const unsigned GPR8ArgRegs[] = { 1043 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 1044 }; 1045 static const unsigned GPR16ArgRegs[] = { 1046 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 1047 }; 1048 static const unsigned GPR32ArgRegs[] = { 1049 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 1050 }; 1051 static const unsigned GPR64ArgRegs[] = { 1052 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1053 }; 1054 static const unsigned XMMArgRegs[] = { 1055 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1056 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1057 }; 1058 1059 for (unsigned i = 0; i != NumOps; ++i) { 1060 SDOperand Arg = Op.getOperand(5+2*i); 1061 MVT::ValueType ArgVT = Arg.getValueType(); 1062 1063 switch (ArgVT) { 1064 default: assert(0 && "Unknown value type!"); 1065 case MVT::i8: 1066 case MVT::i16: 1067 case MVT::i32: 1068 case MVT::i64: 1069 if (NumIntRegs < 6) 1070 ++NumIntRegs; 1071 else 1072 NumBytes += 8; 1073 break; 1074 case MVT::f32: 1075 case MVT::f64: 1076 case MVT::v16i8: 1077 case MVT::v8i16: 1078 case MVT::v4i32: 1079 case MVT::v2i64: 1080 case MVT::v4f32: 1081 case MVT::v2f64: 1082 if (NumXMMRegs < 8) 1083 NumXMMRegs++; 1084 else if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1085 NumBytes += 8; 1086 else { 1087 // XMM arguments have to be aligned on 16-byte boundary. 1088 NumBytes = ((NumBytes + 15) / 16) * 16; 1089 NumBytes += 16; 1090 } 1091 break; 1092 } 1093 } 1094 1095 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1096 1097 // Arguments go on the stack in reverse order, as specified by the ABI. 1098 unsigned ArgOffset = 0; 1099 NumIntRegs = 0; 1100 NumXMMRegs = 0; 1101 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1102 std::vector<SDOperand> MemOpChains; 1103 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1104 for (unsigned i = 0; i != NumOps; ++i) { 1105 SDOperand Arg = Op.getOperand(5+2*i); 1106 MVT::ValueType ArgVT = Arg.getValueType(); 1107 1108 switch (ArgVT) { 1109 default: assert(0 && "Unexpected ValueType for argument!"); 1110 case MVT::i8: 1111 case MVT::i16: 1112 case MVT::i32: 1113 case MVT::i64: 1114 if (NumIntRegs < 6) { 1115 unsigned Reg = 0; 1116 switch (ArgVT) { 1117 default: break; 1118 case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break; 1119 case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break; 1120 case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break; 1121 case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break; 1122 } 1123 RegsToPass.push_back(std::make_pair(Reg, Arg)); 1124 ++NumIntRegs; 1125 } else { 1126 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1127 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1128 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1129 Arg, PtrOff, DAG.getSrcValue(NULL))); 1130 ArgOffset += 8; 1131 } 1132 break; 1133 case MVT::f32: 1134 case MVT::f64: 1135 case MVT::v16i8: 1136 case MVT::v8i16: 1137 case MVT::v4i32: 1138 case MVT::v2i64: 1139 case MVT::v4f32: 1140 case MVT::v2f64: 1141 if (NumXMMRegs < 8) { 1142 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1143 NumXMMRegs++; 1144 } else { 1145 if (ArgVT != MVT::f32 && ArgVT != MVT::f64) { 1146 // XMM arguments have to be aligned on 16-byte boundary. 1147 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1148 } 1149 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1150 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1151 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1152 Arg, PtrOff, DAG.getSrcValue(NULL))); 1153 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1154 ArgOffset += 8; 1155 else 1156 ArgOffset += 16; 1157 } 1158 } 1159 } 1160 1161 if (!MemOpChains.empty()) 1162 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1163 &MemOpChains[0], MemOpChains.size()); 1164 1165 // Build a sequence of copy-to-reg nodes chained together with token chain 1166 // and flag operands which copy the outgoing args into registers. 1167 SDOperand InFlag; 1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1170 InFlag); 1171 InFlag = Chain.getValue(1); 1172 } 1173 1174 if (isVarArg) { 1175 // From AMD64 ABI document: 1176 // For calls that may call functions that use varargs or stdargs 1177 // (prototype-less calls or calls to functions containing ellipsis (...) in 1178 // the declaration) %al is used as hidden argument to specify the number 1179 // of SSE registers used. The contents of %al do not need to match exactly 1180 // the number of registers, but must be an ubound on the number of SSE 1181 // registers used and is in the range 0 - 8 inclusive. 1182 Chain = DAG.getCopyToReg(Chain, X86::AL, 1183 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1184 InFlag = Chain.getValue(1); 1185 } 1186 1187 // If the callee is a GlobalAddress node (quite common, every direct call is) 1188 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1189 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1190 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1191 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1192 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1193 1194 std::vector<MVT::ValueType> NodeTys; 1195 NodeTys.push_back(MVT::Other); // Returns a chain 1196 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1197 std::vector<SDOperand> Ops; 1198 Ops.push_back(Chain); 1199 Ops.push_back(Callee); 1200 1201 // Add argument registers to the end of the list so that they are known live 1202 // into the call. 1203 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1204 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1205 RegsToPass[i].second.getValueType())); 1206 1207 if (InFlag.Val) 1208 Ops.push_back(InFlag); 1209 1210 // FIXME: Do not generate X86ISD::TAILCALL for now. 1211 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1212 NodeTys, &Ops[0], Ops.size()); 1213 InFlag = Chain.getValue(1); 1214 1215 NodeTys.clear(); 1216 NodeTys.push_back(MVT::Other); // Returns a chain 1217 if (RetVT != MVT::Other) 1218 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1219 Ops.clear(); 1220 Ops.push_back(Chain); 1221 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1222 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1223 Ops.push_back(InFlag); 1224 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1225 if (RetVT != MVT::Other) 1226 InFlag = Chain.getValue(1); 1227 1228 std::vector<SDOperand> ResultVals; 1229 NodeTys.clear(); 1230 switch (RetVT) { 1231 default: assert(0 && "Unknown value type to return!"); 1232 case MVT::Other: break; 1233 case MVT::i8: 1234 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1235 ResultVals.push_back(Chain.getValue(0)); 1236 NodeTys.push_back(MVT::i8); 1237 break; 1238 case MVT::i16: 1239 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1240 ResultVals.push_back(Chain.getValue(0)); 1241 NodeTys.push_back(MVT::i16); 1242 break; 1243 case MVT::i32: 1244 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1245 ResultVals.push_back(Chain.getValue(0)); 1246 NodeTys.push_back(MVT::i32); 1247 break; 1248 case MVT::i64: 1249 if (Op.Val->getValueType(1) == MVT::i64) { 1250 // FIXME: __int128 support? 1251 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1252 ResultVals.push_back(Chain.getValue(0)); 1253 Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64, 1254 Chain.getValue(2)).getValue(1); 1255 ResultVals.push_back(Chain.getValue(0)); 1256 NodeTys.push_back(MVT::i64); 1257 } else { 1258 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1259 ResultVals.push_back(Chain.getValue(0)); 1260 } 1261 NodeTys.push_back(MVT::i64); 1262 break; 1263 case MVT::f32: 1264 case MVT::f64: 1265 case MVT::v16i8: 1266 case MVT::v8i16: 1267 case MVT::v4i32: 1268 case MVT::v2i64: 1269 case MVT::v4f32: 1270 case MVT::v2f64: 1271 // FIXME: long double support? 1272 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1273 ResultVals.push_back(Chain.getValue(0)); 1274 NodeTys.push_back(RetVT); 1275 break; 1276 } 1277 1278 // If the function returns void, just return the chain. 1279 if (ResultVals.empty()) 1280 return Chain; 1281 1282 // Otherwise, merge everything together with a MERGE_VALUES node. 1283 NodeTys.push_back(MVT::Other); 1284 ResultVals.push_back(Chain); 1285 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1286 &ResultVals[0], ResultVals.size()); 1287 return Res.getValue(Op.ResNo); 1288} 1289 1290//===----------------------------------------------------------------------===// 1291// Fast Calling Convention implementation 1292//===----------------------------------------------------------------------===// 1293// 1294// The X86 'fast' calling convention passes up to two integer arguments in 1295// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 1296// and requires that the callee pop its arguments off the stack (allowing proper 1297// tail calls), and has the same return value conventions as C calling convs. 1298// 1299// This calling convention always arranges for the callee pop value to be 8n+4 1300// bytes, which is needed for tail recursion elimination and stack alignment 1301// reasons. 1302// 1303// Note that this can be enhanced in the future to pass fp vals in registers 1304// (when we have a global fp allocator) and do other tricks. 1305// 1306 1307/// HowToPassFastCCArgument - Returns how an formal argument of the specified 1308/// type should be passed. If it is through stack, returns the size of the stack 1309/// slot; if it is through integer or XMM register, returns the number of 1310/// integer or XMM registers are needed. 1311static void 1312HowToPassFastCCArgument(MVT::ValueType ObjectVT, 1313 unsigned NumIntRegs, unsigned NumXMMRegs, 1314 unsigned &ObjSize, unsigned &ObjIntRegs, 1315 unsigned &ObjXMMRegs) { 1316 ObjSize = 0; 1317 ObjIntRegs = 0; 1318 ObjXMMRegs = 0; 1319 1320 switch (ObjectVT) { 1321 default: assert(0 && "Unhandled argument type!"); 1322 case MVT::i8: 1323#if FASTCC_NUM_INT_ARGS_INREGS > 0 1324 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1325 ObjIntRegs = 1; 1326 else 1327#endif 1328 ObjSize = 1; 1329 break; 1330 case MVT::i16: 1331#if FASTCC_NUM_INT_ARGS_INREGS > 0 1332 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1333 ObjIntRegs = 1; 1334 else 1335#endif 1336 ObjSize = 2; 1337 break; 1338 case MVT::i32: 1339#if FASTCC_NUM_INT_ARGS_INREGS > 0 1340 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1341 ObjIntRegs = 1; 1342 else 1343#endif 1344 ObjSize = 4; 1345 break; 1346 case MVT::i64: 1347#if FASTCC_NUM_INT_ARGS_INREGS > 0 1348 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1349 ObjIntRegs = 2; 1350 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1351 ObjIntRegs = 1; 1352 ObjSize = 4; 1353 } else 1354#endif 1355 ObjSize = 8; 1356 case MVT::f32: 1357 ObjSize = 4; 1358 break; 1359 case MVT::f64: 1360 ObjSize = 8; 1361 break; 1362 case MVT::v16i8: 1363 case MVT::v8i16: 1364 case MVT::v4i32: 1365 case MVT::v2i64: 1366 case MVT::v4f32: 1367 case MVT::v2f64: 1368 if (NumXMMRegs < 4) 1369 ObjXMMRegs = 1; 1370 else 1371 ObjSize = 16; 1372 break; 1373 } 1374} 1375 1376SDOperand 1377X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1378 unsigned NumArgs = Op.Val->getNumValues()-1; 1379 MachineFunction &MF = DAG.getMachineFunction(); 1380 MachineFrameInfo *MFI = MF.getFrameInfo(); 1381 SDOperand Root = Op.getOperand(0); 1382 std::vector<SDOperand> ArgValues; 1383 1384 // Add DAG nodes to load the arguments... On entry to a function the stack 1385 // frame looks like this: 1386 // 1387 // [ESP] -- return address 1388 // [ESP + 4] -- first nonreg argument (leftmost lexically) 1389 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 1390 // ... 1391 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1392 1393 // Keep track of the number of integer regs passed so far. This can be either 1394 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1395 // used). 1396 unsigned NumIntRegs = 0; 1397 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1398 1399 static const unsigned XMMArgRegs[] = { 1400 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1401 }; 1402 1403 for (unsigned i = 0; i < NumArgs; ++i) { 1404 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1405 unsigned ArgIncrement = 4; 1406 unsigned ObjSize = 0; 1407 unsigned ObjIntRegs = 0; 1408 unsigned ObjXMMRegs = 0; 1409 1410 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 1411 ObjSize, ObjIntRegs, ObjXMMRegs); 1412 if (ObjSize > 4) 1413 ArgIncrement = ObjSize; 1414 1415 unsigned Reg = 0; 1416 SDOperand ArgValue; 1417 if (ObjIntRegs || ObjXMMRegs) { 1418 switch (ObjectVT) { 1419 default: assert(0 && "Unhandled argument type!"); 1420 case MVT::i8: 1421 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 1422 X86::GR8RegisterClass); 1423 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 1424 break; 1425 case MVT::i16: 1426 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 1427 X86::GR16RegisterClass); 1428 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 1429 break; 1430 case MVT::i32: 1431 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1432 X86::GR32RegisterClass); 1433 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1434 break; 1435 case MVT::i64: 1436 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1437 X86::GR32RegisterClass); 1438 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1439 if (ObjIntRegs == 2) { 1440 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 1441 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1442 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1443 } 1444 break; 1445 case MVT::v16i8: 1446 case MVT::v8i16: 1447 case MVT::v4i32: 1448 case MVT::v2i64: 1449 case MVT::v4f32: 1450 case MVT::v2f64: 1451 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 1452 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 1453 break; 1454 } 1455 NumIntRegs += ObjIntRegs; 1456 NumXMMRegs += ObjXMMRegs; 1457 } 1458 1459 if (ObjSize) { 1460 // XMM arguments have to be aligned on 16-byte boundary. 1461 if (ObjSize == 16) 1462 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1463 // Create the SelectionDAG nodes corresponding to a load from this 1464 // parameter. 1465 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1466 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1467 if (ObjectVT == MVT::i64 && ObjIntRegs) { 1468 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1469 DAG.getSrcValue(NULL)); 1470 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1471 } else 1472 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1473 DAG.getSrcValue(NULL)); 1474 ArgOffset += ArgIncrement; // Move on to the next argument. 1475 } 1476 1477 ArgValues.push_back(ArgValue); 1478 } 1479 1480 ArgValues.push_back(Root); 1481 1482 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1483 // arguments and the arguments after the retaddr has been pushed are aligned. 1484 if ((ArgOffset & 7) == 0) 1485 ArgOffset += 4; 1486 1487 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1488 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1489 ReturnAddrIndex = 0; // No return address slot generated yet. 1490 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 1491 BytesCallerReserves = 0; 1492 1493 // Finally, inform the code generator which regs we return values in. 1494 switch (getValueType(MF.getFunction()->getReturnType())) { 1495 default: assert(0 && "Unknown type!"); 1496 case MVT::isVoid: break; 1497 case MVT::i8: 1498 case MVT::i16: 1499 case MVT::i32: 1500 MF.addLiveOut(X86::EAX); 1501 break; 1502 case MVT::i64: 1503 MF.addLiveOut(X86::EAX); 1504 MF.addLiveOut(X86::EDX); 1505 break; 1506 case MVT::f32: 1507 case MVT::f64: 1508 MF.addLiveOut(X86::ST0); 1509 break; 1510 case MVT::v16i8: 1511 case MVT::v8i16: 1512 case MVT::v4i32: 1513 case MVT::v2i64: 1514 case MVT::v4f32: 1515 case MVT::v2f64: 1516 MF.addLiveOut(X86::XMM0); 1517 break; 1518 } 1519 1520 // Return the new list of results. 1521 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1522 Op.Val->value_end()); 1523 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1524} 1525 1526SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){ 1527 SDOperand Chain = Op.getOperand(0); 1528 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1529 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1530 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1531 SDOperand Callee = Op.getOperand(4); 1532 MVT::ValueType RetVT= Op.Val->getValueType(0); 1533 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1534 1535 // Count how many bytes are to be pushed on the stack. 1536 unsigned NumBytes = 0; 1537 1538 // Keep track of the number of integer regs passed so far. This can be either 1539 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1540 // used). 1541 unsigned NumIntRegs = 0; 1542 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1543 1544 static const unsigned GPRArgRegs[][2] = { 1545 { X86::AL, X86::DL }, 1546 { X86::AX, X86::DX }, 1547 { X86::EAX, X86::EDX } 1548 }; 1549 static const unsigned XMMArgRegs[] = { 1550 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1551 }; 1552 1553 for (unsigned i = 0; i != NumOps; ++i) { 1554 SDOperand Arg = Op.getOperand(5+2*i); 1555 1556 switch (Arg.getValueType()) { 1557 default: assert(0 && "Unknown value type!"); 1558 case MVT::i8: 1559 case MVT::i16: 1560 case MVT::i32: 1561#if FASTCC_NUM_INT_ARGS_INREGS > 0 1562 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1563 ++NumIntRegs; 1564 break; 1565 } 1566#endif 1567 // Fall through 1568 case MVT::f32: 1569 NumBytes += 4; 1570 break; 1571 case MVT::f64: 1572 NumBytes += 8; 1573 break; 1574 case MVT::v16i8: 1575 case MVT::v8i16: 1576 case MVT::v4i32: 1577 case MVT::v2i64: 1578 case MVT::v4f32: 1579 case MVT::v2f64: 1580 if (NumXMMRegs < 4) 1581 NumXMMRegs++; 1582 else { 1583 // XMM arguments have to be aligned on 16-byte boundary. 1584 NumBytes = ((NumBytes + 15) / 16) * 16; 1585 NumBytes += 16; 1586 } 1587 break; 1588 } 1589 } 1590 1591 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1592 // arguments and the arguments after the retaddr has been pushed are aligned. 1593 if ((NumBytes & 7) == 0) 1594 NumBytes += 4; 1595 1596 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1597 1598 // Arguments go on the stack in reverse order, as specified by the ABI. 1599 unsigned ArgOffset = 0; 1600 NumIntRegs = 0; 1601 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1602 std::vector<SDOperand> MemOpChains; 1603 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1604 for (unsigned i = 0; i != NumOps; ++i) { 1605 SDOperand Arg = Op.getOperand(5+2*i); 1606 1607 switch (Arg.getValueType()) { 1608 default: assert(0 && "Unexpected ValueType for argument!"); 1609 case MVT::i8: 1610 case MVT::i16: 1611 case MVT::i32: 1612#if FASTCC_NUM_INT_ARGS_INREGS > 0 1613 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1614 RegsToPass.push_back( 1615 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 1616 Arg)); 1617 ++NumIntRegs; 1618 break; 1619 } 1620#endif 1621 // Fall through 1622 case MVT::f32: { 1623 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1624 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1625 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1626 Arg, PtrOff, DAG.getSrcValue(NULL))); 1627 ArgOffset += 4; 1628 break; 1629 } 1630 case MVT::f64: { 1631 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1632 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1633 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1634 Arg, PtrOff, DAG.getSrcValue(NULL))); 1635 ArgOffset += 8; 1636 break; 1637 } 1638 case MVT::v16i8: 1639 case MVT::v8i16: 1640 case MVT::v4i32: 1641 case MVT::v2i64: 1642 case MVT::v4f32: 1643 case MVT::v2f64: 1644 if (NumXMMRegs < 4) { 1645 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1646 NumXMMRegs++; 1647 } else { 1648 // XMM arguments have to be aligned on 16-byte boundary. 1649 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1650 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1651 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1652 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1653 Arg, PtrOff, DAG.getSrcValue(NULL))); 1654 ArgOffset += 16; 1655 } 1656 } 1657 } 1658 1659 if (!MemOpChains.empty()) 1660 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1661 &MemOpChains[0], MemOpChains.size()); 1662 1663 // Build a sequence of copy-to-reg nodes chained together with token chain 1664 // and flag operands which copy the outgoing args into registers. 1665 SDOperand InFlag; 1666 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1667 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1668 InFlag); 1669 InFlag = Chain.getValue(1); 1670 } 1671 1672 // If the callee is a GlobalAddress node (quite common, every direct call is) 1673 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1674 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1675 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1676 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1677 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1678 1679 std::vector<MVT::ValueType> NodeTys; 1680 NodeTys.push_back(MVT::Other); // Returns a chain 1681 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1682 std::vector<SDOperand> Ops; 1683 Ops.push_back(Chain); 1684 Ops.push_back(Callee); 1685 1686 // Add argument registers to the end of the list so that they are known live 1687 // into the call. 1688 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1689 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1690 RegsToPass[i].second.getValueType())); 1691 1692 if (InFlag.Val) 1693 Ops.push_back(InFlag); 1694 1695 // FIXME: Do not generate X86ISD::TAILCALL for now. 1696 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1697 NodeTys, &Ops[0], Ops.size()); 1698 InFlag = Chain.getValue(1); 1699 1700 NodeTys.clear(); 1701 NodeTys.push_back(MVT::Other); // Returns a chain 1702 if (RetVT != MVT::Other) 1703 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1704 Ops.clear(); 1705 Ops.push_back(Chain); 1706 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1707 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1708 Ops.push_back(InFlag); 1709 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1710 if (RetVT != MVT::Other) 1711 InFlag = Chain.getValue(1); 1712 1713 std::vector<SDOperand> ResultVals; 1714 NodeTys.clear(); 1715 switch (RetVT) { 1716 default: assert(0 && "Unknown value type to return!"); 1717 case MVT::Other: break; 1718 case MVT::i8: 1719 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1720 ResultVals.push_back(Chain.getValue(0)); 1721 NodeTys.push_back(MVT::i8); 1722 break; 1723 case MVT::i16: 1724 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1725 ResultVals.push_back(Chain.getValue(0)); 1726 NodeTys.push_back(MVT::i16); 1727 break; 1728 case MVT::i32: 1729 if (Op.Val->getValueType(1) == MVT::i32) { 1730 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1731 ResultVals.push_back(Chain.getValue(0)); 1732 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 1733 Chain.getValue(2)).getValue(1); 1734 ResultVals.push_back(Chain.getValue(0)); 1735 NodeTys.push_back(MVT::i32); 1736 } else { 1737 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1738 ResultVals.push_back(Chain.getValue(0)); 1739 } 1740 NodeTys.push_back(MVT::i32); 1741 break; 1742 case MVT::v16i8: 1743 case MVT::v8i16: 1744 case MVT::v4i32: 1745 case MVT::v2i64: 1746 case MVT::v4f32: 1747 case MVT::v2f64: 1748 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1749 ResultVals.push_back(Chain.getValue(0)); 1750 NodeTys.push_back(RetVT); 1751 break; 1752 case MVT::f32: 1753 case MVT::f64: { 1754 std::vector<MVT::ValueType> Tys; 1755 Tys.push_back(MVT::f64); 1756 Tys.push_back(MVT::Other); 1757 Tys.push_back(MVT::Flag); 1758 std::vector<SDOperand> Ops; 1759 Ops.push_back(Chain); 1760 Ops.push_back(InFlag); 1761 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 1762 &Ops[0], Ops.size()); 1763 Chain = RetVal.getValue(1); 1764 InFlag = RetVal.getValue(2); 1765 if (X86ScalarSSE) { 1766 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1767 // shouldn't be necessary except that RFP cannot be live across 1768 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1769 MachineFunction &MF = DAG.getMachineFunction(); 1770 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1771 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1772 Tys.clear(); 1773 Tys.push_back(MVT::Other); 1774 Ops.clear(); 1775 Ops.push_back(Chain); 1776 Ops.push_back(RetVal); 1777 Ops.push_back(StackSlot); 1778 Ops.push_back(DAG.getValueType(RetVT)); 1779 Ops.push_back(InFlag); 1780 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 1781 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 1782 DAG.getSrcValue(NULL)); 1783 Chain = RetVal.getValue(1); 1784 } 1785 1786 if (RetVT == MVT::f32 && !X86ScalarSSE) 1787 // FIXME: we would really like to remember that this FP_ROUND 1788 // operation is okay to eliminate if we allow excess FP precision. 1789 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1790 ResultVals.push_back(RetVal); 1791 NodeTys.push_back(RetVT); 1792 break; 1793 } 1794 } 1795 1796 1797 // If the function returns void, just return the chain. 1798 if (ResultVals.empty()) 1799 return Chain; 1800 1801 // Otherwise, merge everything together with a MERGE_VALUES node. 1802 NodeTys.push_back(MVT::Other); 1803 ResultVals.push_back(Chain); 1804 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1805 &ResultVals[0], ResultVals.size()); 1806 return Res.getValue(Op.ResNo); 1807} 1808 1809SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1810 if (ReturnAddrIndex == 0) { 1811 // Set up a frame object for the return address. 1812 MachineFunction &MF = DAG.getMachineFunction(); 1813 if (Subtarget->is64Bit()) 1814 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1815 else 1816 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1817 } 1818 1819 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1820} 1821 1822 1823 1824std::pair<SDOperand, SDOperand> X86TargetLowering:: 1825LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1826 SelectionDAG &DAG) { 1827 SDOperand Result; 1828 if (Depth) // Depths > 0 not supported yet! 1829 Result = DAG.getConstant(0, getPointerTy()); 1830 else { 1831 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1832 if (!isFrameAddress) 1833 // Just load the return address 1834 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, 1835 DAG.getSrcValue(NULL)); 1836 else 1837 Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 1838 DAG.getConstant(4, getPointerTy())); 1839 } 1840 return std::make_pair(Result, Chain); 1841} 1842 1843/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1844/// which corresponds to the condition code. 1845static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1846 switch (X86CC) { 1847 default: assert(0 && "Unknown X86 conditional code!"); 1848 case X86ISD::COND_A: return X86::JA; 1849 case X86ISD::COND_AE: return X86::JAE; 1850 case X86ISD::COND_B: return X86::JB; 1851 case X86ISD::COND_BE: return X86::JBE; 1852 case X86ISD::COND_E: return X86::JE; 1853 case X86ISD::COND_G: return X86::JG; 1854 case X86ISD::COND_GE: return X86::JGE; 1855 case X86ISD::COND_L: return X86::JL; 1856 case X86ISD::COND_LE: return X86::JLE; 1857 case X86ISD::COND_NE: return X86::JNE; 1858 case X86ISD::COND_NO: return X86::JNO; 1859 case X86ISD::COND_NP: return X86::JNP; 1860 case X86ISD::COND_NS: return X86::JNS; 1861 case X86ISD::COND_O: return X86::JO; 1862 case X86ISD::COND_P: return X86::JP; 1863 case X86ISD::COND_S: return X86::JS; 1864 } 1865} 1866 1867/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1868/// specific condition code. It returns a false if it cannot do a direct 1869/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1870/// needed. 1871static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1872 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1873 SelectionDAG &DAG) { 1874 X86CC = X86ISD::COND_INVALID; 1875 if (!isFP) { 1876 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1877 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1878 // X > -1 -> X == 0, jump !sign. 1879 RHS = DAG.getConstant(0, RHS.getValueType()); 1880 X86CC = X86ISD::COND_NS; 1881 return true; 1882 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1883 // X < 0 -> X == 0, jump on sign. 1884 X86CC = X86ISD::COND_S; 1885 return true; 1886 } 1887 } 1888 1889 switch (SetCCOpcode) { 1890 default: break; 1891 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1892 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1893 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1894 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1895 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1896 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1897 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1898 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1899 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1900 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1901 } 1902 } else { 1903 // On a floating point condition, the flags are set as follows: 1904 // ZF PF CF op 1905 // 0 | 0 | 0 | X > Y 1906 // 0 | 0 | 1 | X < Y 1907 // 1 | 0 | 0 | X == Y 1908 // 1 | 1 | 1 | unordered 1909 bool Flip = false; 1910 switch (SetCCOpcode) { 1911 default: break; 1912 case ISD::SETUEQ: 1913 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1914 case ISD::SETOLT: Flip = true; // Fallthrough 1915 case ISD::SETOGT: 1916 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1917 case ISD::SETOLE: Flip = true; // Fallthrough 1918 case ISD::SETOGE: 1919 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1920 case ISD::SETUGT: Flip = true; // Fallthrough 1921 case ISD::SETULT: 1922 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1923 case ISD::SETUGE: Flip = true; // Fallthrough 1924 case ISD::SETULE: 1925 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1926 case ISD::SETONE: 1927 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1928 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1929 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1930 } 1931 if (Flip) 1932 std::swap(LHS, RHS); 1933 } 1934 1935 return X86CC != X86ISD::COND_INVALID; 1936} 1937 1938/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1939/// code. Current x86 isa includes the following FP cmov instructions: 1940/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1941static bool hasFPCMov(unsigned X86CC) { 1942 switch (X86CC) { 1943 default: 1944 return false; 1945 case X86ISD::COND_B: 1946 case X86ISD::COND_BE: 1947 case X86ISD::COND_E: 1948 case X86ISD::COND_P: 1949 case X86ISD::COND_A: 1950 case X86ISD::COND_AE: 1951 case X86ISD::COND_NE: 1952 case X86ISD::COND_NP: 1953 return true; 1954 } 1955} 1956 1957/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1958/// load. For Darwin, external and weak symbols are indirect, loading the value 1959/// at address GV rather then the value of GV itself. This means that the 1960/// GlobalAddress must be in the base or index register of the address, not the 1961/// GV offset field. 1962static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1963 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1964 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1965} 1966 1967/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1968/// true if Op is undef or if its value falls within the specified range (L, H]. 1969static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1970 if (Op.getOpcode() == ISD::UNDEF) 1971 return true; 1972 1973 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1974 return (Val >= Low && Val < Hi); 1975} 1976 1977/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1978/// true if Op is undef or if its value equal to the specified value. 1979static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1980 if (Op.getOpcode() == ISD::UNDEF) 1981 return true; 1982 return cast<ConstantSDNode>(Op)->getValue() == Val; 1983} 1984 1985/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1986/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1987bool X86::isPSHUFDMask(SDNode *N) { 1988 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1989 1990 if (N->getNumOperands() != 4) 1991 return false; 1992 1993 // Check if the value doesn't reference the second vector. 1994 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1995 SDOperand Arg = N->getOperand(i); 1996 if (Arg.getOpcode() == ISD::UNDEF) continue; 1997 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1998 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1999 return false; 2000 } 2001 2002 return true; 2003} 2004 2005/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2006/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2007bool X86::isPSHUFHWMask(SDNode *N) { 2008 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2009 2010 if (N->getNumOperands() != 8) 2011 return false; 2012 2013 // Lower quadword copied in order. 2014 for (unsigned i = 0; i != 4; ++i) { 2015 SDOperand Arg = N->getOperand(i); 2016 if (Arg.getOpcode() == ISD::UNDEF) continue; 2017 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2018 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2019 return false; 2020 } 2021 2022 // Upper quadword shuffled. 2023 for (unsigned i = 4; i != 8; ++i) { 2024 SDOperand Arg = N->getOperand(i); 2025 if (Arg.getOpcode() == ISD::UNDEF) continue; 2026 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2027 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2028 if (Val < 4 || Val > 7) 2029 return false; 2030 } 2031 2032 return true; 2033} 2034 2035/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2036/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2037bool X86::isPSHUFLWMask(SDNode *N) { 2038 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2039 2040 if (N->getNumOperands() != 8) 2041 return false; 2042 2043 // Upper quadword copied in order. 2044 for (unsigned i = 4; i != 8; ++i) 2045 if (!isUndefOrEqual(N->getOperand(i), i)) 2046 return false; 2047 2048 // Lower quadword shuffled. 2049 for (unsigned i = 0; i != 4; ++i) 2050 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2051 return false; 2052 2053 return true; 2054} 2055 2056/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2057/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2058static bool isSHUFPMask(std::vector<SDOperand> &N) { 2059 unsigned NumElems = N.size(); 2060 if (NumElems != 2 && NumElems != 4) return false; 2061 2062 unsigned Half = NumElems / 2; 2063 for (unsigned i = 0; i < Half; ++i) 2064 if (!isUndefOrInRange(N[i], 0, NumElems)) 2065 return false; 2066 for (unsigned i = Half; i < NumElems; ++i) 2067 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 2068 return false; 2069 2070 return true; 2071} 2072 2073bool X86::isSHUFPMask(SDNode *N) { 2074 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2075 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2076 return ::isSHUFPMask(Ops); 2077} 2078 2079/// isCommutedSHUFP - Returns true if the shuffle mask is except 2080/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2081/// half elements to come from vector 1 (which would equal the dest.) and 2082/// the upper half to come from vector 2. 2083static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 2084 unsigned NumElems = Ops.size(); 2085 if (NumElems != 2 && NumElems != 4) return false; 2086 2087 unsigned Half = NumElems / 2; 2088 for (unsigned i = 0; i < Half; ++i) 2089 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 2090 return false; 2091 for (unsigned i = Half; i < NumElems; ++i) 2092 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 2093 return false; 2094 return true; 2095} 2096 2097static bool isCommutedSHUFP(SDNode *N) { 2098 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2099 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2100 return isCommutedSHUFP(Ops); 2101} 2102 2103/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2104/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2105bool X86::isMOVHLPSMask(SDNode *N) { 2106 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2107 2108 if (N->getNumOperands() != 4) 2109 return false; 2110 2111 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2112 return isUndefOrEqual(N->getOperand(0), 6) && 2113 isUndefOrEqual(N->getOperand(1), 7) && 2114 isUndefOrEqual(N->getOperand(2), 2) && 2115 isUndefOrEqual(N->getOperand(3), 3); 2116} 2117 2118/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2119/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2120bool X86::isMOVLPMask(SDNode *N) { 2121 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2122 2123 unsigned NumElems = N->getNumOperands(); 2124 if (NumElems != 2 && NumElems != 4) 2125 return false; 2126 2127 for (unsigned i = 0; i < NumElems/2; ++i) 2128 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2129 return false; 2130 2131 for (unsigned i = NumElems/2; i < NumElems; ++i) 2132 if (!isUndefOrEqual(N->getOperand(i), i)) 2133 return false; 2134 2135 return true; 2136} 2137 2138/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2139/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2140/// and MOVLHPS. 2141bool X86::isMOVHPMask(SDNode *N) { 2142 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2143 2144 unsigned NumElems = N->getNumOperands(); 2145 if (NumElems != 2 && NumElems != 4) 2146 return false; 2147 2148 for (unsigned i = 0; i < NumElems/2; ++i) 2149 if (!isUndefOrEqual(N->getOperand(i), i)) 2150 return false; 2151 2152 for (unsigned i = 0; i < NumElems/2; ++i) { 2153 SDOperand Arg = N->getOperand(i + NumElems/2); 2154 if (!isUndefOrEqual(Arg, i + NumElems)) 2155 return false; 2156 } 2157 2158 return true; 2159} 2160 2161/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2162/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2163bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2164 unsigned NumElems = N.size(); 2165 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2166 return false; 2167 2168 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2169 SDOperand BitI = N[i]; 2170 SDOperand BitI1 = N[i+1]; 2171 if (!isUndefOrEqual(BitI, j)) 2172 return false; 2173 if (V2IsSplat) { 2174 if (isUndefOrEqual(BitI1, NumElems)) 2175 return false; 2176 } else { 2177 if (!isUndefOrEqual(BitI1, j + NumElems)) 2178 return false; 2179 } 2180 } 2181 2182 return true; 2183} 2184 2185bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2186 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2187 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2188 return ::isUNPCKLMask(Ops, V2IsSplat); 2189} 2190 2191/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2192/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2193bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2194 unsigned NumElems = N.size(); 2195 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2196 return false; 2197 2198 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2199 SDOperand BitI = N[i]; 2200 SDOperand BitI1 = N[i+1]; 2201 if (!isUndefOrEqual(BitI, j + NumElems/2)) 2202 return false; 2203 if (V2IsSplat) { 2204 if (isUndefOrEqual(BitI1, NumElems)) 2205 return false; 2206 } else { 2207 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 2208 return false; 2209 } 2210 } 2211 2212 return true; 2213} 2214 2215bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2216 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2217 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2218 return ::isUNPCKHMask(Ops, V2IsSplat); 2219} 2220 2221/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2222/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2223/// <0, 0, 1, 1> 2224bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2225 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2226 2227 unsigned NumElems = N->getNumOperands(); 2228 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 2229 return false; 2230 2231 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2232 SDOperand BitI = N->getOperand(i); 2233 SDOperand BitI1 = N->getOperand(i+1); 2234 2235 if (!isUndefOrEqual(BitI, j)) 2236 return false; 2237 if (!isUndefOrEqual(BitI1, j)) 2238 return false; 2239 } 2240 2241 return true; 2242} 2243 2244/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2245/// specifies a shuffle of elements that is suitable for input to MOVSS, 2246/// MOVSD, and MOVD, i.e. setting the lowest element. 2247static bool isMOVLMask(std::vector<SDOperand> &N) { 2248 unsigned NumElems = N.size(); 2249 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2250 return false; 2251 2252 if (!isUndefOrEqual(N[0], NumElems)) 2253 return false; 2254 2255 for (unsigned i = 1; i < NumElems; ++i) { 2256 SDOperand Arg = N[i]; 2257 if (!isUndefOrEqual(Arg, i)) 2258 return false; 2259 } 2260 2261 return true; 2262} 2263 2264bool X86::isMOVLMask(SDNode *N) { 2265 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2266 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2267 return ::isMOVLMask(Ops); 2268} 2269 2270/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2271/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2272/// element of vector 2 and the other elements to come from vector 1 in order. 2273static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false, 2274 bool V2IsUndef = false) { 2275 unsigned NumElems = Ops.size(); 2276 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2277 return false; 2278 2279 if (!isUndefOrEqual(Ops[0], 0)) 2280 return false; 2281 2282 for (unsigned i = 1; i < NumElems; ++i) { 2283 SDOperand Arg = Ops[i]; 2284 if (!(isUndefOrEqual(Arg, i+NumElems) || 2285 (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) || 2286 (V2IsSplat && isUndefOrEqual(Arg, NumElems)))) 2287 return false; 2288 } 2289 2290 return true; 2291} 2292 2293static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2294 bool V2IsUndef = false) { 2295 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2296 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2297 return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef); 2298} 2299 2300/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2301/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2302bool X86::isMOVSHDUPMask(SDNode *N) { 2303 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2304 2305 if (N->getNumOperands() != 4) 2306 return false; 2307 2308 // Expect 1, 1, 3, 3 2309 for (unsigned i = 0; i < 2; ++i) { 2310 SDOperand Arg = N->getOperand(i); 2311 if (Arg.getOpcode() == ISD::UNDEF) continue; 2312 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2313 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2314 if (Val != 1) return false; 2315 } 2316 2317 bool HasHi = false; 2318 for (unsigned i = 2; i < 4; ++i) { 2319 SDOperand Arg = N->getOperand(i); 2320 if (Arg.getOpcode() == ISD::UNDEF) continue; 2321 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2322 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2323 if (Val != 3) return false; 2324 HasHi = true; 2325 } 2326 2327 // Don't use movshdup if it can be done with a shufps. 2328 return HasHi; 2329} 2330 2331/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2332/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2333bool X86::isMOVSLDUPMask(SDNode *N) { 2334 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2335 2336 if (N->getNumOperands() != 4) 2337 return false; 2338 2339 // Expect 0, 0, 2, 2 2340 for (unsigned i = 0; i < 2; ++i) { 2341 SDOperand Arg = N->getOperand(i); 2342 if (Arg.getOpcode() == ISD::UNDEF) continue; 2343 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2344 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2345 if (Val != 0) return false; 2346 } 2347 2348 bool HasHi = false; 2349 for (unsigned i = 2; i < 4; ++i) { 2350 SDOperand Arg = N->getOperand(i); 2351 if (Arg.getOpcode() == ISD::UNDEF) continue; 2352 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2353 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2354 if (Val != 2) return false; 2355 HasHi = true; 2356 } 2357 2358 // Don't use movshdup if it can be done with a shufps. 2359 return HasHi; 2360} 2361 2362/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2363/// a splat of a single element. 2364static bool isSplatMask(SDNode *N) { 2365 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2366 2367 // This is a splat operation if each element of the permute is the same, and 2368 // if the value doesn't reference the second vector. 2369 unsigned NumElems = N->getNumOperands(); 2370 SDOperand ElementBase; 2371 unsigned i = 0; 2372 for (; i != NumElems; ++i) { 2373 SDOperand Elt = N->getOperand(i); 2374 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 2375 ElementBase = Elt; 2376 break; 2377 } 2378 } 2379 2380 if (!ElementBase.Val) 2381 return false; 2382 2383 for (; i != NumElems; ++i) { 2384 SDOperand Arg = N->getOperand(i); 2385 if (Arg.getOpcode() == ISD::UNDEF) continue; 2386 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2387 if (Arg != ElementBase) return false; 2388 } 2389 2390 // Make sure it is a splat of the first vector operand. 2391 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2392} 2393 2394/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2395/// a splat of a single element and it's a 2 or 4 element mask. 2396bool X86::isSplatMask(SDNode *N) { 2397 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2398 2399 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2400 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2401 return false; 2402 return ::isSplatMask(N); 2403} 2404 2405/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2406/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2407/// instructions. 2408unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2409 unsigned NumOperands = N->getNumOperands(); 2410 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2411 unsigned Mask = 0; 2412 for (unsigned i = 0; i < NumOperands; ++i) { 2413 unsigned Val = 0; 2414 SDOperand Arg = N->getOperand(NumOperands-i-1); 2415 if (Arg.getOpcode() != ISD::UNDEF) 2416 Val = cast<ConstantSDNode>(Arg)->getValue(); 2417 if (Val >= NumOperands) Val -= NumOperands; 2418 Mask |= Val; 2419 if (i != NumOperands - 1) 2420 Mask <<= Shift; 2421 } 2422 2423 return Mask; 2424} 2425 2426/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2427/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2428/// instructions. 2429unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2430 unsigned Mask = 0; 2431 // 8 nodes, but we only care about the last 4. 2432 for (unsigned i = 7; i >= 4; --i) { 2433 unsigned Val = 0; 2434 SDOperand Arg = N->getOperand(i); 2435 if (Arg.getOpcode() != ISD::UNDEF) 2436 Val = cast<ConstantSDNode>(Arg)->getValue(); 2437 Mask |= (Val - 4); 2438 if (i != 4) 2439 Mask <<= 2; 2440 } 2441 2442 return Mask; 2443} 2444 2445/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2446/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2447/// instructions. 2448unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2449 unsigned Mask = 0; 2450 // 8 nodes, but we only care about the first 4. 2451 for (int i = 3; i >= 0; --i) { 2452 unsigned Val = 0; 2453 SDOperand Arg = N->getOperand(i); 2454 if (Arg.getOpcode() != ISD::UNDEF) 2455 Val = cast<ConstantSDNode>(Arg)->getValue(); 2456 Mask |= Val; 2457 if (i != 0) 2458 Mask <<= 2; 2459 } 2460 2461 return Mask; 2462} 2463 2464/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2465/// specifies a 8 element shuffle that can be broken into a pair of 2466/// PSHUFHW and PSHUFLW. 2467static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2468 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2469 2470 if (N->getNumOperands() != 8) 2471 return false; 2472 2473 // Lower quadword shuffled. 2474 for (unsigned i = 0; i != 4; ++i) { 2475 SDOperand Arg = N->getOperand(i); 2476 if (Arg.getOpcode() == ISD::UNDEF) continue; 2477 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2478 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2479 if (Val > 4) 2480 return false; 2481 } 2482 2483 // Upper quadword shuffled. 2484 for (unsigned i = 4; i != 8; ++i) { 2485 SDOperand Arg = N->getOperand(i); 2486 if (Arg.getOpcode() == ISD::UNDEF) continue; 2487 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2488 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2489 if (Val < 4 || Val > 7) 2490 return false; 2491 } 2492 2493 return true; 2494} 2495 2496/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2497/// values in ther permute mask. 2498static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 2499 SDOperand V1 = Op.getOperand(0); 2500 SDOperand V2 = Op.getOperand(1); 2501 SDOperand Mask = Op.getOperand(2); 2502 MVT::ValueType VT = Op.getValueType(); 2503 MVT::ValueType MaskVT = Mask.getValueType(); 2504 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2505 unsigned NumElems = Mask.getNumOperands(); 2506 std::vector<SDOperand> MaskVec; 2507 2508 for (unsigned i = 0; i != NumElems; ++i) { 2509 SDOperand Arg = Mask.getOperand(i); 2510 if (Arg.getOpcode() == ISD::UNDEF) { 2511 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2512 continue; 2513 } 2514 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2515 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2516 if (Val < NumElems) 2517 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2518 else 2519 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2520 } 2521 2522 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2523 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 2524} 2525 2526/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2527/// match movhlps. The lower half elements should come from upper half of 2528/// V1 (and in order), and the upper half elements should come from the upper 2529/// half of V2 (and in order). 2530static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2531 unsigned NumElems = Mask->getNumOperands(); 2532 if (NumElems != 4) 2533 return false; 2534 for (unsigned i = 0, e = 2; i != e; ++i) 2535 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2536 return false; 2537 for (unsigned i = 2; i != 4; ++i) 2538 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2539 return false; 2540 return true; 2541} 2542 2543/// isScalarLoadToVector - Returns true if the node is a scalar load that 2544/// is promoted to a vector. 2545static inline bool isScalarLoadToVector(SDNode *N) { 2546 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2547 N = N->getOperand(0).Val; 2548 return (N->getOpcode() == ISD::LOAD); 2549 } 2550 return false; 2551} 2552 2553/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2554/// match movlp{s|d}. The lower half elements should come from lower half of 2555/// V1 (and in order), and the upper half elements should come from the upper 2556/// half of V2 (and in order). And since V1 will become the source of the 2557/// MOVLP, it must be either a vector load or a scalar load to vector. 2558static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2559 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2560 return false; 2561 2562 unsigned NumElems = Mask->getNumOperands(); 2563 if (NumElems != 2 && NumElems != 4) 2564 return false; 2565 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2566 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2567 return false; 2568 for (unsigned i = NumElems/2; i != NumElems; ++i) 2569 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2570 return false; 2571 return true; 2572} 2573 2574/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2575/// all the same. 2576static bool isSplatVector(SDNode *N) { 2577 if (N->getOpcode() != ISD::BUILD_VECTOR) 2578 return false; 2579 2580 SDOperand SplatValue = N->getOperand(0); 2581 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2582 if (N->getOperand(i) != SplatValue) 2583 return false; 2584 return true; 2585} 2586 2587/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2588/// to an undef. 2589static bool isUndefShuffle(SDNode *N) { 2590 if (N->getOpcode() != ISD::BUILD_VECTOR) 2591 return false; 2592 2593 SDOperand V1 = N->getOperand(0); 2594 SDOperand V2 = N->getOperand(1); 2595 SDOperand Mask = N->getOperand(2); 2596 unsigned NumElems = Mask.getNumOperands(); 2597 for (unsigned i = 0; i != NumElems; ++i) { 2598 SDOperand Arg = Mask.getOperand(i); 2599 if (Arg.getOpcode() != ISD::UNDEF) { 2600 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2601 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2602 return false; 2603 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2604 return false; 2605 } 2606 } 2607 return true; 2608} 2609 2610/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2611/// that point to V2 points to its first element. 2612static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2613 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2614 2615 bool Changed = false; 2616 std::vector<SDOperand> MaskVec; 2617 unsigned NumElems = Mask.getNumOperands(); 2618 for (unsigned i = 0; i != NumElems; ++i) { 2619 SDOperand Arg = Mask.getOperand(i); 2620 if (Arg.getOpcode() != ISD::UNDEF) { 2621 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2622 if (Val > NumElems) { 2623 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2624 Changed = true; 2625 } 2626 } 2627 MaskVec.push_back(Arg); 2628 } 2629 2630 if (Changed) 2631 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2632 &MaskVec[0], MaskVec.size()); 2633 return Mask; 2634} 2635 2636/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2637/// operation of specified width. 2638static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2639 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2640 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2641 2642 std::vector<SDOperand> MaskVec; 2643 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2644 for (unsigned i = 1; i != NumElems; ++i) 2645 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2646 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2647} 2648 2649/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2650/// of specified width. 2651static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2652 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2653 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2654 std::vector<SDOperand> MaskVec; 2655 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2656 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2657 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2658 } 2659 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2660} 2661 2662/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2663/// of specified width. 2664static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2665 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2666 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2667 unsigned Half = NumElems/2; 2668 std::vector<SDOperand> MaskVec; 2669 for (unsigned i = 0; i != Half; ++i) { 2670 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2671 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2672 } 2673 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2674} 2675 2676/// getZeroVector - Returns a vector of specified type with all zero elements. 2677/// 2678static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2679 assert(MVT::isVector(VT) && "Expected a vector type"); 2680 unsigned NumElems = getVectorNumElements(VT); 2681 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2682 bool isFP = MVT::isFloatingPoint(EVT); 2683 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2684 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2685 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2686} 2687 2688/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2689/// 2690static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2691 SDOperand V1 = Op.getOperand(0); 2692 SDOperand Mask = Op.getOperand(2); 2693 MVT::ValueType VT = Op.getValueType(); 2694 unsigned NumElems = Mask.getNumOperands(); 2695 Mask = getUnpacklMask(NumElems, DAG); 2696 while (NumElems != 4) { 2697 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2698 NumElems >>= 1; 2699 } 2700 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2701 2702 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2703 Mask = getZeroVector(MaskVT, DAG); 2704 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2705 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2706 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2707} 2708 2709/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2710/// constant +0.0. 2711static inline bool isZeroNode(SDOperand Elt) { 2712 return ((isa<ConstantSDNode>(Elt) && 2713 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2714 (isa<ConstantFPSDNode>(Elt) && 2715 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2716} 2717 2718/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2719/// vector and zero or undef vector. 2720static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2721 unsigned NumElems, unsigned Idx, 2722 bool isZero, SelectionDAG &DAG) { 2723 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2724 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2725 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2726 SDOperand Zero = DAG.getConstant(0, EVT); 2727 std::vector<SDOperand> MaskVec(NumElems, Zero); 2728 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2729 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2730 &MaskVec[0], MaskVec.size()); 2731 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2732} 2733 2734/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2735/// 2736static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2737 unsigned NumNonZero, unsigned NumZero, 2738 SelectionDAG &DAG, TargetLowering &TLI) { 2739 if (NumNonZero > 8) 2740 return SDOperand(); 2741 2742 SDOperand V(0, 0); 2743 bool First = true; 2744 for (unsigned i = 0; i < 16; ++i) { 2745 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2746 if (ThisIsNonZero && First) { 2747 if (NumZero) 2748 V = getZeroVector(MVT::v8i16, DAG); 2749 else 2750 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2751 First = false; 2752 } 2753 2754 if ((i & 1) != 0) { 2755 SDOperand ThisElt(0, 0), LastElt(0, 0); 2756 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2757 if (LastIsNonZero) { 2758 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2759 } 2760 if (ThisIsNonZero) { 2761 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2762 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2763 ThisElt, DAG.getConstant(8, MVT::i8)); 2764 if (LastIsNonZero) 2765 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2766 } else 2767 ThisElt = LastElt; 2768 2769 if (ThisElt.Val) 2770 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2771 DAG.getConstant(i/2, TLI.getPointerTy())); 2772 } 2773 } 2774 2775 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2776} 2777 2778/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2779/// 2780static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2781 unsigned NumNonZero, unsigned NumZero, 2782 SelectionDAG &DAG, TargetLowering &TLI) { 2783 if (NumNonZero > 4) 2784 return SDOperand(); 2785 2786 SDOperand V(0, 0); 2787 bool First = true; 2788 for (unsigned i = 0; i < 8; ++i) { 2789 bool isNonZero = (NonZeros & (1 << i)) != 0; 2790 if (isNonZero) { 2791 if (First) { 2792 if (NumZero) 2793 V = getZeroVector(MVT::v8i16, DAG); 2794 else 2795 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2796 First = false; 2797 } 2798 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2799 DAG.getConstant(i, TLI.getPointerTy())); 2800 } 2801 } 2802 2803 return V; 2804} 2805 2806SDOperand 2807X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2808 // All zero's are handled with pxor. 2809 if (ISD::isBuildVectorAllZeros(Op.Val)) 2810 return Op; 2811 2812 // All one's are handled with pcmpeqd. 2813 if (ISD::isBuildVectorAllOnes(Op.Val)) 2814 return Op; 2815 2816 MVT::ValueType VT = Op.getValueType(); 2817 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2818 unsigned EVTBits = MVT::getSizeInBits(EVT); 2819 2820 unsigned NumElems = Op.getNumOperands(); 2821 unsigned NumZero = 0; 2822 unsigned NumNonZero = 0; 2823 unsigned NonZeros = 0; 2824 std::set<SDOperand> Values; 2825 for (unsigned i = 0; i < NumElems; ++i) { 2826 SDOperand Elt = Op.getOperand(i); 2827 if (Elt.getOpcode() != ISD::UNDEF) { 2828 Values.insert(Elt); 2829 if (isZeroNode(Elt)) 2830 NumZero++; 2831 else { 2832 NonZeros |= (1 << i); 2833 NumNonZero++; 2834 } 2835 } 2836 } 2837 2838 if (NumNonZero == 0) 2839 // Must be a mix of zero and undef. Return a zero vector. 2840 return getZeroVector(VT, DAG); 2841 2842 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2843 if (Values.size() == 1) 2844 return SDOperand(); 2845 2846 // Special case for single non-zero element. 2847 if (NumNonZero == 1) { 2848 unsigned Idx = CountTrailingZeros_32(NonZeros); 2849 SDOperand Item = Op.getOperand(Idx); 2850 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2851 if (Idx == 0) 2852 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2853 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2854 NumZero > 0, DAG); 2855 2856 if (EVTBits == 32) { 2857 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2858 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2859 DAG); 2860 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2861 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2862 std::vector<SDOperand> MaskVec; 2863 for (unsigned i = 0; i < NumElems; i++) 2864 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2865 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2866 &MaskVec[0], MaskVec.size()); 2867 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2868 DAG.getNode(ISD::UNDEF, VT), Mask); 2869 } 2870 } 2871 2872 // Let legalizer expand 2-widde build_vector's. 2873 if (EVTBits == 64) 2874 return SDOperand(); 2875 2876 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2877 if (EVTBits == 8) { 2878 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2879 *this); 2880 if (V.Val) return V; 2881 } 2882 2883 if (EVTBits == 16) { 2884 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2885 *this); 2886 if (V.Val) return V; 2887 } 2888 2889 // If element VT is == 32 bits, turn it into a number of shuffles. 2890 std::vector<SDOperand> V(NumElems); 2891 if (NumElems == 4 && NumZero > 0) { 2892 for (unsigned i = 0; i < 4; ++i) { 2893 bool isZero = !(NonZeros & (1 << i)); 2894 if (isZero) 2895 V[i] = getZeroVector(VT, DAG); 2896 else 2897 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2898 } 2899 2900 for (unsigned i = 0; i < 2; ++i) { 2901 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2902 default: break; 2903 case 0: 2904 V[i] = V[i*2]; // Must be a zero vector. 2905 break; 2906 case 1: 2907 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2908 getMOVLMask(NumElems, DAG)); 2909 break; 2910 case 2: 2911 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2912 getMOVLMask(NumElems, DAG)); 2913 break; 2914 case 3: 2915 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2916 getUnpacklMask(NumElems, DAG)); 2917 break; 2918 } 2919 } 2920 2921 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2922 // clears the upper bits. 2923 // FIXME: we can do the same for v4f32 case when we know both parts of 2924 // the lower half come from scalar_to_vector (loadf32). We should do 2925 // that in post legalizer dag combiner with target specific hooks. 2926 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2927 return V[0]; 2928 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2929 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2930 std::vector<SDOperand> MaskVec; 2931 bool Reverse = (NonZeros & 0x3) == 2; 2932 for (unsigned i = 0; i < 2; ++i) 2933 if (Reverse) 2934 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2935 else 2936 MaskVec.push_back(DAG.getConstant(i, EVT)); 2937 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2938 for (unsigned i = 0; i < 2; ++i) 2939 if (Reverse) 2940 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2941 else 2942 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2943 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2944 &MaskVec[0], MaskVec.size()); 2945 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2946 } 2947 2948 if (Values.size() > 2) { 2949 // Expand into a number of unpckl*. 2950 // e.g. for v4f32 2951 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2952 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2953 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2954 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2955 for (unsigned i = 0; i < NumElems; ++i) 2956 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2957 NumElems >>= 1; 2958 while (NumElems != 0) { 2959 for (unsigned i = 0; i < NumElems; ++i) 2960 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2961 UnpckMask); 2962 NumElems >>= 1; 2963 } 2964 return V[0]; 2965 } 2966 2967 return SDOperand(); 2968} 2969 2970SDOperand 2971X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2972 SDOperand V1 = Op.getOperand(0); 2973 SDOperand V2 = Op.getOperand(1); 2974 SDOperand PermMask = Op.getOperand(2); 2975 MVT::ValueType VT = Op.getValueType(); 2976 unsigned NumElems = PermMask.getNumOperands(); 2977 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2978 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2979 2980 if (isUndefShuffle(Op.Val)) 2981 return DAG.getNode(ISD::UNDEF, VT); 2982 2983 if (isSplatMask(PermMask.Val)) { 2984 if (NumElems <= 4) return Op; 2985 // Promote it to a v4i32 splat. 2986 return PromoteSplat(Op, DAG); 2987 } 2988 2989 if (X86::isMOVLMask(PermMask.Val)) 2990 return (V1IsUndef) ? V2 : Op; 2991 2992 if (X86::isMOVSHDUPMask(PermMask.Val) || 2993 X86::isMOVSLDUPMask(PermMask.Val) || 2994 X86::isMOVHLPSMask(PermMask.Val) || 2995 X86::isMOVHPMask(PermMask.Val) || 2996 X86::isMOVLPMask(PermMask.Val)) 2997 return Op; 2998 2999 if (ShouldXformToMOVHLPS(PermMask.Val) || 3000 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 3001 return CommuteVectorShuffle(Op, DAG); 3002 3003 bool V1IsSplat = isSplatVector(V1.Val); 3004 bool V2IsSplat = isSplatVector(V2.Val); 3005 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3006 Op = CommuteVectorShuffle(Op, DAG); 3007 V1 = Op.getOperand(0); 3008 V2 = Op.getOperand(1); 3009 PermMask = Op.getOperand(2); 3010 std::swap(V1IsSplat, V2IsSplat); 3011 std::swap(V1IsUndef, V2IsUndef); 3012 } 3013 3014 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3015 if (V2IsUndef) return V1; 3016 Op = CommuteVectorShuffle(Op, DAG); 3017 V1 = Op.getOperand(0); 3018 V2 = Op.getOperand(1); 3019 PermMask = Op.getOperand(2); 3020 if (V2IsSplat) { 3021 // V2 is a splat, so the mask may be malformed. That is, it may point 3022 // to any V2 element. The instruction selectior won't like this. Get 3023 // a corrected mask and commute to form a proper MOVS{S|D}. 3024 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3025 if (NewMask.Val != PermMask.Val) 3026 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3027 } 3028 return Op; 3029 } 3030 3031 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3032 X86::isUNPCKLMask(PermMask.Val) || 3033 X86::isUNPCKHMask(PermMask.Val)) 3034 return Op; 3035 3036 if (V2IsSplat) { 3037 // Normalize mask so all entries that point to V2 points to its first 3038 // element then try to match unpck{h|l} again. If match, return a 3039 // new vector_shuffle with the corrected mask. 3040 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3041 if (NewMask.Val != PermMask.Val) { 3042 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3043 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3044 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3045 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3046 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3047 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3048 } 3049 } 3050 } 3051 3052 // Normalize the node to match x86 shuffle ops if needed 3053 if (V2.getOpcode() != ISD::UNDEF) 3054 if (isCommutedSHUFP(PermMask.Val)) { 3055 Op = CommuteVectorShuffle(Op, DAG); 3056 V1 = Op.getOperand(0); 3057 V2 = Op.getOperand(1); 3058 PermMask = Op.getOperand(2); 3059 } 3060 3061 // If VT is integer, try PSHUF* first, then SHUFP*. 3062 if (MVT::isInteger(VT)) { 3063 if (X86::isPSHUFDMask(PermMask.Val) || 3064 X86::isPSHUFHWMask(PermMask.Val) || 3065 X86::isPSHUFLWMask(PermMask.Val)) { 3066 if (V2.getOpcode() != ISD::UNDEF) 3067 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3068 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3069 return Op; 3070 } 3071 3072 if (X86::isSHUFPMask(PermMask.Val)) 3073 return Op; 3074 3075 // Handle v8i16 shuffle high / low shuffle node pair. 3076 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3077 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3078 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3079 std::vector<SDOperand> MaskVec; 3080 for (unsigned i = 0; i != 4; ++i) 3081 MaskVec.push_back(PermMask.getOperand(i)); 3082 for (unsigned i = 4; i != 8; ++i) 3083 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3084 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3085 &MaskVec[0], MaskVec.size()); 3086 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3087 MaskVec.clear(); 3088 for (unsigned i = 0; i != 4; ++i) 3089 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3090 for (unsigned i = 4; i != 8; ++i) 3091 MaskVec.push_back(PermMask.getOperand(i)); 3092 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3093 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3094 } 3095 } else { 3096 // Floating point cases in the other order. 3097 if (X86::isSHUFPMask(PermMask.Val)) 3098 return Op; 3099 if (X86::isPSHUFDMask(PermMask.Val) || 3100 X86::isPSHUFHWMask(PermMask.Val) || 3101 X86::isPSHUFLWMask(PermMask.Val)) { 3102 if (V2.getOpcode() != ISD::UNDEF) 3103 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3104 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3105 return Op; 3106 } 3107 } 3108 3109 if (NumElems == 4) { 3110 MVT::ValueType MaskVT = PermMask.getValueType(); 3111 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3112 std::vector<std::pair<int, int> > Locs; 3113 Locs.reserve(NumElems); 3114 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3115 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3116 unsigned NumHi = 0; 3117 unsigned NumLo = 0; 3118 // If no more than two elements come from either vector. This can be 3119 // implemented with two shuffles. First shuffle gather the elements. 3120 // The second shuffle, which takes the first shuffle as both of its 3121 // vector operands, put the elements into the right order. 3122 for (unsigned i = 0; i != NumElems; ++i) { 3123 SDOperand Elt = PermMask.getOperand(i); 3124 if (Elt.getOpcode() == ISD::UNDEF) { 3125 Locs[i] = std::make_pair(-1, -1); 3126 } else { 3127 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3128 if (Val < NumElems) { 3129 Locs[i] = std::make_pair(0, NumLo); 3130 Mask1[NumLo] = Elt; 3131 NumLo++; 3132 } else { 3133 Locs[i] = std::make_pair(1, NumHi); 3134 if (2+NumHi < NumElems) 3135 Mask1[2+NumHi] = Elt; 3136 NumHi++; 3137 } 3138 } 3139 } 3140 if (NumLo <= 2 && NumHi <= 2) { 3141 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3142 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3143 &Mask1[0], Mask1.size())); 3144 for (unsigned i = 0; i != NumElems; ++i) { 3145 if (Locs[i].first == -1) 3146 continue; 3147 else { 3148 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3149 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3150 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3151 } 3152 } 3153 3154 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3155 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3156 &Mask2[0], Mask2.size())); 3157 } 3158 3159 // Break it into (shuffle shuffle_hi, shuffle_lo). 3160 Locs.clear(); 3161 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3162 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3163 std::vector<SDOperand> *MaskPtr = &LoMask; 3164 unsigned MaskIdx = 0; 3165 unsigned LoIdx = 0; 3166 unsigned HiIdx = NumElems/2; 3167 for (unsigned i = 0; i != NumElems; ++i) { 3168 if (i == NumElems/2) { 3169 MaskPtr = &HiMask; 3170 MaskIdx = 1; 3171 LoIdx = 0; 3172 HiIdx = NumElems/2; 3173 } 3174 SDOperand Elt = PermMask.getOperand(i); 3175 if (Elt.getOpcode() == ISD::UNDEF) { 3176 Locs[i] = std::make_pair(-1, -1); 3177 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3178 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3179 (*MaskPtr)[LoIdx] = Elt; 3180 LoIdx++; 3181 } else { 3182 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3183 (*MaskPtr)[HiIdx] = Elt; 3184 HiIdx++; 3185 } 3186 } 3187 3188 SDOperand LoShuffle = 3189 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3190 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3191 &LoMask[0], LoMask.size())); 3192 SDOperand HiShuffle = 3193 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3194 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3195 &HiMask[0], HiMask.size())); 3196 std::vector<SDOperand> MaskOps; 3197 for (unsigned i = 0; i != NumElems; ++i) { 3198 if (Locs[i].first == -1) { 3199 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3200 } else { 3201 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3202 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3203 } 3204 } 3205 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3206 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3207 &MaskOps[0], MaskOps.size())); 3208 } 3209 3210 return SDOperand(); 3211} 3212 3213SDOperand 3214X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3215 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3216 return SDOperand(); 3217 3218 MVT::ValueType VT = Op.getValueType(); 3219 // TODO: handle v16i8. 3220 if (MVT::getSizeInBits(VT) == 16) { 3221 // Transform it so it match pextrw which produces a 32-bit result. 3222 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3223 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3224 Op.getOperand(0), Op.getOperand(1)); 3225 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3226 DAG.getValueType(VT)); 3227 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3228 } else if (MVT::getSizeInBits(VT) == 32) { 3229 SDOperand Vec = Op.getOperand(0); 3230 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3231 if (Idx == 0) 3232 return Op; 3233 // SHUFPS the element to the lowest double word, then movss. 3234 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3235 std::vector<SDOperand> IdxVec; 3236 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3237 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3238 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3239 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3240 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3241 &IdxVec[0], IdxVec.size()); 3242 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3243 Vec, Vec, Mask); 3244 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3245 DAG.getConstant(0, getPointerTy())); 3246 } else if (MVT::getSizeInBits(VT) == 64) { 3247 SDOperand Vec = Op.getOperand(0); 3248 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3249 if (Idx == 0) 3250 return Op; 3251 3252 // UNPCKHPD the element to the lowest double word, then movsd. 3253 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3254 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3255 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3256 std::vector<SDOperand> IdxVec; 3257 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3258 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3259 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3260 &IdxVec[0], IdxVec.size()); 3261 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3262 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3263 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3264 DAG.getConstant(0, getPointerTy())); 3265 } 3266 3267 return SDOperand(); 3268} 3269 3270SDOperand 3271X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3272 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3273 // as its second argument. 3274 MVT::ValueType VT = Op.getValueType(); 3275 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3276 SDOperand N0 = Op.getOperand(0); 3277 SDOperand N1 = Op.getOperand(1); 3278 SDOperand N2 = Op.getOperand(2); 3279 if (MVT::getSizeInBits(BaseVT) == 16) { 3280 if (N1.getValueType() != MVT::i32) 3281 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3282 if (N2.getValueType() != MVT::i32) 3283 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3284 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3285 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3286 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3287 if (Idx == 0) { 3288 // Use a movss. 3289 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3290 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3291 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3292 std::vector<SDOperand> MaskVec; 3293 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3294 for (unsigned i = 1; i <= 3; ++i) 3295 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3296 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3297 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3298 &MaskVec[0], MaskVec.size())); 3299 } else { 3300 // Use two pinsrw instructions to insert a 32 bit value. 3301 Idx <<= 1; 3302 if (MVT::isFloatingPoint(N1.getValueType())) { 3303 if (N1.getOpcode() == ISD::LOAD) { 3304 // Just load directly from f32mem to GR32. 3305 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 3306 N1.getOperand(2)); 3307 } else { 3308 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3309 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3310 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3311 DAG.getConstant(0, getPointerTy())); 3312 } 3313 } 3314 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3315 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3316 DAG.getConstant(Idx, getPointerTy())); 3317 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3318 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3319 DAG.getConstant(Idx+1, getPointerTy())); 3320 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3321 } 3322 } 3323 3324 return SDOperand(); 3325} 3326 3327SDOperand 3328X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3329 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3330 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3331} 3332 3333// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3334// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3335// one of the above mentioned nodes. It has to be wrapped because otherwise 3336// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3337// be used to form addressing mode. These wrapped nodes will be selected 3338// into MOV32ri. 3339SDOperand 3340X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3341 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3342 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3343 DAG.getTargetConstantPool(CP->getConstVal(), 3344 getPointerTy(), 3345 CP->getAlignment())); 3346 if (Subtarget->isTargetDarwin()) { 3347 // With PIC, the address is actually $g + Offset. 3348 if (!Subtarget->is64Bit() && 3349 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3350 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3351 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3352 } 3353 3354 return Result; 3355} 3356 3357SDOperand 3358X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3359 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3360 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3361 DAG.getTargetGlobalAddress(GV, 3362 getPointerTy())); 3363 if (Subtarget->isTargetDarwin()) { 3364 // With PIC, the address is actually $g + Offset. 3365 if (!Subtarget->is64Bit() && 3366 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3367 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3368 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3369 Result); 3370 3371 // For Darwin, external and weak symbols are indirect, so we want to load 3372 // the value at address GV, not the value of GV itself. This means that 3373 // the GlobalAddress must be in the base or index register of the address, 3374 // not the GV offset field. 3375 if (getTargetMachine().getRelocationModel() != Reloc::Static && 3376 DarwinGVRequiresExtraLoad(GV)) 3377 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), 3378 Result, DAG.getSrcValue(NULL)); 3379 } 3380 3381 return Result; 3382} 3383 3384SDOperand 3385X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3386 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3387 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3388 DAG.getTargetExternalSymbol(Sym, 3389 getPointerTy())); 3390 if (Subtarget->isTargetDarwin()) { 3391 // With PIC, the address is actually $g + Offset. 3392 if (!Subtarget->is64Bit() && 3393 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3394 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3395 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3396 Result); 3397 } 3398 3399 return Result; 3400} 3401 3402SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3403 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3404 "Not an i64 shift!"); 3405 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3406 SDOperand ShOpLo = Op.getOperand(0); 3407 SDOperand ShOpHi = Op.getOperand(1); 3408 SDOperand ShAmt = Op.getOperand(2); 3409 SDOperand Tmp1 = isSRA ? 3410 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3411 DAG.getConstant(0, MVT::i32); 3412 3413 SDOperand Tmp2, Tmp3; 3414 if (Op.getOpcode() == ISD::SHL_PARTS) { 3415 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3416 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3417 } else { 3418 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3419 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3420 } 3421 3422 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3423 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3424 DAG.getConstant(32, MVT::i8)); 3425 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3426 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3427 3428 SDOperand Hi, Lo; 3429 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3430 3431 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3432 SmallVector<SDOperand, 4> Ops; 3433 if (Op.getOpcode() == ISD::SHL_PARTS) { 3434 Ops.push_back(Tmp2); 3435 Ops.push_back(Tmp3); 3436 Ops.push_back(CC); 3437 Ops.push_back(InFlag); 3438 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3439 InFlag = Hi.getValue(1); 3440 3441 Ops.clear(); 3442 Ops.push_back(Tmp3); 3443 Ops.push_back(Tmp1); 3444 Ops.push_back(CC); 3445 Ops.push_back(InFlag); 3446 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3447 } else { 3448 Ops.push_back(Tmp2); 3449 Ops.push_back(Tmp3); 3450 Ops.push_back(CC); 3451 Ops.push_back(InFlag); 3452 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3453 InFlag = Lo.getValue(1); 3454 3455 Ops.clear(); 3456 Ops.push_back(Tmp3); 3457 Ops.push_back(Tmp1); 3458 Ops.push_back(CC); 3459 Ops.push_back(InFlag); 3460 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3461 } 3462 3463 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3464 Ops.clear(); 3465 Ops.push_back(Lo); 3466 Ops.push_back(Hi); 3467 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3468} 3469 3470SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3471 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3472 Op.getOperand(0).getValueType() >= MVT::i16 && 3473 "Unknown SINT_TO_FP to lower!"); 3474 3475 SDOperand Result; 3476 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3477 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3478 MachineFunction &MF = DAG.getMachineFunction(); 3479 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3480 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3481 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 3482 DAG.getEntryNode(), Op.getOperand(0), 3483 StackSlot, DAG.getSrcValue(NULL)); 3484 3485 // Build the FILD 3486 std::vector<MVT::ValueType> Tys; 3487 Tys.push_back(MVT::f64); 3488 Tys.push_back(MVT::Other); 3489 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3490 std::vector<SDOperand> Ops; 3491 Ops.push_back(Chain); 3492 Ops.push_back(StackSlot); 3493 Ops.push_back(DAG.getValueType(SrcVT)); 3494 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3495 Tys, &Ops[0], Ops.size()); 3496 3497 if (X86ScalarSSE) { 3498 Chain = Result.getValue(1); 3499 SDOperand InFlag = Result.getValue(2); 3500 3501 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3502 // shouldn't be necessary except that RFP cannot be live across 3503 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3504 MachineFunction &MF = DAG.getMachineFunction(); 3505 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3506 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3507 std::vector<MVT::ValueType> Tys; 3508 Tys.push_back(MVT::Other); 3509 std::vector<SDOperand> Ops; 3510 Ops.push_back(Chain); 3511 Ops.push_back(Result); 3512 Ops.push_back(StackSlot); 3513 Ops.push_back(DAG.getValueType(Op.getValueType())); 3514 Ops.push_back(InFlag); 3515 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3516 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 3517 DAG.getSrcValue(NULL)); 3518 } 3519 3520 return Result; 3521} 3522 3523SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3524 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3525 "Unknown FP_TO_SINT to lower!"); 3526 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3527 // stack slot. 3528 MachineFunction &MF = DAG.getMachineFunction(); 3529 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3530 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3531 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3532 3533 unsigned Opc; 3534 switch (Op.getValueType()) { 3535 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3536 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3537 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3538 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3539 } 3540 3541 SDOperand Chain = DAG.getEntryNode(); 3542 SDOperand Value = Op.getOperand(0); 3543 if (X86ScalarSSE) { 3544 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3545 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 3546 DAG.getSrcValue(0)); 3547 std::vector<MVT::ValueType> Tys; 3548 Tys.push_back(MVT::f64); 3549 Tys.push_back(MVT::Other); 3550 std::vector<SDOperand> Ops; 3551 Ops.push_back(Chain); 3552 Ops.push_back(StackSlot); 3553 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 3554 Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 3555 Chain = Value.getValue(1); 3556 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3557 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3558 } 3559 3560 // Build the FP_TO_INT*_IN_MEM 3561 std::vector<SDOperand> Ops; 3562 Ops.push_back(Chain); 3563 Ops.push_back(Value); 3564 Ops.push_back(StackSlot); 3565 SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size()); 3566 3567 // Load the result. 3568 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 3569 DAG.getSrcValue(NULL)); 3570} 3571 3572SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3573 MVT::ValueType VT = Op.getValueType(); 3574 const Type *OpNTy = MVT::getTypeForValueType(VT); 3575 std::vector<Constant*> CV; 3576 if (VT == MVT::f64) { 3577 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3578 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3579 } else { 3580 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3581 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3582 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3583 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3584 } 3585 Constant *CS = ConstantStruct::get(CV); 3586 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3587 std::vector<MVT::ValueType> Tys; 3588 Tys.push_back(VT); 3589 Tys.push_back(MVT::Other); 3590 SmallVector<SDOperand, 3> Ops; 3591 Ops.push_back(DAG.getEntryNode()); 3592 Ops.push_back(CPIdx); 3593 Ops.push_back(DAG.getSrcValue(NULL)); 3594 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3595 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3596} 3597 3598SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3599 MVT::ValueType VT = Op.getValueType(); 3600 const Type *OpNTy = MVT::getTypeForValueType(VT); 3601 std::vector<Constant*> CV; 3602 if (VT == MVT::f64) { 3603 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3604 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3605 } else { 3606 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3607 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3608 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3609 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3610 } 3611 Constant *CS = ConstantStruct::get(CV); 3612 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3613 std::vector<MVT::ValueType> Tys; 3614 Tys.push_back(VT); 3615 Tys.push_back(MVT::Other); 3616 SmallVector<SDOperand, 3> Ops; 3617 Ops.push_back(DAG.getEntryNode()); 3618 Ops.push_back(CPIdx); 3619 Ops.push_back(DAG.getSrcValue(NULL)); 3620 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3621 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3622} 3623 3624SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3625 SDOperand Chain) { 3626 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3627 SDOperand Cond; 3628 SDOperand Op0 = Op.getOperand(0); 3629 SDOperand Op1 = Op.getOperand(1); 3630 SDOperand CC = Op.getOperand(2); 3631 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3632 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3633 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3634 unsigned X86CC; 3635 3636 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3637 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3638 Op0, Op1, DAG)) { 3639 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3640 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops1, 3).getValue(1); 3641 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3642 return DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 3643 } 3644 3645 assert(isFP && "Illegal integer SetCC!"); 3646 3647 SDOperand COps[] = { Chain, Op0, Op1 }; 3648 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3649 3650 switch (SetCCOpcode) { 3651 default: assert(false && "Illegal floating point SetCC!"); 3652 case ISD::SETOEQ: { // !PF & ZF 3653 SDOperand Ops1[] = { DAG.getConstant(X86ISD::COND_NP, MVT::i8), Cond }; 3654 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops1, 2); 3655 SDOperand Ops2[] = { DAG.getConstant(X86ISD::COND_E, MVT::i8), 3656 Tmp1.getValue(1) }; 3657 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 3658 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3659 } 3660 case ISD::SETUNE: { // PF | !ZF 3661 SDOperand Ops1[] = { DAG.getConstant(X86ISD::COND_P, MVT::i8), Cond }; 3662 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops1, 2); 3663 SDOperand Ops2[] = { DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3664 Tmp1.getValue(1) }; 3665 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 3666 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3667 } 3668 } 3669} 3670 3671SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3672 bool addTest = true; 3673 SDOperand Chain = DAG.getEntryNode(); 3674 SDOperand Cond = Op.getOperand(0); 3675 SDOperand CC; 3676 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3677 3678 if (Cond.getOpcode() == ISD::SETCC) 3679 Cond = LowerSETCC(Cond, DAG, Chain); 3680 3681 if (Cond.getOpcode() == X86ISD::SETCC) { 3682 CC = Cond.getOperand(0); 3683 3684 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3685 // (since flag operand cannot be shared). Use it as the condition setting 3686 // operand in place of the X86ISD::SETCC. 3687 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3688 // to use a test instead of duplicating the X86ISD::CMP (for register 3689 // pressure reason)? 3690 SDOperand Cmp = Cond.getOperand(1); 3691 unsigned Opc = Cmp.getOpcode(); 3692 bool IllegalFPCMov = !X86ScalarSSE && 3693 MVT::isFloatingPoint(Op.getValueType()) && 3694 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3695 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3696 !IllegalFPCMov) { 3697 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3698 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3699 addTest = false; 3700 } 3701 } 3702 3703 if (addTest) { 3704 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3705 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3706 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3707 } 3708 3709 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3710 SmallVector<SDOperand, 4> Ops; 3711 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3712 // condition is true. 3713 Ops.push_back(Op.getOperand(2)); 3714 Ops.push_back(Op.getOperand(1)); 3715 Ops.push_back(CC); 3716 Ops.push_back(Cond.getValue(1)); 3717 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3718} 3719 3720SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3721 bool addTest = true; 3722 SDOperand Chain = Op.getOperand(0); 3723 SDOperand Cond = Op.getOperand(1); 3724 SDOperand Dest = Op.getOperand(2); 3725 SDOperand CC; 3726 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3727 3728 if (Cond.getOpcode() == ISD::SETCC) 3729 Cond = LowerSETCC(Cond, DAG, Chain); 3730 3731 if (Cond.getOpcode() == X86ISD::SETCC) { 3732 CC = Cond.getOperand(0); 3733 3734 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3735 // (since flag operand cannot be shared). Use it as the condition setting 3736 // operand in place of the X86ISD::SETCC. 3737 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3738 // to use a test instead of duplicating the X86ISD::CMP (for register 3739 // pressure reason)? 3740 SDOperand Cmp = Cond.getOperand(1); 3741 unsigned Opc = Cmp.getOpcode(); 3742 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3743 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3744 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3745 addTest = false; 3746 } 3747 } 3748 3749 if (addTest) { 3750 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3751 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3752 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3753 } 3754 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3755 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3756} 3757 3758SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3759 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3760 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3761 DAG.getTargetJumpTable(JT->getIndex(), 3762 getPointerTy())); 3763 if (Subtarget->isTargetDarwin()) { 3764 // With PIC, the address is actually $g + Offset. 3765 if (!Subtarget->is64Bit() && 3766 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3767 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3768 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3769 Result); 3770 } 3771 3772 return Result; 3773} 3774 3775SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3776 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3777 if (Subtarget->is64Bit()) 3778 return LowerX86_64CCCCallTo(Op, DAG); 3779 else if (CallingConv == CallingConv::Fast && EnableFastCC) 3780 return LowerFastCCCallTo(Op, DAG); 3781 else 3782 return LowerCCCCallTo(Op, DAG); 3783} 3784 3785SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3786 SDOperand Copy; 3787 3788 switch(Op.getNumOperands()) { 3789 default: 3790 assert(0 && "Do not know how to return this many arguments!"); 3791 abort(); 3792 case 1: // ret void. 3793 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3794 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3795 case 3: { 3796 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3797 3798 if (MVT::isVector(ArgVT) || 3799 (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) { 3800 // Integer or FP vector result -> XMM0. 3801 if (DAG.getMachineFunction().liveout_empty()) 3802 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3803 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3804 SDOperand()); 3805 } else if (MVT::isInteger(ArgVT)) { 3806 // Integer result -> EAX / RAX. 3807 // The C calling convention guarantees the return value has been 3808 // promoted to at least MVT::i32. The X86-64 ABI doesn't require the 3809 // value to be promoted MVT::i64. So we don't have to extend it to 3810 // 64-bit. Return the value in EAX, but mark RAX as liveout. 3811 unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 3812 if (DAG.getMachineFunction().liveout_empty()) 3813 DAG.getMachineFunction().addLiveOut(Reg); 3814 3815 Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; 3816 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1), 3817 SDOperand()); 3818 } else if (!X86ScalarSSE) { 3819 // FP return with fp-stack value. 3820 if (DAG.getMachineFunction().liveout_empty()) 3821 DAG.getMachineFunction().addLiveOut(X86::ST0); 3822 3823 std::vector<MVT::ValueType> Tys; 3824 Tys.push_back(MVT::Other); 3825 Tys.push_back(MVT::Flag); 3826 std::vector<SDOperand> Ops; 3827 Ops.push_back(Op.getOperand(0)); 3828 Ops.push_back(Op.getOperand(1)); 3829 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3830 } else { 3831 // FP return with ScalarSSE (return on fp-stack). 3832 if (DAG.getMachineFunction().liveout_empty()) 3833 DAG.getMachineFunction().addLiveOut(X86::ST0); 3834 3835 SDOperand MemLoc; 3836 SDOperand Chain = Op.getOperand(0); 3837 SDOperand Value = Op.getOperand(1); 3838 3839 if (Value.getOpcode() == ISD::LOAD && 3840 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3841 Chain = Value.getOperand(0); 3842 MemLoc = Value.getOperand(1); 3843 } else { 3844 // Spill the value to memory and reload it into top of stack. 3845 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3846 MachineFunction &MF = DAG.getMachineFunction(); 3847 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3848 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3849 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3850 Value, MemLoc, DAG.getSrcValue(0)); 3851 } 3852 std::vector<MVT::ValueType> Tys; 3853 Tys.push_back(MVT::f64); 3854 Tys.push_back(MVT::Other); 3855 std::vector<SDOperand> Ops; 3856 Ops.push_back(Chain); 3857 Ops.push_back(MemLoc); 3858 Ops.push_back(DAG.getValueType(ArgVT)); 3859 Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 3860 Tys.clear(); 3861 Tys.push_back(MVT::Other); 3862 Tys.push_back(MVT::Flag); 3863 Ops.clear(); 3864 Ops.push_back(Copy.getValue(1)); 3865 Ops.push_back(Copy); 3866 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3867 } 3868 break; 3869 } 3870 case 5: { 3871 unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 3872 unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX; 3873 if (DAG.getMachineFunction().liveout_empty()) { 3874 DAG.getMachineFunction().addLiveOut(Reg1); 3875 DAG.getMachineFunction().addLiveOut(Reg2); 3876 } 3877 3878 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3), 3879 SDOperand()); 3880 Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1)); 3881 break; 3882 } 3883 } 3884 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3885 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3886 Copy.getValue(1)); 3887} 3888 3889SDOperand 3890X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3891 MachineFunction &MF = DAG.getMachineFunction(); 3892 const Function* Fn = MF.getFunction(); 3893 if (Fn->hasExternalLinkage() && 3894 Subtarget->TargetType == X86Subtarget::isCygwin && 3895 Fn->getName() == "main") 3896 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 3897 3898 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3899 if (Subtarget->is64Bit()) 3900 return LowerX86_64CCCArguments(Op, DAG); 3901 else if (CC == CallingConv::Fast && EnableFastCC) 3902 return LowerFastCCArguments(Op, DAG); 3903 else 3904 return LowerCCCArguments(Op, DAG); 3905} 3906 3907SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3908 SDOperand InFlag(0, 0); 3909 SDOperand Chain = Op.getOperand(0); 3910 unsigned Align = 3911 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3912 if (Align == 0) Align = 1; 3913 3914 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3915 // If not DWORD aligned, call memset if size is less than the threshold. 3916 // It knows how to align to the right boundary first. 3917 if ((Align & 3) != 0 || 3918 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3919 MVT::ValueType IntPtr = getPointerTy(); 3920 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3921 std::vector<std::pair<SDOperand, const Type*> > Args; 3922 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3923 // Extend the ubyte argument to be an int value for the call. 3924 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3925 Args.push_back(std::make_pair(Val, IntPtrTy)); 3926 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3927 std::pair<SDOperand,SDOperand> CallResult = 3928 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3929 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3930 return CallResult.second; 3931 } 3932 3933 MVT::ValueType AVT; 3934 SDOperand Count; 3935 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3936 unsigned BytesLeft = 0; 3937 bool TwoRepStos = false; 3938 if (ValC) { 3939 unsigned ValReg; 3940 uint64_t Val = ValC->getValue() & 255; 3941 3942 // If the value is a constant, then we can potentially use larger sets. 3943 switch (Align & 3) { 3944 case 2: // WORD aligned 3945 AVT = MVT::i16; 3946 ValReg = X86::AX; 3947 Val = (Val << 8) | Val; 3948 break; 3949 case 0: // DWORD aligned 3950 AVT = MVT::i32; 3951 ValReg = X86::EAX; 3952 Val = (Val << 8) | Val; 3953 Val = (Val << 16) | Val; 3954 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3955 AVT = MVT::i64; 3956 ValReg = X86::RAX; 3957 Val = (Val << 32) | Val; 3958 } 3959 break; 3960 default: // Byte aligned 3961 AVT = MVT::i8; 3962 ValReg = X86::AL; 3963 Count = Op.getOperand(3); 3964 break; 3965 } 3966 3967 if (AVT > MVT::i8) { 3968 if (I) { 3969 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3970 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3971 BytesLeft = I->getValue() % UBytes; 3972 } else { 3973 assert(AVT >= MVT::i32 && 3974 "Do not use rep;stos if not at least DWORD aligned"); 3975 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3976 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3977 TwoRepStos = true; 3978 } 3979 } 3980 3981 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3982 InFlag); 3983 InFlag = Chain.getValue(1); 3984 } else { 3985 AVT = MVT::i8; 3986 Count = Op.getOperand(3); 3987 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3988 InFlag = Chain.getValue(1); 3989 } 3990 3991 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3992 Count, InFlag); 3993 InFlag = Chain.getValue(1); 3994 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3995 Op.getOperand(1), InFlag); 3996 InFlag = Chain.getValue(1); 3997 3998 std::vector<MVT::ValueType> Tys; 3999 Tys.push_back(MVT::Other); 4000 Tys.push_back(MVT::Flag); 4001 std::vector<SDOperand> Ops; 4002 Ops.push_back(Chain); 4003 Ops.push_back(DAG.getValueType(AVT)); 4004 Ops.push_back(InFlag); 4005 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4006 4007 if (TwoRepStos) { 4008 InFlag = Chain.getValue(1); 4009 Count = Op.getOperand(3); 4010 MVT::ValueType CVT = Count.getValueType(); 4011 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4012 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4013 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4014 Left, InFlag); 4015 InFlag = Chain.getValue(1); 4016 Tys.clear(); 4017 Tys.push_back(MVT::Other); 4018 Tys.push_back(MVT::Flag); 4019 Ops.clear(); 4020 Ops.push_back(Chain); 4021 Ops.push_back(DAG.getValueType(MVT::i8)); 4022 Ops.push_back(InFlag); 4023 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4024 } else if (BytesLeft) { 4025 // Issue stores for the last 1 - 7 bytes. 4026 SDOperand Value; 4027 unsigned Val = ValC->getValue() & 255; 4028 unsigned Offset = I->getValue() - BytesLeft; 4029 SDOperand DstAddr = Op.getOperand(1); 4030 MVT::ValueType AddrVT = DstAddr.getValueType(); 4031 if (BytesLeft >= 4) { 4032 Val = (Val << 8) | Val; 4033 Val = (Val << 16) | Val; 4034 Value = DAG.getConstant(Val, MVT::i32); 4035 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4036 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4037 DAG.getConstant(Offset, AddrVT)), 4038 DAG.getSrcValue(NULL)); 4039 BytesLeft -= 4; 4040 Offset += 4; 4041 } 4042 if (BytesLeft >= 2) { 4043 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4044 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4045 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4046 DAG.getConstant(Offset, AddrVT)), 4047 DAG.getSrcValue(NULL)); 4048 BytesLeft -= 2; 4049 Offset += 2; 4050 } 4051 if (BytesLeft == 1) { 4052 Value = DAG.getConstant(Val, MVT::i8); 4053 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4054 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4055 DAG.getConstant(Offset, AddrVT)), 4056 DAG.getSrcValue(NULL)); 4057 } 4058 } 4059 4060 return Chain; 4061} 4062 4063SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4064 SDOperand Chain = Op.getOperand(0); 4065 unsigned Align = 4066 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4067 if (Align == 0) Align = 1; 4068 4069 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4070 // If not DWORD aligned, call memcpy if size is less than the threshold. 4071 // It knows how to align to the right boundary first. 4072 if ((Align & 3) != 0 || 4073 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4074 MVT::ValueType IntPtr = getPointerTy(); 4075 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4076 std::vector<std::pair<SDOperand, const Type*> > Args; 4077 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4078 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 4079 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4080 std::pair<SDOperand,SDOperand> CallResult = 4081 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4082 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4083 return CallResult.second; 4084 } 4085 4086 MVT::ValueType AVT; 4087 SDOperand Count; 4088 unsigned BytesLeft = 0; 4089 bool TwoRepMovs = false; 4090 switch (Align & 3) { 4091 case 2: // WORD aligned 4092 AVT = MVT::i16; 4093 break; 4094 case 0: // DWORD aligned 4095 AVT = MVT::i32; 4096 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4097 AVT = MVT::i64; 4098 break; 4099 default: // Byte aligned 4100 AVT = MVT::i8; 4101 Count = Op.getOperand(3); 4102 break; 4103 } 4104 4105 if (AVT > MVT::i8) { 4106 if (I) { 4107 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4108 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4109 BytesLeft = I->getValue() % UBytes; 4110 } else { 4111 assert(AVT >= MVT::i32 && 4112 "Do not use rep;movs if not at least DWORD aligned"); 4113 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4114 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4115 TwoRepMovs = true; 4116 } 4117 } 4118 4119 SDOperand InFlag(0, 0); 4120 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4121 Count, InFlag); 4122 InFlag = Chain.getValue(1); 4123 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4124 Op.getOperand(1), InFlag); 4125 InFlag = Chain.getValue(1); 4126 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4127 Op.getOperand(2), InFlag); 4128 InFlag = Chain.getValue(1); 4129 4130 std::vector<MVT::ValueType> Tys; 4131 Tys.push_back(MVT::Other); 4132 Tys.push_back(MVT::Flag); 4133 std::vector<SDOperand> Ops; 4134 Ops.push_back(Chain); 4135 Ops.push_back(DAG.getValueType(AVT)); 4136 Ops.push_back(InFlag); 4137 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4138 4139 if (TwoRepMovs) { 4140 InFlag = Chain.getValue(1); 4141 Count = Op.getOperand(3); 4142 MVT::ValueType CVT = Count.getValueType(); 4143 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4144 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4145 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4146 Left, InFlag); 4147 InFlag = Chain.getValue(1); 4148 Tys.clear(); 4149 Tys.push_back(MVT::Other); 4150 Tys.push_back(MVT::Flag); 4151 Ops.clear(); 4152 Ops.push_back(Chain); 4153 Ops.push_back(DAG.getValueType(MVT::i8)); 4154 Ops.push_back(InFlag); 4155 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4156 } else if (BytesLeft) { 4157 // Issue loads and stores for the last 1 - 7 bytes. 4158 unsigned Offset = I->getValue() - BytesLeft; 4159 SDOperand DstAddr = Op.getOperand(1); 4160 MVT::ValueType DstVT = DstAddr.getValueType(); 4161 SDOperand SrcAddr = Op.getOperand(2); 4162 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4163 SDOperand Value; 4164 if (BytesLeft >= 4) { 4165 Value = DAG.getLoad(MVT::i32, Chain, 4166 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4167 DAG.getConstant(Offset, SrcVT)), 4168 DAG.getSrcValue(NULL)); 4169 Chain = Value.getValue(1); 4170 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4171 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4172 DAG.getConstant(Offset, DstVT)), 4173 DAG.getSrcValue(NULL)); 4174 BytesLeft -= 4; 4175 Offset += 4; 4176 } 4177 if (BytesLeft >= 2) { 4178 Value = DAG.getLoad(MVT::i16, Chain, 4179 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4180 DAG.getConstant(Offset, SrcVT)), 4181 DAG.getSrcValue(NULL)); 4182 Chain = Value.getValue(1); 4183 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4184 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4185 DAG.getConstant(Offset, DstVT)), 4186 DAG.getSrcValue(NULL)); 4187 BytesLeft -= 2; 4188 Offset += 2; 4189 } 4190 4191 if (BytesLeft == 1) { 4192 Value = DAG.getLoad(MVT::i8, Chain, 4193 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4194 DAG.getConstant(Offset, SrcVT)), 4195 DAG.getSrcValue(NULL)); 4196 Chain = Value.getValue(1); 4197 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4198 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4199 DAG.getConstant(Offset, DstVT)), 4200 DAG.getSrcValue(NULL)); 4201 } 4202 } 4203 4204 return Chain; 4205} 4206 4207SDOperand 4208X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4209 std::vector<MVT::ValueType> Tys; 4210 Tys.push_back(MVT::Other); 4211 Tys.push_back(MVT::Flag); 4212 std::vector<SDOperand> Ops; 4213 Ops.push_back(Op.getOperand(0)); 4214 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size()); 4215 Ops.clear(); 4216 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 4217 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 4218 MVT::i32, Ops[0].getValue(2))); 4219 Ops.push_back(Ops[1].getValue(1)); 4220 Tys[0] = Tys[1] = MVT::i32; 4221 Tys.push_back(MVT::Other); 4222 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 4223} 4224 4225SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4226 if (!Subtarget->is64Bit()) { 4227 // vastart just stores the address of the VarArgsFrameIndex slot into the 4228 // memory location argument. 4229 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4230 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 4231 Op.getOperand(1), Op.getOperand(2)); 4232 } 4233 4234 // __va_list_tag: 4235 // gp_offset (0 - 6 * 8) 4236 // fp_offset (48 - 48 + 8 * 16) 4237 // overflow_arg_area (point to parameters coming in memory). 4238 // reg_save_area 4239 std::vector<SDOperand> MemOps; 4240 SDOperand FIN = Op.getOperand(1); 4241 // Store gp_offset 4242 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4243 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4244 FIN, Op.getOperand(2)); 4245 MemOps.push_back(Store); 4246 4247 // Store fp_offset 4248 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4249 DAG.getConstant(4, getPointerTy())); 4250 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4251 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4252 FIN, Op.getOperand(2)); 4253 MemOps.push_back(Store); 4254 4255 // Store ptr to overflow_arg_area 4256 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4257 DAG.getConstant(4, getPointerTy())); 4258 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4259 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4260 OVFIN, FIN, Op.getOperand(2)); 4261 MemOps.push_back(Store); 4262 4263 // Store ptr to reg_save_area. 4264 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4265 DAG.getConstant(8, getPointerTy())); 4266 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4267 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4268 RSFIN, FIN, Op.getOperand(2)); 4269 MemOps.push_back(Store); 4270 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4271} 4272 4273SDOperand 4274X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4275 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4276 switch (IntNo) { 4277 default: return SDOperand(); // Don't custom lower most intrinsics. 4278 // Comparison intrinsics. 4279 case Intrinsic::x86_sse_comieq_ss: 4280 case Intrinsic::x86_sse_comilt_ss: 4281 case Intrinsic::x86_sse_comile_ss: 4282 case Intrinsic::x86_sse_comigt_ss: 4283 case Intrinsic::x86_sse_comige_ss: 4284 case Intrinsic::x86_sse_comineq_ss: 4285 case Intrinsic::x86_sse_ucomieq_ss: 4286 case Intrinsic::x86_sse_ucomilt_ss: 4287 case Intrinsic::x86_sse_ucomile_ss: 4288 case Intrinsic::x86_sse_ucomigt_ss: 4289 case Intrinsic::x86_sse_ucomige_ss: 4290 case Intrinsic::x86_sse_ucomineq_ss: 4291 case Intrinsic::x86_sse2_comieq_sd: 4292 case Intrinsic::x86_sse2_comilt_sd: 4293 case Intrinsic::x86_sse2_comile_sd: 4294 case Intrinsic::x86_sse2_comigt_sd: 4295 case Intrinsic::x86_sse2_comige_sd: 4296 case Intrinsic::x86_sse2_comineq_sd: 4297 case Intrinsic::x86_sse2_ucomieq_sd: 4298 case Intrinsic::x86_sse2_ucomilt_sd: 4299 case Intrinsic::x86_sse2_ucomile_sd: 4300 case Intrinsic::x86_sse2_ucomigt_sd: 4301 case Intrinsic::x86_sse2_ucomige_sd: 4302 case Intrinsic::x86_sse2_ucomineq_sd: { 4303 unsigned Opc = 0; 4304 ISD::CondCode CC = ISD::SETCC_INVALID; 4305 switch (IntNo) { 4306 default: break; 4307 case Intrinsic::x86_sse_comieq_ss: 4308 case Intrinsic::x86_sse2_comieq_sd: 4309 Opc = X86ISD::COMI; 4310 CC = ISD::SETEQ; 4311 break; 4312 case Intrinsic::x86_sse_comilt_ss: 4313 case Intrinsic::x86_sse2_comilt_sd: 4314 Opc = X86ISD::COMI; 4315 CC = ISD::SETLT; 4316 break; 4317 case Intrinsic::x86_sse_comile_ss: 4318 case Intrinsic::x86_sse2_comile_sd: 4319 Opc = X86ISD::COMI; 4320 CC = ISD::SETLE; 4321 break; 4322 case Intrinsic::x86_sse_comigt_ss: 4323 case Intrinsic::x86_sse2_comigt_sd: 4324 Opc = X86ISD::COMI; 4325 CC = ISD::SETGT; 4326 break; 4327 case Intrinsic::x86_sse_comige_ss: 4328 case Intrinsic::x86_sse2_comige_sd: 4329 Opc = X86ISD::COMI; 4330 CC = ISD::SETGE; 4331 break; 4332 case Intrinsic::x86_sse_comineq_ss: 4333 case Intrinsic::x86_sse2_comineq_sd: 4334 Opc = X86ISD::COMI; 4335 CC = ISD::SETNE; 4336 break; 4337 case Intrinsic::x86_sse_ucomieq_ss: 4338 case Intrinsic::x86_sse2_ucomieq_sd: 4339 Opc = X86ISD::UCOMI; 4340 CC = ISD::SETEQ; 4341 break; 4342 case Intrinsic::x86_sse_ucomilt_ss: 4343 case Intrinsic::x86_sse2_ucomilt_sd: 4344 Opc = X86ISD::UCOMI; 4345 CC = ISD::SETLT; 4346 break; 4347 case Intrinsic::x86_sse_ucomile_ss: 4348 case Intrinsic::x86_sse2_ucomile_sd: 4349 Opc = X86ISD::UCOMI; 4350 CC = ISD::SETLE; 4351 break; 4352 case Intrinsic::x86_sse_ucomigt_ss: 4353 case Intrinsic::x86_sse2_ucomigt_sd: 4354 Opc = X86ISD::UCOMI; 4355 CC = ISD::SETGT; 4356 break; 4357 case Intrinsic::x86_sse_ucomige_ss: 4358 case Intrinsic::x86_sse2_ucomige_sd: 4359 Opc = X86ISD::UCOMI; 4360 CC = ISD::SETGE; 4361 break; 4362 case Intrinsic::x86_sse_ucomineq_ss: 4363 case Intrinsic::x86_sse2_ucomineq_sd: 4364 Opc = X86ISD::UCOMI; 4365 CC = ISD::SETNE; 4366 break; 4367 } 4368 4369 unsigned X86CC; 4370 SDOperand LHS = Op.getOperand(1); 4371 SDOperand RHS = Op.getOperand(2); 4372 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4373 4374 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4375 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4376 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4377 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4378 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4379 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4380 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4381 } 4382 } 4383} 4384 4385/// LowerOperation - Provide custom lowering hooks for some operations. 4386/// 4387SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4388 switch (Op.getOpcode()) { 4389 default: assert(0 && "Should not custom lower this!"); 4390 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4391 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4392 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4393 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4394 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4395 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4396 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4397 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4398 case ISD::SHL_PARTS: 4399 case ISD::SRA_PARTS: 4400 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4401 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4402 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4403 case ISD::FABS: return LowerFABS(Op, DAG); 4404 case ISD::FNEG: return LowerFNEG(Op, DAG); 4405 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4406 case ISD::SELECT: return LowerSELECT(Op, DAG); 4407 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4408 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4409 case ISD::CALL: return LowerCALL(Op, DAG); 4410 case ISD::RET: return LowerRET(Op, DAG); 4411 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4412 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4413 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4414 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4415 case ISD::VASTART: return LowerVASTART(Op, DAG); 4416 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4417 } 4418} 4419 4420const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4421 switch (Opcode) { 4422 default: return NULL; 4423 case X86ISD::SHLD: return "X86ISD::SHLD"; 4424 case X86ISD::SHRD: return "X86ISD::SHRD"; 4425 case X86ISD::FAND: return "X86ISD::FAND"; 4426 case X86ISD::FXOR: return "X86ISD::FXOR"; 4427 case X86ISD::FILD: return "X86ISD::FILD"; 4428 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4429 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4430 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4431 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4432 case X86ISD::FLD: return "X86ISD::FLD"; 4433 case X86ISD::FST: return "X86ISD::FST"; 4434 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4435 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4436 case X86ISD::CALL: return "X86ISD::CALL"; 4437 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4438 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4439 case X86ISD::CMP: return "X86ISD::CMP"; 4440 case X86ISD::COMI: return "X86ISD::COMI"; 4441 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4442 case X86ISD::SETCC: return "X86ISD::SETCC"; 4443 case X86ISD::CMOV: return "X86ISD::CMOV"; 4444 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4445 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4446 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4447 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4448 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4449 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4450 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4451 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4452 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4453 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4454 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4455 } 4456} 4457 4458/// isLegalAddressImmediate - Return true if the integer value or 4459/// GlobalValue can be used as the offset of the target addressing mode. 4460bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 4461 // X86 allows a sign-extended 32-bit immediate field. 4462 return (V > -(1LL << 32) && V < (1LL << 32)-1); 4463} 4464 4465bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 4466 // GV is 64-bit but displacement field is 32-bit unless we are in small code 4467 // model. Mac OS X happens to support only small PIC code model. 4468 // FIXME: better support for other OS's. 4469 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 4470 return false; 4471 if (Subtarget->isTargetDarwin()) { 4472 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 4473 if (RModel == Reloc::Static) 4474 return true; 4475 else if (RModel == Reloc::DynamicNoPIC) 4476 return !DarwinGVRequiresExtraLoad(GV); 4477 else 4478 return false; 4479 } else 4480 return true; 4481} 4482 4483/// isShuffleMaskLegal - Targets can use this to indicate that they only 4484/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4485/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4486/// are assumed to be legal. 4487bool 4488X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4489 // Only do shuffles on 128-bit vector types for now. 4490 if (MVT::getSizeInBits(VT) == 64) return false; 4491 return (Mask.Val->getNumOperands() <= 4 || 4492 isSplatMask(Mask.Val) || 4493 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4494 X86::isUNPCKLMask(Mask.Val) || 4495 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4496 X86::isUNPCKHMask(Mask.Val)); 4497} 4498 4499bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4500 MVT::ValueType EVT, 4501 SelectionDAG &DAG) const { 4502 unsigned NumElts = BVOps.size(); 4503 // Only do shuffles on 128-bit vector types for now. 4504 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4505 if (NumElts == 2) return true; 4506 if (NumElts == 4) { 4507 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 4508 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 4509 } 4510 return false; 4511} 4512 4513//===----------------------------------------------------------------------===// 4514// X86 Scheduler Hooks 4515//===----------------------------------------------------------------------===// 4516 4517MachineBasicBlock * 4518X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4519 MachineBasicBlock *BB) { 4520 switch (MI->getOpcode()) { 4521 default: assert(false && "Unexpected instr type to insert"); 4522 case X86::CMOV_FR32: 4523 case X86::CMOV_FR64: 4524 case X86::CMOV_V4F32: 4525 case X86::CMOV_V2F64: 4526 case X86::CMOV_V2I64: { 4527 // To "insert" a SELECT_CC instruction, we actually have to insert the 4528 // diamond control-flow pattern. The incoming instruction knows the 4529 // destination vreg to set, the condition code register to branch on, the 4530 // true/false values to select between, and a branch opcode to use. 4531 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4532 ilist<MachineBasicBlock>::iterator It = BB; 4533 ++It; 4534 4535 // thisMBB: 4536 // ... 4537 // TrueVal = ... 4538 // cmpTY ccX, r1, r2 4539 // bCC copy1MBB 4540 // fallthrough --> copy0MBB 4541 MachineBasicBlock *thisMBB = BB; 4542 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4543 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4544 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 4545 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 4546 MachineFunction *F = BB->getParent(); 4547 F->getBasicBlockList().insert(It, copy0MBB); 4548 F->getBasicBlockList().insert(It, sinkMBB); 4549 // Update machine-CFG edges by first adding all successors of the current 4550 // block to the new block which will contain the Phi node for the select. 4551 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4552 e = BB->succ_end(); i != e; ++i) 4553 sinkMBB->addSuccessor(*i); 4554 // Next, remove all successors of the current block, and add the true 4555 // and fallthrough blocks as its successors. 4556 while(!BB->succ_empty()) 4557 BB->removeSuccessor(BB->succ_begin()); 4558 BB->addSuccessor(copy0MBB); 4559 BB->addSuccessor(sinkMBB); 4560 4561 // copy0MBB: 4562 // %FalseValue = ... 4563 // # fallthrough to sinkMBB 4564 BB = copy0MBB; 4565 4566 // Update machine-CFG edges 4567 BB->addSuccessor(sinkMBB); 4568 4569 // sinkMBB: 4570 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4571 // ... 4572 BB = sinkMBB; 4573 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 4574 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4575 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4576 4577 delete MI; // The pseudo instruction is gone now. 4578 return BB; 4579 } 4580 4581 case X86::FP_TO_INT16_IN_MEM: 4582 case X86::FP_TO_INT32_IN_MEM: 4583 case X86::FP_TO_INT64_IN_MEM: { 4584 // Change the floating point control register to use "round towards zero" 4585 // mode when truncating to an integer value. 4586 MachineFunction *F = BB->getParent(); 4587 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4588 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 4589 4590 // Load the old value of the high byte of the control word... 4591 unsigned OldCW = 4592 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4593 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 4594 4595 // Set the high part to be round to zero... 4596 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 4597 4598 // Reload the modified control word now... 4599 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 4600 4601 // Restore the memory image of control word to original value 4602 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 4603 4604 // Get the X86 opcode to use. 4605 unsigned Opc; 4606 switch (MI->getOpcode()) { 4607 default: assert(0 && "illegal opcode!"); 4608 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 4609 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 4610 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 4611 } 4612 4613 X86AddressMode AM; 4614 MachineOperand &Op = MI->getOperand(0); 4615 if (Op.isRegister()) { 4616 AM.BaseType = X86AddressMode::RegBase; 4617 AM.Base.Reg = Op.getReg(); 4618 } else { 4619 AM.BaseType = X86AddressMode::FrameIndexBase; 4620 AM.Base.FrameIndex = Op.getFrameIndex(); 4621 } 4622 Op = MI->getOperand(1); 4623 if (Op.isImmediate()) 4624 AM.Scale = Op.getImmedValue(); 4625 Op = MI->getOperand(2); 4626 if (Op.isImmediate()) 4627 AM.IndexReg = Op.getImmedValue(); 4628 Op = MI->getOperand(3); 4629 if (Op.isGlobalAddress()) { 4630 AM.GV = Op.getGlobal(); 4631 } else { 4632 AM.Disp = Op.getImmedValue(); 4633 } 4634 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 4635 4636 // Reload the original control word now. 4637 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 4638 4639 delete MI; // The pseudo instruction is gone now. 4640 return BB; 4641 } 4642 } 4643} 4644 4645//===----------------------------------------------------------------------===// 4646// X86 Optimization Hooks 4647//===----------------------------------------------------------------------===// 4648 4649void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4650 uint64_t Mask, 4651 uint64_t &KnownZero, 4652 uint64_t &KnownOne, 4653 unsigned Depth) const { 4654 unsigned Opc = Op.getOpcode(); 4655 assert((Opc >= ISD::BUILTIN_OP_END || 4656 Opc == ISD::INTRINSIC_WO_CHAIN || 4657 Opc == ISD::INTRINSIC_W_CHAIN || 4658 Opc == ISD::INTRINSIC_VOID) && 4659 "Should use MaskedValueIsZero if you don't know whether Op" 4660 " is a target node!"); 4661 4662 KnownZero = KnownOne = 0; // Don't know anything. 4663 switch (Opc) { 4664 default: break; 4665 case X86ISD::SETCC: 4666 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4667 break; 4668 } 4669} 4670 4671/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4672/// element of the result of the vector shuffle. 4673static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4674 MVT::ValueType VT = N->getValueType(0); 4675 SDOperand PermMask = N->getOperand(2); 4676 unsigned NumElems = PermMask.getNumOperands(); 4677 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4678 i %= NumElems; 4679 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4680 return (i == 0) 4681 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4682 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4683 SDOperand Idx = PermMask.getOperand(i); 4684 if (Idx.getOpcode() == ISD::UNDEF) 4685 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4686 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4687 } 4688 return SDOperand(); 4689} 4690 4691/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4692/// node is a GlobalAddress + an offset. 4693static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4694 if (N->getOpcode() == X86ISD::Wrapper) { 4695 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4696 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4697 return true; 4698 } 4699 } else if (N->getOpcode() == ISD::ADD) { 4700 SDOperand N1 = N->getOperand(0); 4701 SDOperand N2 = N->getOperand(1); 4702 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4703 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4704 if (V) { 4705 Offset += V->getSignExtended(); 4706 return true; 4707 } 4708 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4709 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4710 if (V) { 4711 Offset += V->getSignExtended(); 4712 return true; 4713 } 4714 } 4715 } 4716 return false; 4717} 4718 4719/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4720/// + Dist * Size. 4721static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4722 MachineFrameInfo *MFI) { 4723 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4724 return false; 4725 4726 SDOperand Loc = N->getOperand(1); 4727 SDOperand BaseLoc = Base->getOperand(1); 4728 if (Loc.getOpcode() == ISD::FrameIndex) { 4729 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4730 return false; 4731 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 4732 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4733 int FS = MFI->getObjectSize(FI); 4734 int BFS = MFI->getObjectSize(BFI); 4735 if (FS != BFS || FS != Size) return false; 4736 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4737 } else { 4738 GlobalValue *GV1 = NULL; 4739 GlobalValue *GV2 = NULL; 4740 int64_t Offset1 = 0; 4741 int64_t Offset2 = 0; 4742 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4743 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4744 if (isGA1 && isGA2 && GV1 == GV2) 4745 return Offset1 == (Offset2 + Dist*Size); 4746 } 4747 4748 return false; 4749} 4750 4751static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4752 const X86Subtarget *Subtarget) { 4753 GlobalValue *GV; 4754 int64_t Offset; 4755 if (isGAPlusOffset(Base, GV, Offset)) 4756 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4757 else { 4758 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4759 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 4760 if (BFI < 0) 4761 // Fixed objects do not specify alignment, however the offsets are known. 4762 return ((Subtarget->getStackAlignment() % 16) == 0 && 4763 (MFI->getObjectOffset(BFI) % 16) == 0); 4764 else 4765 return MFI->getObjectAlignment(BFI) >= 16; 4766 } 4767 return false; 4768} 4769 4770 4771/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4772/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4773/// if the load addresses are consecutive, non-overlapping, and in the right 4774/// order. 4775static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4776 const X86Subtarget *Subtarget) { 4777 MachineFunction &MF = DAG.getMachineFunction(); 4778 MachineFrameInfo *MFI = MF.getFrameInfo(); 4779 MVT::ValueType VT = N->getValueType(0); 4780 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 4781 SDOperand PermMask = N->getOperand(2); 4782 int NumElems = (int)PermMask.getNumOperands(); 4783 SDNode *Base = NULL; 4784 for (int i = 0; i < NumElems; ++i) { 4785 SDOperand Idx = PermMask.getOperand(i); 4786 if (Idx.getOpcode() == ISD::UNDEF) { 4787 if (!Base) return SDOperand(); 4788 } else { 4789 SDOperand Arg = 4790 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4791 if (!Arg.Val || Arg.getOpcode() != ISD::LOAD) 4792 return SDOperand(); 4793 if (!Base) 4794 Base = Arg.Val; 4795 else if (!isConsecutiveLoad(Arg.Val, Base, 4796 i, MVT::getSizeInBits(EVT)/8,MFI)) 4797 return SDOperand(); 4798 } 4799 } 4800 4801 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4802 if (isAlign16) 4803 return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1), 4804 Base->getOperand(2)); 4805 else { 4806 // Just use movups, it's shorter. 4807 std::vector<MVT::ValueType> Tys; 4808 Tys.push_back(MVT::v4f32); 4809 Tys.push_back(MVT::Other); 4810 SmallVector<SDOperand, 3> Ops; 4811 Ops.push_back(Base->getOperand(0)); 4812 Ops.push_back(Base->getOperand(1)); 4813 Ops.push_back(Base->getOperand(2)); 4814 return DAG.getNode(ISD::BIT_CONVERT, VT, 4815 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 4816 } 4817} 4818 4819SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4820 DAGCombinerInfo &DCI) const { 4821 TargetMachine &TM = getTargetMachine(); 4822 SelectionDAG &DAG = DCI.DAG; 4823 switch (N->getOpcode()) { 4824 default: break; 4825 case ISD::VECTOR_SHUFFLE: 4826 return PerformShuffleCombine(N, DAG, Subtarget); 4827 } 4828 4829 return SDOperand(); 4830} 4831 4832//===----------------------------------------------------------------------===// 4833// X86 Inline Assembly Support 4834//===----------------------------------------------------------------------===// 4835 4836/// getConstraintType - Given a constraint letter, return the type of 4837/// constraint it is for this target. 4838X86TargetLowering::ConstraintType 4839X86TargetLowering::getConstraintType(char ConstraintLetter) const { 4840 switch (ConstraintLetter) { 4841 case 'A': 4842 case 'r': 4843 case 'R': 4844 case 'l': 4845 case 'q': 4846 case 'Q': 4847 case 'x': 4848 case 'Y': 4849 return C_RegisterClass; 4850 default: return TargetLowering::getConstraintType(ConstraintLetter); 4851 } 4852} 4853 4854std::vector<unsigned> X86TargetLowering:: 4855getRegClassForInlineAsmConstraint(const std::string &Constraint, 4856 MVT::ValueType VT) const { 4857 if (Constraint.size() == 1) { 4858 // FIXME: not handling fp-stack yet! 4859 // FIXME: not handling MMX registers yet ('y' constraint). 4860 switch (Constraint[0]) { // GCC X86 Constraint Letters 4861 default: break; // Unknown constraint letter 4862 case 'A': // EAX/EDX 4863 if (VT == MVT::i32 || VT == MVT::i64) 4864 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 4865 break; 4866 case 'r': // GENERAL_REGS 4867 case 'R': // LEGACY_REGS 4868 if (VT == MVT::i32) 4869 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4870 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 4871 else if (VT == MVT::i16) 4872 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4873 X86::SI, X86::DI, X86::BP, X86::SP, 0); 4874 else if (VT == MVT::i8) 4875 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4876 break; 4877 case 'l': // INDEX_REGS 4878 if (VT == MVT::i32) 4879 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4880 X86::ESI, X86::EDI, X86::EBP, 0); 4881 else if (VT == MVT::i16) 4882 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4883 X86::SI, X86::DI, X86::BP, 0); 4884 else if (VT == MVT::i8) 4885 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4886 break; 4887 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4888 case 'Q': // Q_REGS 4889 if (VT == MVT::i32) 4890 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 4891 else if (VT == MVT::i16) 4892 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 4893 else if (VT == MVT::i8) 4894 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4895 break; 4896 case 'x': // SSE_REGS if SSE1 allowed 4897 if (Subtarget->hasSSE1()) 4898 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4899 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4900 0); 4901 return std::vector<unsigned>(); 4902 case 'Y': // SSE_REGS if SSE2 allowed 4903 if (Subtarget->hasSSE2()) 4904 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4905 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4906 0); 4907 return std::vector<unsigned>(); 4908 } 4909 } 4910 4911 return std::vector<unsigned>(); 4912} 4913 4914std::pair<unsigned, const TargetRegisterClass*> 4915X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4916 MVT::ValueType VT) const { 4917 // Use the default implementation in TargetLowering to convert the register 4918 // constraint into a member of a register class. 4919 std::pair<unsigned, const TargetRegisterClass*> Res; 4920 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4921 4922 // Not found? Bail out. 4923 if (Res.second == 0) return Res; 4924 4925 // Otherwise, check to see if this is a register class of the wrong value 4926 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 4927 // turn into {ax},{dx}. 4928 if (Res.second->hasType(VT)) 4929 return Res; // Correct type already, nothing to do. 4930 4931 // All of the single-register GCC register classes map their values onto 4932 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 4933 // really want an 8-bit or 32-bit register, map to the appropriate register 4934 // class and return the appropriate register. 4935 if (Res.second != X86::GR16RegisterClass) 4936 return Res; 4937 4938 if (VT == MVT::i8) { 4939 unsigned DestReg = 0; 4940 switch (Res.first) { 4941 default: break; 4942 case X86::AX: DestReg = X86::AL; break; 4943 case X86::DX: DestReg = X86::DL; break; 4944 case X86::CX: DestReg = X86::CL; break; 4945 case X86::BX: DestReg = X86::BL; break; 4946 } 4947 if (DestReg) { 4948 Res.first = DestReg; 4949 Res.second = Res.second = X86::GR8RegisterClass; 4950 } 4951 } else if (VT == MVT::i32) { 4952 unsigned DestReg = 0; 4953 switch (Res.first) { 4954 default: break; 4955 case X86::AX: DestReg = X86::EAX; break; 4956 case X86::DX: DestReg = X86::EDX; break; 4957 case X86::CX: DestReg = X86::ECX; break; 4958 case X86::BX: DestReg = X86::EBX; break; 4959 case X86::SI: DestReg = X86::ESI; break; 4960 case X86::DI: DestReg = X86::EDI; break; 4961 case X86::BP: DestReg = X86::EBP; break; 4962 case X86::SP: DestReg = X86::ESP; break; 4963 } 4964 if (DestReg) { 4965 Res.first = DestReg; 4966 Res.second = Res.second = X86::GR32RegisterClass; 4967 } 4968 } else if (VT == MVT::i64) { 4969 unsigned DestReg = 0; 4970 switch (Res.first) { 4971 default: break; 4972 case X86::AX: DestReg = X86::RAX; break; 4973 case X86::DX: DestReg = X86::RDX; break; 4974 case X86::CX: DestReg = X86::RCX; break; 4975 case X86::BX: DestReg = X86::RBX; break; 4976 case X86::SI: DestReg = X86::RSI; break; 4977 case X86::DI: DestReg = X86::RDI; break; 4978 case X86::BP: DestReg = X86::RBP; break; 4979 case X86::SP: DestReg = X86::RSP; break; 4980 } 4981 if (DestReg) { 4982 Res.first = DestReg; 4983 Res.second = Res.second = X86::GR64RegisterClass; 4984 } 4985 } 4986 4987 return Res; 4988} 4989 4990