X86ISelLowering.cpp revision 25ab690a43cbbb591b76d49e3595b019c32f4b3f
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/ADT/VectorExtras.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/SSARegMap.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36// FIXME: temporary. 37#include "llvm/Support/CommandLine.h" 38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 39 cl::desc("Enable fastcc on X86")); 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 46 47 // Set up the TargetLowering object. 48 49 // X86 is weird, it always uses i8 for shift amounts and setcc results. 50 setShiftAmountType(MVT::i8); 51 setSetCCResultType(MVT::i8); 52 setSetCCResultContents(ZeroOrOneSetCCResult); 53 setSchedulingPreference(SchedulingForRegPressure); 54 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 55 setStackPointerRegisterToSaveRestore(X86StackPtr); 56 57 if (!Subtarget->isTargetDarwin()) 58 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 59 setUseUnderscoreSetJmpLongJmp(true); 60 61 // Add legal addressing mode scale values. 62 addLegalAddressScale(8); 63 addLegalAddressScale(4); 64 addLegalAddressScale(2); 65 // Enter the ones which require both scale + index last. These are more 66 // expensive. 67 addLegalAddressScale(9); 68 addLegalAddressScale(5); 69 addLegalAddressScale(3); 70 71 // Set up the register classes. 72 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 73 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 74 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 75 if (Subtarget->is64Bit()) 76 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 77 78 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 79 // operation. 80 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 81 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 82 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 83 84 if (Subtarget->is64Bit()) { 85 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 86 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 87 } else { 88 if (X86ScalarSSE) 89 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 90 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 91 else 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 93 } 94 95 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 96 // this operation. 97 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 98 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 99 // SSE has no i16 to fp conversion, only i32 100 if (X86ScalarSSE) 101 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 102 else { 103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 104 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 105 } 106 107 if (!Subtarget->is64Bit()) { 108 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 109 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 110 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 111 } 112 113 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 114 // this operation. 115 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 116 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 117 118 if (X86ScalarSSE) { 119 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 120 } else { 121 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 122 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 123 } 124 125 // Handle FP_TO_UINT by promoting the destination to a larger signed 126 // conversion. 127 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 128 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 129 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 130 131 if (Subtarget->is64Bit()) { 132 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 133 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 134 } else { 135 if (X86ScalarSSE && !Subtarget->hasSSE3()) 136 // Expand FP_TO_UINT into a select. 137 // FIXME: We would like to use a Custom expander here eventually to do 138 // the optimal thing for SSE vs. the default expansion in the legalizer. 139 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 140 else 141 // With SSE3 we can use fisttpll to convert to a signed i64. 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 143 } 144 145 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 146 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 147 148 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 149 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 150 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 151 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 152 if (Subtarget->is64Bit()) 153 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 157 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 158 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 159 setOperationAction(ISD::FREM , MVT::f64 , Expand); 160 161 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 162 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 163 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 164 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 165 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 166 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 167 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 168 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 169 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 170 if (Subtarget->is64Bit()) { 171 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 172 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 173 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 174 } 175 176 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 177 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 178 179 // These should be promoted to a larger select which is supported. 180 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 181 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 182 // X86 wants to expand cmov itself. 183 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 184 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 185 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 186 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 187 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 188 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 190 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 191 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 192 if (Subtarget->is64Bit()) { 193 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 195 } 196 // X86 ret instruction may pop stack. 197 setOperationAction(ISD::RET , MVT::Other, Custom); 198 // Darwin ABI issue. 199 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 200 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 201 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 202 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 203 if (Subtarget->is64Bit()) { 204 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 205 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 206 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 207 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 208 } 209 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 210 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 211 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 212 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 213 // X86 wants to expand memset / memcpy itself. 214 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 215 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 216 217 // We don't have line number support yet. 218 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 219 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 220 // FIXME - use subtarget debug flags 221 if (!Subtarget->isTargetDarwin()) 222 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 223 224 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 225 setOperationAction(ISD::VASTART , MVT::Other, Custom); 226 227 // Use the default implementation. 228 setOperationAction(ISD::VAARG , MVT::Other, Expand); 229 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 230 setOperationAction(ISD::VAEND , MVT::Other, Expand); 231 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 232 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 233 if (Subtarget->is64Bit()) 234 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 235 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 236 237 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 238 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 239 240 if (X86ScalarSSE) { 241 // Set up the FP register classes. 242 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 243 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 244 245 // Use ANDPD to simulate FABS. 246 setOperationAction(ISD::FABS , MVT::f64, Custom); 247 setOperationAction(ISD::FABS , MVT::f32, Custom); 248 249 // Use XORP to simulate FNEG. 250 setOperationAction(ISD::FNEG , MVT::f64, Custom); 251 setOperationAction(ISD::FNEG , MVT::f32, Custom); 252 253 // We don't support sin/cos/fmod 254 setOperationAction(ISD::FSIN , MVT::f64, Expand); 255 setOperationAction(ISD::FCOS , MVT::f64, Expand); 256 setOperationAction(ISD::FREM , MVT::f64, Expand); 257 setOperationAction(ISD::FSIN , MVT::f32, Expand); 258 setOperationAction(ISD::FCOS , MVT::f32, Expand); 259 setOperationAction(ISD::FREM , MVT::f32, Expand); 260 261 // Expand FP immediates into loads from the stack, except for the special 262 // cases we handle. 263 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 264 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 265 addLegalFPImmediate(+0.0); // xorps / xorpd 266 } else { 267 // Set up the FP register classes. 268 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 269 270 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 271 272 if (!UnsafeFPMath) { 273 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 274 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 275 } 276 277 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 278 addLegalFPImmediate(+0.0); // FLD0 279 addLegalFPImmediate(+1.0); // FLD1 280 addLegalFPImmediate(-0.0); // FLD0/FCHS 281 addLegalFPImmediate(-1.0); // FLD1/FCHS 282 } 283 284 // First set operation action for all vector types to expand. Then we 285 // will selectively turn on ones that can be effectively codegen'd. 286 for (unsigned VT = (unsigned)MVT::Vector + 1; 287 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 288 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 289 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 290 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 291 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 292 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 293 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 294 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 295 } 296 297 if (Subtarget->hasMMX()) { 298 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 299 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 300 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 301 302 // FIXME: add MMX packed arithmetics 303 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 304 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 305 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 306 } 307 308 if (Subtarget->hasSSE1()) { 309 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 310 311 setOperationAction(ISD::AND, MVT::v4f32, Legal); 312 setOperationAction(ISD::OR, MVT::v4f32, Legal); 313 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 314 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 315 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 316 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 317 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 318 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 319 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 321 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 322 } 323 324 if (Subtarget->hasSSE2()) { 325 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 326 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 327 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 328 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 329 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 330 331 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 332 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 333 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 334 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 335 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 336 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 337 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 338 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 339 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 340 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 341 342 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 343 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 344 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 345 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 346 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 347 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 348 349 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 350 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 351 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 353 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 354 } 355 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 356 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 357 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 358 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 359 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 361 362 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 363 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 364 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 365 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 366 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 367 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 368 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 369 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 370 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 371 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 372 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 373 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 374 } 375 376 // Custom lower v2i64 and v2f64 selects. 377 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 378 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 379 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 380 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 381 } 382 383 // We want to custom lower some of our intrinsics. 384 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 385 386 // We have target-specific dag combine patterns for the following nodes: 387 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 388 389 computeRegisterProperties(); 390 391 // FIXME: These should be based on subtarget info. Plus, the values should 392 // be smaller when we are in optimizing for size mode. 393 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 394 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 395 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 396 allowUnalignedMemoryAccesses = true; // x86 supports it! 397} 398 399//===----------------------------------------------------------------------===// 400// C Calling Convention implementation 401//===----------------------------------------------------------------------===// 402 403/// AddLiveIn - This helper function adds the specified physical register to the 404/// MachineFunction as a live in value. It also creates a corresponding virtual 405/// register for it. 406static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 407 TargetRegisterClass *RC) { 408 assert(RC->contains(PReg) && "Not the correct regclass!"); 409 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 410 MF.addLiveIn(PReg, VReg); 411 return VReg; 412} 413 414/// HowToPassCCCArgument - Returns how an formal argument of the specified type 415/// should be passed. If it is through stack, returns the size of the stack 416/// slot; if it is through XMM register, returns the number of XMM registers 417/// are needed. 418static void 419HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 420 unsigned &ObjSize, unsigned &ObjXMMRegs) { 421 ObjXMMRegs = 0; 422 423 switch (ObjectVT) { 424 default: assert(0 && "Unhandled argument type!"); 425 case MVT::i8: ObjSize = 1; break; 426 case MVT::i16: ObjSize = 2; break; 427 case MVT::i32: ObjSize = 4; break; 428 case MVT::i64: ObjSize = 8; break; 429 case MVT::f32: ObjSize = 4; break; 430 case MVT::f64: ObjSize = 8; break; 431 case MVT::v16i8: 432 case MVT::v8i16: 433 case MVT::v4i32: 434 case MVT::v2i64: 435 case MVT::v4f32: 436 case MVT::v2f64: 437 if (NumXMMRegs < 4) 438 ObjXMMRegs = 1; 439 else 440 ObjSize = 16; 441 break; 442 } 443} 444 445SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 446 unsigned NumArgs = Op.Val->getNumValues() - 1; 447 MachineFunction &MF = DAG.getMachineFunction(); 448 MachineFrameInfo *MFI = MF.getFrameInfo(); 449 SDOperand Root = Op.getOperand(0); 450 std::vector<SDOperand> ArgValues; 451 452 // Add DAG nodes to load the arguments... On entry to a function on the X86, 453 // the stack frame looks like this: 454 // 455 // [ESP] -- return address 456 // [ESP + 4] -- first argument (leftmost lexically) 457 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 458 // ... 459 // 460 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 461 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 462 static const unsigned XMMArgRegs[] = { 463 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 464 }; 465 for (unsigned i = 0; i < NumArgs; ++i) { 466 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 467 unsigned ArgIncrement = 4; 468 unsigned ObjSize = 0; 469 unsigned ObjXMMRegs = 0; 470 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 471 if (ObjSize > 4) 472 ArgIncrement = ObjSize; 473 474 SDOperand ArgValue; 475 if (ObjXMMRegs) { 476 // Passed in a XMM register. 477 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 478 X86::VR128RegisterClass); 479 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 480 ArgValues.push_back(ArgValue); 481 NumXMMRegs += ObjXMMRegs; 482 } else { 483 // XMM arguments have to be aligned on 16-byte boundary. 484 if (ObjSize == 16) 485 ArgOffset = ((ArgOffset + 15) / 16) * 16; 486 // Create the frame index object for this incoming parameter... 487 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 488 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 489 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 490 DAG.getSrcValue(NULL)); 491 ArgValues.push_back(ArgValue); 492 ArgOffset += ArgIncrement; // Move on to the next argument... 493 } 494 } 495 496 ArgValues.push_back(Root); 497 498 // If the function takes variable number of arguments, make a frame index for 499 // the start of the first vararg value... for expansion of llvm.va_start. 500 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 501 if (isVarArg) 502 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 503 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 504 ReturnAddrIndex = 0; // No return address slot generated yet. 505 BytesToPopOnReturn = 0; // Callee pops nothing. 506 BytesCallerReserves = ArgOffset; 507 508 // If this is a struct return on Darwin/X86, the callee pops the hidden struct 509 // pointer. 510 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet && 511 Subtarget->isTargetDarwin()) 512 BytesToPopOnReturn = 4; 513 514 // Return the new list of results. 515 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 516 Op.Val->value_end()); 517 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 518} 519 520 521SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 522 SDOperand Chain = Op.getOperand(0); 523 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 524 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 525 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 526 SDOperand Callee = Op.getOperand(4); 527 MVT::ValueType RetVT= Op.Val->getValueType(0); 528 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 529 530 // Keep track of the number of XMM regs passed so far. 531 unsigned NumXMMRegs = 0; 532 static const unsigned XMMArgRegs[] = { 533 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 534 }; 535 536 // Count how many bytes are to be pushed on the stack. 537 unsigned NumBytes = 0; 538 for (unsigned i = 0; i != NumOps; ++i) { 539 SDOperand Arg = Op.getOperand(5+2*i); 540 541 switch (Arg.getValueType()) { 542 default: assert(0 && "Unexpected ValueType for argument!"); 543 case MVT::i8: 544 case MVT::i16: 545 case MVT::i32: 546 case MVT::f32: 547 NumBytes += 4; 548 break; 549 case MVT::i64: 550 case MVT::f64: 551 NumBytes += 8; 552 break; 553 case MVT::v16i8: 554 case MVT::v8i16: 555 case MVT::v4i32: 556 case MVT::v2i64: 557 case MVT::v4f32: 558 case MVT::v2f64: 559 if (NumXMMRegs < 4) 560 ++NumXMMRegs; 561 else { 562 // XMM arguments have to be aligned on 16-byte boundary. 563 NumBytes = ((NumBytes + 15) / 16) * 16; 564 NumBytes += 16; 565 } 566 break; 567 } 568 } 569 570 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 571 572 // Arguments go on the stack in reverse order, as specified by the ABI. 573 unsigned ArgOffset = 0; 574 NumXMMRegs = 0; 575 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 576 std::vector<SDOperand> MemOpChains; 577 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 578 for (unsigned i = 0; i != NumOps; ++i) { 579 SDOperand Arg = Op.getOperand(5+2*i); 580 581 switch (Arg.getValueType()) { 582 default: assert(0 && "Unexpected ValueType for argument!"); 583 case MVT::i8: 584 case MVT::i16: { 585 // Promote the integer to 32 bits. If the input type is signed use a 586 // sign extend, otherwise use a zero extend. 587 unsigned ExtOp = 588 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 589 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 590 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 591 } 592 // Fallthrough 593 594 case MVT::i32: 595 case MVT::f32: { 596 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 597 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 598 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 599 Arg, PtrOff, DAG.getSrcValue(NULL))); 600 ArgOffset += 4; 601 break; 602 } 603 case MVT::i64: 604 case MVT::f64: { 605 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 606 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 607 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 608 Arg, PtrOff, DAG.getSrcValue(NULL))); 609 ArgOffset += 8; 610 break; 611 } 612 case MVT::v16i8: 613 case MVT::v8i16: 614 case MVT::v4i32: 615 case MVT::v2i64: 616 case MVT::v4f32: 617 case MVT::v2f64: 618 if (NumXMMRegs < 4) { 619 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 620 NumXMMRegs++; 621 } else { 622 // XMM arguments have to be aligned on 16-byte boundary. 623 ArgOffset = ((ArgOffset + 15) / 16) * 16; 624 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 625 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 626 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 627 Arg, PtrOff, DAG.getSrcValue(NULL))); 628 ArgOffset += 16; 629 } 630 } 631 } 632 633 if (!MemOpChains.empty()) 634 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 635 &MemOpChains[0], MemOpChains.size()); 636 637 // Build a sequence of copy-to-reg nodes chained together with token chain 638 // and flag operands which copy the outgoing args into registers. 639 SDOperand InFlag; 640 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 641 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 642 InFlag); 643 InFlag = Chain.getValue(1); 644 } 645 646 // If the callee is a GlobalAddress node (quite common, every direct call is) 647 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 648 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 649 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 650 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 651 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 652 653 std::vector<MVT::ValueType> NodeTys; 654 NodeTys.push_back(MVT::Other); // Returns a chain 655 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 656 std::vector<SDOperand> Ops; 657 Ops.push_back(Chain); 658 Ops.push_back(Callee); 659 660 // Add argument registers to the end of the list so that they are known live 661 // into the call. 662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 663 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 664 RegsToPass[i].second.getValueType())); 665 666 if (InFlag.Val) 667 Ops.push_back(InFlag); 668 669 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 670 NodeTys, &Ops[0], Ops.size()); 671 InFlag = Chain.getValue(1); 672 673 // Create the CALLSEQ_END node. 674 unsigned NumBytesForCalleeToPush = 0; 675 676 // If this is is a call to a struct-return function on Darwin/X86, the callee 677 // pops the hidden struct pointer, so we have to push it back. 678 if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) 679 NumBytesForCalleeToPush = 4; 680 681 NodeTys.clear(); 682 NodeTys.push_back(MVT::Other); // Returns a chain 683 if (RetVT != MVT::Other) 684 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 685 Ops.clear(); 686 Ops.push_back(Chain); 687 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 688 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 689 Ops.push_back(InFlag); 690 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 691 if (RetVT != MVT::Other) 692 InFlag = Chain.getValue(1); 693 694 std::vector<SDOperand> ResultVals; 695 NodeTys.clear(); 696 switch (RetVT) { 697 default: assert(0 && "Unknown value type to return!"); 698 case MVT::Other: break; 699 case MVT::i8: 700 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 701 ResultVals.push_back(Chain.getValue(0)); 702 NodeTys.push_back(MVT::i8); 703 break; 704 case MVT::i16: 705 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 706 ResultVals.push_back(Chain.getValue(0)); 707 NodeTys.push_back(MVT::i16); 708 break; 709 case MVT::i32: 710 if (Op.Val->getValueType(1) == MVT::i32) { 711 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 712 ResultVals.push_back(Chain.getValue(0)); 713 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 714 Chain.getValue(2)).getValue(1); 715 ResultVals.push_back(Chain.getValue(0)); 716 NodeTys.push_back(MVT::i32); 717 } else { 718 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 719 ResultVals.push_back(Chain.getValue(0)); 720 } 721 NodeTys.push_back(MVT::i32); 722 break; 723 case MVT::v16i8: 724 case MVT::v8i16: 725 case MVT::v4i32: 726 case MVT::v2i64: 727 case MVT::v4f32: 728 case MVT::v2f64: 729 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 730 ResultVals.push_back(Chain.getValue(0)); 731 NodeTys.push_back(RetVT); 732 break; 733 case MVT::f32: 734 case MVT::f64: { 735 std::vector<MVT::ValueType> Tys; 736 Tys.push_back(MVT::f64); 737 Tys.push_back(MVT::Other); 738 Tys.push_back(MVT::Flag); 739 std::vector<SDOperand> Ops; 740 Ops.push_back(Chain); 741 Ops.push_back(InFlag); 742 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 743 &Ops[0], Ops.size()); 744 Chain = RetVal.getValue(1); 745 InFlag = RetVal.getValue(2); 746 if (X86ScalarSSE) { 747 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 748 // shouldn't be necessary except that RFP cannot be live across 749 // multiple blocks. When stackifier is fixed, they can be uncoupled. 750 MachineFunction &MF = DAG.getMachineFunction(); 751 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 752 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 753 Tys.clear(); 754 Tys.push_back(MVT::Other); 755 Ops.clear(); 756 Ops.push_back(Chain); 757 Ops.push_back(RetVal); 758 Ops.push_back(StackSlot); 759 Ops.push_back(DAG.getValueType(RetVT)); 760 Ops.push_back(InFlag); 761 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 762 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 763 DAG.getSrcValue(NULL)); 764 Chain = RetVal.getValue(1); 765 } 766 767 if (RetVT == MVT::f32 && !X86ScalarSSE) 768 // FIXME: we would really like to remember that this FP_ROUND 769 // operation is okay to eliminate if we allow excess FP precision. 770 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 771 ResultVals.push_back(RetVal); 772 NodeTys.push_back(RetVT); 773 break; 774 } 775 } 776 777 // If the function returns void, just return the chain. 778 if (ResultVals.empty()) 779 return Chain; 780 781 // Otherwise, merge everything together with a MERGE_VALUES node. 782 NodeTys.push_back(MVT::Other); 783 ResultVals.push_back(Chain); 784 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 785 &ResultVals[0], ResultVals.size()); 786 return Res.getValue(Op.ResNo); 787} 788 789 790//===----------------------------------------------------------------------===// 791// X86-64 C Calling Convention implementation 792//===----------------------------------------------------------------------===// 793 794/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified 795/// type should be passed. If it is through stack, returns the size of the stack 796/// slot; if it is through integer or XMM register, returns the number of 797/// integer or XMM registers are needed. 798static void 799HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT, 800 unsigned NumIntRegs, unsigned NumXMMRegs, 801 unsigned &ObjSize, unsigned &ObjIntRegs, 802 unsigned &ObjXMMRegs) { 803 ObjSize = 0; 804 ObjIntRegs = 0; 805 ObjXMMRegs = 0; 806 807 switch (ObjectVT) { 808 default: assert(0 && "Unhandled argument type!"); 809 case MVT::i8: 810 case MVT::i16: 811 case MVT::i32: 812 case MVT::i64: 813 if (NumIntRegs < 6) 814 ObjIntRegs = 1; 815 else { 816 switch (ObjectVT) { 817 default: break; 818 case MVT::i8: ObjSize = 1; break; 819 case MVT::i16: ObjSize = 2; break; 820 case MVT::i32: ObjSize = 4; break; 821 case MVT::i64: ObjSize = 8; break; 822 } 823 } 824 break; 825 case MVT::f32: 826 case MVT::f64: 827 case MVT::v16i8: 828 case MVT::v8i16: 829 case MVT::v4i32: 830 case MVT::v2i64: 831 case MVT::v4f32: 832 case MVT::v2f64: 833 if (NumXMMRegs < 8) 834 ObjXMMRegs = 1; 835 else { 836 switch (ObjectVT) { 837 default: break; 838 case MVT::f32: ObjSize = 4; break; 839 case MVT::f64: ObjSize = 8; break; 840 case MVT::v16i8: 841 case MVT::v8i16: 842 case MVT::v4i32: 843 case MVT::v2i64: 844 case MVT::v4f32: 845 case MVT::v2f64: ObjSize = 16; break; 846 } 847 break; 848 } 849 } 850} 851 852SDOperand 853X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 854 unsigned NumArgs = Op.Val->getNumValues() - 1; 855 MachineFunction &MF = DAG.getMachineFunction(); 856 MachineFrameInfo *MFI = MF.getFrameInfo(); 857 SDOperand Root = Op.getOperand(0); 858 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 859 std::vector<SDOperand> ArgValues; 860 861 // Add DAG nodes to load the arguments... On entry to a function on the X86, 862 // the stack frame looks like this: 863 // 864 // [RSP] -- return address 865 // [RSP + 8] -- first nonreg argument (leftmost lexically) 866 // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size 867 // ... 868 // 869 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 870 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 871 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 872 873 static const unsigned GPR8ArgRegs[] = { 874 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 875 }; 876 static const unsigned GPR16ArgRegs[] = { 877 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 878 }; 879 static const unsigned GPR32ArgRegs[] = { 880 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 881 }; 882 static const unsigned GPR64ArgRegs[] = { 883 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 884 }; 885 static const unsigned XMMArgRegs[] = { 886 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 887 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 888 }; 889 890 for (unsigned i = 0; i < NumArgs; ++i) { 891 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 892 unsigned ArgIncrement = 8; 893 unsigned ObjSize = 0; 894 unsigned ObjIntRegs = 0; 895 unsigned ObjXMMRegs = 0; 896 897 // FIXME: __int128 and long double support? 898 HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 899 ObjSize, ObjIntRegs, ObjXMMRegs); 900 if (ObjSize > 8) 901 ArgIncrement = ObjSize; 902 903 unsigned Reg = 0; 904 SDOperand ArgValue; 905 if (ObjIntRegs || ObjXMMRegs) { 906 switch (ObjectVT) { 907 default: assert(0 && "Unhandled argument type!"); 908 case MVT::i8: 909 case MVT::i16: 910 case MVT::i32: 911 case MVT::i64: { 912 TargetRegisterClass *RC = NULL; 913 switch (ObjectVT) { 914 default: break; 915 case MVT::i8: 916 RC = X86::GR8RegisterClass; 917 Reg = GPR8ArgRegs[NumIntRegs]; 918 break; 919 case MVT::i16: 920 RC = X86::GR16RegisterClass; 921 Reg = GPR16ArgRegs[NumIntRegs]; 922 break; 923 case MVT::i32: 924 RC = X86::GR32RegisterClass; 925 Reg = GPR32ArgRegs[NumIntRegs]; 926 break; 927 case MVT::i64: 928 RC = X86::GR64RegisterClass; 929 Reg = GPR64ArgRegs[NumIntRegs]; 930 break; 931 } 932 Reg = AddLiveIn(MF, Reg, RC); 933 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 934 break; 935 } 936 case MVT::f32: 937 case MVT::f64: 938 case MVT::v16i8: 939 case MVT::v8i16: 940 case MVT::v4i32: 941 case MVT::v2i64: 942 case MVT::v4f32: 943 case MVT::v2f64: { 944 TargetRegisterClass *RC= (ObjectVT == MVT::f32) ? 945 X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ? 946 X86::FR64RegisterClass : X86::VR128RegisterClass); 947 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC); 948 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 949 break; 950 } 951 } 952 NumIntRegs += ObjIntRegs; 953 NumXMMRegs += ObjXMMRegs; 954 } else if (ObjSize) { 955 // XMM arguments have to be aligned on 16-byte boundary. 956 if (ObjSize == 16) 957 ArgOffset = ((ArgOffset + 15) / 16) * 16; 958 // Create the SelectionDAG nodes corresponding to a load from this 959 // parameter. 960 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 961 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 962 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 963 DAG.getSrcValue(NULL)); 964 ArgOffset += ArgIncrement; // Move on to the next argument. 965 } 966 967 ArgValues.push_back(ArgValue); 968 } 969 970 // If the function takes variable number of arguments, make a frame index for 971 // the start of the first vararg value... for expansion of llvm.va_start. 972 if (isVarArg) { 973 // For X86-64, if there are vararg parameters that are passed via 974 // registers, then we must store them to their spots on the stack so they 975 // may be loaded by deferencing the result of va_next. 976 VarArgsGPOffset = NumIntRegs * 8; 977 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 978 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 979 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 980 981 // Store the integer parameter registers. 982 std::vector<SDOperand> MemOps; 983 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 984 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 985 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 986 for (; NumIntRegs != 6; ++NumIntRegs) { 987 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 988 X86::GR64RegisterClass); 989 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 990 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 991 Val, FIN, DAG.getSrcValue(NULL)); 992 MemOps.push_back(Store); 993 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 994 DAG.getConstant(8, getPointerTy())); 995 } 996 997 // Now store the XMM (fp + vector) parameter registers. 998 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 999 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1000 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1001 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1002 X86::VR128RegisterClass); 1003 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1004 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1005 Val, FIN, DAG.getSrcValue(NULL)); 1006 MemOps.push_back(Store); 1007 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1008 DAG.getConstant(16, getPointerTy())); 1009 } 1010 if (!MemOps.empty()) 1011 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1012 &MemOps[0], MemOps.size()); 1013 } 1014 1015 ArgValues.push_back(Root); 1016 1017 ReturnAddrIndex = 0; // No return address slot generated yet. 1018 BytesToPopOnReturn = 0; // Callee pops nothing. 1019 BytesCallerReserves = ArgOffset; 1020 1021 // Return the new list of results. 1022 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1023 Op.Val->value_end()); 1024 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1025} 1026 1027SDOperand 1028X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) { 1029 SDOperand Chain = Op.getOperand(0); 1030 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1031 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1032 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1033 SDOperand Callee = Op.getOperand(4); 1034 MVT::ValueType RetVT= Op.Val->getValueType(0); 1035 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1036 1037 // Count how many bytes are to be pushed on the stack. 1038 unsigned NumBytes = 0; 1039 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 1040 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1041 1042 static const unsigned GPR8ArgRegs[] = { 1043 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 1044 }; 1045 static const unsigned GPR16ArgRegs[] = { 1046 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 1047 }; 1048 static const unsigned GPR32ArgRegs[] = { 1049 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 1050 }; 1051 static const unsigned GPR64ArgRegs[] = { 1052 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1053 }; 1054 static const unsigned XMMArgRegs[] = { 1055 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1056 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1057 }; 1058 1059 for (unsigned i = 0; i != NumOps; ++i) { 1060 SDOperand Arg = Op.getOperand(5+2*i); 1061 MVT::ValueType ArgVT = Arg.getValueType(); 1062 1063 switch (ArgVT) { 1064 default: assert(0 && "Unknown value type!"); 1065 case MVT::i8: 1066 case MVT::i16: 1067 case MVT::i32: 1068 case MVT::i64: 1069 if (NumIntRegs < 6) 1070 ++NumIntRegs; 1071 else 1072 NumBytes += 8; 1073 break; 1074 case MVT::f32: 1075 case MVT::f64: 1076 case MVT::v16i8: 1077 case MVT::v8i16: 1078 case MVT::v4i32: 1079 case MVT::v2i64: 1080 case MVT::v4f32: 1081 case MVT::v2f64: 1082 if (NumXMMRegs < 8) 1083 NumXMMRegs++; 1084 else if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1085 NumBytes += 8; 1086 else { 1087 // XMM arguments have to be aligned on 16-byte boundary. 1088 NumBytes = ((NumBytes + 15) / 16) * 16; 1089 NumBytes += 16; 1090 } 1091 break; 1092 } 1093 } 1094 1095 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1096 1097 // Arguments go on the stack in reverse order, as specified by the ABI. 1098 unsigned ArgOffset = 0; 1099 NumIntRegs = 0; 1100 NumXMMRegs = 0; 1101 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1102 std::vector<SDOperand> MemOpChains; 1103 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1104 for (unsigned i = 0; i != NumOps; ++i) { 1105 SDOperand Arg = Op.getOperand(5+2*i); 1106 MVT::ValueType ArgVT = Arg.getValueType(); 1107 1108 switch (ArgVT) { 1109 default: assert(0 && "Unexpected ValueType for argument!"); 1110 case MVT::i8: 1111 case MVT::i16: 1112 case MVT::i32: 1113 case MVT::i64: 1114 if (NumIntRegs < 6) { 1115 unsigned Reg = 0; 1116 switch (ArgVT) { 1117 default: break; 1118 case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break; 1119 case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break; 1120 case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break; 1121 case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break; 1122 } 1123 RegsToPass.push_back(std::make_pair(Reg, Arg)); 1124 ++NumIntRegs; 1125 } else { 1126 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1127 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1128 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1129 Arg, PtrOff, DAG.getSrcValue(NULL))); 1130 ArgOffset += 8; 1131 } 1132 break; 1133 case MVT::f32: 1134 case MVT::f64: 1135 case MVT::v16i8: 1136 case MVT::v8i16: 1137 case MVT::v4i32: 1138 case MVT::v2i64: 1139 case MVT::v4f32: 1140 case MVT::v2f64: 1141 if (NumXMMRegs < 8) { 1142 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1143 NumXMMRegs++; 1144 } else { 1145 if (ArgVT != MVT::f32 && ArgVT != MVT::f64) { 1146 // XMM arguments have to be aligned on 16-byte boundary. 1147 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1148 } 1149 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1150 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1151 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1152 Arg, PtrOff, DAG.getSrcValue(NULL))); 1153 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1154 ArgOffset += 8; 1155 else 1156 ArgOffset += 16; 1157 } 1158 } 1159 } 1160 1161 if (!MemOpChains.empty()) 1162 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1163 &MemOpChains[0], MemOpChains.size()); 1164 1165 // Build a sequence of copy-to-reg nodes chained together with token chain 1166 // and flag operands which copy the outgoing args into registers. 1167 SDOperand InFlag; 1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1170 InFlag); 1171 InFlag = Chain.getValue(1); 1172 } 1173 1174 if (isVarArg) { 1175 // From AMD64 ABI document: 1176 // For calls that may call functions that use varargs or stdargs 1177 // (prototype-less calls or calls to functions containing ellipsis (...) in 1178 // the declaration) %al is used as hidden argument to specify the number 1179 // of SSE registers used. The contents of %al do not need to match exactly 1180 // the number of registers, but must be an ubound on the number of SSE 1181 // registers used and is in the range 0 - 8 inclusive. 1182 Chain = DAG.getCopyToReg(Chain, X86::AL, 1183 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1184 InFlag = Chain.getValue(1); 1185 } 1186 1187 // If the callee is a GlobalAddress node (quite common, every direct call is) 1188 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1189 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1190 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1191 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1192 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1193 1194 std::vector<MVT::ValueType> NodeTys; 1195 NodeTys.push_back(MVT::Other); // Returns a chain 1196 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1197 std::vector<SDOperand> Ops; 1198 Ops.push_back(Chain); 1199 Ops.push_back(Callee); 1200 1201 // Add argument registers to the end of the list so that they are known live 1202 // into the call. 1203 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1204 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1205 RegsToPass[i].second.getValueType())); 1206 1207 if (InFlag.Val) 1208 Ops.push_back(InFlag); 1209 1210 // FIXME: Do not generate X86ISD::TAILCALL for now. 1211 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1212 NodeTys, &Ops[0], Ops.size()); 1213 InFlag = Chain.getValue(1); 1214 1215 NodeTys.clear(); 1216 NodeTys.push_back(MVT::Other); // Returns a chain 1217 if (RetVT != MVT::Other) 1218 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1219 Ops.clear(); 1220 Ops.push_back(Chain); 1221 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1222 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1223 Ops.push_back(InFlag); 1224 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1225 if (RetVT != MVT::Other) 1226 InFlag = Chain.getValue(1); 1227 1228 std::vector<SDOperand> ResultVals; 1229 NodeTys.clear(); 1230 switch (RetVT) { 1231 default: assert(0 && "Unknown value type to return!"); 1232 case MVT::Other: break; 1233 case MVT::i8: 1234 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1235 ResultVals.push_back(Chain.getValue(0)); 1236 NodeTys.push_back(MVT::i8); 1237 break; 1238 case MVT::i16: 1239 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1240 ResultVals.push_back(Chain.getValue(0)); 1241 NodeTys.push_back(MVT::i16); 1242 break; 1243 case MVT::i32: 1244 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1245 ResultVals.push_back(Chain.getValue(0)); 1246 NodeTys.push_back(MVT::i32); 1247 break; 1248 case MVT::i64: 1249 if (Op.Val->getValueType(1) == MVT::i64) { 1250 // FIXME: __int128 support? 1251 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1252 ResultVals.push_back(Chain.getValue(0)); 1253 Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64, 1254 Chain.getValue(2)).getValue(1); 1255 ResultVals.push_back(Chain.getValue(0)); 1256 NodeTys.push_back(MVT::i64); 1257 } else { 1258 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1259 ResultVals.push_back(Chain.getValue(0)); 1260 } 1261 NodeTys.push_back(MVT::i64); 1262 break; 1263 case MVT::f32: 1264 case MVT::f64: 1265 case MVT::v16i8: 1266 case MVT::v8i16: 1267 case MVT::v4i32: 1268 case MVT::v2i64: 1269 case MVT::v4f32: 1270 case MVT::v2f64: 1271 // FIXME: long double support? 1272 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1273 ResultVals.push_back(Chain.getValue(0)); 1274 NodeTys.push_back(RetVT); 1275 break; 1276 } 1277 1278 // If the function returns void, just return the chain. 1279 if (ResultVals.empty()) 1280 return Chain; 1281 1282 // Otherwise, merge everything together with a MERGE_VALUES node. 1283 NodeTys.push_back(MVT::Other); 1284 ResultVals.push_back(Chain); 1285 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1286 &ResultVals[0], ResultVals.size()); 1287 return Res.getValue(Op.ResNo); 1288} 1289 1290//===----------------------------------------------------------------------===// 1291// Fast Calling Convention implementation 1292//===----------------------------------------------------------------------===// 1293// 1294// The X86 'fast' calling convention passes up to two integer arguments in 1295// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 1296// and requires that the callee pop its arguments off the stack (allowing proper 1297// tail calls), and has the same return value conventions as C calling convs. 1298// 1299// This calling convention always arranges for the callee pop value to be 8n+4 1300// bytes, which is needed for tail recursion elimination and stack alignment 1301// reasons. 1302// 1303// Note that this can be enhanced in the future to pass fp vals in registers 1304// (when we have a global fp allocator) and do other tricks. 1305// 1306 1307/// HowToPassFastCCArgument - Returns how an formal argument of the specified 1308/// type should be passed. If it is through stack, returns the size of the stack 1309/// slot; if it is through integer or XMM register, returns the number of 1310/// integer or XMM registers are needed. 1311static void 1312HowToPassFastCCArgument(MVT::ValueType ObjectVT, 1313 unsigned NumIntRegs, unsigned NumXMMRegs, 1314 unsigned &ObjSize, unsigned &ObjIntRegs, 1315 unsigned &ObjXMMRegs) { 1316 ObjSize = 0; 1317 ObjIntRegs = 0; 1318 ObjXMMRegs = 0; 1319 1320 switch (ObjectVT) { 1321 default: assert(0 && "Unhandled argument type!"); 1322 case MVT::i8: 1323#if FASTCC_NUM_INT_ARGS_INREGS > 0 1324 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1325 ObjIntRegs = 1; 1326 else 1327#endif 1328 ObjSize = 1; 1329 break; 1330 case MVT::i16: 1331#if FASTCC_NUM_INT_ARGS_INREGS > 0 1332 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1333 ObjIntRegs = 1; 1334 else 1335#endif 1336 ObjSize = 2; 1337 break; 1338 case MVT::i32: 1339#if FASTCC_NUM_INT_ARGS_INREGS > 0 1340 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1341 ObjIntRegs = 1; 1342 else 1343#endif 1344 ObjSize = 4; 1345 break; 1346 case MVT::i64: 1347#if FASTCC_NUM_INT_ARGS_INREGS > 0 1348 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1349 ObjIntRegs = 2; 1350 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1351 ObjIntRegs = 1; 1352 ObjSize = 4; 1353 } else 1354#endif 1355 ObjSize = 8; 1356 case MVT::f32: 1357 ObjSize = 4; 1358 break; 1359 case MVT::f64: 1360 ObjSize = 8; 1361 break; 1362 case MVT::v16i8: 1363 case MVT::v8i16: 1364 case MVT::v4i32: 1365 case MVT::v2i64: 1366 case MVT::v4f32: 1367 case MVT::v2f64: 1368 if (NumXMMRegs < 4) 1369 ObjXMMRegs = 1; 1370 else 1371 ObjSize = 16; 1372 break; 1373 } 1374} 1375 1376SDOperand 1377X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1378 unsigned NumArgs = Op.Val->getNumValues()-1; 1379 MachineFunction &MF = DAG.getMachineFunction(); 1380 MachineFrameInfo *MFI = MF.getFrameInfo(); 1381 SDOperand Root = Op.getOperand(0); 1382 std::vector<SDOperand> ArgValues; 1383 1384 // Add DAG nodes to load the arguments... On entry to a function the stack 1385 // frame looks like this: 1386 // 1387 // [ESP] -- return address 1388 // [ESP + 4] -- first nonreg argument (leftmost lexically) 1389 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 1390 // ... 1391 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1392 1393 // Keep track of the number of integer regs passed so far. This can be either 1394 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1395 // used). 1396 unsigned NumIntRegs = 0; 1397 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1398 1399 static const unsigned XMMArgRegs[] = { 1400 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1401 }; 1402 1403 for (unsigned i = 0; i < NumArgs; ++i) { 1404 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1405 unsigned ArgIncrement = 4; 1406 unsigned ObjSize = 0; 1407 unsigned ObjIntRegs = 0; 1408 unsigned ObjXMMRegs = 0; 1409 1410 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 1411 ObjSize, ObjIntRegs, ObjXMMRegs); 1412 if (ObjSize > 4) 1413 ArgIncrement = ObjSize; 1414 1415 unsigned Reg = 0; 1416 SDOperand ArgValue; 1417 if (ObjIntRegs || ObjXMMRegs) { 1418 switch (ObjectVT) { 1419 default: assert(0 && "Unhandled argument type!"); 1420 case MVT::i8: 1421 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 1422 X86::GR8RegisterClass); 1423 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 1424 break; 1425 case MVT::i16: 1426 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 1427 X86::GR16RegisterClass); 1428 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 1429 break; 1430 case MVT::i32: 1431 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1432 X86::GR32RegisterClass); 1433 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1434 break; 1435 case MVT::i64: 1436 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1437 X86::GR32RegisterClass); 1438 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1439 if (ObjIntRegs == 2) { 1440 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 1441 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1442 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1443 } 1444 break; 1445 case MVT::v16i8: 1446 case MVT::v8i16: 1447 case MVT::v4i32: 1448 case MVT::v2i64: 1449 case MVT::v4f32: 1450 case MVT::v2f64: 1451 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 1452 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 1453 break; 1454 } 1455 NumIntRegs += ObjIntRegs; 1456 NumXMMRegs += ObjXMMRegs; 1457 } 1458 1459 if (ObjSize) { 1460 // XMM arguments have to be aligned on 16-byte boundary. 1461 if (ObjSize == 16) 1462 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1463 // Create the SelectionDAG nodes corresponding to a load from this 1464 // parameter. 1465 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1466 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1467 if (ObjectVT == MVT::i64 && ObjIntRegs) { 1468 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1469 DAG.getSrcValue(NULL)); 1470 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1471 } else 1472 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1473 DAG.getSrcValue(NULL)); 1474 ArgOffset += ArgIncrement; // Move on to the next argument. 1475 } 1476 1477 ArgValues.push_back(ArgValue); 1478 } 1479 1480 ArgValues.push_back(Root); 1481 1482 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1483 // arguments and the arguments after the retaddr has been pushed are aligned. 1484 if ((ArgOffset & 7) == 0) 1485 ArgOffset += 4; 1486 1487 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1488 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1489 ReturnAddrIndex = 0; // No return address slot generated yet. 1490 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 1491 BytesCallerReserves = 0; 1492 1493 // Finally, inform the code generator which regs we return values in. 1494 switch (getValueType(MF.getFunction()->getReturnType())) { 1495 default: assert(0 && "Unknown type!"); 1496 case MVT::isVoid: break; 1497 case MVT::i8: 1498 case MVT::i16: 1499 case MVT::i32: 1500 MF.addLiveOut(X86::EAX); 1501 break; 1502 case MVT::i64: 1503 MF.addLiveOut(X86::EAX); 1504 MF.addLiveOut(X86::EDX); 1505 break; 1506 case MVT::f32: 1507 case MVT::f64: 1508 MF.addLiveOut(X86::ST0); 1509 break; 1510 case MVT::v16i8: 1511 case MVT::v8i16: 1512 case MVT::v4i32: 1513 case MVT::v2i64: 1514 case MVT::v4f32: 1515 case MVT::v2f64: 1516 MF.addLiveOut(X86::XMM0); 1517 break; 1518 } 1519 1520 // Return the new list of results. 1521 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1522 Op.Val->value_end()); 1523 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1524} 1525 1526SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){ 1527 SDOperand Chain = Op.getOperand(0); 1528 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1529 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1530 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1531 SDOperand Callee = Op.getOperand(4); 1532 MVT::ValueType RetVT= Op.Val->getValueType(0); 1533 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1534 1535 // Count how many bytes are to be pushed on the stack. 1536 unsigned NumBytes = 0; 1537 1538 // Keep track of the number of integer regs passed so far. This can be either 1539 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1540 // used). 1541 unsigned NumIntRegs = 0; 1542 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1543 1544 static const unsigned GPRArgRegs[][2] = { 1545 { X86::AL, X86::DL }, 1546 { X86::AX, X86::DX }, 1547 { X86::EAX, X86::EDX } 1548 }; 1549 static const unsigned XMMArgRegs[] = { 1550 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1551 }; 1552 1553 for (unsigned i = 0; i != NumOps; ++i) { 1554 SDOperand Arg = Op.getOperand(5+2*i); 1555 1556 switch (Arg.getValueType()) { 1557 default: assert(0 && "Unknown value type!"); 1558 case MVT::i8: 1559 case MVT::i16: 1560 case MVT::i32: 1561#if FASTCC_NUM_INT_ARGS_INREGS > 0 1562 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1563 ++NumIntRegs; 1564 break; 1565 } 1566#endif 1567 // Fall through 1568 case MVT::f32: 1569 NumBytes += 4; 1570 break; 1571 case MVT::f64: 1572 NumBytes += 8; 1573 break; 1574 case MVT::v16i8: 1575 case MVT::v8i16: 1576 case MVT::v4i32: 1577 case MVT::v2i64: 1578 case MVT::v4f32: 1579 case MVT::v2f64: 1580 if (NumXMMRegs < 4) 1581 NumXMMRegs++; 1582 else { 1583 // XMM arguments have to be aligned on 16-byte boundary. 1584 NumBytes = ((NumBytes + 15) / 16) * 16; 1585 NumBytes += 16; 1586 } 1587 break; 1588 } 1589 } 1590 1591 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1592 // arguments and the arguments after the retaddr has been pushed are aligned. 1593 if ((NumBytes & 7) == 0) 1594 NumBytes += 4; 1595 1596 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1597 1598 // Arguments go on the stack in reverse order, as specified by the ABI. 1599 unsigned ArgOffset = 0; 1600 NumIntRegs = 0; 1601 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1602 std::vector<SDOperand> MemOpChains; 1603 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1604 for (unsigned i = 0; i != NumOps; ++i) { 1605 SDOperand Arg = Op.getOperand(5+2*i); 1606 1607 switch (Arg.getValueType()) { 1608 default: assert(0 && "Unexpected ValueType for argument!"); 1609 case MVT::i8: 1610 case MVT::i16: 1611 case MVT::i32: 1612#if FASTCC_NUM_INT_ARGS_INREGS > 0 1613 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 1614 RegsToPass.push_back( 1615 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 1616 Arg)); 1617 ++NumIntRegs; 1618 break; 1619 } 1620#endif 1621 // Fall through 1622 case MVT::f32: { 1623 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1624 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1625 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1626 Arg, PtrOff, DAG.getSrcValue(NULL))); 1627 ArgOffset += 4; 1628 break; 1629 } 1630 case MVT::f64: { 1631 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1632 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1633 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1634 Arg, PtrOff, DAG.getSrcValue(NULL))); 1635 ArgOffset += 8; 1636 break; 1637 } 1638 case MVT::v16i8: 1639 case MVT::v8i16: 1640 case MVT::v4i32: 1641 case MVT::v2i64: 1642 case MVT::v4f32: 1643 case MVT::v2f64: 1644 if (NumXMMRegs < 4) { 1645 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1646 NumXMMRegs++; 1647 } else { 1648 // XMM arguments have to be aligned on 16-byte boundary. 1649 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1650 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1651 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1652 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1653 Arg, PtrOff, DAG.getSrcValue(NULL))); 1654 ArgOffset += 16; 1655 } 1656 } 1657 } 1658 1659 if (!MemOpChains.empty()) 1660 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1661 &MemOpChains[0], MemOpChains.size()); 1662 1663 // Build a sequence of copy-to-reg nodes chained together with token chain 1664 // and flag operands which copy the outgoing args into registers. 1665 SDOperand InFlag; 1666 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1667 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1668 InFlag); 1669 InFlag = Chain.getValue(1); 1670 } 1671 1672 // If the callee is a GlobalAddress node (quite common, every direct call is) 1673 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1674 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1675 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1676 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1677 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1678 1679 std::vector<MVT::ValueType> NodeTys; 1680 NodeTys.push_back(MVT::Other); // Returns a chain 1681 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1682 std::vector<SDOperand> Ops; 1683 Ops.push_back(Chain); 1684 Ops.push_back(Callee); 1685 1686 // Add argument registers to the end of the list so that they are known live 1687 // into the call. 1688 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1689 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1690 RegsToPass[i].second.getValueType())); 1691 1692 if (InFlag.Val) 1693 Ops.push_back(InFlag); 1694 1695 // FIXME: Do not generate X86ISD::TAILCALL for now. 1696 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1697 NodeTys, &Ops[0], Ops.size()); 1698 InFlag = Chain.getValue(1); 1699 1700 NodeTys.clear(); 1701 NodeTys.push_back(MVT::Other); // Returns a chain 1702 if (RetVT != MVT::Other) 1703 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1704 Ops.clear(); 1705 Ops.push_back(Chain); 1706 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1707 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1708 Ops.push_back(InFlag); 1709 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1710 if (RetVT != MVT::Other) 1711 InFlag = Chain.getValue(1); 1712 1713 std::vector<SDOperand> ResultVals; 1714 NodeTys.clear(); 1715 switch (RetVT) { 1716 default: assert(0 && "Unknown value type to return!"); 1717 case MVT::Other: break; 1718 case MVT::i8: 1719 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1720 ResultVals.push_back(Chain.getValue(0)); 1721 NodeTys.push_back(MVT::i8); 1722 break; 1723 case MVT::i16: 1724 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1725 ResultVals.push_back(Chain.getValue(0)); 1726 NodeTys.push_back(MVT::i16); 1727 break; 1728 case MVT::i32: 1729 if (Op.Val->getValueType(1) == MVT::i32) { 1730 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1731 ResultVals.push_back(Chain.getValue(0)); 1732 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 1733 Chain.getValue(2)).getValue(1); 1734 ResultVals.push_back(Chain.getValue(0)); 1735 NodeTys.push_back(MVT::i32); 1736 } else { 1737 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1738 ResultVals.push_back(Chain.getValue(0)); 1739 } 1740 NodeTys.push_back(MVT::i32); 1741 break; 1742 case MVT::v16i8: 1743 case MVT::v8i16: 1744 case MVT::v4i32: 1745 case MVT::v2i64: 1746 case MVT::v4f32: 1747 case MVT::v2f64: 1748 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1749 ResultVals.push_back(Chain.getValue(0)); 1750 NodeTys.push_back(RetVT); 1751 break; 1752 case MVT::f32: 1753 case MVT::f64: { 1754 std::vector<MVT::ValueType> Tys; 1755 Tys.push_back(MVT::f64); 1756 Tys.push_back(MVT::Other); 1757 Tys.push_back(MVT::Flag); 1758 std::vector<SDOperand> Ops; 1759 Ops.push_back(Chain); 1760 Ops.push_back(InFlag); 1761 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 1762 &Ops[0], Ops.size()); 1763 Chain = RetVal.getValue(1); 1764 InFlag = RetVal.getValue(2); 1765 if (X86ScalarSSE) { 1766 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1767 // shouldn't be necessary except that RFP cannot be live across 1768 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1769 MachineFunction &MF = DAG.getMachineFunction(); 1770 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1771 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1772 Tys.clear(); 1773 Tys.push_back(MVT::Other); 1774 Ops.clear(); 1775 Ops.push_back(Chain); 1776 Ops.push_back(RetVal); 1777 Ops.push_back(StackSlot); 1778 Ops.push_back(DAG.getValueType(RetVT)); 1779 Ops.push_back(InFlag); 1780 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 1781 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 1782 DAG.getSrcValue(NULL)); 1783 Chain = RetVal.getValue(1); 1784 } 1785 1786 if (RetVT == MVT::f32 && !X86ScalarSSE) 1787 // FIXME: we would really like to remember that this FP_ROUND 1788 // operation is okay to eliminate if we allow excess FP precision. 1789 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1790 ResultVals.push_back(RetVal); 1791 NodeTys.push_back(RetVT); 1792 break; 1793 } 1794 } 1795 1796 1797 // If the function returns void, just return the chain. 1798 if (ResultVals.empty()) 1799 return Chain; 1800 1801 // Otherwise, merge everything together with a MERGE_VALUES node. 1802 NodeTys.push_back(MVT::Other); 1803 ResultVals.push_back(Chain); 1804 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1805 &ResultVals[0], ResultVals.size()); 1806 return Res.getValue(Op.ResNo); 1807} 1808 1809SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1810 if (ReturnAddrIndex == 0) { 1811 // Set up a frame object for the return address. 1812 MachineFunction &MF = DAG.getMachineFunction(); 1813 if (Subtarget->is64Bit()) 1814 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1815 else 1816 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1817 } 1818 1819 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1820} 1821 1822 1823 1824std::pair<SDOperand, SDOperand> X86TargetLowering:: 1825LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 1826 SelectionDAG &DAG) { 1827 SDOperand Result; 1828 if (Depth) // Depths > 0 not supported yet! 1829 Result = DAG.getConstant(0, getPointerTy()); 1830 else { 1831 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 1832 if (!isFrameAddress) 1833 // Just load the return address 1834 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, 1835 DAG.getSrcValue(NULL)); 1836 else 1837 Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 1838 DAG.getConstant(4, getPointerTy())); 1839 } 1840 return std::make_pair(Result, Chain); 1841} 1842 1843/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 1844/// which corresponds to the condition code. 1845static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 1846 switch (X86CC) { 1847 default: assert(0 && "Unknown X86 conditional code!"); 1848 case X86ISD::COND_A: return X86::JA; 1849 case X86ISD::COND_AE: return X86::JAE; 1850 case X86ISD::COND_B: return X86::JB; 1851 case X86ISD::COND_BE: return X86::JBE; 1852 case X86ISD::COND_E: return X86::JE; 1853 case X86ISD::COND_G: return X86::JG; 1854 case X86ISD::COND_GE: return X86::JGE; 1855 case X86ISD::COND_L: return X86::JL; 1856 case X86ISD::COND_LE: return X86::JLE; 1857 case X86ISD::COND_NE: return X86::JNE; 1858 case X86ISD::COND_NO: return X86::JNO; 1859 case X86ISD::COND_NP: return X86::JNP; 1860 case X86ISD::COND_NS: return X86::JNS; 1861 case X86ISD::COND_O: return X86::JO; 1862 case X86ISD::COND_P: return X86::JP; 1863 case X86ISD::COND_S: return X86::JS; 1864 } 1865} 1866 1867/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1868/// specific condition code. It returns a false if it cannot do a direct 1869/// translation. X86CC is the translated CondCode. Flip is set to true if the 1870/// the order of comparison operands should be flipped. 1871static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1872 unsigned &X86CC, bool &Flip) { 1873 Flip = false; 1874 X86CC = X86ISD::COND_INVALID; 1875 if (!isFP) { 1876 switch (SetCCOpcode) { 1877 default: break; 1878 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1879 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 1880 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 1881 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 1882 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 1883 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1884 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 1885 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 1886 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 1887 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 1888 } 1889 } else { 1890 // On a floating point condition, the flags are set as follows: 1891 // ZF PF CF op 1892 // 0 | 0 | 0 | X > Y 1893 // 0 | 0 | 1 | X < Y 1894 // 1 | 0 | 0 | X == Y 1895 // 1 | 1 | 1 | unordered 1896 switch (SetCCOpcode) { 1897 default: break; 1898 case ISD::SETUEQ: 1899 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 1900 case ISD::SETOLT: Flip = true; // Fallthrough 1901 case ISD::SETOGT: 1902 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 1903 case ISD::SETOLE: Flip = true; // Fallthrough 1904 case ISD::SETOGE: 1905 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 1906 case ISD::SETUGT: Flip = true; // Fallthrough 1907 case ISD::SETULT: 1908 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 1909 case ISD::SETUGE: Flip = true; // Fallthrough 1910 case ISD::SETULE: 1911 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 1912 case ISD::SETONE: 1913 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 1914 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 1915 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 1916 } 1917 } 1918 1919 return X86CC != X86ISD::COND_INVALID; 1920} 1921 1922static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 1923 bool &Flip) { 1924 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 1925} 1926 1927/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1928/// code. Current x86 isa includes the following FP cmov instructions: 1929/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1930static bool hasFPCMov(unsigned X86CC) { 1931 switch (X86CC) { 1932 default: 1933 return false; 1934 case X86ISD::COND_B: 1935 case X86ISD::COND_BE: 1936 case X86ISD::COND_E: 1937 case X86ISD::COND_P: 1938 case X86ISD::COND_A: 1939 case X86ISD::COND_AE: 1940 case X86ISD::COND_NE: 1941 case X86ISD::COND_NP: 1942 return true; 1943 } 1944} 1945 1946/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 1947/// load. For Darwin, external and weak symbols are indirect, loading the value 1948/// at address GV rather then the value of GV itself. This means that the 1949/// GlobalAddress must be in the base or index register of the address, not the 1950/// GV offset field. 1951static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 1952 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 1953 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 1954} 1955 1956/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1957/// true if Op is undef or if its value falls within the specified range (L, H]. 1958static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1959 if (Op.getOpcode() == ISD::UNDEF) 1960 return true; 1961 1962 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1963 return (Val >= Low && Val < Hi); 1964} 1965 1966/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1967/// true if Op is undef or if its value equal to the specified value. 1968static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1969 if (Op.getOpcode() == ISD::UNDEF) 1970 return true; 1971 return cast<ConstantSDNode>(Op)->getValue() == Val; 1972} 1973 1974/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1975/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1976bool X86::isPSHUFDMask(SDNode *N) { 1977 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1978 1979 if (N->getNumOperands() != 4) 1980 return false; 1981 1982 // Check if the value doesn't reference the second vector. 1983 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1984 SDOperand Arg = N->getOperand(i); 1985 if (Arg.getOpcode() == ISD::UNDEF) continue; 1986 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1987 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1988 return false; 1989 } 1990 1991 return true; 1992} 1993 1994/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1995/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1996bool X86::isPSHUFHWMask(SDNode *N) { 1997 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1998 1999 if (N->getNumOperands() != 8) 2000 return false; 2001 2002 // Lower quadword copied in order. 2003 for (unsigned i = 0; i != 4; ++i) { 2004 SDOperand Arg = N->getOperand(i); 2005 if (Arg.getOpcode() == ISD::UNDEF) continue; 2006 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2007 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2008 return false; 2009 } 2010 2011 // Upper quadword shuffled. 2012 for (unsigned i = 4; i != 8; ++i) { 2013 SDOperand Arg = N->getOperand(i); 2014 if (Arg.getOpcode() == ISD::UNDEF) continue; 2015 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2016 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2017 if (Val < 4 || Val > 7) 2018 return false; 2019 } 2020 2021 return true; 2022} 2023 2024/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2025/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2026bool X86::isPSHUFLWMask(SDNode *N) { 2027 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2028 2029 if (N->getNumOperands() != 8) 2030 return false; 2031 2032 // Upper quadword copied in order. 2033 for (unsigned i = 4; i != 8; ++i) 2034 if (!isUndefOrEqual(N->getOperand(i), i)) 2035 return false; 2036 2037 // Lower quadword shuffled. 2038 for (unsigned i = 0; i != 4; ++i) 2039 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2040 return false; 2041 2042 return true; 2043} 2044 2045/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2046/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2047static bool isSHUFPMask(std::vector<SDOperand> &N) { 2048 unsigned NumElems = N.size(); 2049 if (NumElems != 2 && NumElems != 4) return false; 2050 2051 unsigned Half = NumElems / 2; 2052 for (unsigned i = 0; i < Half; ++i) 2053 if (!isUndefOrInRange(N[i], 0, NumElems)) 2054 return false; 2055 for (unsigned i = Half; i < NumElems; ++i) 2056 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 2057 return false; 2058 2059 return true; 2060} 2061 2062bool X86::isSHUFPMask(SDNode *N) { 2063 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2064 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2065 return ::isSHUFPMask(Ops); 2066} 2067 2068/// isCommutedSHUFP - Returns true if the shuffle mask is except 2069/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2070/// half elements to come from vector 1 (which would equal the dest.) and 2071/// the upper half to come from vector 2. 2072static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 2073 unsigned NumElems = Ops.size(); 2074 if (NumElems != 2 && NumElems != 4) return false; 2075 2076 unsigned Half = NumElems / 2; 2077 for (unsigned i = 0; i < Half; ++i) 2078 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 2079 return false; 2080 for (unsigned i = Half; i < NumElems; ++i) 2081 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 2082 return false; 2083 return true; 2084} 2085 2086static bool isCommutedSHUFP(SDNode *N) { 2087 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2088 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2089 return isCommutedSHUFP(Ops); 2090} 2091 2092/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2093/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2094bool X86::isMOVHLPSMask(SDNode *N) { 2095 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2096 2097 if (N->getNumOperands() != 4) 2098 return false; 2099 2100 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2101 return isUndefOrEqual(N->getOperand(0), 6) && 2102 isUndefOrEqual(N->getOperand(1), 7) && 2103 isUndefOrEqual(N->getOperand(2), 2) && 2104 isUndefOrEqual(N->getOperand(3), 3); 2105} 2106 2107/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2108/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2109bool X86::isMOVLPMask(SDNode *N) { 2110 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2111 2112 unsigned NumElems = N->getNumOperands(); 2113 if (NumElems != 2 && NumElems != 4) 2114 return false; 2115 2116 for (unsigned i = 0; i < NumElems/2; ++i) 2117 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2118 return false; 2119 2120 for (unsigned i = NumElems/2; i < NumElems; ++i) 2121 if (!isUndefOrEqual(N->getOperand(i), i)) 2122 return false; 2123 2124 return true; 2125} 2126 2127/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2128/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2129/// and MOVLHPS. 2130bool X86::isMOVHPMask(SDNode *N) { 2131 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2132 2133 unsigned NumElems = N->getNumOperands(); 2134 if (NumElems != 2 && NumElems != 4) 2135 return false; 2136 2137 for (unsigned i = 0; i < NumElems/2; ++i) 2138 if (!isUndefOrEqual(N->getOperand(i), i)) 2139 return false; 2140 2141 for (unsigned i = 0; i < NumElems/2; ++i) { 2142 SDOperand Arg = N->getOperand(i + NumElems/2); 2143 if (!isUndefOrEqual(Arg, i + NumElems)) 2144 return false; 2145 } 2146 2147 return true; 2148} 2149 2150/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2151/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2152bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2153 unsigned NumElems = N.size(); 2154 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2155 return false; 2156 2157 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2158 SDOperand BitI = N[i]; 2159 SDOperand BitI1 = N[i+1]; 2160 if (!isUndefOrEqual(BitI, j)) 2161 return false; 2162 if (V2IsSplat) { 2163 if (isUndefOrEqual(BitI1, NumElems)) 2164 return false; 2165 } else { 2166 if (!isUndefOrEqual(BitI1, j + NumElems)) 2167 return false; 2168 } 2169 } 2170 2171 return true; 2172} 2173 2174bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2175 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2176 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2177 return ::isUNPCKLMask(Ops, V2IsSplat); 2178} 2179 2180/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2181/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2182bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2183 unsigned NumElems = N.size(); 2184 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2185 return false; 2186 2187 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2188 SDOperand BitI = N[i]; 2189 SDOperand BitI1 = N[i+1]; 2190 if (!isUndefOrEqual(BitI, j + NumElems/2)) 2191 return false; 2192 if (V2IsSplat) { 2193 if (isUndefOrEqual(BitI1, NumElems)) 2194 return false; 2195 } else { 2196 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 2197 return false; 2198 } 2199 } 2200 2201 return true; 2202} 2203 2204bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2205 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2206 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2207 return ::isUNPCKHMask(Ops, V2IsSplat); 2208} 2209 2210/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2211/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2212/// <0, 0, 1, 1> 2213bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2214 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2215 2216 unsigned NumElems = N->getNumOperands(); 2217 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 2218 return false; 2219 2220 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2221 SDOperand BitI = N->getOperand(i); 2222 SDOperand BitI1 = N->getOperand(i+1); 2223 2224 if (!isUndefOrEqual(BitI, j)) 2225 return false; 2226 if (!isUndefOrEqual(BitI1, j)) 2227 return false; 2228 } 2229 2230 return true; 2231} 2232 2233/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2234/// specifies a shuffle of elements that is suitable for input to MOVSS, 2235/// MOVSD, and MOVD, i.e. setting the lowest element. 2236static bool isMOVLMask(std::vector<SDOperand> &N) { 2237 unsigned NumElems = N.size(); 2238 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2239 return false; 2240 2241 if (!isUndefOrEqual(N[0], NumElems)) 2242 return false; 2243 2244 for (unsigned i = 1; i < NumElems; ++i) { 2245 SDOperand Arg = N[i]; 2246 if (!isUndefOrEqual(Arg, i)) 2247 return false; 2248 } 2249 2250 return true; 2251} 2252 2253bool X86::isMOVLMask(SDNode *N) { 2254 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2255 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2256 return ::isMOVLMask(Ops); 2257} 2258 2259/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2260/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2261/// element of vector 2 and the other elements to come from vector 1 in order. 2262static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false, 2263 bool V2IsUndef = false) { 2264 unsigned NumElems = Ops.size(); 2265 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2266 return false; 2267 2268 if (!isUndefOrEqual(Ops[0], 0)) 2269 return false; 2270 2271 for (unsigned i = 1; i < NumElems; ++i) { 2272 SDOperand Arg = Ops[i]; 2273 if (!(isUndefOrEqual(Arg, i+NumElems) || 2274 (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) || 2275 (V2IsSplat && isUndefOrEqual(Arg, NumElems)))) 2276 return false; 2277 } 2278 2279 return true; 2280} 2281 2282static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2283 bool V2IsUndef = false) { 2284 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2285 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2286 return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef); 2287} 2288 2289/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2290/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2291bool X86::isMOVSHDUPMask(SDNode *N) { 2292 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2293 2294 if (N->getNumOperands() != 4) 2295 return false; 2296 2297 // Expect 1, 1, 3, 3 2298 for (unsigned i = 0; i < 2; ++i) { 2299 SDOperand Arg = N->getOperand(i); 2300 if (Arg.getOpcode() == ISD::UNDEF) continue; 2301 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2302 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2303 if (Val != 1) return false; 2304 } 2305 2306 bool HasHi = false; 2307 for (unsigned i = 2; i < 4; ++i) { 2308 SDOperand Arg = N->getOperand(i); 2309 if (Arg.getOpcode() == ISD::UNDEF) continue; 2310 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2311 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2312 if (Val != 3) return false; 2313 HasHi = true; 2314 } 2315 2316 // Don't use movshdup if it can be done with a shufps. 2317 return HasHi; 2318} 2319 2320/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2321/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2322bool X86::isMOVSLDUPMask(SDNode *N) { 2323 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2324 2325 if (N->getNumOperands() != 4) 2326 return false; 2327 2328 // Expect 0, 0, 2, 2 2329 for (unsigned i = 0; i < 2; ++i) { 2330 SDOperand Arg = N->getOperand(i); 2331 if (Arg.getOpcode() == ISD::UNDEF) continue; 2332 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2333 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2334 if (Val != 0) return false; 2335 } 2336 2337 bool HasHi = false; 2338 for (unsigned i = 2; i < 4; ++i) { 2339 SDOperand Arg = N->getOperand(i); 2340 if (Arg.getOpcode() == ISD::UNDEF) continue; 2341 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2342 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2343 if (Val != 2) return false; 2344 HasHi = true; 2345 } 2346 2347 // Don't use movshdup if it can be done with a shufps. 2348 return HasHi; 2349} 2350 2351/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2352/// a splat of a single element. 2353static bool isSplatMask(SDNode *N) { 2354 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2355 2356 // This is a splat operation if each element of the permute is the same, and 2357 // if the value doesn't reference the second vector. 2358 unsigned NumElems = N->getNumOperands(); 2359 SDOperand ElementBase; 2360 unsigned i = 0; 2361 for (; i != NumElems; ++i) { 2362 SDOperand Elt = N->getOperand(i); 2363 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 2364 ElementBase = Elt; 2365 break; 2366 } 2367 } 2368 2369 if (!ElementBase.Val) 2370 return false; 2371 2372 for (; i != NumElems; ++i) { 2373 SDOperand Arg = N->getOperand(i); 2374 if (Arg.getOpcode() == ISD::UNDEF) continue; 2375 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2376 if (Arg != ElementBase) return false; 2377 } 2378 2379 // Make sure it is a splat of the first vector operand. 2380 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2381} 2382 2383/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2384/// a splat of a single element and it's a 2 or 4 element mask. 2385bool X86::isSplatMask(SDNode *N) { 2386 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2387 2388 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2389 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2390 return false; 2391 return ::isSplatMask(N); 2392} 2393 2394/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2395/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2396/// instructions. 2397unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2398 unsigned NumOperands = N->getNumOperands(); 2399 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2400 unsigned Mask = 0; 2401 for (unsigned i = 0; i < NumOperands; ++i) { 2402 unsigned Val = 0; 2403 SDOperand Arg = N->getOperand(NumOperands-i-1); 2404 if (Arg.getOpcode() != ISD::UNDEF) 2405 Val = cast<ConstantSDNode>(Arg)->getValue(); 2406 if (Val >= NumOperands) Val -= NumOperands; 2407 Mask |= Val; 2408 if (i != NumOperands - 1) 2409 Mask <<= Shift; 2410 } 2411 2412 return Mask; 2413} 2414 2415/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2416/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2417/// instructions. 2418unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2419 unsigned Mask = 0; 2420 // 8 nodes, but we only care about the last 4. 2421 for (unsigned i = 7; i >= 4; --i) { 2422 unsigned Val = 0; 2423 SDOperand Arg = N->getOperand(i); 2424 if (Arg.getOpcode() != ISD::UNDEF) 2425 Val = cast<ConstantSDNode>(Arg)->getValue(); 2426 Mask |= (Val - 4); 2427 if (i != 4) 2428 Mask <<= 2; 2429 } 2430 2431 return Mask; 2432} 2433 2434/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2435/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2436/// instructions. 2437unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2438 unsigned Mask = 0; 2439 // 8 nodes, but we only care about the first 4. 2440 for (int i = 3; i >= 0; --i) { 2441 unsigned Val = 0; 2442 SDOperand Arg = N->getOperand(i); 2443 if (Arg.getOpcode() != ISD::UNDEF) 2444 Val = cast<ConstantSDNode>(Arg)->getValue(); 2445 Mask |= Val; 2446 if (i != 0) 2447 Mask <<= 2; 2448 } 2449 2450 return Mask; 2451} 2452 2453/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2454/// specifies a 8 element shuffle that can be broken into a pair of 2455/// PSHUFHW and PSHUFLW. 2456static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2457 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2458 2459 if (N->getNumOperands() != 8) 2460 return false; 2461 2462 // Lower quadword shuffled. 2463 for (unsigned i = 0; i != 4; ++i) { 2464 SDOperand Arg = N->getOperand(i); 2465 if (Arg.getOpcode() == ISD::UNDEF) continue; 2466 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2467 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2468 if (Val > 4) 2469 return false; 2470 } 2471 2472 // Upper quadword shuffled. 2473 for (unsigned i = 4; i != 8; ++i) { 2474 SDOperand Arg = N->getOperand(i); 2475 if (Arg.getOpcode() == ISD::UNDEF) continue; 2476 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2477 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2478 if (Val < 4 || Val > 7) 2479 return false; 2480 } 2481 2482 return true; 2483} 2484 2485/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2486/// values in ther permute mask. 2487static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 2488 SDOperand V1 = Op.getOperand(0); 2489 SDOperand V2 = Op.getOperand(1); 2490 SDOperand Mask = Op.getOperand(2); 2491 MVT::ValueType VT = Op.getValueType(); 2492 MVT::ValueType MaskVT = Mask.getValueType(); 2493 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2494 unsigned NumElems = Mask.getNumOperands(); 2495 std::vector<SDOperand> MaskVec; 2496 2497 for (unsigned i = 0; i != NumElems; ++i) { 2498 SDOperand Arg = Mask.getOperand(i); 2499 if (Arg.getOpcode() == ISD::UNDEF) { 2500 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2501 continue; 2502 } 2503 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2504 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2505 if (Val < NumElems) 2506 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2507 else 2508 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2509 } 2510 2511 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2512 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 2513} 2514 2515/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2516/// match movhlps. The lower half elements should come from upper half of 2517/// V1 (and in order), and the upper half elements should come from the upper 2518/// half of V2 (and in order). 2519static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2520 unsigned NumElems = Mask->getNumOperands(); 2521 if (NumElems != 4) 2522 return false; 2523 for (unsigned i = 0, e = 2; i != e; ++i) 2524 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2525 return false; 2526 for (unsigned i = 2; i != 4; ++i) 2527 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2528 return false; 2529 return true; 2530} 2531 2532/// isScalarLoadToVector - Returns true if the node is a scalar load that 2533/// is promoted to a vector. 2534static inline bool isScalarLoadToVector(SDNode *N) { 2535 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2536 N = N->getOperand(0).Val; 2537 return (N->getOpcode() == ISD::LOAD); 2538 } 2539 return false; 2540} 2541 2542/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2543/// match movlp{s|d}. The lower half elements should come from lower half of 2544/// V1 (and in order), and the upper half elements should come from the upper 2545/// half of V2 (and in order). And since V1 will become the source of the 2546/// MOVLP, it must be either a vector load or a scalar load to vector. 2547static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 2548 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 2549 return false; 2550 2551 unsigned NumElems = Mask->getNumOperands(); 2552 if (NumElems != 2 && NumElems != 4) 2553 return false; 2554 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2555 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2556 return false; 2557 for (unsigned i = NumElems/2; i != NumElems; ++i) 2558 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2559 return false; 2560 return true; 2561} 2562 2563/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2564/// all the same. 2565static bool isSplatVector(SDNode *N) { 2566 if (N->getOpcode() != ISD::BUILD_VECTOR) 2567 return false; 2568 2569 SDOperand SplatValue = N->getOperand(0); 2570 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2571 if (N->getOperand(i) != SplatValue) 2572 return false; 2573 return true; 2574} 2575 2576/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2577/// to an undef. 2578static bool isUndefShuffle(SDNode *N) { 2579 if (N->getOpcode() != ISD::BUILD_VECTOR) 2580 return false; 2581 2582 SDOperand V1 = N->getOperand(0); 2583 SDOperand V2 = N->getOperand(1); 2584 SDOperand Mask = N->getOperand(2); 2585 unsigned NumElems = Mask.getNumOperands(); 2586 for (unsigned i = 0; i != NumElems; ++i) { 2587 SDOperand Arg = Mask.getOperand(i); 2588 if (Arg.getOpcode() != ISD::UNDEF) { 2589 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2590 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2591 return false; 2592 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2593 return false; 2594 } 2595 } 2596 return true; 2597} 2598 2599/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2600/// that point to V2 points to its first element. 2601static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2602 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2603 2604 bool Changed = false; 2605 std::vector<SDOperand> MaskVec; 2606 unsigned NumElems = Mask.getNumOperands(); 2607 for (unsigned i = 0; i != NumElems; ++i) { 2608 SDOperand Arg = Mask.getOperand(i); 2609 if (Arg.getOpcode() != ISD::UNDEF) { 2610 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2611 if (Val > NumElems) { 2612 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2613 Changed = true; 2614 } 2615 } 2616 MaskVec.push_back(Arg); 2617 } 2618 2619 if (Changed) 2620 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2621 &MaskVec[0], MaskVec.size()); 2622 return Mask; 2623} 2624 2625/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2626/// operation of specified width. 2627static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2628 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2629 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2630 2631 std::vector<SDOperand> MaskVec; 2632 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2633 for (unsigned i = 1; i != NumElems; ++i) 2634 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2635 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2636} 2637 2638/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2639/// of specified width. 2640static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2641 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2642 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2643 std::vector<SDOperand> MaskVec; 2644 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2645 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2646 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2647 } 2648 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2649} 2650 2651/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2652/// of specified width. 2653static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2654 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2655 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 2656 unsigned Half = NumElems/2; 2657 std::vector<SDOperand> MaskVec; 2658 for (unsigned i = 0; i != Half; ++i) { 2659 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2660 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2661 } 2662 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2663} 2664 2665/// getZeroVector - Returns a vector of specified type with all zero elements. 2666/// 2667static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2668 assert(MVT::isVector(VT) && "Expected a vector type"); 2669 unsigned NumElems = getVectorNumElements(VT); 2670 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2671 bool isFP = MVT::isFloatingPoint(EVT); 2672 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2673 std::vector<SDOperand> ZeroVec(NumElems, Zero); 2674 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2675} 2676 2677/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2678/// 2679static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2680 SDOperand V1 = Op.getOperand(0); 2681 SDOperand Mask = Op.getOperand(2); 2682 MVT::ValueType VT = Op.getValueType(); 2683 unsigned NumElems = Mask.getNumOperands(); 2684 Mask = getUnpacklMask(NumElems, DAG); 2685 while (NumElems != 4) { 2686 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2687 NumElems >>= 1; 2688 } 2689 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2690 2691 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2692 Mask = getZeroVector(MaskVT, DAG); 2693 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2694 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2695 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2696} 2697 2698/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2699/// constant +0.0. 2700static inline bool isZeroNode(SDOperand Elt) { 2701 return ((isa<ConstantSDNode>(Elt) && 2702 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2703 (isa<ConstantFPSDNode>(Elt) && 2704 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2705} 2706 2707/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2708/// vector and zero or undef vector. 2709static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2710 unsigned NumElems, unsigned Idx, 2711 bool isZero, SelectionDAG &DAG) { 2712 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2713 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2714 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2715 SDOperand Zero = DAG.getConstant(0, EVT); 2716 std::vector<SDOperand> MaskVec(NumElems, Zero); 2717 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2718 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2719 &MaskVec[0], MaskVec.size()); 2720 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2721} 2722 2723/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2724/// 2725static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2726 unsigned NumNonZero, unsigned NumZero, 2727 SelectionDAG &DAG, TargetLowering &TLI) { 2728 if (NumNonZero > 8) 2729 return SDOperand(); 2730 2731 SDOperand V(0, 0); 2732 bool First = true; 2733 for (unsigned i = 0; i < 16; ++i) { 2734 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2735 if (ThisIsNonZero && First) { 2736 if (NumZero) 2737 V = getZeroVector(MVT::v8i16, DAG); 2738 else 2739 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2740 First = false; 2741 } 2742 2743 if ((i & 1) != 0) { 2744 SDOperand ThisElt(0, 0), LastElt(0, 0); 2745 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2746 if (LastIsNonZero) { 2747 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2748 } 2749 if (ThisIsNonZero) { 2750 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2751 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2752 ThisElt, DAG.getConstant(8, MVT::i8)); 2753 if (LastIsNonZero) 2754 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2755 } else 2756 ThisElt = LastElt; 2757 2758 if (ThisElt.Val) 2759 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2760 DAG.getConstant(i/2, TLI.getPointerTy())); 2761 } 2762 } 2763 2764 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2765} 2766 2767/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 2768/// 2769static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2770 unsigned NumNonZero, unsigned NumZero, 2771 SelectionDAG &DAG, TargetLowering &TLI) { 2772 if (NumNonZero > 4) 2773 return SDOperand(); 2774 2775 SDOperand V(0, 0); 2776 bool First = true; 2777 for (unsigned i = 0; i < 8; ++i) { 2778 bool isNonZero = (NonZeros & (1 << i)) != 0; 2779 if (isNonZero) { 2780 if (First) { 2781 if (NumZero) 2782 V = getZeroVector(MVT::v8i16, DAG); 2783 else 2784 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2785 First = false; 2786 } 2787 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2788 DAG.getConstant(i, TLI.getPointerTy())); 2789 } 2790 } 2791 2792 return V; 2793} 2794 2795SDOperand 2796X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2797 // All zero's are handled with pxor. 2798 if (ISD::isBuildVectorAllZeros(Op.Val)) 2799 return Op; 2800 2801 // All one's are handled with pcmpeqd. 2802 if (ISD::isBuildVectorAllOnes(Op.Val)) 2803 return Op; 2804 2805 MVT::ValueType VT = Op.getValueType(); 2806 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 2807 unsigned EVTBits = MVT::getSizeInBits(EVT); 2808 2809 unsigned NumElems = Op.getNumOperands(); 2810 unsigned NumZero = 0; 2811 unsigned NumNonZero = 0; 2812 unsigned NonZeros = 0; 2813 std::set<SDOperand> Values; 2814 for (unsigned i = 0; i < NumElems; ++i) { 2815 SDOperand Elt = Op.getOperand(i); 2816 if (Elt.getOpcode() != ISD::UNDEF) { 2817 Values.insert(Elt); 2818 if (isZeroNode(Elt)) 2819 NumZero++; 2820 else { 2821 NonZeros |= (1 << i); 2822 NumNonZero++; 2823 } 2824 } 2825 } 2826 2827 if (NumNonZero == 0) 2828 // Must be a mix of zero and undef. Return a zero vector. 2829 return getZeroVector(VT, DAG); 2830 2831 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2832 if (Values.size() == 1) 2833 return SDOperand(); 2834 2835 // Special case for single non-zero element. 2836 if (NumNonZero == 1) { 2837 unsigned Idx = CountTrailingZeros_32(NonZeros); 2838 SDOperand Item = Op.getOperand(Idx); 2839 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2840 if (Idx == 0) 2841 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2842 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2843 NumZero > 0, DAG); 2844 2845 if (EVTBits == 32) { 2846 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2847 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2848 DAG); 2849 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2850 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 2851 std::vector<SDOperand> MaskVec; 2852 for (unsigned i = 0; i < NumElems; i++) 2853 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2854 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2855 &MaskVec[0], MaskVec.size()); 2856 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2857 DAG.getNode(ISD::UNDEF, VT), Mask); 2858 } 2859 } 2860 2861 // Let legalizer expand 2-widde build_vector's. 2862 if (EVTBits == 64) 2863 return SDOperand(); 2864 2865 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2866 if (EVTBits == 8) { 2867 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2868 *this); 2869 if (V.Val) return V; 2870 } 2871 2872 if (EVTBits == 16) { 2873 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2874 *this); 2875 if (V.Val) return V; 2876 } 2877 2878 // If element VT is == 32 bits, turn it into a number of shuffles. 2879 std::vector<SDOperand> V(NumElems); 2880 if (NumElems == 4 && NumZero > 0) { 2881 for (unsigned i = 0; i < 4; ++i) { 2882 bool isZero = !(NonZeros & (1 << i)); 2883 if (isZero) 2884 V[i] = getZeroVector(VT, DAG); 2885 else 2886 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2887 } 2888 2889 for (unsigned i = 0; i < 2; ++i) { 2890 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2891 default: break; 2892 case 0: 2893 V[i] = V[i*2]; // Must be a zero vector. 2894 break; 2895 case 1: 2896 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2897 getMOVLMask(NumElems, DAG)); 2898 break; 2899 case 2: 2900 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2901 getMOVLMask(NumElems, DAG)); 2902 break; 2903 case 3: 2904 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2905 getUnpacklMask(NumElems, DAG)); 2906 break; 2907 } 2908 } 2909 2910 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2911 // clears the upper bits. 2912 // FIXME: we can do the same for v4f32 case when we know both parts of 2913 // the lower half come from scalar_to_vector (loadf32). We should do 2914 // that in post legalizer dag combiner with target specific hooks. 2915 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2916 return V[0]; 2917 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2918 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 2919 std::vector<SDOperand> MaskVec; 2920 bool Reverse = (NonZeros & 0x3) == 2; 2921 for (unsigned i = 0; i < 2; ++i) 2922 if (Reverse) 2923 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2924 else 2925 MaskVec.push_back(DAG.getConstant(i, EVT)); 2926 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2927 for (unsigned i = 0; i < 2; ++i) 2928 if (Reverse) 2929 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2930 else 2931 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2932 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2933 &MaskVec[0], MaskVec.size()); 2934 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2935 } 2936 2937 if (Values.size() > 2) { 2938 // Expand into a number of unpckl*. 2939 // e.g. for v4f32 2940 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2941 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2942 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2943 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2944 for (unsigned i = 0; i < NumElems; ++i) 2945 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2946 NumElems >>= 1; 2947 while (NumElems != 0) { 2948 for (unsigned i = 0; i < NumElems; ++i) 2949 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2950 UnpckMask); 2951 NumElems >>= 1; 2952 } 2953 return V[0]; 2954 } 2955 2956 return SDOperand(); 2957} 2958 2959SDOperand 2960X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2961 SDOperand V1 = Op.getOperand(0); 2962 SDOperand V2 = Op.getOperand(1); 2963 SDOperand PermMask = Op.getOperand(2); 2964 MVT::ValueType VT = Op.getValueType(); 2965 unsigned NumElems = PermMask.getNumOperands(); 2966 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2967 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2968 2969 if (isUndefShuffle(Op.Val)) 2970 return DAG.getNode(ISD::UNDEF, VT); 2971 2972 if (isSplatMask(PermMask.Val)) { 2973 if (NumElems <= 4) return Op; 2974 // Promote it to a v4i32 splat. 2975 return PromoteSplat(Op, DAG); 2976 } 2977 2978 if (X86::isMOVLMask(PermMask.Val)) 2979 return (V1IsUndef) ? V2 : Op; 2980 2981 if (X86::isMOVSHDUPMask(PermMask.Val) || 2982 X86::isMOVSLDUPMask(PermMask.Val) || 2983 X86::isMOVHLPSMask(PermMask.Val) || 2984 X86::isMOVHPMask(PermMask.Val) || 2985 X86::isMOVLPMask(PermMask.Val)) 2986 return Op; 2987 2988 if (ShouldXformToMOVHLPS(PermMask.Val) || 2989 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 2990 return CommuteVectorShuffle(Op, DAG); 2991 2992 bool V1IsSplat = isSplatVector(V1.Val); 2993 bool V2IsSplat = isSplatVector(V2.Val); 2994 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2995 Op = CommuteVectorShuffle(Op, DAG); 2996 V1 = Op.getOperand(0); 2997 V2 = Op.getOperand(1); 2998 PermMask = Op.getOperand(2); 2999 std::swap(V1IsSplat, V2IsSplat); 3000 std::swap(V1IsUndef, V2IsUndef); 3001 } 3002 3003 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3004 if (V2IsUndef) return V1; 3005 Op = CommuteVectorShuffle(Op, DAG); 3006 V1 = Op.getOperand(0); 3007 V2 = Op.getOperand(1); 3008 PermMask = Op.getOperand(2); 3009 if (V2IsSplat) { 3010 // V2 is a splat, so the mask may be malformed. That is, it may point 3011 // to any V2 element. The instruction selectior won't like this. Get 3012 // a corrected mask and commute to form a proper MOVS{S|D}. 3013 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3014 if (NewMask.Val != PermMask.Val) 3015 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3016 } 3017 return Op; 3018 } 3019 3020 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3021 X86::isUNPCKLMask(PermMask.Val) || 3022 X86::isUNPCKHMask(PermMask.Val)) 3023 return Op; 3024 3025 if (V2IsSplat) { 3026 // Normalize mask so all entries that point to V2 points to its first 3027 // element then try to match unpck{h|l} again. If match, return a 3028 // new vector_shuffle with the corrected mask. 3029 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3030 if (NewMask.Val != PermMask.Val) { 3031 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3032 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3033 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3034 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3035 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3036 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3037 } 3038 } 3039 } 3040 3041 // Normalize the node to match x86 shuffle ops if needed 3042 if (V2.getOpcode() != ISD::UNDEF) 3043 if (isCommutedSHUFP(PermMask.Val)) { 3044 Op = CommuteVectorShuffle(Op, DAG); 3045 V1 = Op.getOperand(0); 3046 V2 = Op.getOperand(1); 3047 PermMask = Op.getOperand(2); 3048 } 3049 3050 // If VT is integer, try PSHUF* first, then SHUFP*. 3051 if (MVT::isInteger(VT)) { 3052 if (X86::isPSHUFDMask(PermMask.Val) || 3053 X86::isPSHUFHWMask(PermMask.Val) || 3054 X86::isPSHUFLWMask(PermMask.Val)) { 3055 if (V2.getOpcode() != ISD::UNDEF) 3056 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3057 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3058 return Op; 3059 } 3060 3061 if (X86::isSHUFPMask(PermMask.Val)) 3062 return Op; 3063 3064 // Handle v8i16 shuffle high / low shuffle node pair. 3065 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3066 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3067 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3068 std::vector<SDOperand> MaskVec; 3069 for (unsigned i = 0; i != 4; ++i) 3070 MaskVec.push_back(PermMask.getOperand(i)); 3071 for (unsigned i = 4; i != 8; ++i) 3072 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3073 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3074 &MaskVec[0], MaskVec.size()); 3075 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3076 MaskVec.clear(); 3077 for (unsigned i = 0; i != 4; ++i) 3078 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3079 for (unsigned i = 4; i != 8; ++i) 3080 MaskVec.push_back(PermMask.getOperand(i)); 3081 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3082 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3083 } 3084 } else { 3085 // Floating point cases in the other order. 3086 if (X86::isSHUFPMask(PermMask.Val)) 3087 return Op; 3088 if (X86::isPSHUFDMask(PermMask.Val) || 3089 X86::isPSHUFHWMask(PermMask.Val) || 3090 X86::isPSHUFLWMask(PermMask.Val)) { 3091 if (V2.getOpcode() != ISD::UNDEF) 3092 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3093 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3094 return Op; 3095 } 3096 } 3097 3098 if (NumElems == 4) { 3099 MVT::ValueType MaskVT = PermMask.getValueType(); 3100 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3101 std::vector<std::pair<int, int> > Locs; 3102 Locs.reserve(NumElems); 3103 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3104 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3105 unsigned NumHi = 0; 3106 unsigned NumLo = 0; 3107 // If no more than two elements come from either vector. This can be 3108 // implemented with two shuffles. First shuffle gather the elements. 3109 // The second shuffle, which takes the first shuffle as both of its 3110 // vector operands, put the elements into the right order. 3111 for (unsigned i = 0; i != NumElems; ++i) { 3112 SDOperand Elt = PermMask.getOperand(i); 3113 if (Elt.getOpcode() == ISD::UNDEF) { 3114 Locs[i] = std::make_pair(-1, -1); 3115 } else { 3116 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3117 if (Val < NumElems) { 3118 Locs[i] = std::make_pair(0, NumLo); 3119 Mask1[NumLo] = Elt; 3120 NumLo++; 3121 } else { 3122 Locs[i] = std::make_pair(1, NumHi); 3123 if (2+NumHi < NumElems) 3124 Mask1[2+NumHi] = Elt; 3125 NumHi++; 3126 } 3127 } 3128 } 3129 if (NumLo <= 2 && NumHi <= 2) { 3130 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3131 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3132 &Mask1[0], Mask1.size())); 3133 for (unsigned i = 0; i != NumElems; ++i) { 3134 if (Locs[i].first == -1) 3135 continue; 3136 else { 3137 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3138 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3139 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3140 } 3141 } 3142 3143 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3144 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3145 &Mask2[0], Mask2.size())); 3146 } 3147 3148 // Break it into (shuffle shuffle_hi, shuffle_lo). 3149 Locs.clear(); 3150 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3151 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3152 std::vector<SDOperand> *MaskPtr = &LoMask; 3153 unsigned MaskIdx = 0; 3154 unsigned LoIdx = 0; 3155 unsigned HiIdx = NumElems/2; 3156 for (unsigned i = 0; i != NumElems; ++i) { 3157 if (i == NumElems/2) { 3158 MaskPtr = &HiMask; 3159 MaskIdx = 1; 3160 LoIdx = 0; 3161 HiIdx = NumElems/2; 3162 } 3163 SDOperand Elt = PermMask.getOperand(i); 3164 if (Elt.getOpcode() == ISD::UNDEF) { 3165 Locs[i] = std::make_pair(-1, -1); 3166 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3167 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3168 (*MaskPtr)[LoIdx] = Elt; 3169 LoIdx++; 3170 } else { 3171 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3172 (*MaskPtr)[HiIdx] = Elt; 3173 HiIdx++; 3174 } 3175 } 3176 3177 SDOperand LoShuffle = 3178 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3179 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3180 &LoMask[0], LoMask.size())); 3181 SDOperand HiShuffle = 3182 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3183 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3184 &HiMask[0], HiMask.size())); 3185 std::vector<SDOperand> MaskOps; 3186 for (unsigned i = 0; i != NumElems; ++i) { 3187 if (Locs[i].first == -1) { 3188 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3189 } else { 3190 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3191 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3192 } 3193 } 3194 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3195 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3196 &MaskOps[0], MaskOps.size())); 3197 } 3198 3199 return SDOperand(); 3200} 3201 3202SDOperand 3203X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3204 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3205 return SDOperand(); 3206 3207 MVT::ValueType VT = Op.getValueType(); 3208 // TODO: handle v16i8. 3209 if (MVT::getSizeInBits(VT) == 16) { 3210 // Transform it so it match pextrw which produces a 32-bit result. 3211 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3212 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3213 Op.getOperand(0), Op.getOperand(1)); 3214 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3215 DAG.getValueType(VT)); 3216 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3217 } else if (MVT::getSizeInBits(VT) == 32) { 3218 SDOperand Vec = Op.getOperand(0); 3219 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3220 if (Idx == 0) 3221 return Op; 3222 // SHUFPS the element to the lowest double word, then movss. 3223 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3224 std::vector<SDOperand> IdxVec; 3225 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3226 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3227 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3228 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3229 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3230 &IdxVec[0], IdxVec.size()); 3231 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3232 Vec, Vec, Mask); 3233 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3234 DAG.getConstant(0, getPointerTy())); 3235 } else if (MVT::getSizeInBits(VT) == 64) { 3236 SDOperand Vec = Op.getOperand(0); 3237 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3238 if (Idx == 0) 3239 return Op; 3240 3241 // UNPCKHPD the element to the lowest double word, then movsd. 3242 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3243 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3244 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3245 std::vector<SDOperand> IdxVec; 3246 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3247 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3248 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3249 &IdxVec[0], IdxVec.size()); 3250 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3251 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3252 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3253 DAG.getConstant(0, getPointerTy())); 3254 } 3255 3256 return SDOperand(); 3257} 3258 3259SDOperand 3260X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3261 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3262 // as its second argument. 3263 MVT::ValueType VT = Op.getValueType(); 3264 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3265 SDOperand N0 = Op.getOperand(0); 3266 SDOperand N1 = Op.getOperand(1); 3267 SDOperand N2 = Op.getOperand(2); 3268 if (MVT::getSizeInBits(BaseVT) == 16) { 3269 if (N1.getValueType() != MVT::i32) 3270 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3271 if (N2.getValueType() != MVT::i32) 3272 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3273 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3274 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3275 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3276 if (Idx == 0) { 3277 // Use a movss. 3278 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3279 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3280 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3281 std::vector<SDOperand> MaskVec; 3282 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3283 for (unsigned i = 1; i <= 3; ++i) 3284 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3285 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3286 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3287 &MaskVec[0], MaskVec.size())); 3288 } else { 3289 // Use two pinsrw instructions to insert a 32 bit value. 3290 Idx <<= 1; 3291 if (MVT::isFloatingPoint(N1.getValueType())) { 3292 if (N1.getOpcode() == ISD::LOAD) { 3293 // Just load directly from f32mem to GR32. 3294 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 3295 N1.getOperand(2)); 3296 } else { 3297 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3298 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3299 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3300 DAG.getConstant(0, getPointerTy())); 3301 } 3302 } 3303 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3304 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3305 DAG.getConstant(Idx, getPointerTy())); 3306 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3307 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3308 DAG.getConstant(Idx+1, getPointerTy())); 3309 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3310 } 3311 } 3312 3313 return SDOperand(); 3314} 3315 3316SDOperand 3317X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3318 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3319 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3320} 3321 3322// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3323// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3324// one of the above mentioned nodes. It has to be wrapped because otherwise 3325// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3326// be used to form addressing mode. These wrapped nodes will be selected 3327// into MOV32ri. 3328SDOperand 3329X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3330 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3331 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3332 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 3333 CP->getAlignment())); 3334 if (Subtarget->isTargetDarwin()) { 3335 // With PIC, the address is actually $g + Offset. 3336 if (!Subtarget->is64Bit() && 3337 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3338 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3339 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3340 } 3341 3342 return Result; 3343} 3344 3345SDOperand 3346X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3347 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3348 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3349 DAG.getTargetGlobalAddress(GV, 3350 getPointerTy())); 3351 if (Subtarget->isTargetDarwin()) { 3352 // With PIC, the address is actually $g + Offset. 3353 if (!Subtarget->is64Bit() && 3354 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3355 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3356 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3357 Result); 3358 3359 // For Darwin, external and weak symbols are indirect, so we want to load 3360 // the value at address GV, not the value of GV itself. This means that 3361 // the GlobalAddress must be in the base or index register of the address, 3362 // not the GV offset field. 3363 if (getTargetMachine().getRelocationModel() != Reloc::Static && 3364 DarwinGVRequiresExtraLoad(GV)) 3365 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), 3366 Result, DAG.getSrcValue(NULL)); 3367 } 3368 3369 return Result; 3370} 3371 3372SDOperand 3373X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3374 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3375 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3376 DAG.getTargetExternalSymbol(Sym, 3377 getPointerTy())); 3378 if (Subtarget->isTargetDarwin()) { 3379 // With PIC, the address is actually $g + Offset. 3380 if (!Subtarget->is64Bit() && 3381 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3382 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3383 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3384 Result); 3385 } 3386 3387 return Result; 3388} 3389 3390SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3391 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3392 "Not an i64 shift!"); 3393 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3394 SDOperand ShOpLo = Op.getOperand(0); 3395 SDOperand ShOpHi = Op.getOperand(1); 3396 SDOperand ShAmt = Op.getOperand(2); 3397 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 3398 DAG.getConstant(31, MVT::i8)) 3399 : DAG.getConstant(0, MVT::i32); 3400 3401 SDOperand Tmp2, Tmp3; 3402 if (Op.getOpcode() == ISD::SHL_PARTS) { 3403 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3404 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3405 } else { 3406 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3407 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3408 } 3409 3410 SDOperand InFlag = 3411 DAG.getNode(X86ISD::CMP, MVT::Flag, 3412 DAG.getNode(ISD::AND, MVT::i8, 3413 ShAmt, DAG.getConstant(32, MVT::i8)), 3414 DAG.getConstant(0, MVT::i8)); 3415 3416 SDOperand Hi, Lo; 3417 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3418 3419 std::vector<MVT::ValueType> Tys; 3420 Tys.push_back(MVT::i32); 3421 Tys.push_back(MVT::Flag); 3422 std::vector<SDOperand> Ops; 3423 if (Op.getOpcode() == ISD::SHL_PARTS) { 3424 Ops.push_back(Tmp2); 3425 Ops.push_back(Tmp3); 3426 Ops.push_back(CC); 3427 Ops.push_back(InFlag); 3428 Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3429 InFlag = Hi.getValue(1); 3430 3431 Ops.clear(); 3432 Ops.push_back(Tmp3); 3433 Ops.push_back(Tmp1); 3434 Ops.push_back(CC); 3435 Ops.push_back(InFlag); 3436 Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3437 } else { 3438 Ops.push_back(Tmp2); 3439 Ops.push_back(Tmp3); 3440 Ops.push_back(CC); 3441 Ops.push_back(InFlag); 3442 Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3443 InFlag = Lo.getValue(1); 3444 3445 Ops.clear(); 3446 Ops.push_back(Tmp3); 3447 Ops.push_back(Tmp1); 3448 Ops.push_back(CC); 3449 Ops.push_back(InFlag); 3450 Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3451 } 3452 3453 Tys.clear(); 3454 Tys.push_back(MVT::i32); 3455 Tys.push_back(MVT::i32); 3456 Ops.clear(); 3457 Ops.push_back(Lo); 3458 Ops.push_back(Hi); 3459 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 3460} 3461 3462SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3463 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3464 Op.getOperand(0).getValueType() >= MVT::i16 && 3465 "Unknown SINT_TO_FP to lower!"); 3466 3467 SDOperand Result; 3468 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3469 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3470 MachineFunction &MF = DAG.getMachineFunction(); 3471 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3472 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3473 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 3474 DAG.getEntryNode(), Op.getOperand(0), 3475 StackSlot, DAG.getSrcValue(NULL)); 3476 3477 // Build the FILD 3478 std::vector<MVT::ValueType> Tys; 3479 Tys.push_back(MVT::f64); 3480 Tys.push_back(MVT::Other); 3481 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3482 std::vector<SDOperand> Ops; 3483 Ops.push_back(Chain); 3484 Ops.push_back(StackSlot); 3485 Ops.push_back(DAG.getValueType(SrcVT)); 3486 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3487 Tys, &Ops[0], Ops.size()); 3488 3489 if (X86ScalarSSE) { 3490 Chain = Result.getValue(1); 3491 SDOperand InFlag = Result.getValue(2); 3492 3493 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3494 // shouldn't be necessary except that RFP cannot be live across 3495 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3496 MachineFunction &MF = DAG.getMachineFunction(); 3497 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3498 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3499 std::vector<MVT::ValueType> Tys; 3500 Tys.push_back(MVT::Other); 3501 std::vector<SDOperand> Ops; 3502 Ops.push_back(Chain); 3503 Ops.push_back(Result); 3504 Ops.push_back(StackSlot); 3505 Ops.push_back(DAG.getValueType(Op.getValueType())); 3506 Ops.push_back(InFlag); 3507 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3508 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 3509 DAG.getSrcValue(NULL)); 3510 } 3511 3512 return Result; 3513} 3514 3515SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3516 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3517 "Unknown FP_TO_SINT to lower!"); 3518 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3519 // stack slot. 3520 MachineFunction &MF = DAG.getMachineFunction(); 3521 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3522 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3523 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3524 3525 unsigned Opc; 3526 switch (Op.getValueType()) { 3527 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3528 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3529 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3530 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3531 } 3532 3533 SDOperand Chain = DAG.getEntryNode(); 3534 SDOperand Value = Op.getOperand(0); 3535 if (X86ScalarSSE) { 3536 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3537 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 3538 DAG.getSrcValue(0)); 3539 std::vector<MVT::ValueType> Tys; 3540 Tys.push_back(MVT::f64); 3541 Tys.push_back(MVT::Other); 3542 std::vector<SDOperand> Ops; 3543 Ops.push_back(Chain); 3544 Ops.push_back(StackSlot); 3545 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 3546 Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 3547 Chain = Value.getValue(1); 3548 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3549 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3550 } 3551 3552 // Build the FP_TO_INT*_IN_MEM 3553 std::vector<SDOperand> Ops; 3554 Ops.push_back(Chain); 3555 Ops.push_back(Value); 3556 Ops.push_back(StackSlot); 3557 SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size()); 3558 3559 // Load the result. 3560 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 3561 DAG.getSrcValue(NULL)); 3562} 3563 3564SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3565 MVT::ValueType VT = Op.getValueType(); 3566 const Type *OpNTy = MVT::getTypeForValueType(VT); 3567 std::vector<Constant*> CV; 3568 if (VT == MVT::f64) { 3569 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 3570 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3571 } else { 3572 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 3573 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3574 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3575 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3576 } 3577 Constant *CS = ConstantStruct::get(CV); 3578 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3579 std::vector<MVT::ValueType> Tys; 3580 Tys.push_back(VT); 3581 Tys.push_back(MVT::Other); 3582 SmallVector<SDOperand, 3> Ops; 3583 Ops.push_back(DAG.getEntryNode()); 3584 Ops.push_back(CPIdx); 3585 Ops.push_back(DAG.getSrcValue(NULL)); 3586 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3587 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3588} 3589 3590SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3591 MVT::ValueType VT = Op.getValueType(); 3592 const Type *OpNTy = MVT::getTypeForValueType(VT); 3593 std::vector<Constant*> CV; 3594 if (VT == MVT::f64) { 3595 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 3596 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3597 } else { 3598 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 3599 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3600 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3601 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 3602 } 3603 Constant *CS = ConstantStruct::get(CV); 3604 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 3605 std::vector<MVT::ValueType> Tys; 3606 Tys.push_back(VT); 3607 Tys.push_back(MVT::Other); 3608 SmallVector<SDOperand, 3> Ops; 3609 Ops.push_back(DAG.getEntryNode()); 3610 Ops.push_back(CPIdx); 3611 Ops.push_back(DAG.getSrcValue(NULL)); 3612 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 3613 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3614} 3615 3616SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3617 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3618 SDOperand Cond; 3619 SDOperand CC = Op.getOperand(2); 3620 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3621 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3622 bool Flip; 3623 unsigned X86CC; 3624 if (translateX86CC(CC, isFP, X86CC, Flip)) { 3625 if (Flip) 3626 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3627 Op.getOperand(1), Op.getOperand(0)); 3628 else 3629 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3630 Op.getOperand(0), Op.getOperand(1)); 3631 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3632 DAG.getConstant(X86CC, MVT::i8), Cond); 3633 } else { 3634 assert(isFP && "Illegal integer SetCC!"); 3635 3636 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 3637 Op.getOperand(0), Op.getOperand(1)); 3638 std::vector<MVT::ValueType> Tys; 3639 std::vector<SDOperand> Ops; 3640 switch (SetCCOpcode) { 3641 default: assert(false && "Illegal floating point SetCC!"); 3642 case ISD::SETOEQ: { // !PF & ZF 3643 Tys.push_back(MVT::i8); 3644 Tys.push_back(MVT::Flag); 3645 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 3646 Ops.push_back(Cond); 3647 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3648 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3649 DAG.getConstant(X86ISD::COND_E, MVT::i8), 3650 Tmp1.getValue(1)); 3651 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3652 } 3653 case ISD::SETUNE: { // PF | !ZF 3654 Tys.push_back(MVT::i8); 3655 Tys.push_back(MVT::Flag); 3656 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 3657 Ops.push_back(Cond); 3658 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3659 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3660 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 3661 Tmp1.getValue(1)); 3662 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3663 } 3664 } 3665 } 3666} 3667 3668SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3669 MVT::ValueType VT = Op.getValueType(); 3670 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 3671 bool addTest = false; 3672 SDOperand Op0 = Op.getOperand(0); 3673 SDOperand Cond, CC; 3674 if (Op0.getOpcode() == ISD::SETCC) 3675 Op0 = LowerOperation(Op0, DAG); 3676 3677 if (Op0.getOpcode() == X86ISD::SETCC) { 3678 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3679 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3680 // have another use it will be eliminated. 3681 // If the X86ISD::SETCC has more than one use, then it's probably better 3682 // to use a test instead of duplicating the X86ISD::CMP (for register 3683 // pressure reason). 3684 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 3685 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3686 CmpOpc == X86ISD::UCOMI) { 3687 if (!Op0.hasOneUse()) { 3688 std::vector<MVT::ValueType> Tys; 3689 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 3690 Tys.push_back(Op0.Val->getValueType(i)); 3691 std::vector<SDOperand> Ops; 3692 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 3693 Ops.push_back(Op0.getOperand(i)); 3694 Op0 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3695 } 3696 3697 CC = Op0.getOperand(0); 3698 Cond = Op0.getOperand(1); 3699 // Make a copy as flag result cannot be used by more than one. 3700 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3701 Cond.getOperand(0), Cond.getOperand(1)); 3702 addTest = 3703 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3704 } else 3705 addTest = true; 3706 } else 3707 addTest = true; 3708 3709 if (addTest) { 3710 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3711 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, Op0, 3712 DAG.getConstant(0, MVT::i8)); 3713 } 3714 3715 std::vector<MVT::ValueType> Tys; 3716 Tys.push_back(Op.getValueType()); 3717 Tys.push_back(MVT::Flag); 3718 std::vector<SDOperand> Ops; 3719 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3720 // condition is true. 3721 Ops.push_back(Op.getOperand(2)); 3722 Ops.push_back(Op.getOperand(1)); 3723 Ops.push_back(CC); 3724 Ops.push_back(Cond); 3725 return DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size()); 3726} 3727 3728SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3729 bool addTest = false; 3730 SDOperand Cond = Op.getOperand(1); 3731 SDOperand Dest = Op.getOperand(2); 3732 SDOperand CC; 3733 if (Cond.getOpcode() == ISD::SETCC) 3734 Cond = LowerOperation(Cond, DAG); 3735 3736 if (Cond.getOpcode() == X86ISD::SETCC) { 3737 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3738 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 3739 // have another use it will be eliminated. 3740 // If the X86ISD::SETCC has more than one use, then it's probably better 3741 // to use a test instead of duplicating the X86ISD::CMP (for register 3742 // pressure reason). 3743 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 3744 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 3745 CmpOpc == X86ISD::UCOMI) { 3746 if (!Cond.hasOneUse()) { 3747 std::vector<MVT::ValueType> Tys; 3748 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 3749 Tys.push_back(Cond.Val->getValueType(i)); 3750 std::vector<SDOperand> Ops; 3751 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 3752 Ops.push_back(Cond.getOperand(i)); 3753 Cond = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size()); 3754 } 3755 3756 CC = Cond.getOperand(0); 3757 Cond = Cond.getOperand(1); 3758 // Make a copy as flag result cannot be used by more than one. 3759 Cond = DAG.getNode(CmpOpc, MVT::Flag, 3760 Cond.getOperand(0), Cond.getOperand(1)); 3761 } else 3762 addTest = true; 3763 } else 3764 addTest = true; 3765 3766 if (addTest) { 3767 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 3768 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, Cond, 3769 DAG.getConstant(0, MVT::i8)); 3770 } 3771 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3772 Op.getOperand(0), Op.getOperand(2), CC, Cond); 3773} 3774 3775SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3776 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3777 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3778 DAG.getTargetJumpTable(JT->getIndex(), 3779 getPointerTy())); 3780 if (Subtarget->isTargetDarwin()) { 3781 // With PIC, the address is actually $g + Offset. 3782 if (!Subtarget->is64Bit() && 3783 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3784 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3785 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3786 Result); 3787 } 3788 3789 return Result; 3790} 3791 3792SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3793 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3794 if (Subtarget->is64Bit()) 3795 return LowerX86_64CCCCallTo(Op, DAG); 3796 else if (CallingConv == CallingConv::Fast && EnableFastCC) 3797 return LowerFastCCCallTo(Op, DAG); 3798 else 3799 return LowerCCCCallTo(Op, DAG); 3800} 3801 3802SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 3803 SDOperand Copy; 3804 3805 switch(Op.getNumOperands()) { 3806 default: 3807 assert(0 && "Do not know how to return this many arguments!"); 3808 abort(); 3809 case 1: // ret void. 3810 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 3811 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 3812 case 3: { 3813 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 3814 3815 if (MVT::isVector(ArgVT) || 3816 (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) { 3817 // Integer or FP vector result -> XMM0. 3818 if (DAG.getMachineFunction().liveout_empty()) 3819 DAG.getMachineFunction().addLiveOut(X86::XMM0); 3820 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 3821 SDOperand()); 3822 } else if (MVT::isInteger(ArgVT)) { 3823 // Integer result -> EAX / RAX. 3824 // The C calling convention guarantees the return value has been 3825 // promoted to at least MVT::i32. The X86-64 ABI doesn't require the 3826 // value to be promoted MVT::i64. So we don't have to extend it to 3827 // 64-bit. Return the value in EAX, but mark RAX as liveout. 3828 unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 3829 if (DAG.getMachineFunction().liveout_empty()) 3830 DAG.getMachineFunction().addLiveOut(Reg); 3831 3832 Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; 3833 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1), 3834 SDOperand()); 3835 } else if (!X86ScalarSSE) { 3836 // FP return with fp-stack value. 3837 if (DAG.getMachineFunction().liveout_empty()) 3838 DAG.getMachineFunction().addLiveOut(X86::ST0); 3839 3840 std::vector<MVT::ValueType> Tys; 3841 Tys.push_back(MVT::Other); 3842 Tys.push_back(MVT::Flag); 3843 std::vector<SDOperand> Ops; 3844 Ops.push_back(Op.getOperand(0)); 3845 Ops.push_back(Op.getOperand(1)); 3846 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3847 } else { 3848 // FP return with ScalarSSE (return on fp-stack). 3849 if (DAG.getMachineFunction().liveout_empty()) 3850 DAG.getMachineFunction().addLiveOut(X86::ST0); 3851 3852 SDOperand MemLoc; 3853 SDOperand Chain = Op.getOperand(0); 3854 SDOperand Value = Op.getOperand(1); 3855 3856 if (Value.getOpcode() == ISD::LOAD && 3857 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 3858 Chain = Value.getOperand(0); 3859 MemLoc = Value.getOperand(1); 3860 } else { 3861 // Spill the value to memory and reload it into top of stack. 3862 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 3863 MachineFunction &MF = DAG.getMachineFunction(); 3864 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3865 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 3866 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 3867 Value, MemLoc, DAG.getSrcValue(0)); 3868 } 3869 std::vector<MVT::ValueType> Tys; 3870 Tys.push_back(MVT::f64); 3871 Tys.push_back(MVT::Other); 3872 std::vector<SDOperand> Ops; 3873 Ops.push_back(Chain); 3874 Ops.push_back(MemLoc); 3875 Ops.push_back(DAG.getValueType(ArgVT)); 3876 Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 3877 Tys.clear(); 3878 Tys.push_back(MVT::Other); 3879 Tys.push_back(MVT::Flag); 3880 Ops.clear(); 3881 Ops.push_back(Copy.getValue(1)); 3882 Ops.push_back(Copy); 3883 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 3884 } 3885 break; 3886 } 3887 case 5: { 3888 unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 3889 unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX; 3890 if (DAG.getMachineFunction().liveout_empty()) { 3891 DAG.getMachineFunction().addLiveOut(Reg1); 3892 DAG.getMachineFunction().addLiveOut(Reg2); 3893 } 3894 3895 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3), 3896 SDOperand()); 3897 Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1)); 3898 break; 3899 } 3900 } 3901 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 3902 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 3903 Copy.getValue(1)); 3904} 3905 3906SDOperand 3907X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3908 MachineFunction &MF = DAG.getMachineFunction(); 3909 const Function* Fn = MF.getFunction(); 3910 if (Fn->hasExternalLinkage() && 3911 Subtarget->TargetType == X86Subtarget::isCygwin && 3912 Fn->getName() == "main") 3913 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 3914 3915 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3916 if (Subtarget->is64Bit()) 3917 return LowerX86_64CCCArguments(Op, DAG); 3918 else if (CC == CallingConv::Fast && EnableFastCC) 3919 return LowerFastCCArguments(Op, DAG); 3920 else 3921 return LowerCCCArguments(Op, DAG); 3922} 3923 3924SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3925 SDOperand InFlag(0, 0); 3926 SDOperand Chain = Op.getOperand(0); 3927 unsigned Align = 3928 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3929 if (Align == 0) Align = 1; 3930 3931 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3932 // If not DWORD aligned, call memset if size is less than the threshold. 3933 // It knows how to align to the right boundary first. 3934 if ((Align & 3) != 0 || 3935 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3936 MVT::ValueType IntPtr = getPointerTy(); 3937 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3938 std::vector<std::pair<SDOperand, const Type*> > Args; 3939 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 3940 // Extend the ubyte argument to be an int value for the call. 3941 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3942 Args.push_back(std::make_pair(Val, IntPtrTy)); 3943 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 3944 std::pair<SDOperand,SDOperand> CallResult = 3945 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 3946 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3947 return CallResult.second; 3948 } 3949 3950 MVT::ValueType AVT; 3951 SDOperand Count; 3952 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3953 unsigned BytesLeft = 0; 3954 bool TwoRepStos = false; 3955 if (ValC) { 3956 unsigned ValReg; 3957 uint64_t Val = ValC->getValue() & 255; 3958 3959 // If the value is a constant, then we can potentially use larger sets. 3960 switch (Align & 3) { 3961 case 2: // WORD aligned 3962 AVT = MVT::i16; 3963 ValReg = X86::AX; 3964 Val = (Val << 8) | Val; 3965 break; 3966 case 0: // DWORD aligned 3967 AVT = MVT::i32; 3968 ValReg = X86::EAX; 3969 Val = (Val << 8) | Val; 3970 Val = (Val << 16) | Val; 3971 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3972 AVT = MVT::i64; 3973 ValReg = X86::RAX; 3974 Val = (Val << 32) | Val; 3975 } 3976 break; 3977 default: // Byte aligned 3978 AVT = MVT::i8; 3979 ValReg = X86::AL; 3980 Count = Op.getOperand(3); 3981 break; 3982 } 3983 3984 if (AVT > MVT::i8) { 3985 if (I) { 3986 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3987 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3988 BytesLeft = I->getValue() % UBytes; 3989 } else { 3990 assert(AVT >= MVT::i32 && 3991 "Do not use rep;stos if not at least DWORD aligned"); 3992 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3993 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3994 TwoRepStos = true; 3995 } 3996 } 3997 3998 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3999 InFlag); 4000 InFlag = Chain.getValue(1); 4001 } else { 4002 AVT = MVT::i8; 4003 Count = Op.getOperand(3); 4004 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4005 InFlag = Chain.getValue(1); 4006 } 4007 4008 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4009 Count, InFlag); 4010 InFlag = Chain.getValue(1); 4011 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4012 Op.getOperand(1), InFlag); 4013 InFlag = Chain.getValue(1); 4014 4015 std::vector<MVT::ValueType> Tys; 4016 Tys.push_back(MVT::Other); 4017 Tys.push_back(MVT::Flag); 4018 std::vector<SDOperand> Ops; 4019 Ops.push_back(Chain); 4020 Ops.push_back(DAG.getValueType(AVT)); 4021 Ops.push_back(InFlag); 4022 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4023 4024 if (TwoRepStos) { 4025 InFlag = Chain.getValue(1); 4026 Count = Op.getOperand(3); 4027 MVT::ValueType CVT = Count.getValueType(); 4028 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4029 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4030 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4031 Left, InFlag); 4032 InFlag = Chain.getValue(1); 4033 Tys.clear(); 4034 Tys.push_back(MVT::Other); 4035 Tys.push_back(MVT::Flag); 4036 Ops.clear(); 4037 Ops.push_back(Chain); 4038 Ops.push_back(DAG.getValueType(MVT::i8)); 4039 Ops.push_back(InFlag); 4040 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4041 } else if (BytesLeft) { 4042 // Issue stores for the last 1 - 7 bytes. 4043 SDOperand Value; 4044 unsigned Val = ValC->getValue() & 255; 4045 unsigned Offset = I->getValue() - BytesLeft; 4046 SDOperand DstAddr = Op.getOperand(1); 4047 MVT::ValueType AddrVT = DstAddr.getValueType(); 4048 if (BytesLeft >= 4) { 4049 Val = (Val << 8) | Val; 4050 Val = (Val << 16) | Val; 4051 Value = DAG.getConstant(Val, MVT::i32); 4052 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4053 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4054 DAG.getConstant(Offset, AddrVT)), 4055 DAG.getSrcValue(NULL)); 4056 BytesLeft -= 4; 4057 Offset += 4; 4058 } 4059 if (BytesLeft >= 2) { 4060 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4061 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4062 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4063 DAG.getConstant(Offset, AddrVT)), 4064 DAG.getSrcValue(NULL)); 4065 BytesLeft -= 2; 4066 Offset += 2; 4067 } 4068 if (BytesLeft == 1) { 4069 Value = DAG.getConstant(Val, MVT::i8); 4070 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4071 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4072 DAG.getConstant(Offset, AddrVT)), 4073 DAG.getSrcValue(NULL)); 4074 } 4075 } 4076 4077 return Chain; 4078} 4079 4080SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4081 SDOperand Chain = Op.getOperand(0); 4082 unsigned Align = 4083 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4084 if (Align == 0) Align = 1; 4085 4086 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4087 // If not DWORD aligned, call memcpy if size is less than the threshold. 4088 // It knows how to align to the right boundary first. 4089 if ((Align & 3) != 0 || 4090 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4091 MVT::ValueType IntPtr = getPointerTy(); 4092 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4093 std::vector<std::pair<SDOperand, const Type*> > Args; 4094 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4095 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 4096 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4097 std::pair<SDOperand,SDOperand> CallResult = 4098 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4099 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4100 return CallResult.second; 4101 } 4102 4103 MVT::ValueType AVT; 4104 SDOperand Count; 4105 unsigned BytesLeft = 0; 4106 bool TwoRepMovs = false; 4107 switch (Align & 3) { 4108 case 2: // WORD aligned 4109 AVT = MVT::i16; 4110 break; 4111 case 0: // DWORD aligned 4112 AVT = MVT::i32; 4113 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4114 AVT = MVT::i64; 4115 break; 4116 default: // Byte aligned 4117 AVT = MVT::i8; 4118 Count = Op.getOperand(3); 4119 break; 4120 } 4121 4122 if (AVT > MVT::i8) { 4123 if (I) { 4124 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4125 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4126 BytesLeft = I->getValue() % UBytes; 4127 } else { 4128 assert(AVT >= MVT::i32 && 4129 "Do not use rep;movs if not at least DWORD aligned"); 4130 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4131 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4132 TwoRepMovs = true; 4133 } 4134 } 4135 4136 SDOperand InFlag(0, 0); 4137 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4138 Count, InFlag); 4139 InFlag = Chain.getValue(1); 4140 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4141 Op.getOperand(1), InFlag); 4142 InFlag = Chain.getValue(1); 4143 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4144 Op.getOperand(2), InFlag); 4145 InFlag = Chain.getValue(1); 4146 4147 std::vector<MVT::ValueType> Tys; 4148 Tys.push_back(MVT::Other); 4149 Tys.push_back(MVT::Flag); 4150 std::vector<SDOperand> Ops; 4151 Ops.push_back(Chain); 4152 Ops.push_back(DAG.getValueType(AVT)); 4153 Ops.push_back(InFlag); 4154 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4155 4156 if (TwoRepMovs) { 4157 InFlag = Chain.getValue(1); 4158 Count = Op.getOperand(3); 4159 MVT::ValueType CVT = Count.getValueType(); 4160 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4161 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4162 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4163 Left, InFlag); 4164 InFlag = Chain.getValue(1); 4165 Tys.clear(); 4166 Tys.push_back(MVT::Other); 4167 Tys.push_back(MVT::Flag); 4168 Ops.clear(); 4169 Ops.push_back(Chain); 4170 Ops.push_back(DAG.getValueType(MVT::i8)); 4171 Ops.push_back(InFlag); 4172 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4173 } else if (BytesLeft) { 4174 // Issue loads and stores for the last 1 - 7 bytes. 4175 unsigned Offset = I->getValue() - BytesLeft; 4176 SDOperand DstAddr = Op.getOperand(1); 4177 MVT::ValueType DstVT = DstAddr.getValueType(); 4178 SDOperand SrcAddr = Op.getOperand(2); 4179 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4180 SDOperand Value; 4181 if (BytesLeft >= 4) { 4182 Value = DAG.getLoad(MVT::i32, Chain, 4183 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4184 DAG.getConstant(Offset, SrcVT)), 4185 DAG.getSrcValue(NULL)); 4186 Chain = Value.getValue(1); 4187 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4188 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4189 DAG.getConstant(Offset, DstVT)), 4190 DAG.getSrcValue(NULL)); 4191 BytesLeft -= 4; 4192 Offset += 4; 4193 } 4194 if (BytesLeft >= 2) { 4195 Value = DAG.getLoad(MVT::i16, Chain, 4196 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4197 DAG.getConstant(Offset, SrcVT)), 4198 DAG.getSrcValue(NULL)); 4199 Chain = Value.getValue(1); 4200 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4201 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4202 DAG.getConstant(Offset, DstVT)), 4203 DAG.getSrcValue(NULL)); 4204 BytesLeft -= 2; 4205 Offset += 2; 4206 } 4207 4208 if (BytesLeft == 1) { 4209 Value = DAG.getLoad(MVT::i8, Chain, 4210 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4211 DAG.getConstant(Offset, SrcVT)), 4212 DAG.getSrcValue(NULL)); 4213 Chain = Value.getValue(1); 4214 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 4215 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4216 DAG.getConstant(Offset, DstVT)), 4217 DAG.getSrcValue(NULL)); 4218 } 4219 } 4220 4221 return Chain; 4222} 4223 4224SDOperand 4225X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4226 std::vector<MVT::ValueType> Tys; 4227 Tys.push_back(MVT::Other); 4228 Tys.push_back(MVT::Flag); 4229 std::vector<SDOperand> Ops; 4230 Ops.push_back(Op.getOperand(0)); 4231 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size()); 4232 Ops.clear(); 4233 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 4234 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 4235 MVT::i32, Ops[0].getValue(2))); 4236 Ops.push_back(Ops[1].getValue(1)); 4237 Tys[0] = Tys[1] = MVT::i32; 4238 Tys.push_back(MVT::Other); 4239 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 4240} 4241 4242SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4243 if (!Subtarget->is64Bit()) { 4244 // vastart just stores the address of the VarArgsFrameIndex slot into the 4245 // memory location argument. 4246 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4247 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 4248 Op.getOperand(1), Op.getOperand(2)); 4249 } 4250 4251 // __va_list_tag: 4252 // gp_offset (0 - 6 * 8) 4253 // fp_offset (48 - 48 + 8 * 16) 4254 // overflow_arg_area (point to parameters coming in memory). 4255 // reg_save_area 4256 std::vector<SDOperand> MemOps; 4257 SDOperand FIN = Op.getOperand(1); 4258 // Store gp_offset 4259 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4260 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4261 FIN, Op.getOperand(2)); 4262 MemOps.push_back(Store); 4263 4264 // Store fp_offset 4265 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4266 DAG.getConstant(4, getPointerTy())); 4267 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4268 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4269 FIN, Op.getOperand(2)); 4270 MemOps.push_back(Store); 4271 4272 // Store ptr to overflow_arg_area 4273 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4274 DAG.getConstant(4, getPointerTy())); 4275 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4276 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4277 OVFIN, FIN, Op.getOperand(2)); 4278 MemOps.push_back(Store); 4279 4280 // Store ptr to reg_save_area. 4281 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4282 DAG.getConstant(8, getPointerTy())); 4283 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4284 Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 4285 RSFIN, FIN, Op.getOperand(2)); 4286 MemOps.push_back(Store); 4287 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4288} 4289 4290SDOperand 4291X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4292 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4293 switch (IntNo) { 4294 default: return SDOperand(); // Don't custom lower most intrinsics. 4295 // Comparison intrinsics. 4296 case Intrinsic::x86_sse_comieq_ss: 4297 case Intrinsic::x86_sse_comilt_ss: 4298 case Intrinsic::x86_sse_comile_ss: 4299 case Intrinsic::x86_sse_comigt_ss: 4300 case Intrinsic::x86_sse_comige_ss: 4301 case Intrinsic::x86_sse_comineq_ss: 4302 case Intrinsic::x86_sse_ucomieq_ss: 4303 case Intrinsic::x86_sse_ucomilt_ss: 4304 case Intrinsic::x86_sse_ucomile_ss: 4305 case Intrinsic::x86_sse_ucomigt_ss: 4306 case Intrinsic::x86_sse_ucomige_ss: 4307 case Intrinsic::x86_sse_ucomineq_ss: 4308 case Intrinsic::x86_sse2_comieq_sd: 4309 case Intrinsic::x86_sse2_comilt_sd: 4310 case Intrinsic::x86_sse2_comile_sd: 4311 case Intrinsic::x86_sse2_comigt_sd: 4312 case Intrinsic::x86_sse2_comige_sd: 4313 case Intrinsic::x86_sse2_comineq_sd: 4314 case Intrinsic::x86_sse2_ucomieq_sd: 4315 case Intrinsic::x86_sse2_ucomilt_sd: 4316 case Intrinsic::x86_sse2_ucomile_sd: 4317 case Intrinsic::x86_sse2_ucomigt_sd: 4318 case Intrinsic::x86_sse2_ucomige_sd: 4319 case Intrinsic::x86_sse2_ucomineq_sd: { 4320 unsigned Opc = 0; 4321 ISD::CondCode CC = ISD::SETCC_INVALID; 4322 switch (IntNo) { 4323 default: break; 4324 case Intrinsic::x86_sse_comieq_ss: 4325 case Intrinsic::x86_sse2_comieq_sd: 4326 Opc = X86ISD::COMI; 4327 CC = ISD::SETEQ; 4328 break; 4329 case Intrinsic::x86_sse_comilt_ss: 4330 case Intrinsic::x86_sse2_comilt_sd: 4331 Opc = X86ISD::COMI; 4332 CC = ISD::SETLT; 4333 break; 4334 case Intrinsic::x86_sse_comile_ss: 4335 case Intrinsic::x86_sse2_comile_sd: 4336 Opc = X86ISD::COMI; 4337 CC = ISD::SETLE; 4338 break; 4339 case Intrinsic::x86_sse_comigt_ss: 4340 case Intrinsic::x86_sse2_comigt_sd: 4341 Opc = X86ISD::COMI; 4342 CC = ISD::SETGT; 4343 break; 4344 case Intrinsic::x86_sse_comige_ss: 4345 case Intrinsic::x86_sse2_comige_sd: 4346 Opc = X86ISD::COMI; 4347 CC = ISD::SETGE; 4348 break; 4349 case Intrinsic::x86_sse_comineq_ss: 4350 case Intrinsic::x86_sse2_comineq_sd: 4351 Opc = X86ISD::COMI; 4352 CC = ISD::SETNE; 4353 break; 4354 case Intrinsic::x86_sse_ucomieq_ss: 4355 case Intrinsic::x86_sse2_ucomieq_sd: 4356 Opc = X86ISD::UCOMI; 4357 CC = ISD::SETEQ; 4358 break; 4359 case Intrinsic::x86_sse_ucomilt_ss: 4360 case Intrinsic::x86_sse2_ucomilt_sd: 4361 Opc = X86ISD::UCOMI; 4362 CC = ISD::SETLT; 4363 break; 4364 case Intrinsic::x86_sse_ucomile_ss: 4365 case Intrinsic::x86_sse2_ucomile_sd: 4366 Opc = X86ISD::UCOMI; 4367 CC = ISD::SETLE; 4368 break; 4369 case Intrinsic::x86_sse_ucomigt_ss: 4370 case Intrinsic::x86_sse2_ucomigt_sd: 4371 Opc = X86ISD::UCOMI; 4372 CC = ISD::SETGT; 4373 break; 4374 case Intrinsic::x86_sse_ucomige_ss: 4375 case Intrinsic::x86_sse2_ucomige_sd: 4376 Opc = X86ISD::UCOMI; 4377 CC = ISD::SETGE; 4378 break; 4379 case Intrinsic::x86_sse_ucomineq_ss: 4380 case Intrinsic::x86_sse2_ucomineq_sd: 4381 Opc = X86ISD::UCOMI; 4382 CC = ISD::SETNE; 4383 break; 4384 } 4385 bool Flip; 4386 unsigned X86CC; 4387 translateX86CC(CC, true, X86CC, Flip); 4388 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 4389 Op.getOperand(Flip?1:2)); 4390 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4391 DAG.getConstant(X86CC, MVT::i8), Cond); 4392 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4393 } 4394 } 4395} 4396 4397/// LowerOperation - Provide custom lowering hooks for some operations. 4398/// 4399SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4400 switch (Op.getOpcode()) { 4401 default: assert(0 && "Should not custom lower this!"); 4402 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4403 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4404 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4405 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4406 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4407 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4408 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4409 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4410 case ISD::SHL_PARTS: 4411 case ISD::SRA_PARTS: 4412 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4413 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4414 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4415 case ISD::FABS: return LowerFABS(Op, DAG); 4416 case ISD::FNEG: return LowerFNEG(Op, DAG); 4417 case ISD::SETCC: return LowerSETCC(Op, DAG); 4418 case ISD::SELECT: return LowerSELECT(Op, DAG); 4419 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4420 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4421 case ISD::CALL: return LowerCALL(Op, DAG); 4422 case ISD::RET: return LowerRET(Op, DAG); 4423 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4424 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4425 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4426 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4427 case ISD::VASTART: return LowerVASTART(Op, DAG); 4428 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4429 } 4430} 4431 4432const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4433 switch (Opcode) { 4434 default: return NULL; 4435 case X86ISD::SHLD: return "X86ISD::SHLD"; 4436 case X86ISD::SHRD: return "X86ISD::SHRD"; 4437 case X86ISD::FAND: return "X86ISD::FAND"; 4438 case X86ISD::FXOR: return "X86ISD::FXOR"; 4439 case X86ISD::FILD: return "X86ISD::FILD"; 4440 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4441 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4442 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4443 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4444 case X86ISD::FLD: return "X86ISD::FLD"; 4445 case X86ISD::FST: return "X86ISD::FST"; 4446 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4447 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4448 case X86ISD::CALL: return "X86ISD::CALL"; 4449 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4450 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4451 case X86ISD::CMP: return "X86ISD::CMP"; 4452 case X86ISD::COMI: return "X86ISD::COMI"; 4453 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4454 case X86ISD::SETCC: return "X86ISD::SETCC"; 4455 case X86ISD::CMOV: return "X86ISD::CMOV"; 4456 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4457 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4458 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4459 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4460 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4461 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4462 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4463 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4464 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4465 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4466 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4467 } 4468} 4469 4470/// isLegalAddressImmediate - Return true if the integer value or 4471/// GlobalValue can be used as the offset of the target addressing mode. 4472bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 4473 // X86 allows a sign-extended 32-bit immediate field. 4474 return (V > -(1LL << 32) && V < (1LL << 32)-1); 4475} 4476 4477bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 4478 // GV is 64-bit but displacement field is 32-bit unless we are in small code 4479 // model. Mac OS X happens to support only small PIC code model. 4480 // FIXME: better support for other OS's. 4481 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 4482 return false; 4483 if (Subtarget->isTargetDarwin()) { 4484 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 4485 if (RModel == Reloc::Static) 4486 return true; 4487 else if (RModel == Reloc::DynamicNoPIC) 4488 return !DarwinGVRequiresExtraLoad(GV); 4489 else 4490 return false; 4491 } else 4492 return true; 4493} 4494 4495/// isShuffleMaskLegal - Targets can use this to indicate that they only 4496/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4497/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4498/// are assumed to be legal. 4499bool 4500X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4501 // Only do shuffles on 128-bit vector types for now. 4502 if (MVT::getSizeInBits(VT) == 64) return false; 4503 return (Mask.Val->getNumOperands() <= 4 || 4504 isSplatMask(Mask.Val) || 4505 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4506 X86::isUNPCKLMask(Mask.Val) || 4507 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4508 X86::isUNPCKHMask(Mask.Val)); 4509} 4510 4511bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4512 MVT::ValueType EVT, 4513 SelectionDAG &DAG) const { 4514 unsigned NumElts = BVOps.size(); 4515 // Only do shuffles on 128-bit vector types for now. 4516 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4517 if (NumElts == 2) return true; 4518 if (NumElts == 4) { 4519 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 4520 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 4521 } 4522 return false; 4523} 4524 4525//===----------------------------------------------------------------------===// 4526// X86 Scheduler Hooks 4527//===----------------------------------------------------------------------===// 4528 4529MachineBasicBlock * 4530X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4531 MachineBasicBlock *BB) { 4532 switch (MI->getOpcode()) { 4533 default: assert(false && "Unexpected instr type to insert"); 4534 case X86::CMOV_FR32: 4535 case X86::CMOV_FR64: 4536 case X86::CMOV_V4F32: 4537 case X86::CMOV_V2F64: 4538 case X86::CMOV_V2I64: { 4539 // To "insert" a SELECT_CC instruction, we actually have to insert the 4540 // diamond control-flow pattern. The incoming instruction knows the 4541 // destination vreg to set, the condition code register to branch on, the 4542 // true/false values to select between, and a branch opcode to use. 4543 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4544 ilist<MachineBasicBlock>::iterator It = BB; 4545 ++It; 4546 4547 // thisMBB: 4548 // ... 4549 // TrueVal = ... 4550 // cmpTY ccX, r1, r2 4551 // bCC copy1MBB 4552 // fallthrough --> copy0MBB 4553 MachineBasicBlock *thisMBB = BB; 4554 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4555 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4556 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 4557 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 4558 MachineFunction *F = BB->getParent(); 4559 F->getBasicBlockList().insert(It, copy0MBB); 4560 F->getBasicBlockList().insert(It, sinkMBB); 4561 // Update machine-CFG edges by first adding all successors of the current 4562 // block to the new block which will contain the Phi node for the select. 4563 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4564 e = BB->succ_end(); i != e; ++i) 4565 sinkMBB->addSuccessor(*i); 4566 // Next, remove all successors of the current block, and add the true 4567 // and fallthrough blocks as its successors. 4568 while(!BB->succ_empty()) 4569 BB->removeSuccessor(BB->succ_begin()); 4570 BB->addSuccessor(copy0MBB); 4571 BB->addSuccessor(sinkMBB); 4572 4573 // copy0MBB: 4574 // %FalseValue = ... 4575 // # fallthrough to sinkMBB 4576 BB = copy0MBB; 4577 4578 // Update machine-CFG edges 4579 BB->addSuccessor(sinkMBB); 4580 4581 // sinkMBB: 4582 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4583 // ... 4584 BB = sinkMBB; 4585 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 4586 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4587 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4588 4589 delete MI; // The pseudo instruction is gone now. 4590 return BB; 4591 } 4592 4593 case X86::FP_TO_INT16_IN_MEM: 4594 case X86::FP_TO_INT32_IN_MEM: 4595 case X86::FP_TO_INT64_IN_MEM: { 4596 // Change the floating point control register to use "round towards zero" 4597 // mode when truncating to an integer value. 4598 MachineFunction *F = BB->getParent(); 4599 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4600 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 4601 4602 // Load the old value of the high byte of the control word... 4603 unsigned OldCW = 4604 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4605 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 4606 4607 // Set the high part to be round to zero... 4608 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 4609 4610 // Reload the modified control word now... 4611 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 4612 4613 // Restore the memory image of control word to original value 4614 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 4615 4616 // Get the X86 opcode to use. 4617 unsigned Opc; 4618 switch (MI->getOpcode()) { 4619 default: assert(0 && "illegal opcode!"); 4620 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 4621 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 4622 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 4623 } 4624 4625 X86AddressMode AM; 4626 MachineOperand &Op = MI->getOperand(0); 4627 if (Op.isRegister()) { 4628 AM.BaseType = X86AddressMode::RegBase; 4629 AM.Base.Reg = Op.getReg(); 4630 } else { 4631 AM.BaseType = X86AddressMode::FrameIndexBase; 4632 AM.Base.FrameIndex = Op.getFrameIndex(); 4633 } 4634 Op = MI->getOperand(1); 4635 if (Op.isImmediate()) 4636 AM.Scale = Op.getImmedValue(); 4637 Op = MI->getOperand(2); 4638 if (Op.isImmediate()) 4639 AM.IndexReg = Op.getImmedValue(); 4640 Op = MI->getOperand(3); 4641 if (Op.isGlobalAddress()) { 4642 AM.GV = Op.getGlobal(); 4643 } else { 4644 AM.Disp = Op.getImmedValue(); 4645 } 4646 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 4647 4648 // Reload the original control word now. 4649 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 4650 4651 delete MI; // The pseudo instruction is gone now. 4652 return BB; 4653 } 4654 } 4655} 4656 4657//===----------------------------------------------------------------------===// 4658// X86 Optimization Hooks 4659//===----------------------------------------------------------------------===// 4660 4661void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4662 uint64_t Mask, 4663 uint64_t &KnownZero, 4664 uint64_t &KnownOne, 4665 unsigned Depth) const { 4666 unsigned Opc = Op.getOpcode(); 4667 assert((Opc >= ISD::BUILTIN_OP_END || 4668 Opc == ISD::INTRINSIC_WO_CHAIN || 4669 Opc == ISD::INTRINSIC_W_CHAIN || 4670 Opc == ISD::INTRINSIC_VOID) && 4671 "Should use MaskedValueIsZero if you don't know whether Op" 4672 " is a target node!"); 4673 4674 KnownZero = KnownOne = 0; // Don't know anything. 4675 switch (Opc) { 4676 default: break; 4677 case X86ISD::SETCC: 4678 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4679 break; 4680 } 4681} 4682 4683/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4684/// element of the result of the vector shuffle. 4685static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4686 MVT::ValueType VT = N->getValueType(0); 4687 SDOperand PermMask = N->getOperand(2); 4688 unsigned NumElems = PermMask.getNumOperands(); 4689 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4690 i %= NumElems; 4691 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4692 return (i == 0) 4693 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4694 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4695 SDOperand Idx = PermMask.getOperand(i); 4696 if (Idx.getOpcode() == ISD::UNDEF) 4697 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 4698 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4699 } 4700 return SDOperand(); 4701} 4702 4703/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4704/// node is a GlobalAddress + an offset. 4705static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4706 if (N->getOpcode() == X86ISD::Wrapper) { 4707 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4708 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4709 return true; 4710 } 4711 } else if (N->getOpcode() == ISD::ADD) { 4712 SDOperand N1 = N->getOperand(0); 4713 SDOperand N2 = N->getOperand(1); 4714 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4715 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4716 if (V) { 4717 Offset += V->getSignExtended(); 4718 return true; 4719 } 4720 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4721 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4722 if (V) { 4723 Offset += V->getSignExtended(); 4724 return true; 4725 } 4726 } 4727 } 4728 return false; 4729} 4730 4731/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4732/// + Dist * Size. 4733static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4734 MachineFrameInfo *MFI) { 4735 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4736 return false; 4737 4738 SDOperand Loc = N->getOperand(1); 4739 SDOperand BaseLoc = Base->getOperand(1); 4740 if (Loc.getOpcode() == ISD::FrameIndex) { 4741 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4742 return false; 4743 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 4744 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4745 int FS = MFI->getObjectSize(FI); 4746 int BFS = MFI->getObjectSize(BFI); 4747 if (FS != BFS || FS != Size) return false; 4748 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4749 } else { 4750 GlobalValue *GV1 = NULL; 4751 GlobalValue *GV2 = NULL; 4752 int64_t Offset1 = 0; 4753 int64_t Offset2 = 0; 4754 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4755 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4756 if (isGA1 && isGA2 && GV1 == GV2) 4757 return Offset1 == (Offset2 + Dist*Size); 4758 } 4759 4760 return false; 4761} 4762 4763static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4764 const X86Subtarget *Subtarget) { 4765 GlobalValue *GV; 4766 int64_t Offset; 4767 if (isGAPlusOffset(Base, GV, Offset)) 4768 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4769 else { 4770 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4771 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 4772 if (BFI < 0) 4773 // Fixed objects do not specify alignment, however the offsets are known. 4774 return ((Subtarget->getStackAlignment() % 16) == 0 && 4775 (MFI->getObjectOffset(BFI) % 16) == 0); 4776 else 4777 return MFI->getObjectAlignment(BFI) >= 16; 4778 } 4779 return false; 4780} 4781 4782 4783/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4784/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4785/// if the load addresses are consecutive, non-overlapping, and in the right 4786/// order. 4787static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4788 const X86Subtarget *Subtarget) { 4789 MachineFunction &MF = DAG.getMachineFunction(); 4790 MachineFrameInfo *MFI = MF.getFrameInfo(); 4791 MVT::ValueType VT = N->getValueType(0); 4792 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 4793 SDOperand PermMask = N->getOperand(2); 4794 int NumElems = (int)PermMask.getNumOperands(); 4795 SDNode *Base = NULL; 4796 for (int i = 0; i < NumElems; ++i) { 4797 SDOperand Idx = PermMask.getOperand(i); 4798 if (Idx.getOpcode() == ISD::UNDEF) { 4799 if (!Base) return SDOperand(); 4800 } else { 4801 SDOperand Arg = 4802 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4803 if (!Arg.Val || Arg.getOpcode() != ISD::LOAD) 4804 return SDOperand(); 4805 if (!Base) 4806 Base = Arg.Val; 4807 else if (!isConsecutiveLoad(Arg.Val, Base, 4808 i, MVT::getSizeInBits(EVT)/8,MFI)) 4809 return SDOperand(); 4810 } 4811 } 4812 4813 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4814 if (isAlign16) 4815 return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1), 4816 Base->getOperand(2)); 4817 else { 4818 // Just use movups, it's shorter. 4819 std::vector<MVT::ValueType> Tys; 4820 Tys.push_back(MVT::v4f32); 4821 Tys.push_back(MVT::Other); 4822 SmallVector<SDOperand, 3> Ops; 4823 Ops.push_back(Base->getOperand(0)); 4824 Ops.push_back(Base->getOperand(1)); 4825 Ops.push_back(Base->getOperand(2)); 4826 return DAG.getNode(ISD::BIT_CONVERT, VT, 4827 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 4828 } 4829} 4830 4831SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4832 DAGCombinerInfo &DCI) const { 4833 TargetMachine &TM = getTargetMachine(); 4834 SelectionDAG &DAG = DCI.DAG; 4835 switch (N->getOpcode()) { 4836 default: break; 4837 case ISD::VECTOR_SHUFFLE: 4838 return PerformShuffleCombine(N, DAG, Subtarget); 4839 } 4840 4841 return SDOperand(); 4842} 4843 4844//===----------------------------------------------------------------------===// 4845// X86 Inline Assembly Support 4846//===----------------------------------------------------------------------===// 4847 4848/// getConstraintType - Given a constraint letter, return the type of 4849/// constraint it is for this target. 4850X86TargetLowering::ConstraintType 4851X86TargetLowering::getConstraintType(char ConstraintLetter) const { 4852 switch (ConstraintLetter) { 4853 case 'A': 4854 case 'r': 4855 case 'R': 4856 case 'l': 4857 case 'q': 4858 case 'Q': 4859 case 'x': 4860 case 'Y': 4861 return C_RegisterClass; 4862 default: return TargetLowering::getConstraintType(ConstraintLetter); 4863 } 4864} 4865 4866std::vector<unsigned> X86TargetLowering:: 4867getRegClassForInlineAsmConstraint(const std::string &Constraint, 4868 MVT::ValueType VT) const { 4869 if (Constraint.size() == 1) { 4870 // FIXME: not handling fp-stack yet! 4871 // FIXME: not handling MMX registers yet ('y' constraint). 4872 switch (Constraint[0]) { // GCC X86 Constraint Letters 4873 default: break; // Unknown constraint letter 4874 case 'A': // EAX/EDX 4875 if (VT == MVT::i32 || VT == MVT::i64) 4876 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 4877 break; 4878 case 'r': // GENERAL_REGS 4879 case 'R': // LEGACY_REGS 4880 if (VT == MVT::i32) 4881 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4882 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 4883 else if (VT == MVT::i16) 4884 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4885 X86::SI, X86::DI, X86::BP, X86::SP, 0); 4886 else if (VT == MVT::i8) 4887 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4888 break; 4889 case 'l': // INDEX_REGS 4890 if (VT == MVT::i32) 4891 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 4892 X86::ESI, X86::EDI, X86::EBP, 0); 4893 else if (VT == MVT::i16) 4894 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 4895 X86::SI, X86::DI, X86::BP, 0); 4896 else if (VT == MVT::i8) 4897 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4898 break; 4899 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 4900 case 'Q': // Q_REGS 4901 if (VT == MVT::i32) 4902 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 4903 else if (VT == MVT::i16) 4904 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 4905 else if (VT == MVT::i8) 4906 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 4907 break; 4908 case 'x': // SSE_REGS if SSE1 allowed 4909 if (Subtarget->hasSSE1()) 4910 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4911 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4912 0); 4913 return std::vector<unsigned>(); 4914 case 'Y': // SSE_REGS if SSE2 allowed 4915 if (Subtarget->hasSSE2()) 4916 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 4917 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 4918 0); 4919 return std::vector<unsigned>(); 4920 } 4921 } 4922 4923 return std::vector<unsigned>(); 4924} 4925 4926std::pair<unsigned, const TargetRegisterClass*> 4927X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4928 MVT::ValueType VT) const { 4929 // Use the default implementation in TargetLowering to convert the register 4930 // constraint into a member of a register class. 4931 std::pair<unsigned, const TargetRegisterClass*> Res; 4932 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4933 4934 // Not found? Bail out. 4935 if (Res.second == 0) return Res; 4936 4937 // Otherwise, check to see if this is a register class of the wrong value 4938 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 4939 // turn into {ax},{dx}. 4940 if (Res.second->hasType(VT)) 4941 return Res; // Correct type already, nothing to do. 4942 4943 // All of the single-register GCC register classes map their values onto 4944 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 4945 // really want an 8-bit or 32-bit register, map to the appropriate register 4946 // class and return the appropriate register. 4947 if (Res.second != X86::GR16RegisterClass) 4948 return Res; 4949 4950 if (VT == MVT::i8) { 4951 unsigned DestReg = 0; 4952 switch (Res.first) { 4953 default: break; 4954 case X86::AX: DestReg = X86::AL; break; 4955 case X86::DX: DestReg = X86::DL; break; 4956 case X86::CX: DestReg = X86::CL; break; 4957 case X86::BX: DestReg = X86::BL; break; 4958 } 4959 if (DestReg) { 4960 Res.first = DestReg; 4961 Res.second = Res.second = X86::GR8RegisterClass; 4962 } 4963 } else if (VT == MVT::i32) { 4964 unsigned DestReg = 0; 4965 switch (Res.first) { 4966 default: break; 4967 case X86::AX: DestReg = X86::EAX; break; 4968 case X86::DX: DestReg = X86::EDX; break; 4969 case X86::CX: DestReg = X86::ECX; break; 4970 case X86::BX: DestReg = X86::EBX; break; 4971 case X86::SI: DestReg = X86::ESI; break; 4972 case X86::DI: DestReg = X86::EDI; break; 4973 case X86::BP: DestReg = X86::EBP; break; 4974 case X86::SP: DestReg = X86::ESP; break; 4975 } 4976 if (DestReg) { 4977 Res.first = DestReg; 4978 Res.second = Res.second = X86::GR32RegisterClass; 4979 } 4980 } else if (VT == MVT::i64) { 4981 unsigned DestReg = 0; 4982 switch (Res.first) { 4983 default: break; 4984 case X86::AX: DestReg = X86::RAX; break; 4985 case X86::DX: DestReg = X86::RDX; break; 4986 case X86::CX: DestReg = X86::RCX; break; 4987 case X86::BX: DestReg = X86::RBX; break; 4988 case X86::SI: DestReg = X86::RSI; break; 4989 case X86::DI: DestReg = X86::RDI; break; 4990 case X86::BP: DestReg = X86::RBP; break; 4991 case X86::SP: DestReg = X86::RSP; break; 4992 } 4993 if (DestReg) { 4994 Res.first = DestReg; 4995 Res.second = Res.second = X86::GR64RegisterClass; 4996 } 4997 } 4998 4999 return Res; 5000} 5001 5002