X86ISelLowering.cpp revision 9eca5e814da3490ae889a75c93c0191f510b9f1c
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/ADT/VectorExtras.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/SSARegMap.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34#include "llvm/Support/CommandLine.h" 35using namespace llvm; 36 37// FIXME: temporary. 38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 39 cl::desc("Enable fastcc on X86")); 40X86TargetLowering::X86TargetLowering(TargetMachine &TM) 41 : TargetLowering(TM) { 42 Subtarget = &TM.getSubtarget<X86Subtarget>(); 43 X86ScalarSSE = Subtarget->hasSSE2(); 44 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 45 46 // Set up the TargetLowering object. 47 48 // X86 is weird, it always uses i8 for shift amounts and setcc results. 49 setShiftAmountType(MVT::i8); 50 setSetCCResultType(MVT::i8); 51 setSetCCResultContents(ZeroOrOneSetCCResult); 52 setSchedulingPreference(SchedulingForRegPressure); 53 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 54 setStackPointerRegisterToSaveRestore(X86StackPtr); 55 56 if (!Subtarget->isTargetDarwin()) 57 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 58 setUseUnderscoreSetJmpLongJmp(true); 59 60 // Add legal addressing mode scale values. 61 addLegalAddressScale(8); 62 addLegalAddressScale(4); 63 addLegalAddressScale(2); 64 // Enter the ones which require both scale + index last. These are more 65 // expensive. 66 addLegalAddressScale(9); 67 addLegalAddressScale(5); 68 addLegalAddressScale(3); 69 70 // Set up the register classes. 71 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 72 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 73 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 74 if (Subtarget->is64Bit()) 75 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 76 77 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 78 79 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 80 // operation. 81 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 82 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 83 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 84 85 if (Subtarget->is64Bit()) { 86 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 87 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 88 } else { 89 if (X86ScalarSSE) 90 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 91 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 92 else 93 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 94 } 95 96 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 97 // this operation. 98 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 99 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 100 // SSE has no i16 to fp conversion, only i32 101 if (X86ScalarSSE) 102 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 103 else { 104 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 105 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 106 } 107 108 if (!Subtarget->is64Bit()) { 109 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 110 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 111 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 112 } 113 114 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 115 // this operation. 116 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 117 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 118 119 if (X86ScalarSSE) { 120 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 121 } else { 122 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 123 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 124 } 125 126 // Handle FP_TO_UINT by promoting the destination to a larger signed 127 // conversion. 128 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 129 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 130 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 131 132 if (Subtarget->is64Bit()) { 133 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 134 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 135 } else { 136 if (X86ScalarSSE && !Subtarget->hasSSE3()) 137 // Expand FP_TO_UINT into a select. 138 // FIXME: We would like to use a Custom expander here eventually to do 139 // the optimal thing for SSE vs. the default expansion in the legalizer. 140 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 141 else 142 // With SSE3 we can use fisttpll to convert to a signed i64. 143 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 144 } 145 146 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 147 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 148 149 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 150 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 151 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 152 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 153 if (Subtarget->is64Bit()) 154 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 155 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 158 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 159 setOperationAction(ISD::FREM , MVT::f64 , Expand); 160 161 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 162 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 163 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 164 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 165 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 166 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 167 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 168 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 169 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 170 if (Subtarget->is64Bit()) { 171 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 172 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 173 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 174 } 175 176 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 177 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 178 179 // These should be promoted to a larger select which is supported. 180 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 181 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 182 // X86 wants to expand cmov itself. 183 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 184 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 185 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 186 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 187 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 188 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 190 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 191 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 192 if (Subtarget->is64Bit()) { 193 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 195 } 196 // X86 ret instruction may pop stack. 197 setOperationAction(ISD::RET , MVT::Other, Custom); 198 // Darwin ABI issue. 199 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 200 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 201 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 202 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 203 if (Subtarget->is64Bit()) { 204 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 205 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 206 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 207 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 208 } 209 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 210 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 211 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 212 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 213 // X86 wants to expand memset / memcpy itself. 214 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 215 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 216 217 // We don't have line number support yet. 218 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 219 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 220 // FIXME - use subtarget debug flags 221 if (!Subtarget->isTargetDarwin()) 222 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 223 224 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 225 setOperationAction(ISD::VASTART , MVT::Other, Custom); 226 227 // Use the default implementation. 228 setOperationAction(ISD::VAARG , MVT::Other, Expand); 229 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 230 setOperationAction(ISD::VAEND , MVT::Other, Expand); 231 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 232 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 233 if (Subtarget->is64Bit()) 234 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 235 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 236 237 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 238 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 239 240 if (X86ScalarSSE) { 241 // Set up the FP register classes. 242 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 243 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 244 245 // Use ANDPD to simulate FABS. 246 setOperationAction(ISD::FABS , MVT::f64, Custom); 247 setOperationAction(ISD::FABS , MVT::f32, Custom); 248 249 // Use XORP to simulate FNEG. 250 setOperationAction(ISD::FNEG , MVT::f64, Custom); 251 setOperationAction(ISD::FNEG , MVT::f32, Custom); 252 253 // We don't support sin/cos/fmod 254 setOperationAction(ISD::FSIN , MVT::f64, Expand); 255 setOperationAction(ISD::FCOS , MVT::f64, Expand); 256 setOperationAction(ISD::FREM , MVT::f64, Expand); 257 setOperationAction(ISD::FSIN , MVT::f32, Expand); 258 setOperationAction(ISD::FCOS , MVT::f32, Expand); 259 setOperationAction(ISD::FREM , MVT::f32, Expand); 260 261 // Expand FP immediates into loads from the stack, except for the special 262 // cases we handle. 263 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 264 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 265 addLegalFPImmediate(+0.0); // xorps / xorpd 266 } else { 267 // Set up the FP register classes. 268 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 269 270 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 271 272 if (!UnsafeFPMath) { 273 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 274 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 275 } 276 277 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 278 addLegalFPImmediate(+0.0); // FLD0 279 addLegalFPImmediate(+1.0); // FLD1 280 addLegalFPImmediate(-0.0); // FLD0/FCHS 281 addLegalFPImmediate(-1.0); // FLD1/FCHS 282 } 283 284 // First set operation action for all vector types to expand. Then we 285 // will selectively turn on ones that can be effectively codegen'd. 286 for (unsigned VT = (unsigned)MVT::Vector + 1; 287 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 288 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 289 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 290 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 291 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 292 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 293 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 294 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 295 } 296 297 if (Subtarget->hasMMX()) { 298 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 299 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 300 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 301 302 // FIXME: add MMX packed arithmetics 303 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 304 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 305 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 306 } 307 308 if (Subtarget->hasSSE1()) { 309 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 310 311 setOperationAction(ISD::AND, MVT::v4f32, Legal); 312 setOperationAction(ISD::OR, MVT::v4f32, Legal); 313 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 314 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 315 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 316 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 317 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 318 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 319 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 320 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 321 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 322 } 323 324 if (Subtarget->hasSSE2()) { 325 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 326 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 327 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 328 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 329 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 330 331 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 332 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 333 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 334 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 335 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 336 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 337 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 338 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 339 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 340 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 341 342 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 343 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 344 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 345 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 346 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 347 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 348 349 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 350 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 351 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 353 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 354 } 355 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 356 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 357 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 358 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 359 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 361 362 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 363 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 364 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 365 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 366 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 367 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 368 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 369 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 370 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 371 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 372 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 373 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 374 } 375 376 // Custom lower v2i64 and v2f64 selects. 377 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 378 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 379 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 380 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 381 } 382 383 // We want to custom lower some of our intrinsics. 384 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 385 386 // We have target-specific dag combine patterns for the following nodes: 387 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 388 setTargetDAGCombine(ISD::SELECT); 389 390 computeRegisterProperties(); 391 392 // FIXME: These should be based on subtarget info. Plus, the values should 393 // be smaller when we are in optimizing for size mode. 394 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 395 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 396 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 397 allowUnalignedMemoryAccesses = true; // x86 supports it! 398} 399 400//===----------------------------------------------------------------------===// 401// C Calling Convention implementation 402//===----------------------------------------------------------------------===// 403 404/// AddLiveIn - This helper function adds the specified physical register to the 405/// MachineFunction as a live in value. It also creates a corresponding virtual 406/// register for it. 407static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 408 TargetRegisterClass *RC) { 409 assert(RC->contains(PReg) && "Not the correct regclass!"); 410 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 411 MF.addLiveIn(PReg, VReg); 412 return VReg; 413} 414 415/// HowToPassCCCArgument - Returns how an formal argument of the specified type 416/// should be passed. If it is through stack, returns the size of the stack 417/// slot; if it is through XMM register, returns the number of XMM registers 418/// are needed. 419static void 420HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 421 unsigned &ObjSize, unsigned &ObjXMMRegs) { 422 ObjXMMRegs = 0; 423 424 switch (ObjectVT) { 425 default: assert(0 && "Unhandled argument type!"); 426 case MVT::i8: ObjSize = 1; break; 427 case MVT::i16: ObjSize = 2; break; 428 case MVT::i32: ObjSize = 4; break; 429 case MVT::i64: ObjSize = 8; break; 430 case MVT::f32: ObjSize = 4; break; 431 case MVT::f64: ObjSize = 8; break; 432 case MVT::v16i8: 433 case MVT::v8i16: 434 case MVT::v4i32: 435 case MVT::v2i64: 436 case MVT::v4f32: 437 case MVT::v2f64: 438 if (NumXMMRegs < 4) 439 ObjXMMRegs = 1; 440 else 441 ObjSize = 16; 442 break; 443 } 444} 445 446SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 447 unsigned NumArgs = Op.Val->getNumValues() - 1; 448 MachineFunction &MF = DAG.getMachineFunction(); 449 MachineFrameInfo *MFI = MF.getFrameInfo(); 450 SDOperand Root = Op.getOperand(0); 451 std::vector<SDOperand> ArgValues; 452 453 // Add DAG nodes to load the arguments... On entry to a function on the X86, 454 // the stack frame looks like this: 455 // 456 // [ESP] -- return address 457 // [ESP + 4] -- first argument (leftmost lexically) 458 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 459 // ... 460 // 461 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 462 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 463 static const unsigned XMMArgRegs[] = { 464 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 465 }; 466 for (unsigned i = 0; i < NumArgs; ++i) { 467 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 468 unsigned ArgIncrement = 4; 469 unsigned ObjSize = 0; 470 unsigned ObjXMMRegs = 0; 471 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 472 if (ObjSize > 4) 473 ArgIncrement = ObjSize; 474 475 SDOperand ArgValue; 476 if (ObjXMMRegs) { 477 // Passed in a XMM register. 478 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 479 X86::VR128RegisterClass); 480 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 481 ArgValues.push_back(ArgValue); 482 NumXMMRegs += ObjXMMRegs; 483 } else { 484 // XMM arguments have to be aligned on 16-byte boundary. 485 if (ObjSize == 16) 486 ArgOffset = ((ArgOffset + 15) / 16) * 16; 487 // Create the frame index object for this incoming parameter... 488 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 489 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 490 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 491 ArgValues.push_back(ArgValue); 492 ArgOffset += ArgIncrement; // Move on to the next argument... 493 } 494 } 495 496 ArgValues.push_back(Root); 497 498 // If the function takes variable number of arguments, make a frame index for 499 // the start of the first vararg value... for expansion of llvm.va_start. 500 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 501 if (isVarArg) 502 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 503 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 504 ReturnAddrIndex = 0; // No return address slot generated yet. 505 BytesToPopOnReturn = 0; // Callee pops nothing. 506 BytesCallerReserves = ArgOffset; 507 508 // If this is a struct return on Darwin/X86, the callee pops the hidden struct 509 // pointer. 510 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet && 511 Subtarget->isTargetDarwin()) 512 BytesToPopOnReturn = 4; 513 514 // Return the new list of results. 515 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 516 Op.Val->value_end()); 517 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 518} 519 520 521SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 522 SDOperand Chain = Op.getOperand(0); 523 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 524 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 525 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 526 SDOperand Callee = Op.getOperand(4); 527 MVT::ValueType RetVT= Op.Val->getValueType(0); 528 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 529 530 // Keep track of the number of XMM regs passed so far. 531 unsigned NumXMMRegs = 0; 532 static const unsigned XMMArgRegs[] = { 533 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 534 }; 535 536 // Count how many bytes are to be pushed on the stack. 537 unsigned NumBytes = 0; 538 for (unsigned i = 0; i != NumOps; ++i) { 539 SDOperand Arg = Op.getOperand(5+2*i); 540 541 switch (Arg.getValueType()) { 542 default: assert(0 && "Unexpected ValueType for argument!"); 543 case MVT::i8: 544 case MVT::i16: 545 case MVT::i32: 546 case MVT::f32: 547 NumBytes += 4; 548 break; 549 case MVT::i64: 550 case MVT::f64: 551 NumBytes += 8; 552 break; 553 case MVT::v16i8: 554 case MVT::v8i16: 555 case MVT::v4i32: 556 case MVT::v2i64: 557 case MVT::v4f32: 558 case MVT::v2f64: 559 if (NumXMMRegs < 4) 560 ++NumXMMRegs; 561 else { 562 // XMM arguments have to be aligned on 16-byte boundary. 563 NumBytes = ((NumBytes + 15) / 16) * 16; 564 NumBytes += 16; 565 } 566 break; 567 } 568 } 569 570 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 571 572 // Arguments go on the stack in reverse order, as specified by the ABI. 573 unsigned ArgOffset = 0; 574 NumXMMRegs = 0; 575 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 576 std::vector<SDOperand> MemOpChains; 577 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 578 for (unsigned i = 0; i != NumOps; ++i) { 579 SDOperand Arg = Op.getOperand(5+2*i); 580 581 switch (Arg.getValueType()) { 582 default: assert(0 && "Unexpected ValueType for argument!"); 583 case MVT::i8: 584 case MVT::i16: { 585 // Promote the integer to 32 bits. If the input type is signed use a 586 // sign extend, otherwise use a zero extend. 587 unsigned ExtOp = 588 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 589 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 590 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 591 } 592 // Fallthrough 593 594 case MVT::i32: 595 case MVT::f32: { 596 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 597 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 598 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 599 ArgOffset += 4; 600 break; 601 } 602 case MVT::i64: 603 case MVT::f64: { 604 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 605 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 606 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 607 ArgOffset += 8; 608 break; 609 } 610 case MVT::v16i8: 611 case MVT::v8i16: 612 case MVT::v4i32: 613 case MVT::v2i64: 614 case MVT::v4f32: 615 case MVT::v2f64: 616 if (NumXMMRegs < 4) { 617 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 618 NumXMMRegs++; 619 } else { 620 // XMM arguments have to be aligned on 16-byte boundary. 621 ArgOffset = ((ArgOffset + 15) / 16) * 16; 622 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 623 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 624 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 625 ArgOffset += 16; 626 } 627 } 628 } 629 630 if (!MemOpChains.empty()) 631 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 632 &MemOpChains[0], MemOpChains.size()); 633 634 // Build a sequence of copy-to-reg nodes chained together with token chain 635 // and flag operands which copy the outgoing args into registers. 636 SDOperand InFlag; 637 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 638 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 639 InFlag); 640 InFlag = Chain.getValue(1); 641 } 642 643 // If the callee is a GlobalAddress node (quite common, every direct call is) 644 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 645 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 646 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 647 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 648 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 649 650 std::vector<MVT::ValueType> NodeTys; 651 NodeTys.push_back(MVT::Other); // Returns a chain 652 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 653 std::vector<SDOperand> Ops; 654 Ops.push_back(Chain); 655 Ops.push_back(Callee); 656 657 // Add argument registers to the end of the list so that they are known live 658 // into the call. 659 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 660 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 661 RegsToPass[i].second.getValueType())); 662 663 if (InFlag.Val) 664 Ops.push_back(InFlag); 665 666 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 667 NodeTys, &Ops[0], Ops.size()); 668 InFlag = Chain.getValue(1); 669 670 // Create the CALLSEQ_END node. 671 unsigned NumBytesForCalleeToPush = 0; 672 673 // If this is is a call to a struct-return function on Darwin/X86, the callee 674 // pops the hidden struct pointer, so we have to push it back. 675 if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) 676 NumBytesForCalleeToPush = 4; 677 678 NodeTys.clear(); 679 NodeTys.push_back(MVT::Other); // Returns a chain 680 if (RetVT != MVT::Other) 681 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 682 Ops.clear(); 683 Ops.push_back(Chain); 684 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 685 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 686 Ops.push_back(InFlag); 687 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 688 if (RetVT != MVT::Other) 689 InFlag = Chain.getValue(1); 690 691 std::vector<SDOperand> ResultVals; 692 NodeTys.clear(); 693 switch (RetVT) { 694 default: assert(0 && "Unknown value type to return!"); 695 case MVT::Other: break; 696 case MVT::i8: 697 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 698 ResultVals.push_back(Chain.getValue(0)); 699 NodeTys.push_back(MVT::i8); 700 break; 701 case MVT::i16: 702 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 703 ResultVals.push_back(Chain.getValue(0)); 704 NodeTys.push_back(MVT::i16); 705 break; 706 case MVT::i32: 707 if (Op.Val->getValueType(1) == MVT::i32) { 708 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 709 ResultVals.push_back(Chain.getValue(0)); 710 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 711 Chain.getValue(2)).getValue(1); 712 ResultVals.push_back(Chain.getValue(0)); 713 NodeTys.push_back(MVT::i32); 714 } else { 715 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 716 ResultVals.push_back(Chain.getValue(0)); 717 } 718 NodeTys.push_back(MVT::i32); 719 break; 720 case MVT::v16i8: 721 case MVT::v8i16: 722 case MVT::v4i32: 723 case MVT::v2i64: 724 case MVT::v4f32: 725 case MVT::v2f64: 726 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 727 ResultVals.push_back(Chain.getValue(0)); 728 NodeTys.push_back(RetVT); 729 break; 730 case MVT::f32: 731 case MVT::f64: { 732 std::vector<MVT::ValueType> Tys; 733 Tys.push_back(MVT::f64); 734 Tys.push_back(MVT::Other); 735 Tys.push_back(MVT::Flag); 736 std::vector<SDOperand> Ops; 737 Ops.push_back(Chain); 738 Ops.push_back(InFlag); 739 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 740 &Ops[0], Ops.size()); 741 Chain = RetVal.getValue(1); 742 InFlag = RetVal.getValue(2); 743 if (X86ScalarSSE) { 744 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 745 // shouldn't be necessary except that RFP cannot be live across 746 // multiple blocks. When stackifier is fixed, they can be uncoupled. 747 MachineFunction &MF = DAG.getMachineFunction(); 748 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 749 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 750 Tys.clear(); 751 Tys.push_back(MVT::Other); 752 Ops.clear(); 753 Ops.push_back(Chain); 754 Ops.push_back(RetVal); 755 Ops.push_back(StackSlot); 756 Ops.push_back(DAG.getValueType(RetVT)); 757 Ops.push_back(InFlag); 758 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 759 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 760 Chain = RetVal.getValue(1); 761 } 762 763 if (RetVT == MVT::f32 && !X86ScalarSSE) 764 // FIXME: we would really like to remember that this FP_ROUND 765 // operation is okay to eliminate if we allow excess FP precision. 766 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 767 ResultVals.push_back(RetVal); 768 NodeTys.push_back(RetVT); 769 break; 770 } 771 } 772 773 // If the function returns void, just return the chain. 774 if (ResultVals.empty()) 775 return Chain; 776 777 // Otherwise, merge everything together with a MERGE_VALUES node. 778 NodeTys.push_back(MVT::Other); 779 ResultVals.push_back(Chain); 780 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 781 &ResultVals[0], ResultVals.size()); 782 return Res.getValue(Op.ResNo); 783} 784 785 786//===----------------------------------------------------------------------===// 787// X86-64 C Calling Convention implementation 788//===----------------------------------------------------------------------===// 789 790/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified 791/// type should be passed. If it is through stack, returns the size of the stack 792/// slot; if it is through integer or XMM register, returns the number of 793/// integer or XMM registers are needed. 794static void 795HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT, 796 unsigned NumIntRegs, unsigned NumXMMRegs, 797 unsigned &ObjSize, unsigned &ObjIntRegs, 798 unsigned &ObjXMMRegs) { 799 ObjSize = 0; 800 ObjIntRegs = 0; 801 ObjXMMRegs = 0; 802 803 switch (ObjectVT) { 804 default: assert(0 && "Unhandled argument type!"); 805 case MVT::i8: 806 case MVT::i16: 807 case MVT::i32: 808 case MVT::i64: 809 if (NumIntRegs < 6) 810 ObjIntRegs = 1; 811 else { 812 switch (ObjectVT) { 813 default: break; 814 case MVT::i8: ObjSize = 1; break; 815 case MVT::i16: ObjSize = 2; break; 816 case MVT::i32: ObjSize = 4; break; 817 case MVT::i64: ObjSize = 8; break; 818 } 819 } 820 break; 821 case MVT::f32: 822 case MVT::f64: 823 case MVT::v16i8: 824 case MVT::v8i16: 825 case MVT::v4i32: 826 case MVT::v2i64: 827 case MVT::v4f32: 828 case MVT::v2f64: 829 if (NumXMMRegs < 8) 830 ObjXMMRegs = 1; 831 else { 832 switch (ObjectVT) { 833 default: break; 834 case MVT::f32: ObjSize = 4; break; 835 case MVT::f64: ObjSize = 8; break; 836 case MVT::v16i8: 837 case MVT::v8i16: 838 case MVT::v4i32: 839 case MVT::v2i64: 840 case MVT::v4f32: 841 case MVT::v2f64: ObjSize = 16; break; 842 } 843 break; 844 } 845 } 846} 847 848SDOperand 849X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 850 unsigned NumArgs = Op.Val->getNumValues() - 1; 851 MachineFunction &MF = DAG.getMachineFunction(); 852 MachineFrameInfo *MFI = MF.getFrameInfo(); 853 SDOperand Root = Op.getOperand(0); 854 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 855 std::vector<SDOperand> ArgValues; 856 857 // Add DAG nodes to load the arguments... On entry to a function on the X86, 858 // the stack frame looks like this: 859 // 860 // [RSP] -- return address 861 // [RSP + 8] -- first nonreg argument (leftmost lexically) 862 // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size 863 // ... 864 // 865 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 866 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 867 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 868 869 static const unsigned GPR8ArgRegs[] = { 870 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 871 }; 872 static const unsigned GPR16ArgRegs[] = { 873 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 874 }; 875 static const unsigned GPR32ArgRegs[] = { 876 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 877 }; 878 static const unsigned GPR64ArgRegs[] = { 879 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 880 }; 881 static const unsigned XMMArgRegs[] = { 882 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 883 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 884 }; 885 886 for (unsigned i = 0; i < NumArgs; ++i) { 887 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 888 unsigned ArgIncrement = 8; 889 unsigned ObjSize = 0; 890 unsigned ObjIntRegs = 0; 891 unsigned ObjXMMRegs = 0; 892 893 // FIXME: __int128 and long double support? 894 HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 895 ObjSize, ObjIntRegs, ObjXMMRegs); 896 if (ObjSize > 8) 897 ArgIncrement = ObjSize; 898 899 unsigned Reg = 0; 900 SDOperand ArgValue; 901 if (ObjIntRegs || ObjXMMRegs) { 902 switch (ObjectVT) { 903 default: assert(0 && "Unhandled argument type!"); 904 case MVT::i8: 905 case MVT::i16: 906 case MVT::i32: 907 case MVT::i64: { 908 TargetRegisterClass *RC = NULL; 909 switch (ObjectVT) { 910 default: break; 911 case MVT::i8: 912 RC = X86::GR8RegisterClass; 913 Reg = GPR8ArgRegs[NumIntRegs]; 914 break; 915 case MVT::i16: 916 RC = X86::GR16RegisterClass; 917 Reg = GPR16ArgRegs[NumIntRegs]; 918 break; 919 case MVT::i32: 920 RC = X86::GR32RegisterClass; 921 Reg = GPR32ArgRegs[NumIntRegs]; 922 break; 923 case MVT::i64: 924 RC = X86::GR64RegisterClass; 925 Reg = GPR64ArgRegs[NumIntRegs]; 926 break; 927 } 928 Reg = AddLiveIn(MF, Reg, RC); 929 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 930 break; 931 } 932 case MVT::f32: 933 case MVT::f64: 934 case MVT::v16i8: 935 case MVT::v8i16: 936 case MVT::v4i32: 937 case MVT::v2i64: 938 case MVT::v4f32: 939 case MVT::v2f64: { 940 TargetRegisterClass *RC= (ObjectVT == MVT::f32) ? 941 X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ? 942 X86::FR64RegisterClass : X86::VR128RegisterClass); 943 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC); 944 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 945 break; 946 } 947 } 948 NumIntRegs += ObjIntRegs; 949 NumXMMRegs += ObjXMMRegs; 950 } else if (ObjSize) { 951 // XMM arguments have to be aligned on 16-byte boundary. 952 if (ObjSize == 16) 953 ArgOffset = ((ArgOffset + 15) / 16) * 16; 954 // Create the SelectionDAG nodes corresponding to a load from this 955 // parameter. 956 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 957 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 958 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 959 ArgOffset += ArgIncrement; // Move on to the next argument. 960 } 961 962 ArgValues.push_back(ArgValue); 963 } 964 965 // If the function takes variable number of arguments, make a frame index for 966 // the start of the first vararg value... for expansion of llvm.va_start. 967 if (isVarArg) { 968 // For X86-64, if there are vararg parameters that are passed via 969 // registers, then we must store them to their spots on the stack so they 970 // may be loaded by deferencing the result of va_next. 971 VarArgsGPOffset = NumIntRegs * 8; 972 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 973 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 974 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 975 976 // Store the integer parameter registers. 977 std::vector<SDOperand> MemOps; 978 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 979 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 980 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 981 for (; NumIntRegs != 6; ++NumIntRegs) { 982 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 983 X86::GR64RegisterClass); 984 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 985 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 986 MemOps.push_back(Store); 987 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 988 DAG.getConstant(8, getPointerTy())); 989 } 990 991 // Now store the XMM (fp + vector) parameter registers. 992 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 993 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 994 for (; NumXMMRegs != 8; ++NumXMMRegs) { 995 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 996 X86::VR128RegisterClass); 997 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 998 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 999 MemOps.push_back(Store); 1000 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1001 DAG.getConstant(16, getPointerTy())); 1002 } 1003 if (!MemOps.empty()) 1004 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1005 &MemOps[0], MemOps.size()); 1006 } 1007 1008 ArgValues.push_back(Root); 1009 1010 ReturnAddrIndex = 0; // No return address slot generated yet. 1011 BytesToPopOnReturn = 0; // Callee pops nothing. 1012 BytesCallerReserves = ArgOffset; 1013 1014 // Return the new list of results. 1015 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1016 Op.Val->value_end()); 1017 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1018} 1019 1020SDOperand 1021X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) { 1022 SDOperand Chain = Op.getOperand(0); 1023 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1024 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1025 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1026 SDOperand Callee = Op.getOperand(4); 1027 MVT::ValueType RetVT= Op.Val->getValueType(0); 1028 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1029 1030 // Count how many bytes are to be pushed on the stack. 1031 unsigned NumBytes = 0; 1032 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 1033 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1034 1035 static const unsigned GPR8ArgRegs[] = { 1036 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 1037 }; 1038 static const unsigned GPR16ArgRegs[] = { 1039 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 1040 }; 1041 static const unsigned GPR32ArgRegs[] = { 1042 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 1043 }; 1044 static const unsigned GPR64ArgRegs[] = { 1045 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1046 }; 1047 static const unsigned XMMArgRegs[] = { 1048 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1049 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1050 }; 1051 1052 for (unsigned i = 0; i != NumOps; ++i) { 1053 SDOperand Arg = Op.getOperand(5+2*i); 1054 MVT::ValueType ArgVT = Arg.getValueType(); 1055 1056 switch (ArgVT) { 1057 default: assert(0 && "Unknown value type!"); 1058 case MVT::i8: 1059 case MVT::i16: 1060 case MVT::i32: 1061 case MVT::i64: 1062 if (NumIntRegs < 6) 1063 ++NumIntRegs; 1064 else 1065 NumBytes += 8; 1066 break; 1067 case MVT::f32: 1068 case MVT::f64: 1069 case MVT::v16i8: 1070 case MVT::v8i16: 1071 case MVT::v4i32: 1072 case MVT::v2i64: 1073 case MVT::v4f32: 1074 case MVT::v2f64: 1075 if (NumXMMRegs < 8) 1076 NumXMMRegs++; 1077 else if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1078 NumBytes += 8; 1079 else { 1080 // XMM arguments have to be aligned on 16-byte boundary. 1081 NumBytes = ((NumBytes + 15) / 16) * 16; 1082 NumBytes += 16; 1083 } 1084 break; 1085 } 1086 } 1087 1088 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1089 1090 // Arguments go on the stack in reverse order, as specified by the ABI. 1091 unsigned ArgOffset = 0; 1092 NumIntRegs = 0; 1093 NumXMMRegs = 0; 1094 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1095 std::vector<SDOperand> MemOpChains; 1096 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1097 for (unsigned i = 0; i != NumOps; ++i) { 1098 SDOperand Arg = Op.getOperand(5+2*i); 1099 MVT::ValueType ArgVT = Arg.getValueType(); 1100 1101 switch (ArgVT) { 1102 default: assert(0 && "Unexpected ValueType for argument!"); 1103 case MVT::i8: 1104 case MVT::i16: 1105 case MVT::i32: 1106 case MVT::i64: 1107 if (NumIntRegs < 6) { 1108 unsigned Reg = 0; 1109 switch (ArgVT) { 1110 default: break; 1111 case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break; 1112 case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break; 1113 case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break; 1114 case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break; 1115 } 1116 RegsToPass.push_back(std::make_pair(Reg, Arg)); 1117 ++NumIntRegs; 1118 } else { 1119 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1120 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1121 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1122 ArgOffset += 8; 1123 } 1124 break; 1125 case MVT::f32: 1126 case MVT::f64: 1127 case MVT::v16i8: 1128 case MVT::v8i16: 1129 case MVT::v4i32: 1130 case MVT::v2i64: 1131 case MVT::v4f32: 1132 case MVT::v2f64: 1133 if (NumXMMRegs < 8) { 1134 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1135 NumXMMRegs++; 1136 } else { 1137 if (ArgVT != MVT::f32 && ArgVT != MVT::f64) { 1138 // XMM arguments have to be aligned on 16-byte boundary. 1139 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1140 } 1141 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1142 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1143 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1144 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1145 ArgOffset += 8; 1146 else 1147 ArgOffset += 16; 1148 } 1149 } 1150 } 1151 1152 if (!MemOpChains.empty()) 1153 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1154 &MemOpChains[0], MemOpChains.size()); 1155 1156 // Build a sequence of copy-to-reg nodes chained together with token chain 1157 // and flag operands which copy the outgoing args into registers. 1158 SDOperand InFlag; 1159 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1160 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1161 InFlag); 1162 InFlag = Chain.getValue(1); 1163 } 1164 1165 if (isVarArg) { 1166 // From AMD64 ABI document: 1167 // For calls that may call functions that use varargs or stdargs 1168 // (prototype-less calls or calls to functions containing ellipsis (...) in 1169 // the declaration) %al is used as hidden argument to specify the number 1170 // of SSE registers used. The contents of %al do not need to match exactly 1171 // the number of registers, but must be an ubound on the number of SSE 1172 // registers used and is in the range 0 - 8 inclusive. 1173 Chain = DAG.getCopyToReg(Chain, X86::AL, 1174 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1175 InFlag = Chain.getValue(1); 1176 } 1177 1178 // If the callee is a GlobalAddress node (quite common, every direct call is) 1179 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1180 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1181 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1182 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1183 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1184 1185 std::vector<MVT::ValueType> NodeTys; 1186 NodeTys.push_back(MVT::Other); // Returns a chain 1187 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1188 std::vector<SDOperand> Ops; 1189 Ops.push_back(Chain); 1190 Ops.push_back(Callee); 1191 1192 // Add argument registers to the end of the list so that they are known live 1193 // into the call. 1194 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1195 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1196 RegsToPass[i].second.getValueType())); 1197 1198 if (InFlag.Val) 1199 Ops.push_back(InFlag); 1200 1201 // FIXME: Do not generate X86ISD::TAILCALL for now. 1202 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1203 NodeTys, &Ops[0], Ops.size()); 1204 InFlag = Chain.getValue(1); 1205 1206 NodeTys.clear(); 1207 NodeTys.push_back(MVT::Other); // Returns a chain 1208 if (RetVT != MVT::Other) 1209 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1210 Ops.clear(); 1211 Ops.push_back(Chain); 1212 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1213 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1214 Ops.push_back(InFlag); 1215 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1216 if (RetVT != MVT::Other) 1217 InFlag = Chain.getValue(1); 1218 1219 std::vector<SDOperand> ResultVals; 1220 NodeTys.clear(); 1221 switch (RetVT) { 1222 default: assert(0 && "Unknown value type to return!"); 1223 case MVT::Other: break; 1224 case MVT::i8: 1225 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1226 ResultVals.push_back(Chain.getValue(0)); 1227 NodeTys.push_back(MVT::i8); 1228 break; 1229 case MVT::i16: 1230 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1231 ResultVals.push_back(Chain.getValue(0)); 1232 NodeTys.push_back(MVT::i16); 1233 break; 1234 case MVT::i32: 1235 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1236 ResultVals.push_back(Chain.getValue(0)); 1237 NodeTys.push_back(MVT::i32); 1238 break; 1239 case MVT::i64: 1240 if (Op.Val->getValueType(1) == MVT::i64) { 1241 // FIXME: __int128 support? 1242 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1243 ResultVals.push_back(Chain.getValue(0)); 1244 Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64, 1245 Chain.getValue(2)).getValue(1); 1246 ResultVals.push_back(Chain.getValue(0)); 1247 NodeTys.push_back(MVT::i64); 1248 } else { 1249 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1250 ResultVals.push_back(Chain.getValue(0)); 1251 } 1252 NodeTys.push_back(MVT::i64); 1253 break; 1254 case MVT::f32: 1255 case MVT::f64: 1256 case MVT::v16i8: 1257 case MVT::v8i16: 1258 case MVT::v4i32: 1259 case MVT::v2i64: 1260 case MVT::v4f32: 1261 case MVT::v2f64: 1262 // FIXME: long double support? 1263 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1264 ResultVals.push_back(Chain.getValue(0)); 1265 NodeTys.push_back(RetVT); 1266 break; 1267 } 1268 1269 // If the function returns void, just return the chain. 1270 if (ResultVals.empty()) 1271 return Chain; 1272 1273 // Otherwise, merge everything together with a MERGE_VALUES node. 1274 NodeTys.push_back(MVT::Other); 1275 ResultVals.push_back(Chain); 1276 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1277 &ResultVals[0], ResultVals.size()); 1278 return Res.getValue(Op.ResNo); 1279} 1280 1281//===----------------------------------------------------------------------===// 1282// Fast Calling Convention implementation 1283//===----------------------------------------------------------------------===// 1284// 1285// The X86 'fast' calling convention passes up to two integer arguments in 1286// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 1287// and requires that the callee pop its arguments off the stack (allowing proper 1288// tail calls), and has the same return value conventions as C calling convs. 1289// 1290// This calling convention always arranges for the callee pop value to be 8n+4 1291// bytes, which is needed for tail recursion elimination and stack alignment 1292// reasons. 1293// 1294// Note that this can be enhanced in the future to pass fp vals in registers 1295// (when we have a global fp allocator) and do other tricks. 1296// 1297 1298/// HowToPassFastCCArgument - Returns how an formal argument of the specified 1299/// type should be passed. If it is through stack, returns the size of the stack 1300/// slot; if it is through integer or XMM register, returns the number of 1301/// integer or XMM registers are needed. 1302static void 1303HowToPassFastCCArgument(MVT::ValueType ObjectVT, 1304 unsigned NumIntRegs, unsigned NumXMMRegs, 1305 unsigned &ObjSize, unsigned &ObjIntRegs, 1306 unsigned &ObjXMMRegs) { 1307 ObjSize = 0; 1308 ObjIntRegs = 0; 1309 ObjXMMRegs = 0; 1310 1311 switch (ObjectVT) { 1312 default: assert(0 && "Unhandled argument type!"); 1313 case MVT::i8: 1314#if FASTCC_NUM_INT_ARGS_INREGS > 0 1315 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1316 ObjIntRegs = 1; 1317 else 1318#endif 1319 ObjSize = 1; 1320 break; 1321 case MVT::i16: 1322#if FASTCC_NUM_INT_ARGS_INREGS > 0 1323 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1324 ObjIntRegs = 1; 1325 else 1326#endif 1327 ObjSize = 2; 1328 break; 1329 case MVT::i32: 1330#if FASTCC_NUM_INT_ARGS_INREGS > 0 1331 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1332 ObjIntRegs = 1; 1333 else 1334#endif 1335 ObjSize = 4; 1336 break; 1337 case MVT::i64: 1338#if FASTCC_NUM_INT_ARGS_INREGS > 0 1339 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1340 ObjIntRegs = 2; 1341 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1342 ObjIntRegs = 1; 1343 ObjSize = 4; 1344 } else 1345#endif 1346 ObjSize = 8; 1347 case MVT::f32: 1348 ObjSize = 4; 1349 break; 1350 case MVT::f64: 1351 ObjSize = 8; 1352 break; 1353 case MVT::v16i8: 1354 case MVT::v8i16: 1355 case MVT::v4i32: 1356 case MVT::v2i64: 1357 case MVT::v4f32: 1358 case MVT::v2f64: 1359 if (NumXMMRegs < 4) 1360 ObjXMMRegs = 1; 1361 else 1362 ObjSize = 16; 1363 break; 1364 } 1365} 1366 1367SDOperand 1368X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1369 unsigned NumArgs = Op.Val->getNumValues()-1; 1370 MachineFunction &MF = DAG.getMachineFunction(); 1371 MachineFrameInfo *MFI = MF.getFrameInfo(); 1372 SDOperand Root = Op.getOperand(0); 1373 std::vector<SDOperand> ArgValues; 1374 1375 // Add DAG nodes to load the arguments... On entry to a function the stack 1376 // frame looks like this: 1377 // 1378 // [ESP] -- return address 1379 // [ESP + 4] -- first nonreg argument (leftmost lexically) 1380 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 1381 // ... 1382 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1383 1384 // Keep track of the number of integer regs passed so far. This can be either 1385 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1386 // used). 1387 unsigned NumIntRegs = 0; 1388 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1389 1390 static const unsigned XMMArgRegs[] = { 1391 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1392 }; 1393 1394 for (unsigned i = 0; i < NumArgs; ++i) { 1395 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1396 unsigned ArgIncrement = 4; 1397 unsigned ObjSize = 0; 1398 unsigned ObjIntRegs = 0; 1399 unsigned ObjXMMRegs = 0; 1400 1401 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 1402 ObjSize, ObjIntRegs, ObjXMMRegs); 1403 if (ObjSize > 4) 1404 ArgIncrement = ObjSize; 1405 1406 unsigned Reg = 0; 1407 SDOperand ArgValue; 1408 if (ObjIntRegs || ObjXMMRegs) { 1409 switch (ObjectVT) { 1410 default: assert(0 && "Unhandled argument type!"); 1411 case MVT::i8: 1412 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 1413 X86::GR8RegisterClass); 1414 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 1415 break; 1416 case MVT::i16: 1417 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 1418 X86::GR16RegisterClass); 1419 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 1420 break; 1421 case MVT::i32: 1422 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1423 X86::GR32RegisterClass); 1424 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1425 break; 1426 case MVT::i64: 1427 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1428 X86::GR32RegisterClass); 1429 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1430 if (ObjIntRegs == 2) { 1431 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 1432 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1433 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1434 } 1435 break; 1436 case MVT::v16i8: 1437 case MVT::v8i16: 1438 case MVT::v4i32: 1439 case MVT::v2i64: 1440 case MVT::v4f32: 1441 case MVT::v2f64: 1442 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 1443 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 1444 break; 1445 } 1446 NumIntRegs += ObjIntRegs; 1447 NumXMMRegs += ObjXMMRegs; 1448 } 1449 1450 if (ObjSize) { 1451 // XMM arguments have to be aligned on 16-byte boundary. 1452 if (ObjSize == 16) 1453 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1454 // Create the SelectionDAG nodes corresponding to a load from this 1455 // parameter. 1456 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1457 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1458 if (ObjectVT == MVT::i64 && ObjIntRegs) { 1459 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1460 NULL, 0); 1461 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1462 } else 1463 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 1464 ArgOffset += ArgIncrement; // Move on to the next argument. 1465 } 1466 1467 ArgValues.push_back(ArgValue); 1468 } 1469 1470 ArgValues.push_back(Root); 1471 1472 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1473 // arguments and the arguments after the retaddr has been pushed are aligned. 1474 if ((ArgOffset & 7) == 0) 1475 ArgOffset += 4; 1476 1477 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1478 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1479 ReturnAddrIndex = 0; // No return address slot generated yet. 1480 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 1481 BytesCallerReserves = 0; 1482 1483 // Finally, inform the code generator which regs we return values in. 1484 switch (getValueType(MF.getFunction()->getReturnType())) { 1485 default: assert(0 && "Unknown type!"); 1486 case MVT::isVoid: break; 1487 case MVT::i1: 1488 case MVT::i8: 1489 case MVT::i16: 1490 case MVT::i32: 1491 MF.addLiveOut(X86::EAX); 1492 break; 1493 case MVT::i64: 1494 MF.addLiveOut(X86::EAX); 1495 MF.addLiveOut(X86::EDX); 1496 break; 1497 case MVT::f32: 1498 case MVT::f64: 1499 MF.addLiveOut(X86::ST0); 1500 break; 1501 case MVT::v16i8: 1502 case MVT::v8i16: 1503 case MVT::v4i32: 1504 case MVT::v2i64: 1505 case MVT::v4f32: 1506 case MVT::v2f64: 1507 MF.addLiveOut(X86::XMM0); 1508 break; 1509 } 1510 1511 // Return the new list of results. 1512 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1513 Op.Val->value_end()); 1514 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1515} 1516 1517SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1518 bool isFastCall) { 1519 SDOperand Chain = Op.getOperand(0); 1520 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1521 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1522 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1523 SDOperand Callee = Op.getOperand(4); 1524 MVT::ValueType RetVT= Op.Val->getValueType(0); 1525 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1526 1527 // Count how many bytes are to be pushed on the stack. 1528 unsigned NumBytes = 0; 1529 1530 // Keep track of the number of integer regs passed so far. This can be either 1531 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1532 // used). 1533 unsigned NumIntRegs = 0; 1534 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1535 1536 static const unsigned GPRArgRegs[][2] = { 1537 { X86::AL, X86::DL }, 1538 { X86::AX, X86::DX }, 1539 { X86::EAX, X86::EDX } 1540 }; 1541 static const unsigned FastCallGPRArgRegs[][2] = { 1542 { X86::CL, X86::DL }, 1543 { X86::CX, X86::DX }, 1544 { X86::ECX, X86::EDX } 1545 }; 1546 static const unsigned XMMArgRegs[] = { 1547 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1548 }; 1549 1550 for (unsigned i = 0; i != NumOps; ++i) { 1551 SDOperand Arg = Op.getOperand(5+2*i); 1552 1553 switch (Arg.getValueType()) { 1554 default: assert(0 && "Unknown value type!"); 1555 case MVT::i8: 1556 case MVT::i16: 1557 case MVT::i32: { 1558 unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS); 1559 if (NumIntRegs < MaxNumIntRegs) { 1560 ++NumIntRegs; 1561 break; 1562 } 1563 } // Fall through 1564 case MVT::f32: 1565 NumBytes += 4; 1566 break; 1567 case MVT::f64: 1568 NumBytes += 8; 1569 break; 1570 case MVT::v16i8: 1571 case MVT::v8i16: 1572 case MVT::v4i32: 1573 case MVT::v2i64: 1574 case MVT::v4f32: 1575 case MVT::v2f64: 1576 if (isFastCall) { 1577 assert(0 && "Unknown value type!"); 1578 } else { 1579 if (NumXMMRegs < 4) 1580 NumXMMRegs++; 1581 else { 1582 // XMM arguments have to be aligned on 16-byte boundary. 1583 NumBytes = ((NumBytes + 15) / 16) * 16; 1584 NumBytes += 16; 1585 } 1586 } 1587 break; 1588 } 1589 } 1590 1591 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1592 // arguments and the arguments after the retaddr has been pushed are aligned. 1593 if ((NumBytes & 7) == 0) 1594 NumBytes += 4; 1595 1596 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1597 1598 // Arguments go on the stack in reverse order, as specified by the ABI. 1599 unsigned ArgOffset = 0; 1600 NumIntRegs = 0; 1601 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1602 std::vector<SDOperand> MemOpChains; 1603 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1604 for (unsigned i = 0; i != NumOps; ++i) { 1605 SDOperand Arg = Op.getOperand(5+2*i); 1606 1607 switch (Arg.getValueType()) { 1608 default: assert(0 && "Unexpected ValueType for argument!"); 1609 case MVT::i8: 1610 case MVT::i16: 1611 case MVT::i32: { 1612 unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS); 1613 if (NumIntRegs < MaxNumIntRegs) { 1614 RegsToPass.push_back( 1615 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 1616 Arg)); 1617 ++NumIntRegs; 1618 break; 1619 } 1620 } // Fall through 1621 case MVT::f32: { 1622 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1623 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1624 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1625 ArgOffset += 4; 1626 break; 1627 } 1628 case MVT::f64: { 1629 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1630 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1631 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1632 ArgOffset += 8; 1633 break; 1634 } 1635 case MVT::v16i8: 1636 case MVT::v8i16: 1637 case MVT::v4i32: 1638 case MVT::v2i64: 1639 case MVT::v4f32: 1640 case MVT::v2f64: 1641 if (isFastCall) { 1642 assert(0 && "Unexpected ValueType for argument!"); 1643 } else { 1644 if (NumXMMRegs < 4) { 1645 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1646 NumXMMRegs++; 1647 } else { 1648 // XMM arguments have to be aligned on 16-byte boundary. 1649 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1650 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1651 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1652 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1653 ArgOffset += 16; 1654 } 1655 } 1656 break; 1657 } 1658 } 1659 1660 if (!MemOpChains.empty()) 1661 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1662 &MemOpChains[0], MemOpChains.size()); 1663 1664 // Build a sequence of copy-to-reg nodes chained together with token chain 1665 // and flag operands which copy the outgoing args into registers. 1666 SDOperand InFlag; 1667 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1668 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1669 InFlag); 1670 InFlag = Chain.getValue(1); 1671 } 1672 1673 // If the callee is a GlobalAddress node (quite common, every direct call is) 1674 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1676 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1677 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1678 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1679 1680 std::vector<MVT::ValueType> NodeTys; 1681 NodeTys.push_back(MVT::Other); // Returns a chain 1682 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1683 std::vector<SDOperand> Ops; 1684 Ops.push_back(Chain); 1685 Ops.push_back(Callee); 1686 1687 // Add argument registers to the end of the list so that they are known live 1688 // into the call. 1689 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1690 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1691 RegsToPass[i].second.getValueType())); 1692 1693 if (InFlag.Val) 1694 Ops.push_back(InFlag); 1695 1696 // FIXME: Do not generate X86ISD::TAILCALL for now. 1697 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1698 NodeTys, &Ops[0], Ops.size()); 1699 InFlag = Chain.getValue(1); 1700 1701 NodeTys.clear(); 1702 NodeTys.push_back(MVT::Other); // Returns a chain 1703 if (RetVT != MVT::Other) 1704 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1705 Ops.clear(); 1706 Ops.push_back(Chain); 1707 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1708 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1709 Ops.push_back(InFlag); 1710 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1711 if (RetVT != MVT::Other) 1712 InFlag = Chain.getValue(1); 1713 1714 std::vector<SDOperand> ResultVals; 1715 NodeTys.clear(); 1716 switch (RetVT) { 1717 default: assert(0 && "Unknown value type to return!"); 1718 case MVT::Other: break; 1719 case MVT::i8: 1720 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1721 ResultVals.push_back(Chain.getValue(0)); 1722 NodeTys.push_back(MVT::i8); 1723 break; 1724 case MVT::i16: 1725 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1726 ResultVals.push_back(Chain.getValue(0)); 1727 NodeTys.push_back(MVT::i16); 1728 break; 1729 case MVT::i32: 1730 if (Op.Val->getValueType(1) == MVT::i32) { 1731 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1732 ResultVals.push_back(Chain.getValue(0)); 1733 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 1734 Chain.getValue(2)).getValue(1); 1735 ResultVals.push_back(Chain.getValue(0)); 1736 NodeTys.push_back(MVT::i32); 1737 } else { 1738 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1739 ResultVals.push_back(Chain.getValue(0)); 1740 } 1741 NodeTys.push_back(MVT::i32); 1742 break; 1743 case MVT::v16i8: 1744 case MVT::v8i16: 1745 case MVT::v4i32: 1746 case MVT::v2i64: 1747 case MVT::v4f32: 1748 case MVT::v2f64: 1749 if (isFastCall) { 1750 assert(0 && "Unknown value type to return!"); 1751 } else { 1752 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1753 ResultVals.push_back(Chain.getValue(0)); 1754 NodeTys.push_back(RetVT); 1755 } 1756 break; 1757 case MVT::f32: 1758 case MVT::f64: { 1759 std::vector<MVT::ValueType> Tys; 1760 Tys.push_back(MVT::f64); 1761 Tys.push_back(MVT::Other); 1762 Tys.push_back(MVT::Flag); 1763 std::vector<SDOperand> Ops; 1764 Ops.push_back(Chain); 1765 Ops.push_back(InFlag); 1766 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 1767 &Ops[0], Ops.size()); 1768 Chain = RetVal.getValue(1); 1769 InFlag = RetVal.getValue(2); 1770 if (X86ScalarSSE) { 1771 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1772 // shouldn't be necessary except that RFP cannot be live across 1773 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1774 MachineFunction &MF = DAG.getMachineFunction(); 1775 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1776 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1777 Tys.clear(); 1778 Tys.push_back(MVT::Other); 1779 Ops.clear(); 1780 Ops.push_back(Chain); 1781 Ops.push_back(RetVal); 1782 Ops.push_back(StackSlot); 1783 Ops.push_back(DAG.getValueType(RetVT)); 1784 Ops.push_back(InFlag); 1785 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 1786 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 1787 Chain = RetVal.getValue(1); 1788 } 1789 1790 if (RetVT == MVT::f32 && !X86ScalarSSE) 1791 // FIXME: we would really like to remember that this FP_ROUND 1792 // operation is okay to eliminate if we allow excess FP precision. 1793 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1794 ResultVals.push_back(RetVal); 1795 NodeTys.push_back(RetVT); 1796 break; 1797 } 1798 } 1799 1800 1801 // If the function returns void, just return the chain. 1802 if (ResultVals.empty()) 1803 return Chain; 1804 1805 // Otherwise, merge everything together with a MERGE_VALUES node. 1806 NodeTys.push_back(MVT::Other); 1807 ResultVals.push_back(Chain); 1808 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1809 &ResultVals[0], ResultVals.size()); 1810 return Res.getValue(Op.ResNo); 1811} 1812 1813//===----------------------------------------------------------------------===// 1814// StdCall Calling Convention implementation 1815//===----------------------------------------------------------------------===// 1816// StdCall calling convention seems to be standard for many Windows' API 1817// routines and around. It differs from C calling convention just a little: 1818// callee should clean up the stack, not caller. Symbols should be also 1819// decorated in some fancy way :) It doesn't support any vector arguments. 1820 1821/// HowToPassStdCallCCArgument - Returns how an formal argument of the specified 1822/// type should be passed. Returns the size of the stack slot 1823static void 1824HowToPassStdCallCCArgument(MVT::ValueType ObjectVT, unsigned &ObjSize) { 1825 switch (ObjectVT) { 1826 default: assert(0 && "Unhandled argument type!"); 1827 case MVT::i8: ObjSize = 1; break; 1828 case MVT::i16: ObjSize = 2; break; 1829 case MVT::i32: ObjSize = 4; break; 1830 case MVT::i64: ObjSize = 8; break; 1831 case MVT::f32: ObjSize = 4; break; 1832 case MVT::f64: ObjSize = 8; break; 1833 } 1834} 1835 1836SDOperand X86TargetLowering::LowerStdCallCCArguments(SDOperand Op, 1837 SelectionDAG &DAG) { 1838 unsigned NumArgs = Op.Val->getNumValues() - 1; 1839 MachineFunction &MF = DAG.getMachineFunction(); 1840 MachineFrameInfo *MFI = MF.getFrameInfo(); 1841 SDOperand Root = Op.getOperand(0); 1842 std::vector<SDOperand> ArgValues; 1843 1844 // Add DAG nodes to load the arguments... On entry to a function on the X86, 1845 // the stack frame looks like this: 1846 // 1847 // [ESP] -- return address 1848 // [ESP + 4] -- first argument (leftmost lexically) 1849 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 1850 // ... 1851 // 1852 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1853 for (unsigned i = 0; i < NumArgs; ++i) { 1854 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1855 unsigned ArgIncrement = 4; 1856 unsigned ObjSize = 0; 1857 HowToPassStdCallCCArgument(ObjectVT, ObjSize); 1858 if (ObjSize > 4) 1859 ArgIncrement = ObjSize; 1860 1861 SDOperand ArgValue; 1862 // Create the frame index object for this incoming parameter... 1863 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1864 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1865 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 1866 ArgValues.push_back(ArgValue); 1867 ArgOffset += ArgIncrement; // Move on to the next argument... 1868 } 1869 1870 ArgValues.push_back(Root); 1871 1872 // If the function takes variable number of arguments, make a frame index for 1873 // the start of the first vararg value... for expansion of llvm.va_start. 1874 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1875 if (isVarArg) { 1876 BytesToPopOnReturn = 0; // Callee pops nothing. 1877 BytesCallerReserves = ArgOffset; 1878 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 1879 } else { 1880 BytesToPopOnReturn = ArgOffset; // Callee pops everything.. 1881 BytesCallerReserves = 0; 1882 } 1883 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1884 ReturnAddrIndex = 0; // No return address slot generated yet. 1885 1886 MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); 1887 1888 // Return the new list of results. 1889 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1890 Op.Val->value_end()); 1891 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1892} 1893 1894 1895SDOperand X86TargetLowering::LowerStdCallCCCallTo(SDOperand Op, 1896 SelectionDAG &DAG) { 1897 SDOperand Chain = Op.getOperand(0); 1898 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1899 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1900 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1901 SDOperand Callee = Op.getOperand(4); 1902 MVT::ValueType RetVT= Op.Val->getValueType(0); 1903 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1904 1905 // Count how many bytes are to be pushed on the stack. 1906 unsigned NumBytes = 0; 1907 for (unsigned i = 0; i != NumOps; ++i) { 1908 SDOperand Arg = Op.getOperand(5+2*i); 1909 1910 switch (Arg.getValueType()) { 1911 default: assert(0 && "Unexpected ValueType for argument!"); 1912 case MVT::i8: 1913 case MVT::i16: 1914 case MVT::i32: 1915 case MVT::f32: 1916 NumBytes += 4; 1917 break; 1918 case MVT::i64: 1919 case MVT::f64: 1920 NumBytes += 8; 1921 break; 1922 } 1923 } 1924 1925 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1926 1927 // Arguments go on the stack in reverse order, as specified by the ABI. 1928 unsigned ArgOffset = 0; 1929 std::vector<SDOperand> MemOpChains; 1930 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1931 for (unsigned i = 0; i != NumOps; ++i) { 1932 SDOperand Arg = Op.getOperand(5+2*i); 1933 1934 switch (Arg.getValueType()) { 1935 default: assert(0 && "Unexpected ValueType for argument!"); 1936 case MVT::i8: 1937 case MVT::i16: { 1938 // Promote the integer to 32 bits. If the input type is signed use a 1939 // sign extend, otherwise use a zero extend. 1940 unsigned ExtOp = 1941 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 1942 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1943 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 1944 } 1945 // Fallthrough 1946 1947 case MVT::i32: 1948 case MVT::f32: { 1949 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1950 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1951 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1952 ArgOffset += 4; 1953 break; 1954 } 1955 case MVT::i64: 1956 case MVT::f64: { 1957 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1958 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1959 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1960 ArgOffset += 8; 1961 break; 1962 } 1963 } 1964 } 1965 1966 if (!MemOpChains.empty()) 1967 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1968 &MemOpChains[0], MemOpChains.size()); 1969 1970 // If the callee is a GlobalAddress node (quite common, every direct call is) 1971 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1972 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1973 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1974 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1975 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1976 1977 std::vector<MVT::ValueType> NodeTys; 1978 NodeTys.push_back(MVT::Other); // Returns a chain 1979 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1980 std::vector<SDOperand> Ops; 1981 Ops.push_back(Chain); 1982 Ops.push_back(Callee); 1983 1984 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1985 NodeTys, &Ops[0], Ops.size()); 1986 SDOperand InFlag = Chain.getValue(1); 1987 1988 // Create the CALLSEQ_END node. 1989 unsigned NumBytesForCalleeToPush; 1990 1991 if (isVarArg) { 1992 NumBytesForCalleeToPush = 0; 1993 } else { 1994 NumBytesForCalleeToPush = NumBytes; 1995 } 1996 1997 NodeTys.clear(); 1998 NodeTys.push_back(MVT::Other); // Returns a chain 1999 if (RetVT != MVT::Other) 2000 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2001 Ops.clear(); 2002 Ops.push_back(Chain); 2003 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2004 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2005 Ops.push_back(InFlag); 2006 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2007 if (RetVT != MVT::Other) 2008 InFlag = Chain.getValue(1); 2009 2010 std::vector<SDOperand> ResultVals; 2011 NodeTys.clear(); 2012 switch (RetVT) { 2013 default: assert(0 && "Unknown value type to return!"); 2014 case MVT::Other: break; 2015 case MVT::i8: 2016 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 2017 ResultVals.push_back(Chain.getValue(0)); 2018 NodeTys.push_back(MVT::i8); 2019 break; 2020 case MVT::i16: 2021 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 2022 ResultVals.push_back(Chain.getValue(0)); 2023 NodeTys.push_back(MVT::i16); 2024 break; 2025 case MVT::i32: 2026 if (Op.Val->getValueType(1) == MVT::i32) { 2027 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 2028 ResultVals.push_back(Chain.getValue(0)); 2029 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 2030 Chain.getValue(2)).getValue(1); 2031 ResultVals.push_back(Chain.getValue(0)); 2032 NodeTys.push_back(MVT::i32); 2033 } else { 2034 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 2035 ResultVals.push_back(Chain.getValue(0)); 2036 } 2037 NodeTys.push_back(MVT::i32); 2038 break; 2039 case MVT::f32: 2040 case MVT::f64: { 2041 std::vector<MVT::ValueType> Tys; 2042 Tys.push_back(MVT::f64); 2043 Tys.push_back(MVT::Other); 2044 Tys.push_back(MVT::Flag); 2045 std::vector<SDOperand> Ops; 2046 Ops.push_back(Chain); 2047 Ops.push_back(InFlag); 2048 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 2049 &Ops[0], Ops.size()); 2050 Chain = RetVal.getValue(1); 2051 InFlag = RetVal.getValue(2); 2052 if (X86ScalarSSE) { 2053 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 2054 // shouldn't be necessary except that RFP cannot be live across 2055 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2056 MachineFunction &MF = DAG.getMachineFunction(); 2057 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2058 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2059 Tys.clear(); 2060 Tys.push_back(MVT::Other); 2061 Ops.clear(); 2062 Ops.push_back(Chain); 2063 Ops.push_back(RetVal); 2064 Ops.push_back(StackSlot); 2065 Ops.push_back(DAG.getValueType(RetVT)); 2066 Ops.push_back(InFlag); 2067 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 2068 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 2069 Chain = RetVal.getValue(1); 2070 } 2071 2072 if (RetVT == MVT::f32 && !X86ScalarSSE) 2073 // FIXME: we would really like to remember that this FP_ROUND 2074 // operation is okay to eliminate if we allow excess FP precision. 2075 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 2076 ResultVals.push_back(RetVal); 2077 NodeTys.push_back(RetVT); 2078 break; 2079 } 2080 } 2081 2082 // If the function returns void, just return the chain. 2083 if (ResultVals.empty()) 2084 return Chain; 2085 2086 // Otherwise, merge everything together with a MERGE_VALUES node. 2087 NodeTys.push_back(MVT::Other); 2088 ResultVals.push_back(Chain); 2089 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 2090 &ResultVals[0], ResultVals.size()); 2091 return Res.getValue(Op.ResNo); 2092} 2093 2094//===----------------------------------------------------------------------===// 2095// FastCall Calling Convention implementation 2096//===----------------------------------------------------------------------===// 2097// 2098// The X86 'fastcall' calling convention passes up to two integer arguments in 2099// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 2100// and requires that the callee pop its arguments off the stack (allowing proper 2101// tail calls), and has the same return value conventions as C calling convs. 2102// 2103// This calling convention always arranges for the callee pop value to be 8n+4 2104// bytes, which is needed for tail recursion elimination and stack alignment 2105// reasons. 2106// 2107 2108/// HowToPassFastCallCCArgument - Returns how an formal argument of the 2109/// specified type should be passed. If it is through stack, returns the size of 2110/// the stack slot; if it is through integer register, returns the number of 2111/// integer registers are needed. 2112static void 2113HowToPassFastCallCCArgument(MVT::ValueType ObjectVT, 2114 unsigned NumIntRegs, 2115 unsigned &ObjSize, 2116 unsigned &ObjIntRegs) 2117{ 2118 ObjSize = 0; 2119 ObjIntRegs = 0; 2120 2121 switch (ObjectVT) { 2122 default: assert(0 && "Unhandled argument type!"); 2123 case MVT::i8: 2124 if (NumIntRegs < 2) 2125 ObjIntRegs = 1; 2126 else 2127 ObjSize = 1; 2128 break; 2129 case MVT::i16: 2130 if (NumIntRegs < 2) 2131 ObjIntRegs = 1; 2132 else 2133 ObjSize = 2; 2134 break; 2135 case MVT::i32: 2136 if (NumIntRegs < 2) 2137 ObjIntRegs = 1; 2138 else 2139 ObjSize = 4; 2140 break; 2141 case MVT::i64: 2142 if (NumIntRegs+2 <= 2) { 2143 ObjIntRegs = 2; 2144 } else if (NumIntRegs+1 <= 2) { 2145 ObjIntRegs = 1; 2146 ObjSize = 4; 2147 } else 2148 ObjSize = 8; 2149 case MVT::f32: 2150 ObjSize = 4; 2151 break; 2152 case MVT::f64: 2153 ObjSize = 8; 2154 break; 2155 } 2156} 2157 2158SDOperand 2159X86TargetLowering::LowerFastCallCCArguments(SDOperand Op, SelectionDAG &DAG) { 2160 unsigned NumArgs = Op.Val->getNumValues()-1; 2161 MachineFunction &MF = DAG.getMachineFunction(); 2162 MachineFrameInfo *MFI = MF.getFrameInfo(); 2163 SDOperand Root = Op.getOperand(0); 2164 std::vector<SDOperand> ArgValues; 2165 2166 // Add DAG nodes to load the arguments... On entry to a function the stack 2167 // frame looks like this: 2168 // 2169 // [ESP] -- return address 2170 // [ESP + 4] -- first nonreg argument (leftmost lexically) 2171 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 2172 // ... 2173 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 2174 2175 // Keep track of the number of integer regs passed so far. This can be either 2176 // 0 (neither ECX or EDX used), 1 (ECX is used) or 2 (ECX and EDX are both 2177 // used). 2178 unsigned NumIntRegs = 0; 2179 2180 for (unsigned i = 0; i < NumArgs; ++i) { 2181 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 2182 unsigned ArgIncrement = 4; 2183 unsigned ObjSize = 0; 2184 unsigned ObjIntRegs = 0; 2185 2186 HowToPassFastCallCCArgument(ObjectVT, NumIntRegs, ObjSize, ObjIntRegs); 2187 if (ObjSize > 4) 2188 ArgIncrement = ObjSize; 2189 2190 unsigned Reg = 0; 2191 SDOperand ArgValue; 2192 if (ObjIntRegs) { 2193 switch (ObjectVT) { 2194 default: assert(0 && "Unhandled argument type!"); 2195 case MVT::i8: 2196 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::CL, 2197 X86::GR8RegisterClass); 2198 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 2199 break; 2200 case MVT::i16: 2201 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::CX, 2202 X86::GR16RegisterClass); 2203 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 2204 break; 2205 case MVT::i32: 2206 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX, 2207 X86::GR32RegisterClass); 2208 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2209 break; 2210 case MVT::i64: 2211 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX, 2212 X86::GR32RegisterClass); 2213 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2214 if (ObjIntRegs == 2) { 2215 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 2216 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2217 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 2218 } 2219 break; 2220 } 2221 2222 NumIntRegs += ObjIntRegs; 2223 } 2224 2225 if (ObjSize) { 2226 // Create the SelectionDAG nodes corresponding to a load from this 2227 // parameter. 2228 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 2229 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 2230 if (ObjectVT == MVT::i64 && ObjIntRegs) { 2231 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 2232 NULL, 0); 2233 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 2234 } else 2235 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 2236 ArgOffset += ArgIncrement; // Move on to the next argument. 2237 } 2238 2239 ArgValues.push_back(ArgValue); 2240 } 2241 2242 ArgValues.push_back(Root); 2243 2244 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 2245 // arguments and the arguments after the retaddr has been pushed are aligned. 2246 if ((ArgOffset & 7) == 0) 2247 ArgOffset += 4; 2248 2249 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 2250 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 2251 ReturnAddrIndex = 0; // No return address slot generated yet. 2252 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 2253 BytesCallerReserves = 0; 2254 2255 MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); 2256 2257 // Finally, inform the code generator which regs we return values in. 2258 switch (getValueType(MF.getFunction()->getReturnType())) { 2259 default: assert(0 && "Unknown type!"); 2260 case MVT::isVoid: break; 2261 case MVT::i1: 2262 case MVT::i8: 2263 case MVT::i16: 2264 case MVT::i32: 2265 MF.addLiveOut(X86::ECX); 2266 break; 2267 case MVT::i64: 2268 MF.addLiveOut(X86::ECX); 2269 MF.addLiveOut(X86::EDX); 2270 break; 2271 case MVT::f32: 2272 case MVT::f64: 2273 MF.addLiveOut(X86::ST0); 2274 break; 2275 } 2276 2277 // Return the new list of results. 2278 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 2279 Op.Val->value_end()); 2280 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 2281} 2282 2283SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2284 if (ReturnAddrIndex == 0) { 2285 // Set up a frame object for the return address. 2286 MachineFunction &MF = DAG.getMachineFunction(); 2287 if (Subtarget->is64Bit()) 2288 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2289 else 2290 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2291 } 2292 2293 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2294} 2295 2296 2297 2298std::pair<SDOperand, SDOperand> X86TargetLowering:: 2299LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 2300 SelectionDAG &DAG) { 2301 SDOperand Result; 2302 if (Depth) // Depths > 0 not supported yet! 2303 Result = DAG.getConstant(0, getPointerTy()); 2304 else { 2305 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 2306 if (!isFrameAddress) 2307 // Just load the return address 2308 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, 2309 NULL, 0); 2310 else 2311 Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 2312 DAG.getConstant(4, getPointerTy())); 2313 } 2314 return std::make_pair(Result, Chain); 2315} 2316 2317/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2318/// specific condition code. It returns a false if it cannot do a direct 2319/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2320/// needed. 2321static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2322 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2323 SelectionDAG &DAG) { 2324 X86CC = X86::COND_INVALID; 2325 if (!isFP) { 2326 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2327 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2328 // X > -1 -> X == 0, jump !sign. 2329 RHS = DAG.getConstant(0, RHS.getValueType()); 2330 X86CC = X86::COND_NS; 2331 return true; 2332 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2333 // X < 0 -> X == 0, jump on sign. 2334 X86CC = X86::COND_S; 2335 return true; 2336 } 2337 } 2338 2339 switch (SetCCOpcode) { 2340 default: break; 2341 case ISD::SETEQ: X86CC = X86::COND_E; break; 2342 case ISD::SETGT: X86CC = X86::COND_G; break; 2343 case ISD::SETGE: X86CC = X86::COND_GE; break; 2344 case ISD::SETLT: X86CC = X86::COND_L; break; 2345 case ISD::SETLE: X86CC = X86::COND_LE; break; 2346 case ISD::SETNE: X86CC = X86::COND_NE; break; 2347 case ISD::SETULT: X86CC = X86::COND_B; break; 2348 case ISD::SETUGT: X86CC = X86::COND_A; break; 2349 case ISD::SETULE: X86CC = X86::COND_BE; break; 2350 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2351 } 2352 } else { 2353 // On a floating point condition, the flags are set as follows: 2354 // ZF PF CF op 2355 // 0 | 0 | 0 | X > Y 2356 // 0 | 0 | 1 | X < Y 2357 // 1 | 0 | 0 | X == Y 2358 // 1 | 1 | 1 | unordered 2359 bool Flip = false; 2360 switch (SetCCOpcode) { 2361 default: break; 2362 case ISD::SETUEQ: 2363 case ISD::SETEQ: X86CC = X86::COND_E; break; 2364 case ISD::SETOLT: Flip = true; // Fallthrough 2365 case ISD::SETOGT: 2366 case ISD::SETGT: X86CC = X86::COND_A; break; 2367 case ISD::SETOLE: Flip = true; // Fallthrough 2368 case ISD::SETOGE: 2369 case ISD::SETGE: X86CC = X86::COND_AE; break; 2370 case ISD::SETUGT: Flip = true; // Fallthrough 2371 case ISD::SETULT: 2372 case ISD::SETLT: X86CC = X86::COND_B; break; 2373 case ISD::SETUGE: Flip = true; // Fallthrough 2374 case ISD::SETULE: 2375 case ISD::SETLE: X86CC = X86::COND_BE; break; 2376 case ISD::SETONE: 2377 case ISD::SETNE: X86CC = X86::COND_NE; break; 2378 case ISD::SETUO: X86CC = X86::COND_P; break; 2379 case ISD::SETO: X86CC = X86::COND_NP; break; 2380 } 2381 if (Flip) 2382 std::swap(LHS, RHS); 2383 } 2384 2385 return X86CC != X86::COND_INVALID; 2386} 2387 2388/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2389/// code. Current x86 isa includes the following FP cmov instructions: 2390/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2391static bool hasFPCMov(unsigned X86CC) { 2392 switch (X86CC) { 2393 default: 2394 return false; 2395 case X86::COND_B: 2396 case X86::COND_BE: 2397 case X86::COND_E: 2398 case X86::COND_P: 2399 case X86::COND_A: 2400 case X86::COND_AE: 2401 case X86::COND_NE: 2402 case X86::COND_NP: 2403 return true; 2404 } 2405} 2406 2407/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 2408/// load. For Darwin, external and weak symbols are indirect, loading the value 2409/// at address GV rather then the value of GV itself. This means that the 2410/// GlobalAddress must be in the base or index register of the address, not the 2411/// GV offset field. 2412static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 2413 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 2414 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 2415} 2416 2417/// WindowsGVRequiresExtraLoad - true if accessing the GV requires an extra 2418/// load. For Windows, dllimported symbols are indirect, loading the value at 2419/// address GV rather then the value of GV itself. This means that the 2420/// GlobalAddress must be in the base or index register of the address, not the 2421/// GV offset field. 2422static bool WindowsGVRequiresExtraLoad(GlobalValue *GV) { 2423 return (GV->hasDLLImportLinkage()); 2424} 2425 2426/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2427/// true if Op is undef or if its value falls within the specified range (L, H]. 2428static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2429 if (Op.getOpcode() == ISD::UNDEF) 2430 return true; 2431 2432 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2433 return (Val >= Low && Val < Hi); 2434} 2435 2436/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2437/// true if Op is undef or if its value equal to the specified value. 2438static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2439 if (Op.getOpcode() == ISD::UNDEF) 2440 return true; 2441 return cast<ConstantSDNode>(Op)->getValue() == Val; 2442} 2443 2444/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2445/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2446bool X86::isPSHUFDMask(SDNode *N) { 2447 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2448 2449 if (N->getNumOperands() != 4) 2450 return false; 2451 2452 // Check if the value doesn't reference the second vector. 2453 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2454 SDOperand Arg = N->getOperand(i); 2455 if (Arg.getOpcode() == ISD::UNDEF) continue; 2456 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2457 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 2458 return false; 2459 } 2460 2461 return true; 2462} 2463 2464/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2465/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2466bool X86::isPSHUFHWMask(SDNode *N) { 2467 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2468 2469 if (N->getNumOperands() != 8) 2470 return false; 2471 2472 // Lower quadword copied in order. 2473 for (unsigned i = 0; i != 4; ++i) { 2474 SDOperand Arg = N->getOperand(i); 2475 if (Arg.getOpcode() == ISD::UNDEF) continue; 2476 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2477 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2478 return false; 2479 } 2480 2481 // Upper quadword shuffled. 2482 for (unsigned i = 4; i != 8; ++i) { 2483 SDOperand Arg = N->getOperand(i); 2484 if (Arg.getOpcode() == ISD::UNDEF) continue; 2485 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2486 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2487 if (Val < 4 || Val > 7) 2488 return false; 2489 } 2490 2491 return true; 2492} 2493 2494/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2495/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2496bool X86::isPSHUFLWMask(SDNode *N) { 2497 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2498 2499 if (N->getNumOperands() != 8) 2500 return false; 2501 2502 // Upper quadword copied in order. 2503 for (unsigned i = 4; i != 8; ++i) 2504 if (!isUndefOrEqual(N->getOperand(i), i)) 2505 return false; 2506 2507 // Lower quadword shuffled. 2508 for (unsigned i = 0; i != 4; ++i) 2509 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2510 return false; 2511 2512 return true; 2513} 2514 2515/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2516/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2517static bool isSHUFPMask(std::vector<SDOperand> &N) { 2518 unsigned NumElems = N.size(); 2519 if (NumElems != 2 && NumElems != 4) return false; 2520 2521 unsigned Half = NumElems / 2; 2522 for (unsigned i = 0; i < Half; ++i) 2523 if (!isUndefOrInRange(N[i], 0, NumElems)) 2524 return false; 2525 for (unsigned i = Half; i < NumElems; ++i) 2526 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 2527 return false; 2528 2529 return true; 2530} 2531 2532bool X86::isSHUFPMask(SDNode *N) { 2533 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2534 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2535 return ::isSHUFPMask(Ops); 2536} 2537 2538/// isCommutedSHUFP - Returns true if the shuffle mask is except 2539/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2540/// half elements to come from vector 1 (which would equal the dest.) and 2541/// the upper half to come from vector 2. 2542static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 2543 unsigned NumElems = Ops.size(); 2544 if (NumElems != 2 && NumElems != 4) return false; 2545 2546 unsigned Half = NumElems / 2; 2547 for (unsigned i = 0; i < Half; ++i) 2548 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 2549 return false; 2550 for (unsigned i = Half; i < NumElems; ++i) 2551 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 2552 return false; 2553 return true; 2554} 2555 2556static bool isCommutedSHUFP(SDNode *N) { 2557 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2558 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2559 return isCommutedSHUFP(Ops); 2560} 2561 2562/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2563/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2564bool X86::isMOVHLPSMask(SDNode *N) { 2565 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2566 2567 if (N->getNumOperands() != 4) 2568 return false; 2569 2570 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2571 return isUndefOrEqual(N->getOperand(0), 6) && 2572 isUndefOrEqual(N->getOperand(1), 7) && 2573 isUndefOrEqual(N->getOperand(2), 2) && 2574 isUndefOrEqual(N->getOperand(3), 3); 2575} 2576 2577/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2578/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2579bool X86::isMOVLPMask(SDNode *N) { 2580 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2581 2582 unsigned NumElems = N->getNumOperands(); 2583 if (NumElems != 2 && NumElems != 4) 2584 return false; 2585 2586 for (unsigned i = 0; i < NumElems/2; ++i) 2587 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2588 return false; 2589 2590 for (unsigned i = NumElems/2; i < NumElems; ++i) 2591 if (!isUndefOrEqual(N->getOperand(i), i)) 2592 return false; 2593 2594 return true; 2595} 2596 2597/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2598/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2599/// and MOVLHPS. 2600bool X86::isMOVHPMask(SDNode *N) { 2601 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2602 2603 unsigned NumElems = N->getNumOperands(); 2604 if (NumElems != 2 && NumElems != 4) 2605 return false; 2606 2607 for (unsigned i = 0; i < NumElems/2; ++i) 2608 if (!isUndefOrEqual(N->getOperand(i), i)) 2609 return false; 2610 2611 for (unsigned i = 0; i < NumElems/2; ++i) { 2612 SDOperand Arg = N->getOperand(i + NumElems/2); 2613 if (!isUndefOrEqual(Arg, i + NumElems)) 2614 return false; 2615 } 2616 2617 return true; 2618} 2619 2620/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2621/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2622bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2623 unsigned NumElems = N.size(); 2624 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2625 return false; 2626 2627 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2628 SDOperand BitI = N[i]; 2629 SDOperand BitI1 = N[i+1]; 2630 if (!isUndefOrEqual(BitI, j)) 2631 return false; 2632 if (V2IsSplat) { 2633 if (isUndefOrEqual(BitI1, NumElems)) 2634 return false; 2635 } else { 2636 if (!isUndefOrEqual(BitI1, j + NumElems)) 2637 return false; 2638 } 2639 } 2640 2641 return true; 2642} 2643 2644bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2645 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2646 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2647 return ::isUNPCKLMask(Ops, V2IsSplat); 2648} 2649 2650/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2651/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2652bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2653 unsigned NumElems = N.size(); 2654 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2655 return false; 2656 2657 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2658 SDOperand BitI = N[i]; 2659 SDOperand BitI1 = N[i+1]; 2660 if (!isUndefOrEqual(BitI, j + NumElems/2)) 2661 return false; 2662 if (V2IsSplat) { 2663 if (isUndefOrEqual(BitI1, NumElems)) 2664 return false; 2665 } else { 2666 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 2667 return false; 2668 } 2669 } 2670 2671 return true; 2672} 2673 2674bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2675 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2676 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2677 return ::isUNPCKHMask(Ops, V2IsSplat); 2678} 2679 2680/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2681/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2682/// <0, 0, 1, 1> 2683bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2684 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2685 2686 unsigned NumElems = N->getNumOperands(); 2687 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 2688 return false; 2689 2690 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2691 SDOperand BitI = N->getOperand(i); 2692 SDOperand BitI1 = N->getOperand(i+1); 2693 2694 if (!isUndefOrEqual(BitI, j)) 2695 return false; 2696 if (!isUndefOrEqual(BitI1, j)) 2697 return false; 2698 } 2699 2700 return true; 2701} 2702 2703/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2704/// specifies a shuffle of elements that is suitable for input to MOVSS, 2705/// MOVSD, and MOVD, i.e. setting the lowest element. 2706static bool isMOVLMask(std::vector<SDOperand> &N) { 2707 unsigned NumElems = N.size(); 2708 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2709 return false; 2710 2711 if (!isUndefOrEqual(N[0], NumElems)) 2712 return false; 2713 2714 for (unsigned i = 1; i < NumElems; ++i) { 2715 SDOperand Arg = N[i]; 2716 if (!isUndefOrEqual(Arg, i)) 2717 return false; 2718 } 2719 2720 return true; 2721} 2722 2723bool X86::isMOVLMask(SDNode *N) { 2724 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2725 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2726 return ::isMOVLMask(Ops); 2727} 2728 2729/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2730/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2731/// element of vector 2 and the other elements to come from vector 1 in order. 2732static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false, 2733 bool V2IsUndef = false) { 2734 unsigned NumElems = Ops.size(); 2735 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2736 return false; 2737 2738 if (!isUndefOrEqual(Ops[0], 0)) 2739 return false; 2740 2741 for (unsigned i = 1; i < NumElems; ++i) { 2742 SDOperand Arg = Ops[i]; 2743 if (!(isUndefOrEqual(Arg, i+NumElems) || 2744 (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) || 2745 (V2IsSplat && isUndefOrEqual(Arg, NumElems)))) 2746 return false; 2747 } 2748 2749 return true; 2750} 2751 2752static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2753 bool V2IsUndef = false) { 2754 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2755 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2756 return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef); 2757} 2758 2759/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2760/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2761bool X86::isMOVSHDUPMask(SDNode *N) { 2762 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2763 2764 if (N->getNumOperands() != 4) 2765 return false; 2766 2767 // Expect 1, 1, 3, 3 2768 for (unsigned i = 0; i < 2; ++i) { 2769 SDOperand Arg = N->getOperand(i); 2770 if (Arg.getOpcode() == ISD::UNDEF) continue; 2771 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2772 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2773 if (Val != 1) return false; 2774 } 2775 2776 bool HasHi = false; 2777 for (unsigned i = 2; i < 4; ++i) { 2778 SDOperand Arg = N->getOperand(i); 2779 if (Arg.getOpcode() == ISD::UNDEF) continue; 2780 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2781 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2782 if (Val != 3) return false; 2783 HasHi = true; 2784 } 2785 2786 // Don't use movshdup if it can be done with a shufps. 2787 return HasHi; 2788} 2789 2790/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2791/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2792bool X86::isMOVSLDUPMask(SDNode *N) { 2793 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2794 2795 if (N->getNumOperands() != 4) 2796 return false; 2797 2798 // Expect 0, 0, 2, 2 2799 for (unsigned i = 0; i < 2; ++i) { 2800 SDOperand Arg = N->getOperand(i); 2801 if (Arg.getOpcode() == ISD::UNDEF) continue; 2802 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2803 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2804 if (Val != 0) return false; 2805 } 2806 2807 bool HasHi = false; 2808 for (unsigned i = 2; i < 4; ++i) { 2809 SDOperand Arg = N->getOperand(i); 2810 if (Arg.getOpcode() == ISD::UNDEF) continue; 2811 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2812 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2813 if (Val != 2) return false; 2814 HasHi = true; 2815 } 2816 2817 // Don't use movshdup if it can be done with a shufps. 2818 return HasHi; 2819} 2820 2821/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2822/// a splat of a single element. 2823static bool isSplatMask(SDNode *N) { 2824 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2825 2826 // This is a splat operation if each element of the permute is the same, and 2827 // if the value doesn't reference the second vector. 2828 unsigned NumElems = N->getNumOperands(); 2829 SDOperand ElementBase; 2830 unsigned i = 0; 2831 for (; i != NumElems; ++i) { 2832 SDOperand Elt = N->getOperand(i); 2833 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 2834 ElementBase = Elt; 2835 break; 2836 } 2837 } 2838 2839 if (!ElementBase.Val) 2840 return false; 2841 2842 for (; i != NumElems; ++i) { 2843 SDOperand Arg = N->getOperand(i); 2844 if (Arg.getOpcode() == ISD::UNDEF) continue; 2845 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2846 if (Arg != ElementBase) return false; 2847 } 2848 2849 // Make sure it is a splat of the first vector operand. 2850 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2851} 2852 2853/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2854/// a splat of a single element and it's a 2 or 4 element mask. 2855bool X86::isSplatMask(SDNode *N) { 2856 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2857 2858 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2859 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2860 return false; 2861 return ::isSplatMask(N); 2862} 2863 2864/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2865/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2866/// instructions. 2867unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2868 unsigned NumOperands = N->getNumOperands(); 2869 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2870 unsigned Mask = 0; 2871 for (unsigned i = 0; i < NumOperands; ++i) { 2872 unsigned Val = 0; 2873 SDOperand Arg = N->getOperand(NumOperands-i-1); 2874 if (Arg.getOpcode() != ISD::UNDEF) 2875 Val = cast<ConstantSDNode>(Arg)->getValue(); 2876 if (Val >= NumOperands) Val -= NumOperands; 2877 Mask |= Val; 2878 if (i != NumOperands - 1) 2879 Mask <<= Shift; 2880 } 2881 2882 return Mask; 2883} 2884 2885/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2886/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2887/// instructions. 2888unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2889 unsigned Mask = 0; 2890 // 8 nodes, but we only care about the last 4. 2891 for (unsigned i = 7; i >= 4; --i) { 2892 unsigned Val = 0; 2893 SDOperand Arg = N->getOperand(i); 2894 if (Arg.getOpcode() != ISD::UNDEF) 2895 Val = cast<ConstantSDNode>(Arg)->getValue(); 2896 Mask |= (Val - 4); 2897 if (i != 4) 2898 Mask <<= 2; 2899 } 2900 2901 return Mask; 2902} 2903 2904/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2905/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2906/// instructions. 2907unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2908 unsigned Mask = 0; 2909 // 8 nodes, but we only care about the first 4. 2910 for (int i = 3; i >= 0; --i) { 2911 unsigned Val = 0; 2912 SDOperand Arg = N->getOperand(i); 2913 if (Arg.getOpcode() != ISD::UNDEF) 2914 Val = cast<ConstantSDNode>(Arg)->getValue(); 2915 Mask |= Val; 2916 if (i != 0) 2917 Mask <<= 2; 2918 } 2919 2920 return Mask; 2921} 2922 2923/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2924/// specifies a 8 element shuffle that can be broken into a pair of 2925/// PSHUFHW and PSHUFLW. 2926static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2927 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2928 2929 if (N->getNumOperands() != 8) 2930 return false; 2931 2932 // Lower quadword shuffled. 2933 for (unsigned i = 0; i != 4; ++i) { 2934 SDOperand Arg = N->getOperand(i); 2935 if (Arg.getOpcode() == ISD::UNDEF) continue; 2936 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2937 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2938 if (Val > 4) 2939 return false; 2940 } 2941 2942 // Upper quadword shuffled. 2943 for (unsigned i = 4; i != 8; ++i) { 2944 SDOperand Arg = N->getOperand(i); 2945 if (Arg.getOpcode() == ISD::UNDEF) continue; 2946 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2947 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2948 if (Val < 4 || Val > 7) 2949 return false; 2950 } 2951 2952 return true; 2953} 2954 2955/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2956/// values in ther permute mask. 2957static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2958 SDOperand &V2, SDOperand &Mask, 2959 SelectionDAG &DAG) { 2960 MVT::ValueType VT = Op.getValueType(); 2961 MVT::ValueType MaskVT = Mask.getValueType(); 2962 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2963 unsigned NumElems = Mask.getNumOperands(); 2964 std::vector<SDOperand> MaskVec; 2965 2966 for (unsigned i = 0; i != NumElems; ++i) { 2967 SDOperand Arg = Mask.getOperand(i); 2968 if (Arg.getOpcode() == ISD::UNDEF) { 2969 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2970 continue; 2971 } 2972 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2973 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2974 if (Val < NumElems) 2975 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2976 else 2977 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2978 } 2979 2980 std::swap(V1, V2); 2981 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2982 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2983} 2984 2985/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2986/// match movhlps. The lower half elements should come from upper half of 2987/// V1 (and in order), and the upper half elements should come from the upper 2988/// half of V2 (and in order). 2989static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2990 unsigned NumElems = Mask->getNumOperands(); 2991 if (NumElems != 4) 2992 return false; 2993 for (unsigned i = 0, e = 2; i != e; ++i) 2994 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2995 return false; 2996 for (unsigned i = 2; i != 4; ++i) 2997 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2998 return false; 2999 return true; 3000} 3001 3002/// isScalarLoadToVector - Returns true if the node is a scalar load that 3003/// is promoted to a vector. 3004static inline bool isScalarLoadToVector(SDNode *N) { 3005 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 3006 N = N->getOperand(0).Val; 3007 return ISD::isNON_EXTLoad(N); 3008 } 3009 return false; 3010} 3011 3012/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 3013/// match movlp{s|d}. The lower half elements should come from lower half of 3014/// V1 (and in order), and the upper half elements should come from the upper 3015/// half of V2 (and in order). And since V1 will become the source of the 3016/// MOVLP, it must be either a vector load or a scalar load to vector. 3017static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 3018 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 3019 return false; 3020 // Is V2 is a vector load, don't do this transformation. We will try to use 3021 // load folding shufps op. 3022 if (ISD::isNON_EXTLoad(V2)) 3023 return false; 3024 3025 unsigned NumElems = Mask->getNumOperands(); 3026 if (NumElems != 2 && NumElems != 4) 3027 return false; 3028 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 3029 if (!isUndefOrEqual(Mask->getOperand(i), i)) 3030 return false; 3031 for (unsigned i = NumElems/2; i != NumElems; ++i) 3032 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 3033 return false; 3034 return true; 3035} 3036 3037/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 3038/// all the same. 3039static bool isSplatVector(SDNode *N) { 3040 if (N->getOpcode() != ISD::BUILD_VECTOR) 3041 return false; 3042 3043 SDOperand SplatValue = N->getOperand(0); 3044 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 3045 if (N->getOperand(i) != SplatValue) 3046 return false; 3047 return true; 3048} 3049 3050/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 3051/// to an undef. 3052static bool isUndefShuffle(SDNode *N) { 3053 if (N->getOpcode() != ISD::BUILD_VECTOR) 3054 return false; 3055 3056 SDOperand V1 = N->getOperand(0); 3057 SDOperand V2 = N->getOperand(1); 3058 SDOperand Mask = N->getOperand(2); 3059 unsigned NumElems = Mask.getNumOperands(); 3060 for (unsigned i = 0; i != NumElems; ++i) { 3061 SDOperand Arg = Mask.getOperand(i); 3062 if (Arg.getOpcode() != ISD::UNDEF) { 3063 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3064 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 3065 return false; 3066 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 3067 return false; 3068 } 3069 } 3070 return true; 3071} 3072 3073/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 3074/// that point to V2 points to its first element. 3075static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 3076 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 3077 3078 bool Changed = false; 3079 std::vector<SDOperand> MaskVec; 3080 unsigned NumElems = Mask.getNumOperands(); 3081 for (unsigned i = 0; i != NumElems; ++i) { 3082 SDOperand Arg = Mask.getOperand(i); 3083 if (Arg.getOpcode() != ISD::UNDEF) { 3084 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3085 if (Val > NumElems) { 3086 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 3087 Changed = true; 3088 } 3089 } 3090 MaskVec.push_back(Arg); 3091 } 3092 3093 if (Changed) 3094 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 3095 &MaskVec[0], MaskVec.size()); 3096 return Mask; 3097} 3098 3099/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 3100/// operation of specified width. 3101static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 3102 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3103 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3104 3105 std::vector<SDOperand> MaskVec; 3106 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 3107 for (unsigned i = 1; i != NumElems; ++i) 3108 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3109 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3110} 3111 3112/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 3113/// of specified width. 3114static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 3115 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3116 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3117 std::vector<SDOperand> MaskVec; 3118 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 3119 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3120 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 3121 } 3122 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3123} 3124 3125/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 3126/// of specified width. 3127static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 3128 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3129 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3130 unsigned Half = NumElems/2; 3131 std::vector<SDOperand> MaskVec; 3132 for (unsigned i = 0; i != Half; ++i) { 3133 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 3134 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 3135 } 3136 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3137} 3138 3139/// getZeroVector - Returns a vector of specified type with all zero elements. 3140/// 3141static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 3142 assert(MVT::isVector(VT) && "Expected a vector type"); 3143 unsigned NumElems = getVectorNumElements(VT); 3144 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3145 bool isFP = MVT::isFloatingPoint(EVT); 3146 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 3147 std::vector<SDOperand> ZeroVec(NumElems, Zero); 3148 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 3149} 3150 3151/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 3152/// 3153static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 3154 SDOperand V1 = Op.getOperand(0); 3155 SDOperand Mask = Op.getOperand(2); 3156 MVT::ValueType VT = Op.getValueType(); 3157 unsigned NumElems = Mask.getNumOperands(); 3158 Mask = getUnpacklMask(NumElems, DAG); 3159 while (NumElems != 4) { 3160 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 3161 NumElems >>= 1; 3162 } 3163 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 3164 3165 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3166 Mask = getZeroVector(MaskVT, DAG); 3167 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 3168 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 3169 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3170} 3171 3172/// isZeroNode - Returns true if Elt is a constant zero or a floating point 3173/// constant +0.0. 3174static inline bool isZeroNode(SDOperand Elt) { 3175 return ((isa<ConstantSDNode>(Elt) && 3176 cast<ConstantSDNode>(Elt)->getValue() == 0) || 3177 (isa<ConstantFPSDNode>(Elt) && 3178 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 3179} 3180 3181/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3182/// vector and zero or undef vector. 3183static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 3184 unsigned NumElems, unsigned Idx, 3185 bool isZero, SelectionDAG &DAG) { 3186 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 3187 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3188 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3189 SDOperand Zero = DAG.getConstant(0, EVT); 3190 std::vector<SDOperand> MaskVec(NumElems, Zero); 3191 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 3192 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3193 &MaskVec[0], MaskVec.size()); 3194 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3195} 3196 3197/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3198/// 3199static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 3200 unsigned NumNonZero, unsigned NumZero, 3201 SelectionDAG &DAG, TargetLowering &TLI) { 3202 if (NumNonZero > 8) 3203 return SDOperand(); 3204 3205 SDOperand V(0, 0); 3206 bool First = true; 3207 for (unsigned i = 0; i < 16; ++i) { 3208 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3209 if (ThisIsNonZero && First) { 3210 if (NumZero) 3211 V = getZeroVector(MVT::v8i16, DAG); 3212 else 3213 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3214 First = false; 3215 } 3216 3217 if ((i & 1) != 0) { 3218 SDOperand ThisElt(0, 0), LastElt(0, 0); 3219 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3220 if (LastIsNonZero) { 3221 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3222 } 3223 if (ThisIsNonZero) { 3224 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3225 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3226 ThisElt, DAG.getConstant(8, MVT::i8)); 3227 if (LastIsNonZero) 3228 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3229 } else 3230 ThisElt = LastElt; 3231 3232 if (ThisElt.Val) 3233 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3234 DAG.getConstant(i/2, TLI.getPointerTy())); 3235 } 3236 } 3237 3238 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3239} 3240 3241/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 3242/// 3243static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3244 unsigned NumNonZero, unsigned NumZero, 3245 SelectionDAG &DAG, TargetLowering &TLI) { 3246 if (NumNonZero > 4) 3247 return SDOperand(); 3248 3249 SDOperand V(0, 0); 3250 bool First = true; 3251 for (unsigned i = 0; i < 8; ++i) { 3252 bool isNonZero = (NonZeros & (1 << i)) != 0; 3253 if (isNonZero) { 3254 if (First) { 3255 if (NumZero) 3256 V = getZeroVector(MVT::v8i16, DAG); 3257 else 3258 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3259 First = false; 3260 } 3261 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3262 DAG.getConstant(i, TLI.getPointerTy())); 3263 } 3264 } 3265 3266 return V; 3267} 3268 3269SDOperand 3270X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3271 // All zero's are handled with pxor. 3272 if (ISD::isBuildVectorAllZeros(Op.Val)) 3273 return Op; 3274 3275 // All one's are handled with pcmpeqd. 3276 if (ISD::isBuildVectorAllOnes(Op.Val)) 3277 return Op; 3278 3279 MVT::ValueType VT = Op.getValueType(); 3280 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3281 unsigned EVTBits = MVT::getSizeInBits(EVT); 3282 3283 unsigned NumElems = Op.getNumOperands(); 3284 unsigned NumZero = 0; 3285 unsigned NumNonZero = 0; 3286 unsigned NonZeros = 0; 3287 std::set<SDOperand> Values; 3288 for (unsigned i = 0; i < NumElems; ++i) { 3289 SDOperand Elt = Op.getOperand(i); 3290 if (Elt.getOpcode() != ISD::UNDEF) { 3291 Values.insert(Elt); 3292 if (isZeroNode(Elt)) 3293 NumZero++; 3294 else { 3295 NonZeros |= (1 << i); 3296 NumNonZero++; 3297 } 3298 } 3299 } 3300 3301 if (NumNonZero == 0) 3302 // Must be a mix of zero and undef. Return a zero vector. 3303 return getZeroVector(VT, DAG); 3304 3305 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3306 if (Values.size() == 1) 3307 return SDOperand(); 3308 3309 // Special case for single non-zero element. 3310 if (NumNonZero == 1) { 3311 unsigned Idx = CountTrailingZeros_32(NonZeros); 3312 SDOperand Item = Op.getOperand(Idx); 3313 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3314 if (Idx == 0) 3315 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3316 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3317 NumZero > 0, DAG); 3318 3319 if (EVTBits == 32) { 3320 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3321 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3322 DAG); 3323 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3324 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3325 std::vector<SDOperand> MaskVec; 3326 for (unsigned i = 0; i < NumElems; i++) 3327 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3328 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3329 &MaskVec[0], MaskVec.size()); 3330 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3331 DAG.getNode(ISD::UNDEF, VT), Mask); 3332 } 3333 } 3334 3335 // Let legalizer expand 2-wide build_vector's. 3336 if (EVTBits == 64) 3337 return SDOperand(); 3338 3339 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3340 if (EVTBits == 8) { 3341 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3342 *this); 3343 if (V.Val) return V; 3344 } 3345 3346 if (EVTBits == 16) { 3347 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3348 *this); 3349 if (V.Val) return V; 3350 } 3351 3352 // If element VT is == 32 bits, turn it into a number of shuffles. 3353 std::vector<SDOperand> V(NumElems); 3354 if (NumElems == 4 && NumZero > 0) { 3355 for (unsigned i = 0; i < 4; ++i) { 3356 bool isZero = !(NonZeros & (1 << i)); 3357 if (isZero) 3358 V[i] = getZeroVector(VT, DAG); 3359 else 3360 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3361 } 3362 3363 for (unsigned i = 0; i < 2; ++i) { 3364 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3365 default: break; 3366 case 0: 3367 V[i] = V[i*2]; // Must be a zero vector. 3368 break; 3369 case 1: 3370 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3371 getMOVLMask(NumElems, DAG)); 3372 break; 3373 case 2: 3374 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3375 getMOVLMask(NumElems, DAG)); 3376 break; 3377 case 3: 3378 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3379 getUnpacklMask(NumElems, DAG)); 3380 break; 3381 } 3382 } 3383 3384 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3385 // clears the upper bits. 3386 // FIXME: we can do the same for v4f32 case when we know both parts of 3387 // the lower half come from scalar_to_vector (loadf32). We should do 3388 // that in post legalizer dag combiner with target specific hooks. 3389 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3390 return V[0]; 3391 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3392 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3393 std::vector<SDOperand> MaskVec; 3394 bool Reverse = (NonZeros & 0x3) == 2; 3395 for (unsigned i = 0; i < 2; ++i) 3396 if (Reverse) 3397 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3398 else 3399 MaskVec.push_back(DAG.getConstant(i, EVT)); 3400 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3401 for (unsigned i = 0; i < 2; ++i) 3402 if (Reverse) 3403 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3404 else 3405 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3406 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3407 &MaskVec[0], MaskVec.size()); 3408 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3409 } 3410 3411 if (Values.size() > 2) { 3412 // Expand into a number of unpckl*. 3413 // e.g. for v4f32 3414 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3415 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3416 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3417 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3418 for (unsigned i = 0; i < NumElems; ++i) 3419 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3420 NumElems >>= 1; 3421 while (NumElems != 0) { 3422 for (unsigned i = 0; i < NumElems; ++i) 3423 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3424 UnpckMask); 3425 NumElems >>= 1; 3426 } 3427 return V[0]; 3428 } 3429 3430 return SDOperand(); 3431} 3432 3433SDOperand 3434X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3435 SDOperand V1 = Op.getOperand(0); 3436 SDOperand V2 = Op.getOperand(1); 3437 SDOperand PermMask = Op.getOperand(2); 3438 MVT::ValueType VT = Op.getValueType(); 3439 unsigned NumElems = PermMask.getNumOperands(); 3440 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3441 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3442 bool V1IsSplat = false; 3443 bool V2IsSplat = false; 3444 3445 if (isUndefShuffle(Op.Val)) 3446 return DAG.getNode(ISD::UNDEF, VT); 3447 3448 if (isSplatMask(PermMask.Val)) { 3449 if (NumElems <= 4) return Op; 3450 // Promote it to a v4i32 splat. 3451 return PromoteSplat(Op, DAG); 3452 } 3453 3454 if (X86::isMOVLMask(PermMask.Val)) 3455 return (V1IsUndef) ? V2 : Op; 3456 3457 if (X86::isMOVSHDUPMask(PermMask.Val) || 3458 X86::isMOVSLDUPMask(PermMask.Val) || 3459 X86::isMOVHLPSMask(PermMask.Val) || 3460 X86::isMOVHPMask(PermMask.Val) || 3461 X86::isMOVLPMask(PermMask.Val)) 3462 return Op; 3463 3464 if (ShouldXformToMOVHLPS(PermMask.Val) || 3465 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3466 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3467 3468 bool Commuted = false; 3469 V1IsSplat = isSplatVector(V1.Val); 3470 V2IsSplat = isSplatVector(V2.Val); 3471 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3472 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3473 std::swap(V1IsSplat, V2IsSplat); 3474 std::swap(V1IsUndef, V2IsUndef); 3475 Commuted = true; 3476 } 3477 3478 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3479 if (V2IsUndef) return V1; 3480 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3481 if (V2IsSplat) { 3482 // V2 is a splat, so the mask may be malformed. That is, it may point 3483 // to any V2 element. The instruction selectior won't like this. Get 3484 // a corrected mask and commute to form a proper MOVS{S|D}. 3485 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3486 if (NewMask.Val != PermMask.Val) 3487 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3488 } 3489 return Op; 3490 } 3491 3492 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3493 X86::isUNPCKLMask(PermMask.Val) || 3494 X86::isUNPCKHMask(PermMask.Val)) 3495 return Op; 3496 3497 if (V2IsSplat) { 3498 // Normalize mask so all entries that point to V2 points to its first 3499 // element then try to match unpck{h|l} again. If match, return a 3500 // new vector_shuffle with the corrected mask. 3501 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3502 if (NewMask.Val != PermMask.Val) { 3503 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3504 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3505 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3506 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3507 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3508 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3509 } 3510 } 3511 } 3512 3513 // Normalize the node to match x86 shuffle ops if needed 3514 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3515 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3516 3517 if (Commuted) { 3518 // Commute is back and try unpck* again. 3519 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3520 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3521 X86::isUNPCKLMask(PermMask.Val) || 3522 X86::isUNPCKHMask(PermMask.Val)) 3523 return Op; 3524 } 3525 3526 // If VT is integer, try PSHUF* first, then SHUFP*. 3527 if (MVT::isInteger(VT)) { 3528 if (X86::isPSHUFDMask(PermMask.Val) || 3529 X86::isPSHUFHWMask(PermMask.Val) || 3530 X86::isPSHUFLWMask(PermMask.Val)) { 3531 if (V2.getOpcode() != ISD::UNDEF) 3532 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3533 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3534 return Op; 3535 } 3536 3537 if (X86::isSHUFPMask(PermMask.Val)) 3538 return Op; 3539 3540 // Handle v8i16 shuffle high / low shuffle node pair. 3541 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3542 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3543 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3544 std::vector<SDOperand> MaskVec; 3545 for (unsigned i = 0; i != 4; ++i) 3546 MaskVec.push_back(PermMask.getOperand(i)); 3547 for (unsigned i = 4; i != 8; ++i) 3548 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3549 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3550 &MaskVec[0], MaskVec.size()); 3551 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3552 MaskVec.clear(); 3553 for (unsigned i = 0; i != 4; ++i) 3554 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3555 for (unsigned i = 4; i != 8; ++i) 3556 MaskVec.push_back(PermMask.getOperand(i)); 3557 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3558 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3559 } 3560 } else { 3561 // Floating point cases in the other order. 3562 if (X86::isSHUFPMask(PermMask.Val)) 3563 return Op; 3564 if (X86::isPSHUFDMask(PermMask.Val) || 3565 X86::isPSHUFHWMask(PermMask.Val) || 3566 X86::isPSHUFLWMask(PermMask.Val)) { 3567 if (V2.getOpcode() != ISD::UNDEF) 3568 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3569 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3570 return Op; 3571 } 3572 } 3573 3574 if (NumElems == 4) { 3575 MVT::ValueType MaskVT = PermMask.getValueType(); 3576 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3577 std::vector<std::pair<int, int> > Locs; 3578 Locs.reserve(NumElems); 3579 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3580 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3581 unsigned NumHi = 0; 3582 unsigned NumLo = 0; 3583 // If no more than two elements come from either vector. This can be 3584 // implemented with two shuffles. First shuffle gather the elements. 3585 // The second shuffle, which takes the first shuffle as both of its 3586 // vector operands, put the elements into the right order. 3587 for (unsigned i = 0; i != NumElems; ++i) { 3588 SDOperand Elt = PermMask.getOperand(i); 3589 if (Elt.getOpcode() == ISD::UNDEF) { 3590 Locs[i] = std::make_pair(-1, -1); 3591 } else { 3592 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3593 if (Val < NumElems) { 3594 Locs[i] = std::make_pair(0, NumLo); 3595 Mask1[NumLo] = Elt; 3596 NumLo++; 3597 } else { 3598 Locs[i] = std::make_pair(1, NumHi); 3599 if (2+NumHi < NumElems) 3600 Mask1[2+NumHi] = Elt; 3601 NumHi++; 3602 } 3603 } 3604 } 3605 if (NumLo <= 2 && NumHi <= 2) { 3606 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3607 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3608 &Mask1[0], Mask1.size())); 3609 for (unsigned i = 0; i != NumElems; ++i) { 3610 if (Locs[i].first == -1) 3611 continue; 3612 else { 3613 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3614 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3615 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3616 } 3617 } 3618 3619 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3620 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3621 &Mask2[0], Mask2.size())); 3622 } 3623 3624 // Break it into (shuffle shuffle_hi, shuffle_lo). 3625 Locs.clear(); 3626 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3627 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3628 std::vector<SDOperand> *MaskPtr = &LoMask; 3629 unsigned MaskIdx = 0; 3630 unsigned LoIdx = 0; 3631 unsigned HiIdx = NumElems/2; 3632 for (unsigned i = 0; i != NumElems; ++i) { 3633 if (i == NumElems/2) { 3634 MaskPtr = &HiMask; 3635 MaskIdx = 1; 3636 LoIdx = 0; 3637 HiIdx = NumElems/2; 3638 } 3639 SDOperand Elt = PermMask.getOperand(i); 3640 if (Elt.getOpcode() == ISD::UNDEF) { 3641 Locs[i] = std::make_pair(-1, -1); 3642 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3643 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3644 (*MaskPtr)[LoIdx] = Elt; 3645 LoIdx++; 3646 } else { 3647 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3648 (*MaskPtr)[HiIdx] = Elt; 3649 HiIdx++; 3650 } 3651 } 3652 3653 SDOperand LoShuffle = 3654 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3655 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3656 &LoMask[0], LoMask.size())); 3657 SDOperand HiShuffle = 3658 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3659 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3660 &HiMask[0], HiMask.size())); 3661 std::vector<SDOperand> MaskOps; 3662 for (unsigned i = 0; i != NumElems; ++i) { 3663 if (Locs[i].first == -1) { 3664 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3665 } else { 3666 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3667 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3668 } 3669 } 3670 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3671 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3672 &MaskOps[0], MaskOps.size())); 3673 } 3674 3675 return SDOperand(); 3676} 3677 3678SDOperand 3679X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3680 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3681 return SDOperand(); 3682 3683 MVT::ValueType VT = Op.getValueType(); 3684 // TODO: handle v16i8. 3685 if (MVT::getSizeInBits(VT) == 16) { 3686 // Transform it so it match pextrw which produces a 32-bit result. 3687 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3688 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3689 Op.getOperand(0), Op.getOperand(1)); 3690 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3691 DAG.getValueType(VT)); 3692 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3693 } else if (MVT::getSizeInBits(VT) == 32) { 3694 SDOperand Vec = Op.getOperand(0); 3695 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3696 if (Idx == 0) 3697 return Op; 3698 // SHUFPS the element to the lowest double word, then movss. 3699 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3700 std::vector<SDOperand> IdxVec; 3701 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3702 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3703 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3704 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3705 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3706 &IdxVec[0], IdxVec.size()); 3707 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3708 Vec, Vec, Mask); 3709 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3710 DAG.getConstant(0, getPointerTy())); 3711 } else if (MVT::getSizeInBits(VT) == 64) { 3712 SDOperand Vec = Op.getOperand(0); 3713 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3714 if (Idx == 0) 3715 return Op; 3716 3717 // UNPCKHPD the element to the lowest double word, then movsd. 3718 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3719 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3720 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3721 std::vector<SDOperand> IdxVec; 3722 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3723 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3724 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3725 &IdxVec[0], IdxVec.size()); 3726 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3727 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3728 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3729 DAG.getConstant(0, getPointerTy())); 3730 } 3731 3732 return SDOperand(); 3733} 3734 3735SDOperand 3736X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3737 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3738 // as its second argument. 3739 MVT::ValueType VT = Op.getValueType(); 3740 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3741 SDOperand N0 = Op.getOperand(0); 3742 SDOperand N1 = Op.getOperand(1); 3743 SDOperand N2 = Op.getOperand(2); 3744 if (MVT::getSizeInBits(BaseVT) == 16) { 3745 if (N1.getValueType() != MVT::i32) 3746 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3747 if (N2.getValueType() != MVT::i32) 3748 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3749 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3750 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3751 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3752 if (Idx == 0) { 3753 // Use a movss. 3754 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3755 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3756 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3757 std::vector<SDOperand> MaskVec; 3758 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3759 for (unsigned i = 1; i <= 3; ++i) 3760 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3761 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3762 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3763 &MaskVec[0], MaskVec.size())); 3764 } else { 3765 // Use two pinsrw instructions to insert a 32 bit value. 3766 Idx <<= 1; 3767 if (MVT::isFloatingPoint(N1.getValueType())) { 3768 if (ISD::isNON_EXTLoad(N1.Val)) { 3769 // Just load directly from f32mem to GR32. 3770 LoadSDNode *LD = cast<LoadSDNode>(N1); 3771 N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), 3772 LD->getSrcValue(), LD->getSrcValueOffset()); 3773 } else { 3774 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3775 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3776 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3777 DAG.getConstant(0, getPointerTy())); 3778 } 3779 } 3780 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3781 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3782 DAG.getConstant(Idx, getPointerTy())); 3783 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3784 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3785 DAG.getConstant(Idx+1, getPointerTy())); 3786 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3787 } 3788 } 3789 3790 return SDOperand(); 3791} 3792 3793SDOperand 3794X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3795 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3796 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3797} 3798 3799// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3800// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3801// one of the above mentioned nodes. It has to be wrapped because otherwise 3802// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3803// be used to form addressing mode. These wrapped nodes will be selected 3804// into MOV32ri. 3805SDOperand 3806X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3807 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3808 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3809 DAG.getTargetConstantPool(CP->getConstVal(), 3810 getPointerTy(), 3811 CP->getAlignment())); 3812 if (Subtarget->isTargetDarwin()) { 3813 // With PIC, the address is actually $g + Offset. 3814 if (!Subtarget->is64Bit() && 3815 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3816 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3817 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3818 } 3819 3820 return Result; 3821} 3822 3823SDOperand 3824X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3825 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3826 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3827 DAG.getTargetGlobalAddress(GV, 3828 getPointerTy())); 3829 if (Subtarget->isTargetDarwin()) { 3830 // With PIC, the address is actually $g + Offset. 3831 if (!Subtarget->is64Bit() && 3832 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3833 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3834 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3835 Result); 3836 3837 // For Darwin, external and weak symbols are indirect, so we want to load 3838 // the value at address GV, not the value of GV itself. This means that 3839 // the GlobalAddress must be in the base or index register of the address, 3840 // not the GV offset field. 3841 if (getTargetMachine().getRelocationModel() != Reloc::Static && 3842 DarwinGVRequiresExtraLoad(GV)) 3843 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3844 } else if (Subtarget->isTargetCygwin() || Subtarget->isTargetWindows()) { 3845 // FIXME: What about PIC? 3846 if (WindowsGVRequiresExtraLoad(GV)) 3847 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3848 } 3849 3850 3851 return Result; 3852} 3853 3854SDOperand 3855X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3856 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3857 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3858 DAG.getTargetExternalSymbol(Sym, 3859 getPointerTy())); 3860 if (Subtarget->isTargetDarwin()) { 3861 // With PIC, the address is actually $g + Offset. 3862 if (!Subtarget->is64Bit() && 3863 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3864 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3865 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3866 Result); 3867 } 3868 3869 return Result; 3870} 3871 3872SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3873 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3874 "Not an i64 shift!"); 3875 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3876 SDOperand ShOpLo = Op.getOperand(0); 3877 SDOperand ShOpHi = Op.getOperand(1); 3878 SDOperand ShAmt = Op.getOperand(2); 3879 SDOperand Tmp1 = isSRA ? 3880 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3881 DAG.getConstant(0, MVT::i32); 3882 3883 SDOperand Tmp2, Tmp3; 3884 if (Op.getOpcode() == ISD::SHL_PARTS) { 3885 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3886 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3887 } else { 3888 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3889 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3890 } 3891 3892 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3893 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3894 DAG.getConstant(32, MVT::i8)); 3895 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3896 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3897 3898 SDOperand Hi, Lo; 3899 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3900 3901 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3902 SmallVector<SDOperand, 4> Ops; 3903 if (Op.getOpcode() == ISD::SHL_PARTS) { 3904 Ops.push_back(Tmp2); 3905 Ops.push_back(Tmp3); 3906 Ops.push_back(CC); 3907 Ops.push_back(InFlag); 3908 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3909 InFlag = Hi.getValue(1); 3910 3911 Ops.clear(); 3912 Ops.push_back(Tmp3); 3913 Ops.push_back(Tmp1); 3914 Ops.push_back(CC); 3915 Ops.push_back(InFlag); 3916 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3917 } else { 3918 Ops.push_back(Tmp2); 3919 Ops.push_back(Tmp3); 3920 Ops.push_back(CC); 3921 Ops.push_back(InFlag); 3922 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3923 InFlag = Lo.getValue(1); 3924 3925 Ops.clear(); 3926 Ops.push_back(Tmp3); 3927 Ops.push_back(Tmp1); 3928 Ops.push_back(CC); 3929 Ops.push_back(InFlag); 3930 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3931 } 3932 3933 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3934 Ops.clear(); 3935 Ops.push_back(Lo); 3936 Ops.push_back(Hi); 3937 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3938} 3939 3940SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3941 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3942 Op.getOperand(0).getValueType() >= MVT::i16 && 3943 "Unknown SINT_TO_FP to lower!"); 3944 3945 SDOperand Result; 3946 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3947 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3948 MachineFunction &MF = DAG.getMachineFunction(); 3949 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3950 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3951 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3952 StackSlot, NULL, 0); 3953 3954 // Build the FILD 3955 std::vector<MVT::ValueType> Tys; 3956 Tys.push_back(MVT::f64); 3957 Tys.push_back(MVT::Other); 3958 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3959 std::vector<SDOperand> Ops; 3960 Ops.push_back(Chain); 3961 Ops.push_back(StackSlot); 3962 Ops.push_back(DAG.getValueType(SrcVT)); 3963 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3964 Tys, &Ops[0], Ops.size()); 3965 3966 if (X86ScalarSSE) { 3967 Chain = Result.getValue(1); 3968 SDOperand InFlag = Result.getValue(2); 3969 3970 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3971 // shouldn't be necessary except that RFP cannot be live across 3972 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3973 MachineFunction &MF = DAG.getMachineFunction(); 3974 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3975 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3976 std::vector<MVT::ValueType> Tys; 3977 Tys.push_back(MVT::Other); 3978 std::vector<SDOperand> Ops; 3979 Ops.push_back(Chain); 3980 Ops.push_back(Result); 3981 Ops.push_back(StackSlot); 3982 Ops.push_back(DAG.getValueType(Op.getValueType())); 3983 Ops.push_back(InFlag); 3984 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3985 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3986 } 3987 3988 return Result; 3989} 3990 3991SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3992 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3993 "Unknown FP_TO_SINT to lower!"); 3994 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3995 // stack slot. 3996 MachineFunction &MF = DAG.getMachineFunction(); 3997 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3998 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3999 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4000 4001 unsigned Opc; 4002 switch (Op.getValueType()) { 4003 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4004 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4005 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4006 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4007 } 4008 4009 SDOperand Chain = DAG.getEntryNode(); 4010 SDOperand Value = Op.getOperand(0); 4011 if (X86ScalarSSE) { 4012 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4013 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 4014 std::vector<MVT::ValueType> Tys; 4015 Tys.push_back(MVT::f64); 4016 Tys.push_back(MVT::Other); 4017 std::vector<SDOperand> Ops; 4018 Ops.push_back(Chain); 4019 Ops.push_back(StackSlot); 4020 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 4021 Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 4022 Chain = Value.getValue(1); 4023 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4024 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4025 } 4026 4027 // Build the FP_TO_INT*_IN_MEM 4028 std::vector<SDOperand> Ops; 4029 Ops.push_back(Chain); 4030 Ops.push_back(Value); 4031 Ops.push_back(StackSlot); 4032 SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size()); 4033 4034 // Load the result. 4035 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4036} 4037 4038SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4039 MVT::ValueType VT = Op.getValueType(); 4040 const Type *OpNTy = MVT::getTypeForValueType(VT); 4041 std::vector<Constant*> CV; 4042 if (VT == MVT::f64) { 4043 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 4044 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4045 } else { 4046 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 4047 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4048 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4049 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4050 } 4051 Constant *CS = ConstantStruct::get(CV); 4052 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 4053 std::vector<MVT::ValueType> Tys; 4054 Tys.push_back(VT); 4055 Tys.push_back(MVT::Other); 4056 SmallVector<SDOperand, 3> Ops; 4057 Ops.push_back(DAG.getEntryNode()); 4058 Ops.push_back(CPIdx); 4059 Ops.push_back(DAG.getSrcValue(NULL)); 4060 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 4061 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4062} 4063 4064SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4065 MVT::ValueType VT = Op.getValueType(); 4066 const Type *OpNTy = MVT::getTypeForValueType(VT); 4067 std::vector<Constant*> CV; 4068 if (VT == MVT::f64) { 4069 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 4070 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4071 } else { 4072 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 4073 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4074 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4075 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4076 } 4077 Constant *CS = ConstantStruct::get(CV); 4078 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 4079 std::vector<MVT::ValueType> Tys; 4080 Tys.push_back(VT); 4081 Tys.push_back(MVT::Other); 4082 SmallVector<SDOperand, 3> Ops; 4083 Ops.push_back(DAG.getEntryNode()); 4084 Ops.push_back(CPIdx); 4085 Ops.push_back(DAG.getSrcValue(NULL)); 4086 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 4087 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4088} 4089 4090SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 4091 SDOperand Chain) { 4092 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4093 SDOperand Cond; 4094 SDOperand Op0 = Op.getOperand(0); 4095 SDOperand Op1 = Op.getOperand(1); 4096 SDOperand CC = Op.getOperand(2); 4097 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4098 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4099 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4100 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4101 unsigned X86CC; 4102 4103 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4104 Op0, Op1, DAG)) { 4105 SDOperand Ops1[] = { Chain, Op0, Op1 }; 4106 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 4107 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4108 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4109 } 4110 4111 assert(isFP && "Illegal integer SetCC!"); 4112 4113 SDOperand COps[] = { Chain, Op0, Op1 }; 4114 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 4115 4116 switch (SetCCOpcode) { 4117 default: assert(false && "Illegal floating point SetCC!"); 4118 case ISD::SETOEQ: { // !PF & ZF 4119 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 4120 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 4121 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 4122 Tmp1.getValue(1) }; 4123 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4124 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4125 } 4126 case ISD::SETUNE: { // PF | !ZF 4127 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 4128 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 4129 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 4130 Tmp1.getValue(1) }; 4131 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4132 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4133 } 4134 } 4135} 4136 4137SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4138 bool addTest = true; 4139 SDOperand Chain = DAG.getEntryNode(); 4140 SDOperand Cond = Op.getOperand(0); 4141 SDOperand CC; 4142 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4143 4144 if (Cond.getOpcode() == ISD::SETCC) 4145 Cond = LowerSETCC(Cond, DAG, Chain); 4146 4147 if (Cond.getOpcode() == X86ISD::SETCC) { 4148 CC = Cond.getOperand(0); 4149 4150 // If condition flag is set by a X86ISD::CMP, then make a copy of it 4151 // (since flag operand cannot be shared). Use it as the condition setting 4152 // operand in place of the X86ISD::SETCC. 4153 // If the X86ISD::SETCC has more than one use, then perhaps it's better 4154 // to use a test instead of duplicating the X86ISD::CMP (for register 4155 // pressure reason)? 4156 SDOperand Cmp = Cond.getOperand(1); 4157 unsigned Opc = Cmp.getOpcode(); 4158 bool IllegalFPCMov = !X86ScalarSSE && 4159 MVT::isFloatingPoint(Op.getValueType()) && 4160 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4161 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 4162 !IllegalFPCMov) { 4163 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 4164 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 4165 addTest = false; 4166 } 4167 } 4168 4169 if (addTest) { 4170 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4171 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 4172 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 4173 } 4174 4175 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 4176 SmallVector<SDOperand, 4> Ops; 4177 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4178 // condition is true. 4179 Ops.push_back(Op.getOperand(2)); 4180 Ops.push_back(Op.getOperand(1)); 4181 Ops.push_back(CC); 4182 Ops.push_back(Cond.getValue(1)); 4183 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4184} 4185 4186SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4187 bool addTest = true; 4188 SDOperand Chain = Op.getOperand(0); 4189 SDOperand Cond = Op.getOperand(1); 4190 SDOperand Dest = Op.getOperand(2); 4191 SDOperand CC; 4192 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4193 4194 if (Cond.getOpcode() == ISD::SETCC) 4195 Cond = LowerSETCC(Cond, DAG, Chain); 4196 4197 if (Cond.getOpcode() == X86ISD::SETCC) { 4198 CC = Cond.getOperand(0); 4199 4200 // If condition flag is set by a X86ISD::CMP, then make a copy of it 4201 // (since flag operand cannot be shared). Use it as the condition setting 4202 // operand in place of the X86ISD::SETCC. 4203 // If the X86ISD::SETCC has more than one use, then perhaps it's better 4204 // to use a test instead of duplicating the X86ISD::CMP (for register 4205 // pressure reason)? 4206 SDOperand Cmp = Cond.getOperand(1); 4207 unsigned Opc = Cmp.getOpcode(); 4208 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 4209 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 4210 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 4211 addTest = false; 4212 } 4213 } 4214 4215 if (addTest) { 4216 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4217 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 4218 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 4219 } 4220 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4221 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 4222} 4223 4224SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 4225 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4226 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 4227 DAG.getTargetJumpTable(JT->getIndex(), 4228 getPointerTy())); 4229 if (Subtarget->isTargetDarwin()) { 4230 // With PIC, the address is actually $g + Offset. 4231 if (!Subtarget->is64Bit() && 4232 getTargetMachine().getRelocationModel() == Reloc::PIC_) 4233 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4234 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4235 Result); 4236 } 4237 4238 return Result; 4239} 4240 4241SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4242 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4243 4244 if (Subtarget->is64Bit()) 4245 return LowerX86_64CCCCallTo(Op, DAG); 4246 else 4247 switch (CallingConv) { 4248 default: 4249 assert(0 && "Unsupported calling convention"); 4250 case CallingConv::Fast: 4251 if (EnableFastCC) { 4252 return LowerFastCCCallTo(Op, DAG, false); 4253 } 4254 // Falls through 4255 case CallingConv::C: 4256 case CallingConv::CSRet: 4257 return LowerCCCCallTo(Op, DAG); 4258 case CallingConv::X86_StdCall: 4259 return LowerStdCallCCCallTo(Op, DAG); 4260 case CallingConv::X86_FastCall: 4261 return LowerFastCCCallTo(Op, DAG, true); 4262 } 4263} 4264 4265SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 4266 SDOperand Copy; 4267 4268 switch(Op.getNumOperands()) { 4269 default: 4270 assert(0 && "Do not know how to return this many arguments!"); 4271 abort(); 4272 case 1: // ret void. 4273 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 4274 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 4275 case 3: { 4276 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 4277 4278 if (MVT::isVector(ArgVT) || 4279 (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) { 4280 // Integer or FP vector result -> XMM0. 4281 if (DAG.getMachineFunction().liveout_empty()) 4282 DAG.getMachineFunction().addLiveOut(X86::XMM0); 4283 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 4284 SDOperand()); 4285 } else if (MVT::isInteger(ArgVT)) { 4286 // Integer result -> EAX / RAX. 4287 // The C calling convention guarantees the return value has been 4288 // promoted to at least MVT::i32. The X86-64 ABI doesn't require the 4289 // value to be promoted MVT::i64. So we don't have to extend it to 4290 // 64-bit. Return the value in EAX, but mark RAX as liveout. 4291 unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 4292 if (DAG.getMachineFunction().liveout_empty()) 4293 DAG.getMachineFunction().addLiveOut(Reg); 4294 4295 Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; 4296 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1), 4297 SDOperand()); 4298 } else if (!X86ScalarSSE) { 4299 // FP return with fp-stack value. 4300 if (DAG.getMachineFunction().liveout_empty()) 4301 DAG.getMachineFunction().addLiveOut(X86::ST0); 4302 4303 std::vector<MVT::ValueType> Tys; 4304 Tys.push_back(MVT::Other); 4305 Tys.push_back(MVT::Flag); 4306 std::vector<SDOperand> Ops; 4307 Ops.push_back(Op.getOperand(0)); 4308 Ops.push_back(Op.getOperand(1)); 4309 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 4310 } else { 4311 // FP return with ScalarSSE (return on fp-stack). 4312 if (DAG.getMachineFunction().liveout_empty()) 4313 DAG.getMachineFunction().addLiveOut(X86::ST0); 4314 4315 SDOperand MemLoc; 4316 SDOperand Chain = Op.getOperand(0); 4317 SDOperand Value = Op.getOperand(1); 4318 4319 if (ISD::isNON_EXTLoad(Value.Val) && 4320 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 4321 Chain = Value.getOperand(0); 4322 MemLoc = Value.getOperand(1); 4323 } else { 4324 // Spill the value to memory and reload it into top of stack. 4325 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 4326 MachineFunction &MF = DAG.getMachineFunction(); 4327 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4328 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 4329 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 4330 } 4331 std::vector<MVT::ValueType> Tys; 4332 Tys.push_back(MVT::f64); 4333 Tys.push_back(MVT::Other); 4334 std::vector<SDOperand> Ops; 4335 Ops.push_back(Chain); 4336 Ops.push_back(MemLoc); 4337 Ops.push_back(DAG.getValueType(ArgVT)); 4338 Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 4339 Tys.clear(); 4340 Tys.push_back(MVT::Other); 4341 Tys.push_back(MVT::Flag); 4342 Ops.clear(); 4343 Ops.push_back(Copy.getValue(1)); 4344 Ops.push_back(Copy); 4345 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 4346 } 4347 break; 4348 } 4349 case 5: { 4350 unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 4351 unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX; 4352 if (DAG.getMachineFunction().liveout_empty()) { 4353 DAG.getMachineFunction().addLiveOut(Reg1); 4354 DAG.getMachineFunction().addLiveOut(Reg2); 4355 } 4356 4357 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3), 4358 SDOperand()); 4359 Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1)); 4360 break; 4361 } 4362 } 4363 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 4364 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 4365 Copy.getValue(1)); 4366} 4367 4368SDOperand 4369X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4370 MachineFunction &MF = DAG.getMachineFunction(); 4371 const Function* Fn = MF.getFunction(); 4372 if (Fn->hasExternalLinkage() && 4373 Subtarget->isTargetCygwin() && 4374 Fn->getName() == "main") 4375 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 4376 4377 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4378 if (Subtarget->is64Bit()) 4379 return LowerX86_64CCCArguments(Op, DAG); 4380 else 4381 switch(CC) { 4382 default: 4383 assert(0 && "Unsupported calling convention"); 4384 case CallingConv::Fast: 4385 if (EnableFastCC) { 4386 return LowerFastCCArguments(Op, DAG); 4387 } 4388 // Falls through 4389 case CallingConv::C: 4390 case CallingConv::CSRet: 4391 return LowerCCCArguments(Op, DAG); 4392 case CallingConv::X86_StdCall: 4393 MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall); 4394 return LowerStdCallCCArguments(Op, DAG); 4395 case CallingConv::X86_FastCall: 4396 MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall); 4397 return LowerFastCallCCArguments(Op, DAG); 4398 } 4399} 4400 4401SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4402 SDOperand InFlag(0, 0); 4403 SDOperand Chain = Op.getOperand(0); 4404 unsigned Align = 4405 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4406 if (Align == 0) Align = 1; 4407 4408 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4409 // If not DWORD aligned, call memset if size is less than the threshold. 4410 // It knows how to align to the right boundary first. 4411 if ((Align & 3) != 0 || 4412 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4413 MVT::ValueType IntPtr = getPointerTy(); 4414 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4415 std::vector<std::pair<SDOperand, const Type*> > Args; 4416 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4417 // Extend the ubyte argument to be an int value for the call. 4418 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4419 Args.push_back(std::make_pair(Val, IntPtrTy)); 4420 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4421 std::pair<SDOperand,SDOperand> CallResult = 4422 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4423 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4424 return CallResult.second; 4425 } 4426 4427 MVT::ValueType AVT; 4428 SDOperand Count; 4429 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4430 unsigned BytesLeft = 0; 4431 bool TwoRepStos = false; 4432 if (ValC) { 4433 unsigned ValReg; 4434 uint64_t Val = ValC->getValue() & 255; 4435 4436 // If the value is a constant, then we can potentially use larger sets. 4437 switch (Align & 3) { 4438 case 2: // WORD aligned 4439 AVT = MVT::i16; 4440 ValReg = X86::AX; 4441 Val = (Val << 8) | Val; 4442 break; 4443 case 0: // DWORD aligned 4444 AVT = MVT::i32; 4445 ValReg = X86::EAX; 4446 Val = (Val << 8) | Val; 4447 Val = (Val << 16) | Val; 4448 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4449 AVT = MVT::i64; 4450 ValReg = X86::RAX; 4451 Val = (Val << 32) | Val; 4452 } 4453 break; 4454 default: // Byte aligned 4455 AVT = MVT::i8; 4456 ValReg = X86::AL; 4457 Count = Op.getOperand(3); 4458 break; 4459 } 4460 4461 if (AVT > MVT::i8) { 4462 if (I) { 4463 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4464 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4465 BytesLeft = I->getValue() % UBytes; 4466 } else { 4467 assert(AVT >= MVT::i32 && 4468 "Do not use rep;stos if not at least DWORD aligned"); 4469 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4470 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4471 TwoRepStos = true; 4472 } 4473 } 4474 4475 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4476 InFlag); 4477 InFlag = Chain.getValue(1); 4478 } else { 4479 AVT = MVT::i8; 4480 Count = Op.getOperand(3); 4481 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4482 InFlag = Chain.getValue(1); 4483 } 4484 4485 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4486 Count, InFlag); 4487 InFlag = Chain.getValue(1); 4488 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4489 Op.getOperand(1), InFlag); 4490 InFlag = Chain.getValue(1); 4491 4492 std::vector<MVT::ValueType> Tys; 4493 Tys.push_back(MVT::Other); 4494 Tys.push_back(MVT::Flag); 4495 std::vector<SDOperand> Ops; 4496 Ops.push_back(Chain); 4497 Ops.push_back(DAG.getValueType(AVT)); 4498 Ops.push_back(InFlag); 4499 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4500 4501 if (TwoRepStos) { 4502 InFlag = Chain.getValue(1); 4503 Count = Op.getOperand(3); 4504 MVT::ValueType CVT = Count.getValueType(); 4505 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4506 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4507 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4508 Left, InFlag); 4509 InFlag = Chain.getValue(1); 4510 Tys.clear(); 4511 Tys.push_back(MVT::Other); 4512 Tys.push_back(MVT::Flag); 4513 Ops.clear(); 4514 Ops.push_back(Chain); 4515 Ops.push_back(DAG.getValueType(MVT::i8)); 4516 Ops.push_back(InFlag); 4517 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4518 } else if (BytesLeft) { 4519 // Issue stores for the last 1 - 7 bytes. 4520 SDOperand Value; 4521 unsigned Val = ValC->getValue() & 255; 4522 unsigned Offset = I->getValue() - BytesLeft; 4523 SDOperand DstAddr = Op.getOperand(1); 4524 MVT::ValueType AddrVT = DstAddr.getValueType(); 4525 if (BytesLeft >= 4) { 4526 Val = (Val << 8) | Val; 4527 Val = (Val << 16) | Val; 4528 Value = DAG.getConstant(Val, MVT::i32); 4529 Chain = DAG.getStore(Chain, Value, 4530 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4531 DAG.getConstant(Offset, AddrVT)), 4532 NULL, 0); 4533 BytesLeft -= 4; 4534 Offset += 4; 4535 } 4536 if (BytesLeft >= 2) { 4537 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4538 Chain = DAG.getStore(Chain, Value, 4539 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4540 DAG.getConstant(Offset, AddrVT)), 4541 NULL, 0); 4542 BytesLeft -= 2; 4543 Offset += 2; 4544 } 4545 if (BytesLeft == 1) { 4546 Value = DAG.getConstant(Val, MVT::i8); 4547 Chain = DAG.getStore(Chain, Value, 4548 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4549 DAG.getConstant(Offset, AddrVT)), 4550 NULL, 0); 4551 } 4552 } 4553 4554 return Chain; 4555} 4556 4557SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4558 SDOperand Chain = Op.getOperand(0); 4559 unsigned Align = 4560 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4561 if (Align == 0) Align = 1; 4562 4563 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4564 // If not DWORD aligned, call memcpy if size is less than the threshold. 4565 // It knows how to align to the right boundary first. 4566 if ((Align & 3) != 0 || 4567 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4568 MVT::ValueType IntPtr = getPointerTy(); 4569 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4570 std::vector<std::pair<SDOperand, const Type*> > Args; 4571 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4572 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 4573 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4574 std::pair<SDOperand,SDOperand> CallResult = 4575 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4576 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4577 return CallResult.second; 4578 } 4579 4580 MVT::ValueType AVT; 4581 SDOperand Count; 4582 unsigned BytesLeft = 0; 4583 bool TwoRepMovs = false; 4584 switch (Align & 3) { 4585 case 2: // WORD aligned 4586 AVT = MVT::i16; 4587 break; 4588 case 0: // DWORD aligned 4589 AVT = MVT::i32; 4590 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4591 AVT = MVT::i64; 4592 break; 4593 default: // Byte aligned 4594 AVT = MVT::i8; 4595 Count = Op.getOperand(3); 4596 break; 4597 } 4598 4599 if (AVT > MVT::i8) { 4600 if (I) { 4601 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4602 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4603 BytesLeft = I->getValue() % UBytes; 4604 } else { 4605 assert(AVT >= MVT::i32 && 4606 "Do not use rep;movs if not at least DWORD aligned"); 4607 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4608 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4609 TwoRepMovs = true; 4610 } 4611 } 4612 4613 SDOperand InFlag(0, 0); 4614 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4615 Count, InFlag); 4616 InFlag = Chain.getValue(1); 4617 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4618 Op.getOperand(1), InFlag); 4619 InFlag = Chain.getValue(1); 4620 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4621 Op.getOperand(2), InFlag); 4622 InFlag = Chain.getValue(1); 4623 4624 std::vector<MVT::ValueType> Tys; 4625 Tys.push_back(MVT::Other); 4626 Tys.push_back(MVT::Flag); 4627 std::vector<SDOperand> Ops; 4628 Ops.push_back(Chain); 4629 Ops.push_back(DAG.getValueType(AVT)); 4630 Ops.push_back(InFlag); 4631 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4632 4633 if (TwoRepMovs) { 4634 InFlag = Chain.getValue(1); 4635 Count = Op.getOperand(3); 4636 MVT::ValueType CVT = Count.getValueType(); 4637 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4638 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4639 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4640 Left, InFlag); 4641 InFlag = Chain.getValue(1); 4642 Tys.clear(); 4643 Tys.push_back(MVT::Other); 4644 Tys.push_back(MVT::Flag); 4645 Ops.clear(); 4646 Ops.push_back(Chain); 4647 Ops.push_back(DAG.getValueType(MVT::i8)); 4648 Ops.push_back(InFlag); 4649 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4650 } else if (BytesLeft) { 4651 // Issue loads and stores for the last 1 - 7 bytes. 4652 unsigned Offset = I->getValue() - BytesLeft; 4653 SDOperand DstAddr = Op.getOperand(1); 4654 MVT::ValueType DstVT = DstAddr.getValueType(); 4655 SDOperand SrcAddr = Op.getOperand(2); 4656 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4657 SDOperand Value; 4658 if (BytesLeft >= 4) { 4659 Value = DAG.getLoad(MVT::i32, Chain, 4660 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4661 DAG.getConstant(Offset, SrcVT)), 4662 NULL, 0); 4663 Chain = Value.getValue(1); 4664 Chain = DAG.getStore(Chain, Value, 4665 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4666 DAG.getConstant(Offset, DstVT)), 4667 NULL, 0); 4668 BytesLeft -= 4; 4669 Offset += 4; 4670 } 4671 if (BytesLeft >= 2) { 4672 Value = DAG.getLoad(MVT::i16, Chain, 4673 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4674 DAG.getConstant(Offset, SrcVT)), 4675 NULL, 0); 4676 Chain = Value.getValue(1); 4677 Chain = DAG.getStore(Chain, Value, 4678 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4679 DAG.getConstant(Offset, DstVT)), 4680 NULL, 0); 4681 BytesLeft -= 2; 4682 Offset += 2; 4683 } 4684 4685 if (BytesLeft == 1) { 4686 Value = DAG.getLoad(MVT::i8, Chain, 4687 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4688 DAG.getConstant(Offset, SrcVT)), 4689 NULL, 0); 4690 Chain = Value.getValue(1); 4691 Chain = DAG.getStore(Chain, Value, 4692 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4693 DAG.getConstant(Offset, DstVT)), 4694 NULL, 0); 4695 } 4696 } 4697 4698 return Chain; 4699} 4700 4701SDOperand 4702X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4703 std::vector<MVT::ValueType> Tys; 4704 Tys.push_back(MVT::Other); 4705 Tys.push_back(MVT::Flag); 4706 std::vector<SDOperand> Ops; 4707 Ops.push_back(Op.getOperand(0)); 4708 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size()); 4709 Ops.clear(); 4710 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 4711 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 4712 MVT::i32, Ops[0].getValue(2))); 4713 Ops.push_back(Ops[1].getValue(1)); 4714 Tys[0] = Tys[1] = MVT::i32; 4715 Tys.push_back(MVT::Other); 4716 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 4717} 4718 4719SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4720 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4721 4722 if (!Subtarget->is64Bit()) { 4723 // vastart just stores the address of the VarArgsFrameIndex slot into the 4724 // memory location argument. 4725 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4726 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4727 SV->getOffset()); 4728 } 4729 4730 // __va_list_tag: 4731 // gp_offset (0 - 6 * 8) 4732 // fp_offset (48 - 48 + 8 * 16) 4733 // overflow_arg_area (point to parameters coming in memory). 4734 // reg_save_area 4735 std::vector<SDOperand> MemOps; 4736 SDOperand FIN = Op.getOperand(1); 4737 // Store gp_offset 4738 SDOperand Store = DAG.getStore(Op.getOperand(0), 4739 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4740 FIN, SV->getValue(), SV->getOffset()); 4741 MemOps.push_back(Store); 4742 4743 // Store fp_offset 4744 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4745 DAG.getConstant(4, getPointerTy())); 4746 Store = DAG.getStore(Op.getOperand(0), 4747 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4748 FIN, SV->getValue(), SV->getOffset()); 4749 MemOps.push_back(Store); 4750 4751 // Store ptr to overflow_arg_area 4752 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4753 DAG.getConstant(4, getPointerTy())); 4754 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4755 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4756 SV->getOffset()); 4757 MemOps.push_back(Store); 4758 4759 // Store ptr to reg_save_area. 4760 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4761 DAG.getConstant(8, getPointerTy())); 4762 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4763 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4764 SV->getOffset()); 4765 MemOps.push_back(Store); 4766 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4767} 4768 4769SDOperand 4770X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4771 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4772 switch (IntNo) { 4773 default: return SDOperand(); // Don't custom lower most intrinsics. 4774 // Comparison intrinsics. 4775 case Intrinsic::x86_sse_comieq_ss: 4776 case Intrinsic::x86_sse_comilt_ss: 4777 case Intrinsic::x86_sse_comile_ss: 4778 case Intrinsic::x86_sse_comigt_ss: 4779 case Intrinsic::x86_sse_comige_ss: 4780 case Intrinsic::x86_sse_comineq_ss: 4781 case Intrinsic::x86_sse_ucomieq_ss: 4782 case Intrinsic::x86_sse_ucomilt_ss: 4783 case Intrinsic::x86_sse_ucomile_ss: 4784 case Intrinsic::x86_sse_ucomigt_ss: 4785 case Intrinsic::x86_sse_ucomige_ss: 4786 case Intrinsic::x86_sse_ucomineq_ss: 4787 case Intrinsic::x86_sse2_comieq_sd: 4788 case Intrinsic::x86_sse2_comilt_sd: 4789 case Intrinsic::x86_sse2_comile_sd: 4790 case Intrinsic::x86_sse2_comigt_sd: 4791 case Intrinsic::x86_sse2_comige_sd: 4792 case Intrinsic::x86_sse2_comineq_sd: 4793 case Intrinsic::x86_sse2_ucomieq_sd: 4794 case Intrinsic::x86_sse2_ucomilt_sd: 4795 case Intrinsic::x86_sse2_ucomile_sd: 4796 case Intrinsic::x86_sse2_ucomigt_sd: 4797 case Intrinsic::x86_sse2_ucomige_sd: 4798 case Intrinsic::x86_sse2_ucomineq_sd: { 4799 unsigned Opc = 0; 4800 ISD::CondCode CC = ISD::SETCC_INVALID; 4801 switch (IntNo) { 4802 default: break; 4803 case Intrinsic::x86_sse_comieq_ss: 4804 case Intrinsic::x86_sse2_comieq_sd: 4805 Opc = X86ISD::COMI; 4806 CC = ISD::SETEQ; 4807 break; 4808 case Intrinsic::x86_sse_comilt_ss: 4809 case Intrinsic::x86_sse2_comilt_sd: 4810 Opc = X86ISD::COMI; 4811 CC = ISD::SETLT; 4812 break; 4813 case Intrinsic::x86_sse_comile_ss: 4814 case Intrinsic::x86_sse2_comile_sd: 4815 Opc = X86ISD::COMI; 4816 CC = ISD::SETLE; 4817 break; 4818 case Intrinsic::x86_sse_comigt_ss: 4819 case Intrinsic::x86_sse2_comigt_sd: 4820 Opc = X86ISD::COMI; 4821 CC = ISD::SETGT; 4822 break; 4823 case Intrinsic::x86_sse_comige_ss: 4824 case Intrinsic::x86_sse2_comige_sd: 4825 Opc = X86ISD::COMI; 4826 CC = ISD::SETGE; 4827 break; 4828 case Intrinsic::x86_sse_comineq_ss: 4829 case Intrinsic::x86_sse2_comineq_sd: 4830 Opc = X86ISD::COMI; 4831 CC = ISD::SETNE; 4832 break; 4833 case Intrinsic::x86_sse_ucomieq_ss: 4834 case Intrinsic::x86_sse2_ucomieq_sd: 4835 Opc = X86ISD::UCOMI; 4836 CC = ISD::SETEQ; 4837 break; 4838 case Intrinsic::x86_sse_ucomilt_ss: 4839 case Intrinsic::x86_sse2_ucomilt_sd: 4840 Opc = X86ISD::UCOMI; 4841 CC = ISD::SETLT; 4842 break; 4843 case Intrinsic::x86_sse_ucomile_ss: 4844 case Intrinsic::x86_sse2_ucomile_sd: 4845 Opc = X86ISD::UCOMI; 4846 CC = ISD::SETLE; 4847 break; 4848 case Intrinsic::x86_sse_ucomigt_ss: 4849 case Intrinsic::x86_sse2_ucomigt_sd: 4850 Opc = X86ISD::UCOMI; 4851 CC = ISD::SETGT; 4852 break; 4853 case Intrinsic::x86_sse_ucomige_ss: 4854 case Intrinsic::x86_sse2_ucomige_sd: 4855 Opc = X86ISD::UCOMI; 4856 CC = ISD::SETGE; 4857 break; 4858 case Intrinsic::x86_sse_ucomineq_ss: 4859 case Intrinsic::x86_sse2_ucomineq_sd: 4860 Opc = X86ISD::UCOMI; 4861 CC = ISD::SETNE; 4862 break; 4863 } 4864 4865 unsigned X86CC; 4866 SDOperand LHS = Op.getOperand(1); 4867 SDOperand RHS = Op.getOperand(2); 4868 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4869 4870 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4871 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4872 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4873 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4874 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4875 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4876 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4877 } 4878 } 4879} 4880 4881/// LowerOperation - Provide custom lowering hooks for some operations. 4882/// 4883SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4884 switch (Op.getOpcode()) { 4885 default: assert(0 && "Should not custom lower this!"); 4886 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4887 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4888 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4889 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4890 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4891 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4892 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4893 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4894 case ISD::SHL_PARTS: 4895 case ISD::SRA_PARTS: 4896 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4897 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4898 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4899 case ISD::FABS: return LowerFABS(Op, DAG); 4900 case ISD::FNEG: return LowerFNEG(Op, DAG); 4901 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4902 case ISD::SELECT: return LowerSELECT(Op, DAG); 4903 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4904 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4905 case ISD::CALL: return LowerCALL(Op, DAG); 4906 case ISD::RET: return LowerRET(Op, DAG); 4907 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4908 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4909 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4910 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4911 case ISD::VASTART: return LowerVASTART(Op, DAG); 4912 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4913 } 4914} 4915 4916const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4917 switch (Opcode) { 4918 default: return NULL; 4919 case X86ISD::SHLD: return "X86ISD::SHLD"; 4920 case X86ISD::SHRD: return "X86ISD::SHRD"; 4921 case X86ISD::FAND: return "X86ISD::FAND"; 4922 case X86ISD::FXOR: return "X86ISD::FXOR"; 4923 case X86ISD::FILD: return "X86ISD::FILD"; 4924 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4925 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4926 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4927 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4928 case X86ISD::FLD: return "X86ISD::FLD"; 4929 case X86ISD::FST: return "X86ISD::FST"; 4930 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4931 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4932 case X86ISD::CALL: return "X86ISD::CALL"; 4933 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4934 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4935 case X86ISD::CMP: return "X86ISD::CMP"; 4936 case X86ISD::COMI: return "X86ISD::COMI"; 4937 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4938 case X86ISD::SETCC: return "X86ISD::SETCC"; 4939 case X86ISD::CMOV: return "X86ISD::CMOV"; 4940 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4941 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4942 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4943 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4944 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4945 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4946 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4947 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4948 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4949 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4950 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4951 } 4952} 4953 4954/// isLegalAddressImmediate - Return true if the integer value or 4955/// GlobalValue can be used as the offset of the target addressing mode. 4956bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 4957 // X86 allows a sign-extended 32-bit immediate field. 4958 return (V > -(1LL << 32) && V < (1LL << 32)-1); 4959} 4960 4961bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 4962 // GV is 64-bit but displacement field is 32-bit unless we are in small code 4963 // model. Mac OS X happens to support only small PIC code model. 4964 // FIXME: better support for other OS's. 4965 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 4966 return false; 4967 if (Subtarget->isTargetDarwin()) { 4968 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 4969 if (RModel == Reloc::Static) 4970 return true; 4971 else if (RModel == Reloc::DynamicNoPIC) 4972 return !DarwinGVRequiresExtraLoad(GV); 4973 else 4974 return false; 4975 } else 4976 return true; 4977} 4978 4979/// isShuffleMaskLegal - Targets can use this to indicate that they only 4980/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4981/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4982/// are assumed to be legal. 4983bool 4984X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4985 // Only do shuffles on 128-bit vector types for now. 4986 if (MVT::getSizeInBits(VT) == 64) return false; 4987 return (Mask.Val->getNumOperands() <= 4 || 4988 isSplatMask(Mask.Val) || 4989 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4990 X86::isUNPCKLMask(Mask.Val) || 4991 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4992 X86::isUNPCKHMask(Mask.Val)); 4993} 4994 4995bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4996 MVT::ValueType EVT, 4997 SelectionDAG &DAG) const { 4998 unsigned NumElts = BVOps.size(); 4999 // Only do shuffles on 128-bit vector types for now. 5000 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5001 if (NumElts == 2) return true; 5002 if (NumElts == 4) { 5003 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 5004 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 5005 } 5006 return false; 5007} 5008 5009//===----------------------------------------------------------------------===// 5010// X86 Scheduler Hooks 5011//===----------------------------------------------------------------------===// 5012 5013MachineBasicBlock * 5014X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5015 MachineBasicBlock *BB) { 5016 switch (MI->getOpcode()) { 5017 default: assert(false && "Unexpected instr type to insert"); 5018 case X86::CMOV_FR32: 5019 case X86::CMOV_FR64: 5020 case X86::CMOV_V4F32: 5021 case X86::CMOV_V2F64: 5022 case X86::CMOV_V2I64: { 5023 // To "insert" a SELECT_CC instruction, we actually have to insert the 5024 // diamond control-flow pattern. The incoming instruction knows the 5025 // destination vreg to set, the condition code register to branch on, the 5026 // true/false values to select between, and a branch opcode to use. 5027 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5028 ilist<MachineBasicBlock>::iterator It = BB; 5029 ++It; 5030 5031 // thisMBB: 5032 // ... 5033 // TrueVal = ... 5034 // cmpTY ccX, r1, r2 5035 // bCC copy1MBB 5036 // fallthrough --> copy0MBB 5037 MachineBasicBlock *thisMBB = BB; 5038 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5039 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5040 unsigned Opc = 5041 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5042 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 5043 MachineFunction *F = BB->getParent(); 5044 F->getBasicBlockList().insert(It, copy0MBB); 5045 F->getBasicBlockList().insert(It, sinkMBB); 5046 // Update machine-CFG edges by first adding all successors of the current 5047 // block to the new block which will contain the Phi node for the select. 5048 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5049 e = BB->succ_end(); i != e; ++i) 5050 sinkMBB->addSuccessor(*i); 5051 // Next, remove all successors of the current block, and add the true 5052 // and fallthrough blocks as its successors. 5053 while(!BB->succ_empty()) 5054 BB->removeSuccessor(BB->succ_begin()); 5055 BB->addSuccessor(copy0MBB); 5056 BB->addSuccessor(sinkMBB); 5057 5058 // copy0MBB: 5059 // %FalseValue = ... 5060 // # fallthrough to sinkMBB 5061 BB = copy0MBB; 5062 5063 // Update machine-CFG edges 5064 BB->addSuccessor(sinkMBB); 5065 5066 // sinkMBB: 5067 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5068 // ... 5069 BB = sinkMBB; 5070 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 5071 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5072 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5073 5074 delete MI; // The pseudo instruction is gone now. 5075 return BB; 5076 } 5077 5078 case X86::FP_TO_INT16_IN_MEM: 5079 case X86::FP_TO_INT32_IN_MEM: 5080 case X86::FP_TO_INT64_IN_MEM: { 5081 // Change the floating point control register to use "round towards zero" 5082 // mode when truncating to an integer value. 5083 MachineFunction *F = BB->getParent(); 5084 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5085 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 5086 5087 // Load the old value of the high byte of the control word... 5088 unsigned OldCW = 5089 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5090 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 5091 5092 // Set the high part to be round to zero... 5093 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 5094 5095 // Reload the modified control word now... 5096 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 5097 5098 // Restore the memory image of control word to original value 5099 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 5100 5101 // Get the X86 opcode to use. 5102 unsigned Opc; 5103 switch (MI->getOpcode()) { 5104 default: assert(0 && "illegal opcode!"); 5105 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 5106 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 5107 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 5108 } 5109 5110 X86AddressMode AM; 5111 MachineOperand &Op = MI->getOperand(0); 5112 if (Op.isRegister()) { 5113 AM.BaseType = X86AddressMode::RegBase; 5114 AM.Base.Reg = Op.getReg(); 5115 } else { 5116 AM.BaseType = X86AddressMode::FrameIndexBase; 5117 AM.Base.FrameIndex = Op.getFrameIndex(); 5118 } 5119 Op = MI->getOperand(1); 5120 if (Op.isImmediate()) 5121 AM.Scale = Op.getImm(); 5122 Op = MI->getOperand(2); 5123 if (Op.isImmediate()) 5124 AM.IndexReg = Op.getImm(); 5125 Op = MI->getOperand(3); 5126 if (Op.isGlobalAddress()) { 5127 AM.GV = Op.getGlobal(); 5128 } else { 5129 AM.Disp = Op.getImm(); 5130 } 5131 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 5132 5133 // Reload the original control word now. 5134 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 5135 5136 delete MI; // The pseudo instruction is gone now. 5137 return BB; 5138 } 5139 } 5140} 5141 5142//===----------------------------------------------------------------------===// 5143// X86 Optimization Hooks 5144//===----------------------------------------------------------------------===// 5145 5146void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5147 uint64_t Mask, 5148 uint64_t &KnownZero, 5149 uint64_t &KnownOne, 5150 unsigned Depth) const { 5151 unsigned Opc = Op.getOpcode(); 5152 assert((Opc >= ISD::BUILTIN_OP_END || 5153 Opc == ISD::INTRINSIC_WO_CHAIN || 5154 Opc == ISD::INTRINSIC_W_CHAIN || 5155 Opc == ISD::INTRINSIC_VOID) && 5156 "Should use MaskedValueIsZero if you don't know whether Op" 5157 " is a target node!"); 5158 5159 KnownZero = KnownOne = 0; // Don't know anything. 5160 switch (Opc) { 5161 default: break; 5162 case X86ISD::SETCC: 5163 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5164 break; 5165 } 5166} 5167 5168/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5169/// element of the result of the vector shuffle. 5170static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5171 MVT::ValueType VT = N->getValueType(0); 5172 SDOperand PermMask = N->getOperand(2); 5173 unsigned NumElems = PermMask.getNumOperands(); 5174 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5175 i %= NumElems; 5176 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5177 return (i == 0) 5178 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 5179 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5180 SDOperand Idx = PermMask.getOperand(i); 5181 if (Idx.getOpcode() == ISD::UNDEF) 5182 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 5183 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5184 } 5185 return SDOperand(); 5186} 5187 5188/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5189/// node is a GlobalAddress + an offset. 5190static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5191 if (N->getOpcode() == X86ISD::Wrapper) { 5192 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5193 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5194 return true; 5195 } 5196 } else if (N->getOpcode() == ISD::ADD) { 5197 SDOperand N1 = N->getOperand(0); 5198 SDOperand N2 = N->getOperand(1); 5199 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5200 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5201 if (V) { 5202 Offset += V->getSignExtended(); 5203 return true; 5204 } 5205 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5206 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5207 if (V) { 5208 Offset += V->getSignExtended(); 5209 return true; 5210 } 5211 } 5212 } 5213 return false; 5214} 5215 5216/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5217/// + Dist * Size. 5218static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5219 MachineFrameInfo *MFI) { 5220 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5221 return false; 5222 5223 SDOperand Loc = N->getOperand(1); 5224 SDOperand BaseLoc = Base->getOperand(1); 5225 if (Loc.getOpcode() == ISD::FrameIndex) { 5226 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5227 return false; 5228 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 5229 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5230 int FS = MFI->getObjectSize(FI); 5231 int BFS = MFI->getObjectSize(BFI); 5232 if (FS != BFS || FS != Size) return false; 5233 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5234 } else { 5235 GlobalValue *GV1 = NULL; 5236 GlobalValue *GV2 = NULL; 5237 int64_t Offset1 = 0; 5238 int64_t Offset2 = 0; 5239 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5240 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5241 if (isGA1 && isGA2 && GV1 == GV2) 5242 return Offset1 == (Offset2 + Dist*Size); 5243 } 5244 5245 return false; 5246} 5247 5248static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5249 const X86Subtarget *Subtarget) { 5250 GlobalValue *GV; 5251 int64_t Offset; 5252 if (isGAPlusOffset(Base, GV, Offset)) 5253 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5254 else { 5255 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5256 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 5257 if (BFI < 0) 5258 // Fixed objects do not specify alignment, however the offsets are known. 5259 return ((Subtarget->getStackAlignment() % 16) == 0 && 5260 (MFI->getObjectOffset(BFI) % 16) == 0); 5261 else 5262 return MFI->getObjectAlignment(BFI) >= 16; 5263 } 5264 return false; 5265} 5266 5267 5268/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5269/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5270/// if the load addresses are consecutive, non-overlapping, and in the right 5271/// order. 5272static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5273 const X86Subtarget *Subtarget) { 5274 MachineFunction &MF = DAG.getMachineFunction(); 5275 MachineFrameInfo *MFI = MF.getFrameInfo(); 5276 MVT::ValueType VT = N->getValueType(0); 5277 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 5278 SDOperand PermMask = N->getOperand(2); 5279 int NumElems = (int)PermMask.getNumOperands(); 5280 SDNode *Base = NULL; 5281 for (int i = 0; i < NumElems; ++i) { 5282 SDOperand Idx = PermMask.getOperand(i); 5283 if (Idx.getOpcode() == ISD::UNDEF) { 5284 if (!Base) return SDOperand(); 5285 } else { 5286 SDOperand Arg = 5287 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5288 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5289 return SDOperand(); 5290 if (!Base) 5291 Base = Arg.Val; 5292 else if (!isConsecutiveLoad(Arg.Val, Base, 5293 i, MVT::getSizeInBits(EVT)/8,MFI)) 5294 return SDOperand(); 5295 } 5296 } 5297 5298 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5299 if (isAlign16) { 5300 LoadSDNode *LD = cast<LoadSDNode>(Base); 5301 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5302 LD->getSrcValueOffset()); 5303 } else { 5304 // Just use movups, it's shorter. 5305 std::vector<MVT::ValueType> Tys; 5306 Tys.push_back(MVT::v4f32); 5307 Tys.push_back(MVT::Other); 5308 SmallVector<SDOperand, 3> Ops; 5309 Ops.push_back(Base->getOperand(0)); 5310 Ops.push_back(Base->getOperand(1)); 5311 Ops.push_back(Base->getOperand(2)); 5312 return DAG.getNode(ISD::BIT_CONVERT, VT, 5313 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 5314 } 5315} 5316 5317/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5318static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5319 const X86Subtarget *Subtarget) { 5320 SDOperand Cond = N->getOperand(0); 5321 5322 // If we have SSE[12] support, try to form min/max nodes. 5323 if (Subtarget->hasSSE2() && 5324 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5325 if (Cond.getOpcode() == ISD::SETCC) { 5326 // Get the LHS/RHS of the select. 5327 SDOperand LHS = N->getOperand(1); 5328 SDOperand RHS = N->getOperand(2); 5329 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5330 5331 unsigned IntNo = 0; 5332 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5333 switch (CC) { 5334 default: break; 5335 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5336 case ISD::SETULE: 5337 case ISD::SETLE: 5338 if (!UnsafeFPMath) break; 5339 // FALL THROUGH. 5340 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5341 case ISD::SETLT: 5342 IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_min_ss : 5343 Intrinsic::x86_sse2_min_sd; 5344 break; 5345 5346 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5347 case ISD::SETUGT: 5348 case ISD::SETGT: 5349 if (!UnsafeFPMath) break; 5350 // FALL THROUGH. 5351 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5352 case ISD::SETGE: 5353 IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_max_ss : 5354 Intrinsic::x86_sse2_max_sd; 5355 break; 5356 } 5357 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5358 switch (CC) { 5359 default: break; 5360 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5361 case ISD::SETUGT: 5362 case ISD::SETGT: 5363 if (!UnsafeFPMath) break; 5364 // FALL THROUGH. 5365 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5366 case ISD::SETGE: 5367 IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_min_ss : 5368 Intrinsic::x86_sse2_min_sd; 5369 break; 5370 5371 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5372 case ISD::SETULE: 5373 case ISD::SETLE: 5374 if (!UnsafeFPMath) break; 5375 // FALL THROUGH. 5376 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5377 case ISD::SETLT: 5378 IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_max_ss : 5379 Intrinsic::x86_sse2_max_sd; 5380 break; 5381 } 5382 } 5383 5384 // minss/maxss take a v4f32 operand. 5385 if (IntNo) { 5386 if (LHS.getValueType() == MVT::f32) { 5387 LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, LHS); 5388 RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, RHS); 5389 } else { 5390 LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, LHS); 5391 RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, RHS); 5392 } 5393 5394 MVT::ValueType PtrTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 5395 SDOperand IntNoN = DAG.getConstant(IntNo, PtrTy); 5396 5397 SDOperand Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, LHS.getValueType(), 5398 IntNoN, LHS, RHS); 5399 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getValueType(0), Val, 5400 DAG.getConstant(0, PtrTy)); 5401 } 5402 } 5403 5404 } 5405 5406 return SDOperand(); 5407} 5408 5409 5410SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5411 DAGCombinerInfo &DCI) const { 5412 TargetMachine &TM = getTargetMachine(); 5413 SelectionDAG &DAG = DCI.DAG; 5414 switch (N->getOpcode()) { 5415 default: break; 5416 case ISD::VECTOR_SHUFFLE: 5417 return PerformShuffleCombine(N, DAG, Subtarget); 5418 case ISD::SELECT: 5419 return PerformSELECTCombine(N, DAG, Subtarget); 5420 } 5421 5422 return SDOperand(); 5423} 5424 5425//===----------------------------------------------------------------------===// 5426// X86 Inline Assembly Support 5427//===----------------------------------------------------------------------===// 5428 5429/// getConstraintType - Given a constraint letter, return the type of 5430/// constraint it is for this target. 5431X86TargetLowering::ConstraintType 5432X86TargetLowering::getConstraintType(char ConstraintLetter) const { 5433 switch (ConstraintLetter) { 5434 case 'A': 5435 case 'r': 5436 case 'R': 5437 case 'l': 5438 case 'q': 5439 case 'Q': 5440 case 'x': 5441 case 'Y': 5442 return C_RegisterClass; 5443 default: return TargetLowering::getConstraintType(ConstraintLetter); 5444 } 5445} 5446 5447std::vector<unsigned> X86TargetLowering:: 5448getRegClassForInlineAsmConstraint(const std::string &Constraint, 5449 MVT::ValueType VT) const { 5450 if (Constraint.size() == 1) { 5451 // FIXME: not handling fp-stack yet! 5452 // FIXME: not handling MMX registers yet ('y' constraint). 5453 switch (Constraint[0]) { // GCC X86 Constraint Letters 5454 default: break; // Unknown constraint letter 5455 case 'A': // EAX/EDX 5456 if (VT == MVT::i32 || VT == MVT::i64) 5457 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5458 break; 5459 case 'r': // GENERAL_REGS 5460 case 'R': // LEGACY_REGS 5461 if (VT == MVT::i32) 5462 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 5463 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 5464 else if (VT == MVT::i16) 5465 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 5466 X86::SI, X86::DI, X86::BP, X86::SP, 0); 5467 else if (VT == MVT::i8) 5468 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5469 break; 5470 case 'l': // INDEX_REGS 5471 if (VT == MVT::i32) 5472 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 5473 X86::ESI, X86::EDI, X86::EBP, 0); 5474 else if (VT == MVT::i16) 5475 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 5476 X86::SI, X86::DI, X86::BP, 0); 5477 else if (VT == MVT::i8) 5478 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5479 break; 5480 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5481 case 'Q': // Q_REGS 5482 if (VT == MVT::i32) 5483 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5484 else if (VT == MVT::i16) 5485 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5486 else if (VT == MVT::i8) 5487 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5488 break; 5489 case 'x': // SSE_REGS if SSE1 allowed 5490 if (Subtarget->hasSSE1()) 5491 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 5492 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 5493 0); 5494 return std::vector<unsigned>(); 5495 case 'Y': // SSE_REGS if SSE2 allowed 5496 if (Subtarget->hasSSE2()) 5497 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 5498 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 5499 0); 5500 return std::vector<unsigned>(); 5501 } 5502 } 5503 5504 return std::vector<unsigned>(); 5505} 5506 5507std::pair<unsigned, const TargetRegisterClass*> 5508X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5509 MVT::ValueType VT) const { 5510 // Use the default implementation in TargetLowering to convert the register 5511 // constraint into a member of a register class. 5512 std::pair<unsigned, const TargetRegisterClass*> Res; 5513 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5514 5515 // Not found? Bail out. 5516 if (Res.second == 0) return Res; 5517 5518 // Otherwise, check to see if this is a register class of the wrong value 5519 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5520 // turn into {ax},{dx}. 5521 if (Res.second->hasType(VT)) 5522 return Res; // Correct type already, nothing to do. 5523 5524 // All of the single-register GCC register classes map their values onto 5525 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5526 // really want an 8-bit or 32-bit register, map to the appropriate register 5527 // class and return the appropriate register. 5528 if (Res.second != X86::GR16RegisterClass) 5529 return Res; 5530 5531 if (VT == MVT::i8) { 5532 unsigned DestReg = 0; 5533 switch (Res.first) { 5534 default: break; 5535 case X86::AX: DestReg = X86::AL; break; 5536 case X86::DX: DestReg = X86::DL; break; 5537 case X86::CX: DestReg = X86::CL; break; 5538 case X86::BX: DestReg = X86::BL; break; 5539 } 5540 if (DestReg) { 5541 Res.first = DestReg; 5542 Res.second = Res.second = X86::GR8RegisterClass; 5543 } 5544 } else if (VT == MVT::i32) { 5545 unsigned DestReg = 0; 5546 switch (Res.first) { 5547 default: break; 5548 case X86::AX: DestReg = X86::EAX; break; 5549 case X86::DX: DestReg = X86::EDX; break; 5550 case X86::CX: DestReg = X86::ECX; break; 5551 case X86::BX: DestReg = X86::EBX; break; 5552 case X86::SI: DestReg = X86::ESI; break; 5553 case X86::DI: DestReg = X86::EDI; break; 5554 case X86::BP: DestReg = X86::EBP; break; 5555 case X86::SP: DestReg = X86::ESP; break; 5556 } 5557 if (DestReg) { 5558 Res.first = DestReg; 5559 Res.second = Res.second = X86::GR32RegisterClass; 5560 } 5561 } else if (VT == MVT::i64) { 5562 unsigned DestReg = 0; 5563 switch (Res.first) { 5564 default: break; 5565 case X86::AX: DestReg = X86::RAX; break; 5566 case X86::DX: DestReg = X86::RDX; break; 5567 case X86::CX: DestReg = X86::RCX; break; 5568 case X86::BX: DestReg = X86::RBX; break; 5569 case X86::SI: DestReg = X86::RSI; break; 5570 case X86::DI: DestReg = X86::RDI; break; 5571 case X86::BP: DestReg = X86::RBP; break; 5572 case X86::SP: DestReg = X86::RSP; break; 5573 } 5574 if (DestReg) { 5575 Res.first = DestReg; 5576 Res.second = Res.second = X86::GR64RegisterClass; 5577 } 5578 } 5579 5580 return Res; 5581} 5582 5583