X86ISelLowering.cpp revision 5cd3e9f4b7caa5a79c6c05633b11144d0ae41771
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/Function.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/ADT/VectorExtras.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SelectionDAG.h" 31#include "llvm/CodeGen/SSARegMap.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/ADT/StringExtras.h" 36using namespace llvm; 37 38// FIXME: temporary. 39static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 40 cl::desc("Enable fastcc on X86")); 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 46 47 // Set up the TargetLowering object. 48 49 // X86 is weird, it always uses i8 for shift amounts and setcc results. 50 setShiftAmountType(MVT::i8); 51 setSetCCResultType(MVT::i8); 52 setSetCCResultContents(ZeroOrOneSetCCResult); 53 setSchedulingPreference(SchedulingForRegPressure); 54 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 55 setStackPointerRegisterToSaveRestore(X86StackPtr); 56 57 if (!Subtarget->isTargetDarwin()) 58 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 59 setUseUnderscoreSetJmpLongJmp(true); 60 61 // Add legal addressing mode scale values. 62 addLegalAddressScale(8); 63 addLegalAddressScale(4); 64 addLegalAddressScale(2); 65 // Enter the ones which require both scale + index last. These are more 66 // expensive. 67 addLegalAddressScale(9); 68 addLegalAddressScale(5); 69 addLegalAddressScale(3); 70 71 // Set up the register classes. 72 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 73 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 74 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 75 if (Subtarget->is64Bit()) 76 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 77 78 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 79 80 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 81 // operation. 82 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 83 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 84 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 85 86 if (Subtarget->is64Bit()) { 87 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 89 } else { 90 if (X86ScalarSSE) 91 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 93 else 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 95 } 96 97 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 98 // this operation. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 100 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 101 // SSE has no i16 to fp conversion, only i32 102 if (X86ScalarSSE) 103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 104 else { 105 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 106 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 107 } 108 109 if (!Subtarget->is64Bit()) { 110 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 111 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 112 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 113 } 114 115 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 116 // this operation. 117 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 118 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 119 120 if (X86ScalarSSE) { 121 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 122 } else { 123 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 124 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 125 } 126 127 // Handle FP_TO_UINT by promoting the destination to a larger signed 128 // conversion. 129 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 130 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 131 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 132 133 if (Subtarget->is64Bit()) { 134 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 135 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 136 } else { 137 if (X86ScalarSSE && !Subtarget->hasSSE3()) 138 // Expand FP_TO_UINT into a select. 139 // FIXME: We would like to use a Custom expander here eventually to do 140 // the optimal thing for SSE vs. the default expansion in the legalizer. 141 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 142 else 143 // With SSE3 we can use fisttpll to convert to a signed i64. 144 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 145 } 146 147 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 148 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 149 150 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 151 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 152 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 153 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 154 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 155 if (Subtarget->is64Bit()) 156 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 160 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 161 setOperationAction(ISD::FREM , MVT::f64 , Expand); 162 163 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 164 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 165 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 166 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 167 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 168 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 169 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 170 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 171 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 172 if (Subtarget->is64Bit()) { 173 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 176 } 177 178 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 179 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 180 181 // These should be promoted to a larger select which is supported. 182 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 183 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 184 // X86 wants to expand cmov itself. 185 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 186 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 187 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 188 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 189 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 190 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 191 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 192 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 193 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 194 if (Subtarget->is64Bit()) { 195 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 196 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 197 } 198 // X86 ret instruction may pop stack. 199 setOperationAction(ISD::RET , MVT::Other, Custom); 200 // Darwin ABI issue. 201 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 202 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 203 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 204 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 205 if (Subtarget->is64Bit()) { 206 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 207 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 208 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 209 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 210 } 211 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 212 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 213 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 214 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 215 // X86 wants to expand memset / memcpy itself. 216 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 217 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 218 219 // We don't have line number support yet. 220 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 221 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 222 // FIXME - use subtarget debug flags 223 if (!Subtarget->isTargetDarwin() && 224 !Subtarget->isTargetELF() && 225 !Subtarget->isTargetCygwin()) 226 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 227 228 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 229 setOperationAction(ISD::VASTART , MVT::Other, Custom); 230 231 // Use the default implementation. 232 setOperationAction(ISD::VAARG , MVT::Other, Expand); 233 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 234 setOperationAction(ISD::VAEND , MVT::Other, Expand); 235 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 236 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 237 if (Subtarget->is64Bit()) 238 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 239 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 240 241 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 242 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 243 244 if (X86ScalarSSE) { 245 // Set up the FP register classes. 246 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 247 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 248 249 // Use ANDPD to simulate FABS. 250 setOperationAction(ISD::FABS , MVT::f64, Custom); 251 setOperationAction(ISD::FABS , MVT::f32, Custom); 252 253 // Use XORP to simulate FNEG. 254 setOperationAction(ISD::FNEG , MVT::f64, Custom); 255 setOperationAction(ISD::FNEG , MVT::f32, Custom); 256 257 // We don't support sin/cos/fmod 258 setOperationAction(ISD::FSIN , MVT::f64, Expand); 259 setOperationAction(ISD::FCOS , MVT::f64, Expand); 260 setOperationAction(ISD::FREM , MVT::f64, Expand); 261 setOperationAction(ISD::FSIN , MVT::f32, Expand); 262 setOperationAction(ISD::FCOS , MVT::f32, Expand); 263 setOperationAction(ISD::FREM , MVT::f32, Expand); 264 265 // Expand FP immediates into loads from the stack, except for the special 266 // cases we handle. 267 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 268 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 269 addLegalFPImmediate(+0.0); // xorps / xorpd 270 } else { 271 // Set up the FP register classes. 272 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 273 274 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 275 276 if (!UnsafeFPMath) { 277 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 278 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 279 } 280 281 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 282 addLegalFPImmediate(+0.0); // FLD0 283 addLegalFPImmediate(+1.0); // FLD1 284 addLegalFPImmediate(-0.0); // FLD0/FCHS 285 addLegalFPImmediate(-1.0); // FLD1/FCHS 286 } 287 288 // First set operation action for all vector types to expand. Then we 289 // will selectively turn on ones that can be effectively codegen'd. 290 for (unsigned VT = (unsigned)MVT::Vector + 1; 291 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 292 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 293 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 294 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 295 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 296 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 297 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 298 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 299 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 300 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 301 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 302 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 303 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 304 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 305 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 306 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 307 } 308 309 if (Subtarget->hasMMX()) { 310 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 311 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 312 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 313 314 // FIXME: add MMX packed arithmetics 315 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 316 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 317 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 318 } 319 320 if (Subtarget->hasSSE1()) { 321 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 322 323 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 324 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 325 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 326 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 327 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 328 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 329 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 330 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 331 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 332 } 333 334 if (Subtarget->hasSSE2()) { 335 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 336 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 337 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 338 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 339 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 340 341 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 342 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 343 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 344 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 345 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 346 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 347 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 348 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 349 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 350 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 351 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 352 353 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 354 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 355 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 356 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 357 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 358 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 359 360 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 361 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 362 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 363 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 364 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 365 } 366 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 367 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 368 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 369 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 370 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 371 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 372 373 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 374 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 375 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 376 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 377 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 378 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 379 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 380 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 381 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 382 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 383 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 384 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 385 } 386 387 // Custom lower v2i64 and v2f64 selects. 388 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 389 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 390 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 391 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 392 } 393 394 // We want to custom lower some of our intrinsics. 395 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 396 397 // We have target-specific dag combine patterns for the following nodes: 398 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 399 setTargetDAGCombine(ISD::SELECT); 400 401 computeRegisterProperties(); 402 403 // FIXME: These should be based on subtarget info. Plus, the values should 404 // be smaller when we are in optimizing for size mode. 405 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 406 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 407 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 408 allowUnalignedMemoryAccesses = true; // x86 supports it! 409} 410 411//===----------------------------------------------------------------------===// 412// C Calling Convention implementation 413//===----------------------------------------------------------------------===// 414 415/// AddLiveIn - This helper function adds the specified physical register to the 416/// MachineFunction as a live in value. It also creates a corresponding virtual 417/// register for it. 418static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 419 TargetRegisterClass *RC) { 420 assert(RC->contains(PReg) && "Not the correct regclass!"); 421 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 422 MF.addLiveIn(PReg, VReg); 423 return VReg; 424} 425 426/// HowToPassCCCArgument - Returns how an formal argument of the specified type 427/// should be passed. If it is through stack, returns the size of the stack 428/// slot; if it is through XMM register, returns the number of XMM registers 429/// are needed. 430static void 431HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 432 unsigned &ObjSize, unsigned &ObjXMMRegs) { 433 ObjXMMRegs = 0; 434 435 switch (ObjectVT) { 436 default: assert(0 && "Unhandled argument type!"); 437 case MVT::i8: ObjSize = 1; break; 438 case MVT::i16: ObjSize = 2; break; 439 case MVT::i32: ObjSize = 4; break; 440 case MVT::i64: ObjSize = 8; break; 441 case MVT::f32: ObjSize = 4; break; 442 case MVT::f64: ObjSize = 8; break; 443 case MVT::v16i8: 444 case MVT::v8i16: 445 case MVT::v4i32: 446 case MVT::v2i64: 447 case MVT::v4f32: 448 case MVT::v2f64: 449 if (NumXMMRegs < 4) 450 ObjXMMRegs = 1; 451 else 452 ObjSize = 16; 453 break; 454 } 455} 456 457SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 458 unsigned NumArgs = Op.Val->getNumValues() - 1; 459 MachineFunction &MF = DAG.getMachineFunction(); 460 MachineFrameInfo *MFI = MF.getFrameInfo(); 461 SDOperand Root = Op.getOperand(0); 462 std::vector<SDOperand> ArgValues; 463 464 // Add DAG nodes to load the arguments... On entry to a function on the X86, 465 // the stack frame looks like this: 466 // 467 // [ESP] -- return address 468 // [ESP + 4] -- first argument (leftmost lexically) 469 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 470 // ... 471 // 472 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 473 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 474 static const unsigned XMMArgRegs[] = { 475 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 476 }; 477 for (unsigned i = 0; i < NumArgs; ++i) { 478 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 479 unsigned ArgIncrement = 4; 480 unsigned ObjSize = 0; 481 unsigned ObjXMMRegs = 0; 482 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 483 if (ObjSize > 4) 484 ArgIncrement = ObjSize; 485 486 SDOperand ArgValue; 487 if (ObjXMMRegs) { 488 // Passed in a XMM register. 489 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 490 X86::VR128RegisterClass); 491 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 492 ArgValues.push_back(ArgValue); 493 NumXMMRegs += ObjXMMRegs; 494 } else { 495 // XMM arguments have to be aligned on 16-byte boundary. 496 if (ObjSize == 16) 497 ArgOffset = ((ArgOffset + 15) / 16) * 16; 498 // Create the frame index object for this incoming parameter... 499 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 500 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 501 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 502 ArgValues.push_back(ArgValue); 503 ArgOffset += ArgIncrement; // Move on to the next argument... 504 } 505 } 506 507 ArgValues.push_back(Root); 508 509 // If the function takes variable number of arguments, make a frame index for 510 // the start of the first vararg value... for expansion of llvm.va_start. 511 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 512 if (isVarArg) 513 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 514 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 515 ReturnAddrIndex = 0; // No return address slot generated yet. 516 BytesToPopOnReturn = 0; // Callee pops nothing. 517 BytesCallerReserves = ArgOffset; 518 519 // If this is a struct return on, the callee pops the hidden struct 520 // pointer. This is common for Darwin/X86, Linux & Mingw32 targets. 521 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet) 522 BytesToPopOnReturn = 4; 523 524 // Return the new list of results. 525 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 526 Op.Val->value_end()); 527 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 528} 529 530 531SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 532 SDOperand Chain = Op.getOperand(0); 533 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 534 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 535 SDOperand Callee = Op.getOperand(4); 536 MVT::ValueType RetVT= Op.Val->getValueType(0); 537 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 538 539 // Keep track of the number of XMM regs passed so far. 540 unsigned NumXMMRegs = 0; 541 static const unsigned XMMArgRegs[] = { 542 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 543 }; 544 545 // Count how many bytes are to be pushed on the stack. 546 unsigned NumBytes = 0; 547 for (unsigned i = 0; i != NumOps; ++i) { 548 SDOperand Arg = Op.getOperand(5+2*i); 549 550 switch (Arg.getValueType()) { 551 default: assert(0 && "Unexpected ValueType for argument!"); 552 case MVT::i8: 553 case MVT::i16: 554 case MVT::i32: 555 case MVT::f32: 556 NumBytes += 4; 557 break; 558 case MVT::i64: 559 case MVT::f64: 560 NumBytes += 8; 561 break; 562 case MVT::v16i8: 563 case MVT::v8i16: 564 case MVT::v4i32: 565 case MVT::v2i64: 566 case MVT::v4f32: 567 case MVT::v2f64: 568 if (NumXMMRegs < 4) 569 ++NumXMMRegs; 570 else { 571 // XMM arguments have to be aligned on 16-byte boundary. 572 NumBytes = ((NumBytes + 15) / 16) * 16; 573 NumBytes += 16; 574 } 575 break; 576 } 577 } 578 579 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 580 581 // Arguments go on the stack in reverse order, as specified by the ABI. 582 unsigned ArgOffset = 0; 583 NumXMMRegs = 0; 584 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 585 std::vector<SDOperand> MemOpChains; 586 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 587 for (unsigned i = 0; i != NumOps; ++i) { 588 SDOperand Arg = Op.getOperand(5+2*i); 589 590 switch (Arg.getValueType()) { 591 default: assert(0 && "Unexpected ValueType for argument!"); 592 case MVT::i8: 593 case MVT::i16: { 594 // Promote the integer to 32 bits. If the input type is signed use a 595 // sign extend, otherwise use a zero extend. 596 unsigned ExtOp = 597 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 598 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 599 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 600 } 601 // Fallthrough 602 603 case MVT::i32: 604 case MVT::f32: { 605 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 606 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 607 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 608 ArgOffset += 4; 609 break; 610 } 611 case MVT::i64: 612 case MVT::f64: { 613 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 614 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 615 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 616 ArgOffset += 8; 617 break; 618 } 619 case MVT::v16i8: 620 case MVT::v8i16: 621 case MVT::v4i32: 622 case MVT::v2i64: 623 case MVT::v4f32: 624 case MVT::v2f64: 625 if (NumXMMRegs < 4) { 626 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 627 NumXMMRegs++; 628 } else { 629 // XMM arguments have to be aligned on 16-byte boundary. 630 ArgOffset = ((ArgOffset + 15) / 16) * 16; 631 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 632 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 633 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 634 ArgOffset += 16; 635 } 636 } 637 } 638 639 if (!MemOpChains.empty()) 640 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 641 &MemOpChains[0], MemOpChains.size()); 642 643 // Build a sequence of copy-to-reg nodes chained together with token chain 644 // and flag operands which copy the outgoing args into registers. 645 SDOperand InFlag; 646 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 647 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 648 InFlag); 649 InFlag = Chain.getValue(1); 650 } 651 652 // If the callee is a GlobalAddress node (quite common, every direct call is) 653 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 654 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 655 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 656 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 657 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 658 659 std::vector<MVT::ValueType> NodeTys; 660 NodeTys.push_back(MVT::Other); // Returns a chain 661 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 662 std::vector<SDOperand> Ops; 663 Ops.push_back(Chain); 664 Ops.push_back(Callee); 665 666 // Add argument registers to the end of the list so that they are known live 667 // into the call. 668 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 669 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 670 RegsToPass[i].second.getValueType())); 671 672 if (InFlag.Val) 673 Ops.push_back(InFlag); 674 675 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 676 NodeTys, &Ops[0], Ops.size()); 677 InFlag = Chain.getValue(1); 678 679 // Create the CALLSEQ_END node. 680 unsigned NumBytesForCalleeToPush = 0; 681 682 // If this is is a call to a struct-return function, the callee 683 // pops the hidden struct pointer, so we have to push it back. 684 // This is common for Darwin/X86, Linux & Mingw32 targets. 685 if (CallingConv == CallingConv::CSRet) 686 NumBytesForCalleeToPush = 4; 687 688 NodeTys.clear(); 689 NodeTys.push_back(MVT::Other); // Returns a chain 690 if (RetVT != MVT::Other) 691 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 692 Ops.clear(); 693 Ops.push_back(Chain); 694 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 695 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 696 Ops.push_back(InFlag); 697 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 698 if (RetVT != MVT::Other) 699 InFlag = Chain.getValue(1); 700 701 std::vector<SDOperand> ResultVals; 702 NodeTys.clear(); 703 switch (RetVT) { 704 default: assert(0 && "Unknown value type to return!"); 705 case MVT::Other: break; 706 case MVT::i8: 707 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 708 ResultVals.push_back(Chain.getValue(0)); 709 NodeTys.push_back(MVT::i8); 710 break; 711 case MVT::i16: 712 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 713 ResultVals.push_back(Chain.getValue(0)); 714 NodeTys.push_back(MVT::i16); 715 break; 716 case MVT::i32: 717 if (Op.Val->getValueType(1) == MVT::i32) { 718 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 719 ResultVals.push_back(Chain.getValue(0)); 720 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 721 Chain.getValue(2)).getValue(1); 722 ResultVals.push_back(Chain.getValue(0)); 723 NodeTys.push_back(MVT::i32); 724 } else { 725 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 726 ResultVals.push_back(Chain.getValue(0)); 727 } 728 NodeTys.push_back(MVT::i32); 729 break; 730 case MVT::v16i8: 731 case MVT::v8i16: 732 case MVT::v4i32: 733 case MVT::v2i64: 734 case MVT::v4f32: 735 case MVT::v2f64: 736 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 737 ResultVals.push_back(Chain.getValue(0)); 738 NodeTys.push_back(RetVT); 739 break; 740 case MVT::f32: 741 case MVT::f64: { 742 std::vector<MVT::ValueType> Tys; 743 Tys.push_back(MVT::f64); 744 Tys.push_back(MVT::Other); 745 Tys.push_back(MVT::Flag); 746 std::vector<SDOperand> Ops; 747 Ops.push_back(Chain); 748 Ops.push_back(InFlag); 749 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 750 &Ops[0], Ops.size()); 751 Chain = RetVal.getValue(1); 752 InFlag = RetVal.getValue(2); 753 if (X86ScalarSSE) { 754 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 755 // shouldn't be necessary except that RFP cannot be live across 756 // multiple blocks. When stackifier is fixed, they can be uncoupled. 757 MachineFunction &MF = DAG.getMachineFunction(); 758 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 759 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 760 Tys.clear(); 761 Tys.push_back(MVT::Other); 762 Ops.clear(); 763 Ops.push_back(Chain); 764 Ops.push_back(RetVal); 765 Ops.push_back(StackSlot); 766 Ops.push_back(DAG.getValueType(RetVT)); 767 Ops.push_back(InFlag); 768 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 769 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 770 Chain = RetVal.getValue(1); 771 } 772 773 if (RetVT == MVT::f32 && !X86ScalarSSE) 774 // FIXME: we would really like to remember that this FP_ROUND 775 // operation is okay to eliminate if we allow excess FP precision. 776 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 777 ResultVals.push_back(RetVal); 778 NodeTys.push_back(RetVT); 779 break; 780 } 781 } 782 783 // If the function returns void, just return the chain. 784 if (ResultVals.empty()) 785 return Chain; 786 787 // Otherwise, merge everything together with a MERGE_VALUES node. 788 NodeTys.push_back(MVT::Other); 789 ResultVals.push_back(Chain); 790 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 791 &ResultVals[0], ResultVals.size()); 792 return Res.getValue(Op.ResNo); 793} 794 795 796//===----------------------------------------------------------------------===// 797// X86-64 C Calling Convention implementation 798//===----------------------------------------------------------------------===// 799 800/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified 801/// type should be passed. If it is through stack, returns the size of the stack 802/// slot; if it is through integer or XMM register, returns the number of 803/// integer or XMM registers are needed. 804static void 805HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT, 806 unsigned NumIntRegs, unsigned NumXMMRegs, 807 unsigned &ObjSize, unsigned &ObjIntRegs, 808 unsigned &ObjXMMRegs) { 809 ObjSize = 0; 810 ObjIntRegs = 0; 811 ObjXMMRegs = 0; 812 813 switch (ObjectVT) { 814 default: assert(0 && "Unhandled argument type!"); 815 case MVT::i8: 816 case MVT::i16: 817 case MVT::i32: 818 case MVT::i64: 819 if (NumIntRegs < 6) 820 ObjIntRegs = 1; 821 else { 822 switch (ObjectVT) { 823 default: break; 824 case MVT::i8: ObjSize = 1; break; 825 case MVT::i16: ObjSize = 2; break; 826 case MVT::i32: ObjSize = 4; break; 827 case MVT::i64: ObjSize = 8; break; 828 } 829 } 830 break; 831 case MVT::f32: 832 case MVT::f64: 833 case MVT::v16i8: 834 case MVT::v8i16: 835 case MVT::v4i32: 836 case MVT::v2i64: 837 case MVT::v4f32: 838 case MVT::v2f64: 839 if (NumXMMRegs < 8) 840 ObjXMMRegs = 1; 841 else { 842 switch (ObjectVT) { 843 default: break; 844 case MVT::f32: ObjSize = 4; break; 845 case MVT::f64: ObjSize = 8; break; 846 case MVT::v16i8: 847 case MVT::v8i16: 848 case MVT::v4i32: 849 case MVT::v2i64: 850 case MVT::v4f32: 851 case MVT::v2f64: ObjSize = 16; break; 852 } 853 break; 854 } 855 } 856} 857 858SDOperand 859X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 860 unsigned NumArgs = Op.Val->getNumValues() - 1; 861 MachineFunction &MF = DAG.getMachineFunction(); 862 MachineFrameInfo *MFI = MF.getFrameInfo(); 863 SDOperand Root = Op.getOperand(0); 864 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 865 std::vector<SDOperand> ArgValues; 866 867 // Add DAG nodes to load the arguments... On entry to a function on the X86, 868 // the stack frame looks like this: 869 // 870 // [RSP] -- return address 871 // [RSP + 8] -- first nonreg argument (leftmost lexically) 872 // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size 873 // ... 874 // 875 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 876 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 877 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 878 879 static const unsigned GPR8ArgRegs[] = { 880 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 881 }; 882 static const unsigned GPR16ArgRegs[] = { 883 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 884 }; 885 static const unsigned GPR32ArgRegs[] = { 886 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 887 }; 888 static const unsigned GPR64ArgRegs[] = { 889 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 890 }; 891 static const unsigned XMMArgRegs[] = { 892 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 893 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 894 }; 895 896 for (unsigned i = 0; i < NumArgs; ++i) { 897 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 898 unsigned ArgIncrement = 8; 899 unsigned ObjSize = 0; 900 unsigned ObjIntRegs = 0; 901 unsigned ObjXMMRegs = 0; 902 903 // FIXME: __int128 and long double support? 904 HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 905 ObjSize, ObjIntRegs, ObjXMMRegs); 906 if (ObjSize > 8) 907 ArgIncrement = ObjSize; 908 909 unsigned Reg = 0; 910 SDOperand ArgValue; 911 if (ObjIntRegs || ObjXMMRegs) { 912 switch (ObjectVT) { 913 default: assert(0 && "Unhandled argument type!"); 914 case MVT::i8: 915 case MVT::i16: 916 case MVT::i32: 917 case MVT::i64: { 918 TargetRegisterClass *RC = NULL; 919 switch (ObjectVT) { 920 default: break; 921 case MVT::i8: 922 RC = X86::GR8RegisterClass; 923 Reg = GPR8ArgRegs[NumIntRegs]; 924 break; 925 case MVT::i16: 926 RC = X86::GR16RegisterClass; 927 Reg = GPR16ArgRegs[NumIntRegs]; 928 break; 929 case MVT::i32: 930 RC = X86::GR32RegisterClass; 931 Reg = GPR32ArgRegs[NumIntRegs]; 932 break; 933 case MVT::i64: 934 RC = X86::GR64RegisterClass; 935 Reg = GPR64ArgRegs[NumIntRegs]; 936 break; 937 } 938 Reg = AddLiveIn(MF, Reg, RC); 939 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 940 break; 941 } 942 case MVT::f32: 943 case MVT::f64: 944 case MVT::v16i8: 945 case MVT::v8i16: 946 case MVT::v4i32: 947 case MVT::v2i64: 948 case MVT::v4f32: 949 case MVT::v2f64: { 950 TargetRegisterClass *RC= (ObjectVT == MVT::f32) ? 951 X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ? 952 X86::FR64RegisterClass : X86::VR128RegisterClass); 953 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC); 954 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 955 break; 956 } 957 } 958 NumIntRegs += ObjIntRegs; 959 NumXMMRegs += ObjXMMRegs; 960 } else if (ObjSize) { 961 // XMM arguments have to be aligned on 16-byte boundary. 962 if (ObjSize == 16) 963 ArgOffset = ((ArgOffset + 15) / 16) * 16; 964 // Create the SelectionDAG nodes corresponding to a load from this 965 // parameter. 966 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 967 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 968 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 969 ArgOffset += ArgIncrement; // Move on to the next argument. 970 } 971 972 ArgValues.push_back(ArgValue); 973 } 974 975 // If the function takes variable number of arguments, make a frame index for 976 // the start of the first vararg value... for expansion of llvm.va_start. 977 if (isVarArg) { 978 // For X86-64, if there are vararg parameters that are passed via 979 // registers, then we must store them to their spots on the stack so they 980 // may be loaded by deferencing the result of va_next. 981 VarArgsGPOffset = NumIntRegs * 8; 982 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 983 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 984 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 985 986 // Store the integer parameter registers. 987 std::vector<SDOperand> MemOps; 988 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 989 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 990 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 991 for (; NumIntRegs != 6; ++NumIntRegs) { 992 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 993 X86::GR64RegisterClass); 994 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 995 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 996 MemOps.push_back(Store); 997 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 998 DAG.getConstant(8, getPointerTy())); 999 } 1000 1001 // Now store the XMM (fp + vector) parameter registers. 1002 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1003 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1004 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1005 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1006 X86::VR128RegisterClass); 1007 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1008 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1009 MemOps.push_back(Store); 1010 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1011 DAG.getConstant(16, getPointerTy())); 1012 } 1013 if (!MemOps.empty()) 1014 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1015 &MemOps[0], MemOps.size()); 1016 } 1017 1018 ArgValues.push_back(Root); 1019 1020 ReturnAddrIndex = 0; // No return address slot generated yet. 1021 BytesToPopOnReturn = 0; // Callee pops nothing. 1022 BytesCallerReserves = ArgOffset; 1023 1024 // Return the new list of results. 1025 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1026 Op.Val->value_end()); 1027 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1028} 1029 1030SDOperand 1031X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) { 1032 SDOperand Chain = Op.getOperand(0); 1033 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1034 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1035 SDOperand Callee = Op.getOperand(4); 1036 MVT::ValueType RetVT= Op.Val->getValueType(0); 1037 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1038 1039 // Count how many bytes are to be pushed on the stack. 1040 unsigned NumBytes = 0; 1041 unsigned NumIntRegs = 0; // Int regs used for parameter passing. 1042 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1043 1044 static const unsigned GPR8ArgRegs[] = { 1045 X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B 1046 }; 1047 static const unsigned GPR16ArgRegs[] = { 1048 X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W 1049 }; 1050 static const unsigned GPR32ArgRegs[] = { 1051 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 1052 }; 1053 static const unsigned GPR64ArgRegs[] = { 1054 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1055 }; 1056 static const unsigned XMMArgRegs[] = { 1057 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1058 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1059 }; 1060 1061 for (unsigned i = 0; i != NumOps; ++i) { 1062 SDOperand Arg = Op.getOperand(5+2*i); 1063 MVT::ValueType ArgVT = Arg.getValueType(); 1064 1065 switch (ArgVT) { 1066 default: assert(0 && "Unknown value type!"); 1067 case MVT::i8: 1068 case MVT::i16: 1069 case MVT::i32: 1070 case MVT::i64: 1071 if (NumIntRegs < 6) 1072 ++NumIntRegs; 1073 else 1074 NumBytes += 8; 1075 break; 1076 case MVT::f32: 1077 case MVT::f64: 1078 case MVT::v16i8: 1079 case MVT::v8i16: 1080 case MVT::v4i32: 1081 case MVT::v2i64: 1082 case MVT::v4f32: 1083 case MVT::v2f64: 1084 if (NumXMMRegs < 8) 1085 NumXMMRegs++; 1086 else if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1087 NumBytes += 8; 1088 else { 1089 // XMM arguments have to be aligned on 16-byte boundary. 1090 NumBytes = ((NumBytes + 15) / 16) * 16; 1091 NumBytes += 16; 1092 } 1093 break; 1094 } 1095 } 1096 1097 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1098 1099 // Arguments go on the stack in reverse order, as specified by the ABI. 1100 unsigned ArgOffset = 0; 1101 NumIntRegs = 0; 1102 NumXMMRegs = 0; 1103 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1104 std::vector<SDOperand> MemOpChains; 1105 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1106 for (unsigned i = 0; i != NumOps; ++i) { 1107 SDOperand Arg = Op.getOperand(5+2*i); 1108 MVT::ValueType ArgVT = Arg.getValueType(); 1109 1110 switch (ArgVT) { 1111 default: assert(0 && "Unexpected ValueType for argument!"); 1112 case MVT::i8: 1113 case MVT::i16: 1114 case MVT::i32: 1115 case MVT::i64: 1116 if (NumIntRegs < 6) { 1117 unsigned Reg = 0; 1118 switch (ArgVT) { 1119 default: break; 1120 case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break; 1121 case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break; 1122 case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break; 1123 case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break; 1124 } 1125 RegsToPass.push_back(std::make_pair(Reg, Arg)); 1126 ++NumIntRegs; 1127 } else { 1128 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1129 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1131 ArgOffset += 8; 1132 } 1133 break; 1134 case MVT::f32: 1135 case MVT::f64: 1136 case MVT::v16i8: 1137 case MVT::v8i16: 1138 case MVT::v4i32: 1139 case MVT::v2i64: 1140 case MVT::v4f32: 1141 case MVT::v2f64: 1142 if (NumXMMRegs < 8) { 1143 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1144 NumXMMRegs++; 1145 } else { 1146 if (ArgVT != MVT::f32 && ArgVT != MVT::f64) { 1147 // XMM arguments have to be aligned on 16-byte boundary. 1148 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1149 } 1150 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1151 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1152 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1153 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 1154 ArgOffset += 8; 1155 else 1156 ArgOffset += 16; 1157 } 1158 } 1159 } 1160 1161 if (!MemOpChains.empty()) 1162 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1163 &MemOpChains[0], MemOpChains.size()); 1164 1165 // Build a sequence of copy-to-reg nodes chained together with token chain 1166 // and flag operands which copy the outgoing args into registers. 1167 SDOperand InFlag; 1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1170 InFlag); 1171 InFlag = Chain.getValue(1); 1172 } 1173 1174 if (isVarArg) { 1175 // From AMD64 ABI document: 1176 // For calls that may call functions that use varargs or stdargs 1177 // (prototype-less calls or calls to functions containing ellipsis (...) in 1178 // the declaration) %al is used as hidden argument to specify the number 1179 // of SSE registers used. The contents of %al do not need to match exactly 1180 // the number of registers, but must be an ubound on the number of SSE 1181 // registers used and is in the range 0 - 8 inclusive. 1182 Chain = DAG.getCopyToReg(Chain, X86::AL, 1183 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1184 InFlag = Chain.getValue(1); 1185 } 1186 1187 // If the callee is a GlobalAddress node (quite common, every direct call is) 1188 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1189 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1190 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1191 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1192 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1193 1194 std::vector<MVT::ValueType> NodeTys; 1195 NodeTys.push_back(MVT::Other); // Returns a chain 1196 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1197 std::vector<SDOperand> Ops; 1198 Ops.push_back(Chain); 1199 Ops.push_back(Callee); 1200 1201 // Add argument registers to the end of the list so that they are known live 1202 // into the call. 1203 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1204 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1205 RegsToPass[i].second.getValueType())); 1206 1207 if (InFlag.Val) 1208 Ops.push_back(InFlag); 1209 1210 // FIXME: Do not generate X86ISD::TAILCALL for now. 1211 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1212 NodeTys, &Ops[0], Ops.size()); 1213 InFlag = Chain.getValue(1); 1214 1215 NodeTys.clear(); 1216 NodeTys.push_back(MVT::Other); // Returns a chain 1217 if (RetVT != MVT::Other) 1218 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1219 Ops.clear(); 1220 Ops.push_back(Chain); 1221 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1222 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1223 Ops.push_back(InFlag); 1224 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1225 if (RetVT != MVT::Other) 1226 InFlag = Chain.getValue(1); 1227 1228 std::vector<SDOperand> ResultVals; 1229 NodeTys.clear(); 1230 switch (RetVT) { 1231 default: assert(0 && "Unknown value type to return!"); 1232 case MVT::Other: break; 1233 case MVT::i8: 1234 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1235 ResultVals.push_back(Chain.getValue(0)); 1236 NodeTys.push_back(MVT::i8); 1237 break; 1238 case MVT::i16: 1239 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1240 ResultVals.push_back(Chain.getValue(0)); 1241 NodeTys.push_back(MVT::i16); 1242 break; 1243 case MVT::i32: 1244 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1245 ResultVals.push_back(Chain.getValue(0)); 1246 NodeTys.push_back(MVT::i32); 1247 break; 1248 case MVT::i64: 1249 if (Op.Val->getValueType(1) == MVT::i64) { 1250 // FIXME: __int128 support? 1251 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1252 ResultVals.push_back(Chain.getValue(0)); 1253 Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64, 1254 Chain.getValue(2)).getValue(1); 1255 ResultVals.push_back(Chain.getValue(0)); 1256 NodeTys.push_back(MVT::i64); 1257 } else { 1258 Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); 1259 ResultVals.push_back(Chain.getValue(0)); 1260 } 1261 NodeTys.push_back(MVT::i64); 1262 break; 1263 case MVT::f32: 1264 case MVT::f64: 1265 case MVT::v16i8: 1266 case MVT::v8i16: 1267 case MVT::v4i32: 1268 case MVT::v2i64: 1269 case MVT::v4f32: 1270 case MVT::v2f64: 1271 // FIXME: long double support? 1272 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1273 ResultVals.push_back(Chain.getValue(0)); 1274 NodeTys.push_back(RetVT); 1275 break; 1276 } 1277 1278 // If the function returns void, just return the chain. 1279 if (ResultVals.empty()) 1280 return Chain; 1281 1282 // Otherwise, merge everything together with a MERGE_VALUES node. 1283 NodeTys.push_back(MVT::Other); 1284 ResultVals.push_back(Chain); 1285 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1286 &ResultVals[0], ResultVals.size()); 1287 return Res.getValue(Op.ResNo); 1288} 1289 1290//===----------------------------------------------------------------------===// 1291// Fast Calling Convention implementation 1292//===----------------------------------------------------------------------===// 1293// 1294// The X86 'fast' calling convention passes up to two integer arguments in 1295// registers (an appropriate portion of EAX/EDX), passes arguments in C order, 1296// and requires that the callee pop its arguments off the stack (allowing proper 1297// tail calls), and has the same return value conventions as C calling convs. 1298// 1299// This calling convention always arranges for the callee pop value to be 8n+4 1300// bytes, which is needed for tail recursion elimination and stack alignment 1301// reasons. 1302// 1303// Note that this can be enhanced in the future to pass fp vals in registers 1304// (when we have a global fp allocator) and do other tricks. 1305// 1306 1307/// HowToPassFastCCArgument - Returns how an formal argument of the specified 1308/// type should be passed. If it is through stack, returns the size of the stack 1309/// slot; if it is through integer or XMM register, returns the number of 1310/// integer or XMM registers are needed. 1311static void 1312HowToPassFastCCArgument(MVT::ValueType ObjectVT, 1313 unsigned NumIntRegs, unsigned NumXMMRegs, 1314 unsigned &ObjSize, unsigned &ObjIntRegs, 1315 unsigned &ObjXMMRegs) { 1316 ObjSize = 0; 1317 ObjIntRegs = 0; 1318 ObjXMMRegs = 0; 1319 1320 switch (ObjectVT) { 1321 default: assert(0 && "Unhandled argument type!"); 1322 case MVT::i8: 1323#if FASTCC_NUM_INT_ARGS_INREGS > 0 1324 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1325 ObjIntRegs = 1; 1326 else 1327#endif 1328 ObjSize = 1; 1329 break; 1330 case MVT::i16: 1331#if FASTCC_NUM_INT_ARGS_INREGS > 0 1332 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1333 ObjIntRegs = 1; 1334 else 1335#endif 1336 ObjSize = 2; 1337 break; 1338 case MVT::i32: 1339#if FASTCC_NUM_INT_ARGS_INREGS > 0 1340 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 1341 ObjIntRegs = 1; 1342 else 1343#endif 1344 ObjSize = 4; 1345 break; 1346 case MVT::i64: 1347#if FASTCC_NUM_INT_ARGS_INREGS > 0 1348 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 1349 ObjIntRegs = 2; 1350 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 1351 ObjIntRegs = 1; 1352 ObjSize = 4; 1353 } else 1354#endif 1355 ObjSize = 8; 1356 case MVT::f32: 1357 ObjSize = 4; 1358 break; 1359 case MVT::f64: 1360 ObjSize = 8; 1361 break; 1362 case MVT::v16i8: 1363 case MVT::v8i16: 1364 case MVT::v4i32: 1365 case MVT::v2i64: 1366 case MVT::v4f32: 1367 case MVT::v2f64: 1368 if (NumXMMRegs < 4) 1369 ObjXMMRegs = 1; 1370 else 1371 ObjSize = 16; 1372 break; 1373 } 1374} 1375 1376SDOperand 1377X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1378 unsigned NumArgs = Op.Val->getNumValues()-1; 1379 MachineFunction &MF = DAG.getMachineFunction(); 1380 MachineFrameInfo *MFI = MF.getFrameInfo(); 1381 SDOperand Root = Op.getOperand(0); 1382 std::vector<SDOperand> ArgValues; 1383 1384 // Add DAG nodes to load the arguments... On entry to a function the stack 1385 // frame looks like this: 1386 // 1387 // [ESP] -- return address 1388 // [ESP + 4] -- first nonreg argument (leftmost lexically) 1389 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 1390 // ... 1391 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1392 1393 // Keep track of the number of integer regs passed so far. This can be either 1394 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1395 // used). 1396 unsigned NumIntRegs = 0; 1397 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1398 1399 static const unsigned XMMArgRegs[] = { 1400 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1401 }; 1402 1403 for (unsigned i = 0; i < NumArgs; ++i) { 1404 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1405 unsigned ArgIncrement = 4; 1406 unsigned ObjSize = 0; 1407 unsigned ObjIntRegs = 0; 1408 unsigned ObjXMMRegs = 0; 1409 1410 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 1411 ObjSize, ObjIntRegs, ObjXMMRegs); 1412 if (ObjSize > 4) 1413 ArgIncrement = ObjSize; 1414 1415 unsigned Reg = 0; 1416 SDOperand ArgValue; 1417 if (ObjIntRegs || ObjXMMRegs) { 1418 switch (ObjectVT) { 1419 default: assert(0 && "Unhandled argument type!"); 1420 case MVT::i8: 1421 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 1422 X86::GR8RegisterClass); 1423 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 1424 break; 1425 case MVT::i16: 1426 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 1427 X86::GR16RegisterClass); 1428 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 1429 break; 1430 case MVT::i32: 1431 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1432 X86::GR32RegisterClass); 1433 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1434 break; 1435 case MVT::i64: 1436 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 1437 X86::GR32RegisterClass); 1438 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1439 if (ObjIntRegs == 2) { 1440 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 1441 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 1442 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1443 } 1444 break; 1445 case MVT::v16i8: 1446 case MVT::v8i16: 1447 case MVT::v4i32: 1448 case MVT::v2i64: 1449 case MVT::v4f32: 1450 case MVT::v2f64: 1451 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 1452 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 1453 break; 1454 } 1455 NumIntRegs += ObjIntRegs; 1456 NumXMMRegs += ObjXMMRegs; 1457 } 1458 1459 if (ObjSize) { 1460 // XMM arguments have to be aligned on 16-byte boundary. 1461 if (ObjSize == 16) 1462 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1463 // Create the SelectionDAG nodes corresponding to a load from this 1464 // parameter. 1465 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1466 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1467 if (ObjectVT == MVT::i64 && ObjIntRegs) { 1468 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 1469 NULL, 0); 1470 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 1471 } else 1472 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 1473 ArgOffset += ArgIncrement; // Move on to the next argument. 1474 } 1475 1476 ArgValues.push_back(ArgValue); 1477 } 1478 1479 ArgValues.push_back(Root); 1480 1481 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1482 // arguments and the arguments after the retaddr has been pushed are aligned. 1483 if ((ArgOffset & 7) == 0) 1484 ArgOffset += 4; 1485 1486 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1487 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1488 ReturnAddrIndex = 0; // No return address slot generated yet. 1489 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 1490 BytesCallerReserves = 0; 1491 1492 // Finally, inform the code generator which regs we return values in. 1493 switch (getValueType(MF.getFunction()->getReturnType())) { 1494 default: assert(0 && "Unknown type!"); 1495 case MVT::isVoid: break; 1496 case MVT::i1: 1497 case MVT::i8: 1498 case MVT::i16: 1499 case MVT::i32: 1500 MF.addLiveOut(X86::EAX); 1501 break; 1502 case MVT::i64: 1503 MF.addLiveOut(X86::EAX); 1504 MF.addLiveOut(X86::EDX); 1505 break; 1506 case MVT::f32: 1507 case MVT::f64: 1508 MF.addLiveOut(X86::ST0); 1509 break; 1510 case MVT::v16i8: 1511 case MVT::v8i16: 1512 case MVT::v4i32: 1513 case MVT::v2i64: 1514 case MVT::v4f32: 1515 case MVT::v2f64: 1516 MF.addLiveOut(X86::XMM0); 1517 break; 1518 } 1519 1520 // Return the new list of results. 1521 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1522 Op.Val->value_end()); 1523 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1524} 1525 1526SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1527 bool isFastCall) { 1528 SDOperand Chain = Op.getOperand(0); 1529 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1530 SDOperand Callee = Op.getOperand(4); 1531 MVT::ValueType RetVT= Op.Val->getValueType(0); 1532 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1533 1534 // Count how many bytes are to be pushed on the stack. 1535 unsigned NumBytes = 0; 1536 1537 // Keep track of the number of integer regs passed so far. This can be either 1538 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 1539 // used). 1540 unsigned NumIntRegs = 0; 1541 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 1542 1543 static const unsigned GPRArgRegs[][2] = { 1544 { X86::AL, X86::DL }, 1545 { X86::AX, X86::DX }, 1546 { X86::EAX, X86::EDX } 1547 }; 1548#if 0 1549 static const unsigned FastCallGPRArgRegs[][2] = { 1550 { X86::CL, X86::DL }, 1551 { X86::CX, X86::DX }, 1552 { X86::ECX, X86::EDX } 1553 }; 1554#endif 1555 static const unsigned XMMArgRegs[] = { 1556 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1557 }; 1558 1559 for (unsigned i = 0; i != NumOps; ++i) { 1560 SDOperand Arg = Op.getOperand(5+2*i); 1561 1562 switch (Arg.getValueType()) { 1563 default: assert(0 && "Unknown value type!"); 1564 case MVT::i8: 1565 case MVT::i16: 1566 case MVT::i32: { 1567 unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS); 1568 if (NumIntRegs < MaxNumIntRegs) { 1569 ++NumIntRegs; 1570 break; 1571 } 1572 } // Fall through 1573 case MVT::f32: 1574 NumBytes += 4; 1575 break; 1576 case MVT::f64: 1577 NumBytes += 8; 1578 break; 1579 case MVT::v16i8: 1580 case MVT::v8i16: 1581 case MVT::v4i32: 1582 case MVT::v2i64: 1583 case MVT::v4f32: 1584 case MVT::v2f64: 1585 if (isFastCall) { 1586 assert(0 && "Unknown value type!"); 1587 } else { 1588 if (NumXMMRegs < 4) 1589 NumXMMRegs++; 1590 else { 1591 // XMM arguments have to be aligned on 16-byte boundary. 1592 NumBytes = ((NumBytes + 15) / 16) * 16; 1593 NumBytes += 16; 1594 } 1595 } 1596 break; 1597 } 1598 } 1599 1600 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1601 // arguments and the arguments after the retaddr has been pushed are aligned. 1602 if ((NumBytes & 7) == 0) 1603 NumBytes += 4; 1604 1605 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1606 1607 // Arguments go on the stack in reverse order, as specified by the ABI. 1608 unsigned ArgOffset = 0; 1609 NumIntRegs = 0; 1610 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1611 std::vector<SDOperand> MemOpChains; 1612 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1613 for (unsigned i = 0; i != NumOps; ++i) { 1614 SDOperand Arg = Op.getOperand(5+2*i); 1615 1616 switch (Arg.getValueType()) { 1617 default: assert(0 && "Unexpected ValueType for argument!"); 1618 case MVT::i8: 1619 case MVT::i16: 1620 case MVT::i32: { 1621 unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS); 1622 if (NumIntRegs < MaxNumIntRegs) { 1623 RegsToPass.push_back( 1624 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 1625 Arg)); 1626 ++NumIntRegs; 1627 break; 1628 } 1629 } // Fall through 1630 case MVT::f32: { 1631 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1632 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1633 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1634 ArgOffset += 4; 1635 break; 1636 } 1637 case MVT::f64: { 1638 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1639 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1640 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1641 ArgOffset += 8; 1642 break; 1643 } 1644 case MVT::v16i8: 1645 case MVT::v8i16: 1646 case MVT::v4i32: 1647 case MVT::v2i64: 1648 case MVT::v4f32: 1649 case MVT::v2f64: 1650 if (isFastCall) { 1651 assert(0 && "Unexpected ValueType for argument!"); 1652 } else { 1653 if (NumXMMRegs < 4) { 1654 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 1655 NumXMMRegs++; 1656 } else { 1657 // XMM arguments have to be aligned on 16-byte boundary. 1658 ArgOffset = ((ArgOffset + 15) / 16) * 16; 1659 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1660 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1661 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1662 ArgOffset += 16; 1663 } 1664 } 1665 break; 1666 } 1667 } 1668 1669 if (!MemOpChains.empty()) 1670 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1671 &MemOpChains[0], MemOpChains.size()); 1672 1673 // Build a sequence of copy-to-reg nodes chained together with token chain 1674 // and flag operands which copy the outgoing args into registers. 1675 SDOperand InFlag; 1676 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1677 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1678 InFlag); 1679 InFlag = Chain.getValue(1); 1680 } 1681 1682 // If the callee is a GlobalAddress node (quite common, every direct call is) 1683 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1684 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1685 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1686 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1687 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1688 1689 std::vector<MVT::ValueType> NodeTys; 1690 NodeTys.push_back(MVT::Other); // Returns a chain 1691 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1692 std::vector<SDOperand> Ops; 1693 Ops.push_back(Chain); 1694 Ops.push_back(Callee); 1695 1696 // Add argument registers to the end of the list so that they are known live 1697 // into the call. 1698 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1699 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1700 RegsToPass[i].second.getValueType())); 1701 1702 if (InFlag.Val) 1703 Ops.push_back(InFlag); 1704 1705 // FIXME: Do not generate X86ISD::TAILCALL for now. 1706 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1707 NodeTys, &Ops[0], Ops.size()); 1708 InFlag = Chain.getValue(1); 1709 1710 NodeTys.clear(); 1711 NodeTys.push_back(MVT::Other); // Returns a chain 1712 if (RetVT != MVT::Other) 1713 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1714 Ops.clear(); 1715 Ops.push_back(Chain); 1716 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1717 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1718 Ops.push_back(InFlag); 1719 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1720 if (RetVT != MVT::Other) 1721 InFlag = Chain.getValue(1); 1722 1723 std::vector<SDOperand> ResultVals; 1724 NodeTys.clear(); 1725 switch (RetVT) { 1726 default: assert(0 && "Unknown value type to return!"); 1727 case MVT::Other: break; 1728 case MVT::i8: 1729 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 1730 ResultVals.push_back(Chain.getValue(0)); 1731 NodeTys.push_back(MVT::i8); 1732 break; 1733 case MVT::i16: 1734 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 1735 ResultVals.push_back(Chain.getValue(0)); 1736 NodeTys.push_back(MVT::i16); 1737 break; 1738 case MVT::i32: 1739 if (Op.Val->getValueType(1) == MVT::i32) { 1740 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1741 ResultVals.push_back(Chain.getValue(0)); 1742 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 1743 Chain.getValue(2)).getValue(1); 1744 ResultVals.push_back(Chain.getValue(0)); 1745 NodeTys.push_back(MVT::i32); 1746 } else { 1747 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 1748 ResultVals.push_back(Chain.getValue(0)); 1749 } 1750 NodeTys.push_back(MVT::i32); 1751 break; 1752 case MVT::v16i8: 1753 case MVT::v8i16: 1754 case MVT::v4i32: 1755 case MVT::v2i64: 1756 case MVT::v4f32: 1757 case MVT::v2f64: 1758 if (isFastCall) { 1759 assert(0 && "Unknown value type to return!"); 1760 } else { 1761 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 1762 ResultVals.push_back(Chain.getValue(0)); 1763 NodeTys.push_back(RetVT); 1764 } 1765 break; 1766 case MVT::f32: 1767 case MVT::f64: { 1768 std::vector<MVT::ValueType> Tys; 1769 Tys.push_back(MVT::f64); 1770 Tys.push_back(MVT::Other); 1771 Tys.push_back(MVT::Flag); 1772 std::vector<SDOperand> Ops; 1773 Ops.push_back(Chain); 1774 Ops.push_back(InFlag); 1775 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 1776 &Ops[0], Ops.size()); 1777 Chain = RetVal.getValue(1); 1778 InFlag = RetVal.getValue(2); 1779 if (X86ScalarSSE) { 1780 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 1781 // shouldn't be necessary except that RFP cannot be live across 1782 // multiple blocks. When stackifier is fixed, they can be uncoupled. 1783 MachineFunction &MF = DAG.getMachineFunction(); 1784 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 1785 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 1786 Tys.clear(); 1787 Tys.push_back(MVT::Other); 1788 Ops.clear(); 1789 Ops.push_back(Chain); 1790 Ops.push_back(RetVal); 1791 Ops.push_back(StackSlot); 1792 Ops.push_back(DAG.getValueType(RetVT)); 1793 Ops.push_back(InFlag); 1794 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 1795 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 1796 Chain = RetVal.getValue(1); 1797 } 1798 1799 if (RetVT == MVT::f32 && !X86ScalarSSE) 1800 // FIXME: we would really like to remember that this FP_ROUND 1801 // operation is okay to eliminate if we allow excess FP precision. 1802 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 1803 ResultVals.push_back(RetVal); 1804 NodeTys.push_back(RetVT); 1805 break; 1806 } 1807 } 1808 1809 1810 // If the function returns void, just return the chain. 1811 if (ResultVals.empty()) 1812 return Chain; 1813 1814 // Otherwise, merge everything together with a MERGE_VALUES node. 1815 NodeTys.push_back(MVT::Other); 1816 ResultVals.push_back(Chain); 1817 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1818 &ResultVals[0], ResultVals.size()); 1819 return Res.getValue(Op.ResNo); 1820} 1821 1822//===----------------------------------------------------------------------===// 1823// StdCall Calling Convention implementation 1824//===----------------------------------------------------------------------===// 1825// StdCall calling convention seems to be standard for many Windows' API 1826// routines and around. It differs from C calling convention just a little: 1827// callee should clean up the stack, not caller. Symbols should be also 1828// decorated in some fancy way :) It doesn't support any vector arguments. 1829 1830/// HowToPassStdCallCCArgument - Returns how an formal argument of the specified 1831/// type should be passed. Returns the size of the stack slot 1832static void 1833HowToPassStdCallCCArgument(MVT::ValueType ObjectVT, unsigned &ObjSize) { 1834 switch (ObjectVT) { 1835 default: assert(0 && "Unhandled argument type!"); 1836 case MVT::i8: ObjSize = 1; break; 1837 case MVT::i16: ObjSize = 2; break; 1838 case MVT::i32: ObjSize = 4; break; 1839 case MVT::i64: ObjSize = 8; break; 1840 case MVT::f32: ObjSize = 4; break; 1841 case MVT::f64: ObjSize = 8; break; 1842 } 1843} 1844 1845SDOperand X86TargetLowering::LowerStdCallCCArguments(SDOperand Op, 1846 SelectionDAG &DAG) { 1847 unsigned NumArgs = Op.Val->getNumValues() - 1; 1848 MachineFunction &MF = DAG.getMachineFunction(); 1849 MachineFrameInfo *MFI = MF.getFrameInfo(); 1850 SDOperand Root = Op.getOperand(0); 1851 std::vector<SDOperand> ArgValues; 1852 1853 // Add DAG nodes to load the arguments... On entry to a function on the X86, 1854 // the stack frame looks like this: 1855 // 1856 // [ESP] -- return address 1857 // [ESP + 4] -- first argument (leftmost lexically) 1858 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 1859 // ... 1860 // 1861 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 1862 for (unsigned i = 0; i < NumArgs; ++i) { 1863 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 1864 unsigned ArgIncrement = 4; 1865 unsigned ObjSize = 0; 1866 HowToPassStdCallCCArgument(ObjectVT, ObjSize); 1867 if (ObjSize > 4) 1868 ArgIncrement = ObjSize; 1869 1870 SDOperand ArgValue; 1871 // Create the frame index object for this incoming parameter... 1872 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1873 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1874 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 1875 ArgValues.push_back(ArgValue); 1876 ArgOffset += ArgIncrement; // Move on to the next argument... 1877 } 1878 1879 ArgValues.push_back(Root); 1880 1881 // If the function takes variable number of arguments, make a frame index for 1882 // the start of the first vararg value... for expansion of llvm.va_start. 1883 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1884 if (isVarArg) { 1885 BytesToPopOnReturn = 0; // Callee pops nothing. 1886 BytesCallerReserves = ArgOffset; 1887 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 1888 } else { 1889 BytesToPopOnReturn = ArgOffset; // Callee pops everything.. 1890 BytesCallerReserves = 0; 1891 } 1892 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1893 ReturnAddrIndex = 0; // No return address slot generated yet. 1894 1895 MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); 1896 1897 // Return the new list of results. 1898 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 1899 Op.Val->value_end()); 1900 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 1901} 1902 1903 1904SDOperand X86TargetLowering::LowerStdCallCCCallTo(SDOperand Op, 1905 SelectionDAG &DAG) { 1906 SDOperand Chain = Op.getOperand(0); 1907 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1908 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1909 SDOperand Callee = Op.getOperand(4); 1910 MVT::ValueType RetVT= Op.Val->getValueType(0); 1911 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1912 1913 // Count how many bytes are to be pushed on the stack. 1914 unsigned NumBytes = 0; 1915 for (unsigned i = 0; i != NumOps; ++i) { 1916 SDOperand Arg = Op.getOperand(5+2*i); 1917 1918 switch (Arg.getValueType()) { 1919 default: assert(0 && "Unexpected ValueType for argument!"); 1920 case MVT::i8: 1921 case MVT::i16: 1922 case MVT::i32: 1923 case MVT::f32: 1924 NumBytes += 4; 1925 break; 1926 case MVT::i64: 1927 case MVT::f64: 1928 NumBytes += 8; 1929 break; 1930 } 1931 } 1932 1933 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1934 1935 // Arguments go on the stack in reverse order, as specified by the ABI. 1936 unsigned ArgOffset = 0; 1937 std::vector<SDOperand> MemOpChains; 1938 SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); 1939 for (unsigned i = 0; i != NumOps; ++i) { 1940 SDOperand Arg = Op.getOperand(5+2*i); 1941 1942 switch (Arg.getValueType()) { 1943 default: assert(0 && "Unexpected ValueType for argument!"); 1944 case MVT::i8: 1945 case MVT::i16: { 1946 // Promote the integer to 32 bits. If the input type is signed use a 1947 // sign extend, otherwise use a zero extend. 1948 unsigned ExtOp = 1949 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 1950 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1951 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 1952 } 1953 // Fallthrough 1954 1955 case MVT::i32: 1956 case MVT::f32: { 1957 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1958 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1959 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1960 ArgOffset += 4; 1961 break; 1962 } 1963 case MVT::i64: 1964 case MVT::f64: { 1965 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1966 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1967 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1968 ArgOffset += 8; 1969 break; 1970 } 1971 } 1972 } 1973 1974 if (!MemOpChains.empty()) 1975 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1976 &MemOpChains[0], MemOpChains.size()); 1977 1978 // If the callee is a GlobalAddress node (quite common, every direct call is) 1979 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1980 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1981 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1982 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1983 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1984 1985 std::vector<MVT::ValueType> NodeTys; 1986 NodeTys.push_back(MVT::Other); // Returns a chain 1987 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1988 std::vector<SDOperand> Ops; 1989 Ops.push_back(Chain); 1990 Ops.push_back(Callee); 1991 1992 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1993 NodeTys, &Ops[0], Ops.size()); 1994 SDOperand InFlag = Chain.getValue(1); 1995 1996 // Create the CALLSEQ_END node. 1997 unsigned NumBytesForCalleeToPush; 1998 1999 if (isVarArg) { 2000 NumBytesForCalleeToPush = 0; 2001 } else { 2002 NumBytesForCalleeToPush = NumBytes; 2003 } 2004 2005 NodeTys.clear(); 2006 NodeTys.push_back(MVT::Other); // Returns a chain 2007 if (RetVT != MVT::Other) 2008 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2009 Ops.clear(); 2010 Ops.push_back(Chain); 2011 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2012 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2013 Ops.push_back(InFlag); 2014 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2015 if (RetVT != MVT::Other) 2016 InFlag = Chain.getValue(1); 2017 2018 std::vector<SDOperand> ResultVals; 2019 NodeTys.clear(); 2020 switch (RetVT) { 2021 default: assert(0 && "Unknown value type to return!"); 2022 case MVT::Other: break; 2023 case MVT::i8: 2024 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 2025 ResultVals.push_back(Chain.getValue(0)); 2026 NodeTys.push_back(MVT::i8); 2027 break; 2028 case MVT::i16: 2029 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 2030 ResultVals.push_back(Chain.getValue(0)); 2031 NodeTys.push_back(MVT::i16); 2032 break; 2033 case MVT::i32: 2034 if (Op.Val->getValueType(1) == MVT::i32) { 2035 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 2036 ResultVals.push_back(Chain.getValue(0)); 2037 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 2038 Chain.getValue(2)).getValue(1); 2039 ResultVals.push_back(Chain.getValue(0)); 2040 NodeTys.push_back(MVT::i32); 2041 } else { 2042 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 2043 ResultVals.push_back(Chain.getValue(0)); 2044 } 2045 NodeTys.push_back(MVT::i32); 2046 break; 2047 case MVT::f32: 2048 case MVT::f64: { 2049 std::vector<MVT::ValueType> Tys; 2050 Tys.push_back(MVT::f64); 2051 Tys.push_back(MVT::Other); 2052 Tys.push_back(MVT::Flag); 2053 std::vector<SDOperand> Ops; 2054 Ops.push_back(Chain); 2055 Ops.push_back(InFlag); 2056 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, 2057 &Ops[0], Ops.size()); 2058 Chain = RetVal.getValue(1); 2059 InFlag = RetVal.getValue(2); 2060 if (X86ScalarSSE) { 2061 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 2062 // shouldn't be necessary except that RFP cannot be live across 2063 // multiple blocks. When stackifier is fixed, they can be uncoupled. 2064 MachineFunction &MF = DAG.getMachineFunction(); 2065 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2066 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 2067 Tys.clear(); 2068 Tys.push_back(MVT::Other); 2069 Ops.clear(); 2070 Ops.push_back(Chain); 2071 Ops.push_back(RetVal); 2072 Ops.push_back(StackSlot); 2073 Ops.push_back(DAG.getValueType(RetVT)); 2074 Ops.push_back(InFlag); 2075 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 2076 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0); 2077 Chain = RetVal.getValue(1); 2078 } 2079 2080 if (RetVT == MVT::f32 && !X86ScalarSSE) 2081 // FIXME: we would really like to remember that this FP_ROUND 2082 // operation is okay to eliminate if we allow excess FP precision. 2083 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 2084 ResultVals.push_back(RetVal); 2085 NodeTys.push_back(RetVT); 2086 break; 2087 } 2088 } 2089 2090 // If the function returns void, just return the chain. 2091 if (ResultVals.empty()) 2092 return Chain; 2093 2094 // Otherwise, merge everything together with a MERGE_VALUES node. 2095 NodeTys.push_back(MVT::Other); 2096 ResultVals.push_back(Chain); 2097 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 2098 &ResultVals[0], ResultVals.size()); 2099 return Res.getValue(Op.ResNo); 2100} 2101 2102//===----------------------------------------------------------------------===// 2103// FastCall Calling Convention implementation 2104//===----------------------------------------------------------------------===// 2105// 2106// The X86 'fastcall' calling convention passes up to two integer arguments in 2107// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 2108// and requires that the callee pop its arguments off the stack (allowing proper 2109// tail calls), and has the same return value conventions as C calling convs. 2110// 2111// This calling convention always arranges for the callee pop value to be 8n+4 2112// bytes, which is needed for tail recursion elimination and stack alignment 2113// reasons. 2114// 2115 2116/// HowToPassFastCallCCArgument - Returns how an formal argument of the 2117/// specified type should be passed. If it is through stack, returns the size of 2118/// the stack slot; if it is through integer register, returns the number of 2119/// integer registers are needed. 2120static void 2121HowToPassFastCallCCArgument(MVT::ValueType ObjectVT, 2122 unsigned NumIntRegs, 2123 unsigned &ObjSize, 2124 unsigned &ObjIntRegs) 2125{ 2126 ObjSize = 0; 2127 ObjIntRegs = 0; 2128 2129 switch (ObjectVT) { 2130 default: assert(0 && "Unhandled argument type!"); 2131 case MVT::i8: 2132 if (NumIntRegs < 2) 2133 ObjIntRegs = 1; 2134 else 2135 ObjSize = 1; 2136 break; 2137 case MVT::i16: 2138 if (NumIntRegs < 2) 2139 ObjIntRegs = 1; 2140 else 2141 ObjSize = 2; 2142 break; 2143 case MVT::i32: 2144 if (NumIntRegs < 2) 2145 ObjIntRegs = 1; 2146 else 2147 ObjSize = 4; 2148 break; 2149 case MVT::i64: 2150 if (NumIntRegs+2 <= 2) { 2151 ObjIntRegs = 2; 2152 } else if (NumIntRegs+1 <= 2) { 2153 ObjIntRegs = 1; 2154 ObjSize = 4; 2155 } else 2156 ObjSize = 8; 2157 case MVT::f32: 2158 ObjSize = 4; 2159 break; 2160 case MVT::f64: 2161 ObjSize = 8; 2162 break; 2163 } 2164} 2165 2166SDOperand 2167X86TargetLowering::LowerFastCallCCArguments(SDOperand Op, SelectionDAG &DAG) { 2168 unsigned NumArgs = Op.Val->getNumValues()-1; 2169 MachineFunction &MF = DAG.getMachineFunction(); 2170 MachineFrameInfo *MFI = MF.getFrameInfo(); 2171 SDOperand Root = Op.getOperand(0); 2172 std::vector<SDOperand> ArgValues; 2173 2174 // Add DAG nodes to load the arguments... On entry to a function the stack 2175 // frame looks like this: 2176 // 2177 // [ESP] -- return address 2178 // [ESP + 4] -- first nonreg argument (leftmost lexically) 2179 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 2180 // ... 2181 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 2182 2183 // Keep track of the number of integer regs passed so far. This can be either 2184 // 0 (neither ECX or EDX used), 1 (ECX is used) or 2 (ECX and EDX are both 2185 // used). 2186 unsigned NumIntRegs = 0; 2187 2188 for (unsigned i = 0; i < NumArgs; ++i) { 2189 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 2190 unsigned ArgIncrement = 4; 2191 unsigned ObjSize = 0; 2192 unsigned ObjIntRegs = 0; 2193 2194 HowToPassFastCallCCArgument(ObjectVT, NumIntRegs, ObjSize, ObjIntRegs); 2195 if (ObjSize > 4) 2196 ArgIncrement = ObjSize; 2197 2198 unsigned Reg = 0; 2199 SDOperand ArgValue; 2200 if (ObjIntRegs) { 2201 switch (ObjectVT) { 2202 default: assert(0 && "Unhandled argument type!"); 2203 case MVT::i8: 2204 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::CL, 2205 X86::GR8RegisterClass); 2206 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 2207 break; 2208 case MVT::i16: 2209 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::CX, 2210 X86::GR16RegisterClass); 2211 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 2212 break; 2213 case MVT::i32: 2214 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX, 2215 X86::GR32RegisterClass); 2216 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2217 break; 2218 case MVT::i64: 2219 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX, 2220 X86::GR32RegisterClass); 2221 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2222 if (ObjIntRegs == 2) { 2223 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 2224 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 2225 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 2226 } 2227 break; 2228 } 2229 2230 NumIntRegs += ObjIntRegs; 2231 } 2232 2233 if (ObjSize) { 2234 // Create the SelectionDAG nodes corresponding to a load from this 2235 // parameter. 2236 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 2237 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 2238 if (ObjectVT == MVT::i64 && ObjIntRegs) { 2239 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 2240 NULL, 0); 2241 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 2242 } else 2243 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); 2244 ArgOffset += ArgIncrement; // Move on to the next argument. 2245 } 2246 2247 ArgValues.push_back(ArgValue); 2248 } 2249 2250 ArgValues.push_back(Root); 2251 2252 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 2253 // arguments and the arguments after the retaddr has been pushed are aligned. 2254 if ((ArgOffset & 7) == 0) 2255 ArgOffset += 4; 2256 2257 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 2258 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 2259 ReturnAddrIndex = 0; // No return address slot generated yet. 2260 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 2261 BytesCallerReserves = 0; 2262 2263 MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); 2264 2265 // Finally, inform the code generator which regs we return values in. 2266 switch (getValueType(MF.getFunction()->getReturnType())) { 2267 default: assert(0 && "Unknown type!"); 2268 case MVT::isVoid: break; 2269 case MVT::i1: 2270 case MVT::i8: 2271 case MVT::i16: 2272 case MVT::i32: 2273 MF.addLiveOut(X86::ECX); 2274 break; 2275 case MVT::i64: 2276 MF.addLiveOut(X86::ECX); 2277 MF.addLiveOut(X86::EDX); 2278 break; 2279 case MVT::f32: 2280 case MVT::f64: 2281 MF.addLiveOut(X86::ST0); 2282 break; 2283 } 2284 2285 // Return the new list of results. 2286 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 2287 Op.Val->value_end()); 2288 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); 2289} 2290 2291SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2292 if (ReturnAddrIndex == 0) { 2293 // Set up a frame object for the return address. 2294 MachineFunction &MF = DAG.getMachineFunction(); 2295 if (Subtarget->is64Bit()) 2296 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2297 else 2298 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2299 } 2300 2301 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2302} 2303 2304 2305 2306std::pair<SDOperand, SDOperand> X86TargetLowering:: 2307LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 2308 SelectionDAG &DAG) { 2309 SDOperand Result; 2310 if (Depth) // Depths > 0 not supported yet! 2311 Result = DAG.getConstant(0, getPointerTy()); 2312 else { 2313 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 2314 if (!isFrameAddress) 2315 // Just load the return address 2316 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, 2317 NULL, 0); 2318 else 2319 Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 2320 DAG.getConstant(4, getPointerTy())); 2321 } 2322 return std::make_pair(Result, Chain); 2323} 2324 2325/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2326/// specific condition code. It returns a false if it cannot do a direct 2327/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2328/// needed. 2329static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2330 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2331 SelectionDAG &DAG) { 2332 X86CC = X86::COND_INVALID; 2333 if (!isFP) { 2334 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2335 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2336 // X > -1 -> X == 0, jump !sign. 2337 RHS = DAG.getConstant(0, RHS.getValueType()); 2338 X86CC = X86::COND_NS; 2339 return true; 2340 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2341 // X < 0 -> X == 0, jump on sign. 2342 X86CC = X86::COND_S; 2343 return true; 2344 } 2345 } 2346 2347 switch (SetCCOpcode) { 2348 default: break; 2349 case ISD::SETEQ: X86CC = X86::COND_E; break; 2350 case ISD::SETGT: X86CC = X86::COND_G; break; 2351 case ISD::SETGE: X86CC = X86::COND_GE; break; 2352 case ISD::SETLT: X86CC = X86::COND_L; break; 2353 case ISD::SETLE: X86CC = X86::COND_LE; break; 2354 case ISD::SETNE: X86CC = X86::COND_NE; break; 2355 case ISD::SETULT: X86CC = X86::COND_B; break; 2356 case ISD::SETUGT: X86CC = X86::COND_A; break; 2357 case ISD::SETULE: X86CC = X86::COND_BE; break; 2358 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2359 } 2360 } else { 2361 // On a floating point condition, the flags are set as follows: 2362 // ZF PF CF op 2363 // 0 | 0 | 0 | X > Y 2364 // 0 | 0 | 1 | X < Y 2365 // 1 | 0 | 0 | X == Y 2366 // 1 | 1 | 1 | unordered 2367 bool Flip = false; 2368 switch (SetCCOpcode) { 2369 default: break; 2370 case ISD::SETUEQ: 2371 case ISD::SETEQ: X86CC = X86::COND_E; break; 2372 case ISD::SETOLT: Flip = true; // Fallthrough 2373 case ISD::SETOGT: 2374 case ISD::SETGT: X86CC = X86::COND_A; break; 2375 case ISD::SETOLE: Flip = true; // Fallthrough 2376 case ISD::SETOGE: 2377 case ISD::SETGE: X86CC = X86::COND_AE; break; 2378 case ISD::SETUGT: Flip = true; // Fallthrough 2379 case ISD::SETULT: 2380 case ISD::SETLT: X86CC = X86::COND_B; break; 2381 case ISD::SETUGE: Flip = true; // Fallthrough 2382 case ISD::SETULE: 2383 case ISD::SETLE: X86CC = X86::COND_BE; break; 2384 case ISD::SETONE: 2385 case ISD::SETNE: X86CC = X86::COND_NE; break; 2386 case ISD::SETUO: X86CC = X86::COND_P; break; 2387 case ISD::SETO: X86CC = X86::COND_NP; break; 2388 } 2389 if (Flip) 2390 std::swap(LHS, RHS); 2391 } 2392 2393 return X86CC != X86::COND_INVALID; 2394} 2395 2396/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2397/// code. Current x86 isa includes the following FP cmov instructions: 2398/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2399static bool hasFPCMov(unsigned X86CC) { 2400 switch (X86CC) { 2401 default: 2402 return false; 2403 case X86::COND_B: 2404 case X86::COND_BE: 2405 case X86::COND_E: 2406 case X86::COND_P: 2407 case X86::COND_A: 2408 case X86::COND_AE: 2409 case X86::COND_NE: 2410 case X86::COND_NP: 2411 return true; 2412 } 2413} 2414 2415/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 2416/// load. For Darwin, external and weak symbols are indirect, loading the value 2417/// at address GV rather then the value of GV itself. This means that the 2418/// GlobalAddress must be in the base or index register of the address, not the 2419/// GV offset field. 2420static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 2421 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 2422 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 2423} 2424 2425/// WindowsGVRequiresExtraLoad - true if accessing the GV requires an extra 2426/// load. For Windows, dllimported symbols are indirect, loading the value at 2427/// address GV rather then the value of GV itself. This means that the 2428/// GlobalAddress must be in the base or index register of the address, not the 2429/// GV offset field. 2430static bool WindowsGVRequiresExtraLoad(GlobalValue *GV) { 2431 return (GV->hasDLLImportLinkage()); 2432} 2433 2434/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2435/// true if Op is undef or if its value falls within the specified range (L, H]. 2436static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2437 if (Op.getOpcode() == ISD::UNDEF) 2438 return true; 2439 2440 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2441 return (Val >= Low && Val < Hi); 2442} 2443 2444/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2445/// true if Op is undef or if its value equal to the specified value. 2446static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2447 if (Op.getOpcode() == ISD::UNDEF) 2448 return true; 2449 return cast<ConstantSDNode>(Op)->getValue() == Val; 2450} 2451 2452/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2453/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2454bool X86::isPSHUFDMask(SDNode *N) { 2455 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2456 2457 if (N->getNumOperands() != 4) 2458 return false; 2459 2460 // Check if the value doesn't reference the second vector. 2461 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2462 SDOperand Arg = N->getOperand(i); 2463 if (Arg.getOpcode() == ISD::UNDEF) continue; 2464 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2465 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 2466 return false; 2467 } 2468 2469 return true; 2470} 2471 2472/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2473/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2474bool X86::isPSHUFHWMask(SDNode *N) { 2475 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2476 2477 if (N->getNumOperands() != 8) 2478 return false; 2479 2480 // Lower quadword copied in order. 2481 for (unsigned i = 0; i != 4; ++i) { 2482 SDOperand Arg = N->getOperand(i); 2483 if (Arg.getOpcode() == ISD::UNDEF) continue; 2484 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2485 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2486 return false; 2487 } 2488 2489 // Upper quadword shuffled. 2490 for (unsigned i = 4; i != 8; ++i) { 2491 SDOperand Arg = N->getOperand(i); 2492 if (Arg.getOpcode() == ISD::UNDEF) continue; 2493 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2494 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2495 if (Val < 4 || Val > 7) 2496 return false; 2497 } 2498 2499 return true; 2500} 2501 2502/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2503/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2504bool X86::isPSHUFLWMask(SDNode *N) { 2505 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2506 2507 if (N->getNumOperands() != 8) 2508 return false; 2509 2510 // Upper quadword copied in order. 2511 for (unsigned i = 4; i != 8; ++i) 2512 if (!isUndefOrEqual(N->getOperand(i), i)) 2513 return false; 2514 2515 // Lower quadword shuffled. 2516 for (unsigned i = 0; i != 4; ++i) 2517 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2518 return false; 2519 2520 return true; 2521} 2522 2523/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2524/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2525static bool isSHUFPMask(std::vector<SDOperand> &N) { 2526 unsigned NumElems = N.size(); 2527 if (NumElems != 2 && NumElems != 4) return false; 2528 2529 unsigned Half = NumElems / 2; 2530 for (unsigned i = 0; i < Half; ++i) 2531 if (!isUndefOrInRange(N[i], 0, NumElems)) 2532 return false; 2533 for (unsigned i = Half; i < NumElems; ++i) 2534 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 2535 return false; 2536 2537 return true; 2538} 2539 2540bool X86::isSHUFPMask(SDNode *N) { 2541 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2542 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2543 return ::isSHUFPMask(Ops); 2544} 2545 2546/// isCommutedSHUFP - Returns true if the shuffle mask is except 2547/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2548/// half elements to come from vector 1 (which would equal the dest.) and 2549/// the upper half to come from vector 2. 2550static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 2551 unsigned NumElems = Ops.size(); 2552 if (NumElems != 2 && NumElems != 4) return false; 2553 2554 unsigned Half = NumElems / 2; 2555 for (unsigned i = 0; i < Half; ++i) 2556 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 2557 return false; 2558 for (unsigned i = Half; i < NumElems; ++i) 2559 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 2560 return false; 2561 return true; 2562} 2563 2564static bool isCommutedSHUFP(SDNode *N) { 2565 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2566 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2567 return isCommutedSHUFP(Ops); 2568} 2569 2570/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2571/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2572bool X86::isMOVHLPSMask(SDNode *N) { 2573 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2574 2575 if (N->getNumOperands() != 4) 2576 return false; 2577 2578 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2579 return isUndefOrEqual(N->getOperand(0), 6) && 2580 isUndefOrEqual(N->getOperand(1), 7) && 2581 isUndefOrEqual(N->getOperand(2), 2) && 2582 isUndefOrEqual(N->getOperand(3), 3); 2583} 2584 2585/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2586/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2587/// <2, 3, 2, 3> 2588bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2589 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2590 2591 if (N->getNumOperands() != 4) 2592 return false; 2593 2594 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2595 return isUndefOrEqual(N->getOperand(0), 2) && 2596 isUndefOrEqual(N->getOperand(1), 3) && 2597 isUndefOrEqual(N->getOperand(2), 2) && 2598 isUndefOrEqual(N->getOperand(3), 3); 2599} 2600 2601/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2602/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2603bool X86::isMOVLPMask(SDNode *N) { 2604 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2605 2606 unsigned NumElems = N->getNumOperands(); 2607 if (NumElems != 2 && NumElems != 4) 2608 return false; 2609 2610 for (unsigned i = 0; i < NumElems/2; ++i) 2611 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2612 return false; 2613 2614 for (unsigned i = NumElems/2; i < NumElems; ++i) 2615 if (!isUndefOrEqual(N->getOperand(i), i)) 2616 return false; 2617 2618 return true; 2619} 2620 2621/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2622/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2623/// and MOVLHPS. 2624bool X86::isMOVHPMask(SDNode *N) { 2625 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2626 2627 unsigned NumElems = N->getNumOperands(); 2628 if (NumElems != 2 && NumElems != 4) 2629 return false; 2630 2631 for (unsigned i = 0; i < NumElems/2; ++i) 2632 if (!isUndefOrEqual(N->getOperand(i), i)) 2633 return false; 2634 2635 for (unsigned i = 0; i < NumElems/2; ++i) { 2636 SDOperand Arg = N->getOperand(i + NumElems/2); 2637 if (!isUndefOrEqual(Arg, i + NumElems)) 2638 return false; 2639 } 2640 2641 return true; 2642} 2643 2644/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2645/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2646bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2647 unsigned NumElems = N.size(); 2648 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2649 return false; 2650 2651 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2652 SDOperand BitI = N[i]; 2653 SDOperand BitI1 = N[i+1]; 2654 if (!isUndefOrEqual(BitI, j)) 2655 return false; 2656 if (V2IsSplat) { 2657 if (isUndefOrEqual(BitI1, NumElems)) 2658 return false; 2659 } else { 2660 if (!isUndefOrEqual(BitI1, j + NumElems)) 2661 return false; 2662 } 2663 } 2664 2665 return true; 2666} 2667 2668bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2669 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2670 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2671 return ::isUNPCKLMask(Ops, V2IsSplat); 2672} 2673 2674/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2675/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2676bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 2677 unsigned NumElems = N.size(); 2678 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2679 return false; 2680 2681 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2682 SDOperand BitI = N[i]; 2683 SDOperand BitI1 = N[i+1]; 2684 if (!isUndefOrEqual(BitI, j + NumElems/2)) 2685 return false; 2686 if (V2IsSplat) { 2687 if (isUndefOrEqual(BitI1, NumElems)) 2688 return false; 2689 } else { 2690 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 2691 return false; 2692 } 2693 } 2694 2695 return true; 2696} 2697 2698bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2699 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2700 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2701 return ::isUNPCKHMask(Ops, V2IsSplat); 2702} 2703 2704/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2705/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2706/// <0, 0, 1, 1> 2707bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2708 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2709 2710 unsigned NumElems = N->getNumOperands(); 2711 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 2712 return false; 2713 2714 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2715 SDOperand BitI = N->getOperand(i); 2716 SDOperand BitI1 = N->getOperand(i+1); 2717 2718 if (!isUndefOrEqual(BitI, j)) 2719 return false; 2720 if (!isUndefOrEqual(BitI1, j)) 2721 return false; 2722 } 2723 2724 return true; 2725} 2726 2727/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2728/// specifies a shuffle of elements that is suitable for input to MOVSS, 2729/// MOVSD, and MOVD, i.e. setting the lowest element. 2730static bool isMOVLMask(std::vector<SDOperand> &N) { 2731 unsigned NumElems = N.size(); 2732 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2733 return false; 2734 2735 if (!isUndefOrEqual(N[0], NumElems)) 2736 return false; 2737 2738 for (unsigned i = 1; i < NumElems; ++i) { 2739 SDOperand Arg = N[i]; 2740 if (!isUndefOrEqual(Arg, i)) 2741 return false; 2742 } 2743 2744 return true; 2745} 2746 2747bool X86::isMOVLMask(SDNode *N) { 2748 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2749 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2750 return ::isMOVLMask(Ops); 2751} 2752 2753/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2754/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2755/// element of vector 2 and the other elements to come from vector 1 in order. 2756static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false, 2757 bool V2IsUndef = false) { 2758 unsigned NumElems = Ops.size(); 2759 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2760 return false; 2761 2762 if (!isUndefOrEqual(Ops[0], 0)) 2763 return false; 2764 2765 for (unsigned i = 1; i < NumElems; ++i) { 2766 SDOperand Arg = Ops[i]; 2767 if (!(isUndefOrEqual(Arg, i+NumElems) || 2768 (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) || 2769 (V2IsSplat && isUndefOrEqual(Arg, NumElems)))) 2770 return false; 2771 } 2772 2773 return true; 2774} 2775 2776static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2777 bool V2IsUndef = false) { 2778 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2779 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 2780 return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef); 2781} 2782 2783/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2784/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2785bool X86::isMOVSHDUPMask(SDNode *N) { 2786 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2787 2788 if (N->getNumOperands() != 4) 2789 return false; 2790 2791 // Expect 1, 1, 3, 3 2792 for (unsigned i = 0; i < 2; ++i) { 2793 SDOperand Arg = N->getOperand(i); 2794 if (Arg.getOpcode() == ISD::UNDEF) continue; 2795 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2796 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2797 if (Val != 1) return false; 2798 } 2799 2800 bool HasHi = false; 2801 for (unsigned i = 2; i < 4; ++i) { 2802 SDOperand Arg = N->getOperand(i); 2803 if (Arg.getOpcode() == ISD::UNDEF) continue; 2804 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2805 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2806 if (Val != 3) return false; 2807 HasHi = true; 2808 } 2809 2810 // Don't use movshdup if it can be done with a shufps. 2811 return HasHi; 2812} 2813 2814/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2815/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2816bool X86::isMOVSLDUPMask(SDNode *N) { 2817 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2818 2819 if (N->getNumOperands() != 4) 2820 return false; 2821 2822 // Expect 0, 0, 2, 2 2823 for (unsigned i = 0; i < 2; ++i) { 2824 SDOperand Arg = N->getOperand(i); 2825 if (Arg.getOpcode() == ISD::UNDEF) continue; 2826 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2827 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2828 if (Val != 0) return false; 2829 } 2830 2831 bool HasHi = false; 2832 for (unsigned i = 2; i < 4; ++i) { 2833 SDOperand Arg = N->getOperand(i); 2834 if (Arg.getOpcode() == ISD::UNDEF) continue; 2835 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2836 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2837 if (Val != 2) return false; 2838 HasHi = true; 2839 } 2840 2841 // Don't use movshdup if it can be done with a shufps. 2842 return HasHi; 2843} 2844 2845/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2846/// a splat of a single element. 2847static bool isSplatMask(SDNode *N) { 2848 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2849 2850 // This is a splat operation if each element of the permute is the same, and 2851 // if the value doesn't reference the second vector. 2852 unsigned NumElems = N->getNumOperands(); 2853 SDOperand ElementBase; 2854 unsigned i = 0; 2855 for (; i != NumElems; ++i) { 2856 SDOperand Elt = N->getOperand(i); 2857 if (isa<ConstantSDNode>(Elt)) { 2858 ElementBase = Elt; 2859 break; 2860 } 2861 } 2862 2863 if (!ElementBase.Val) 2864 return false; 2865 2866 for (; i != NumElems; ++i) { 2867 SDOperand Arg = N->getOperand(i); 2868 if (Arg.getOpcode() == ISD::UNDEF) continue; 2869 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2870 if (Arg != ElementBase) return false; 2871 } 2872 2873 // Make sure it is a splat of the first vector operand. 2874 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2875} 2876 2877/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2878/// a splat of a single element and it's a 2 or 4 element mask. 2879bool X86::isSplatMask(SDNode *N) { 2880 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2881 2882 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2883 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2884 return false; 2885 return ::isSplatMask(N); 2886} 2887 2888/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2889/// specifies a splat of zero element. 2890bool X86::isSplatLoMask(SDNode *N) { 2891 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2892 2893 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2894 if (!isUndefOrEqual(N->getOperand(i), 0)) 2895 return false; 2896 return true; 2897} 2898 2899/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2900/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2901/// instructions. 2902unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2903 unsigned NumOperands = N->getNumOperands(); 2904 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2905 unsigned Mask = 0; 2906 for (unsigned i = 0; i < NumOperands; ++i) { 2907 unsigned Val = 0; 2908 SDOperand Arg = N->getOperand(NumOperands-i-1); 2909 if (Arg.getOpcode() != ISD::UNDEF) 2910 Val = cast<ConstantSDNode>(Arg)->getValue(); 2911 if (Val >= NumOperands) Val -= NumOperands; 2912 Mask |= Val; 2913 if (i != NumOperands - 1) 2914 Mask <<= Shift; 2915 } 2916 2917 return Mask; 2918} 2919 2920/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2921/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2922/// instructions. 2923unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2924 unsigned Mask = 0; 2925 // 8 nodes, but we only care about the last 4. 2926 for (unsigned i = 7; i >= 4; --i) { 2927 unsigned Val = 0; 2928 SDOperand Arg = N->getOperand(i); 2929 if (Arg.getOpcode() != ISD::UNDEF) 2930 Val = cast<ConstantSDNode>(Arg)->getValue(); 2931 Mask |= (Val - 4); 2932 if (i != 4) 2933 Mask <<= 2; 2934 } 2935 2936 return Mask; 2937} 2938 2939/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2940/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2941/// instructions. 2942unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2943 unsigned Mask = 0; 2944 // 8 nodes, but we only care about the first 4. 2945 for (int i = 3; i >= 0; --i) { 2946 unsigned Val = 0; 2947 SDOperand Arg = N->getOperand(i); 2948 if (Arg.getOpcode() != ISD::UNDEF) 2949 Val = cast<ConstantSDNode>(Arg)->getValue(); 2950 Mask |= Val; 2951 if (i != 0) 2952 Mask <<= 2; 2953 } 2954 2955 return Mask; 2956} 2957 2958/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2959/// specifies a 8 element shuffle that can be broken into a pair of 2960/// PSHUFHW and PSHUFLW. 2961static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2962 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2963 2964 if (N->getNumOperands() != 8) 2965 return false; 2966 2967 // Lower quadword shuffled. 2968 for (unsigned i = 0; i != 4; ++i) { 2969 SDOperand Arg = N->getOperand(i); 2970 if (Arg.getOpcode() == ISD::UNDEF) continue; 2971 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2972 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2973 if (Val > 4) 2974 return false; 2975 } 2976 2977 // Upper quadword shuffled. 2978 for (unsigned i = 4; i != 8; ++i) { 2979 SDOperand Arg = N->getOperand(i); 2980 if (Arg.getOpcode() == ISD::UNDEF) continue; 2981 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2982 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2983 if (Val < 4 || Val > 7) 2984 return false; 2985 } 2986 2987 return true; 2988} 2989 2990/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2991/// values in ther permute mask. 2992static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2993 SDOperand &V2, SDOperand &Mask, 2994 SelectionDAG &DAG) { 2995 MVT::ValueType VT = Op.getValueType(); 2996 MVT::ValueType MaskVT = Mask.getValueType(); 2997 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 2998 unsigned NumElems = Mask.getNumOperands(); 2999 std::vector<SDOperand> MaskVec; 3000 3001 for (unsigned i = 0; i != NumElems; ++i) { 3002 SDOperand Arg = Mask.getOperand(i); 3003 if (Arg.getOpcode() == ISD::UNDEF) { 3004 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 3005 continue; 3006 } 3007 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 3008 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3009 if (Val < NumElems) 3010 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 3011 else 3012 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 3013 } 3014 3015 std::swap(V1, V2); 3016 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3017 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3018} 3019 3020/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 3021/// match movhlps. The lower half elements should come from upper half of 3022/// V1 (and in order), and the upper half elements should come from the upper 3023/// half of V2 (and in order). 3024static bool ShouldXformToMOVHLPS(SDNode *Mask) { 3025 unsigned NumElems = Mask->getNumOperands(); 3026 if (NumElems != 4) 3027 return false; 3028 for (unsigned i = 0, e = 2; i != e; ++i) 3029 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 3030 return false; 3031 for (unsigned i = 2; i != 4; ++i) 3032 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 3033 return false; 3034 return true; 3035} 3036 3037/// isScalarLoadToVector - Returns true if the node is a scalar load that 3038/// is promoted to a vector. 3039static inline bool isScalarLoadToVector(SDNode *N) { 3040 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 3041 N = N->getOperand(0).Val; 3042 return ISD::isNON_EXTLoad(N); 3043 } 3044 return false; 3045} 3046 3047/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 3048/// match movlp{s|d}. The lower half elements should come from lower half of 3049/// V1 (and in order), and the upper half elements should come from the upper 3050/// half of V2 (and in order). And since V1 will become the source of the 3051/// MOVLP, it must be either a vector load or a scalar load to vector. 3052static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 3053 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 3054 return false; 3055 // Is V2 is a vector load, don't do this transformation. We will try to use 3056 // load folding shufps op. 3057 if (ISD::isNON_EXTLoad(V2)) 3058 return false; 3059 3060 unsigned NumElems = Mask->getNumOperands(); 3061 if (NumElems != 2 && NumElems != 4) 3062 return false; 3063 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 3064 if (!isUndefOrEqual(Mask->getOperand(i), i)) 3065 return false; 3066 for (unsigned i = NumElems/2; i != NumElems; ++i) 3067 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 3068 return false; 3069 return true; 3070} 3071 3072/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 3073/// all the same. 3074static bool isSplatVector(SDNode *N) { 3075 if (N->getOpcode() != ISD::BUILD_VECTOR) 3076 return false; 3077 3078 SDOperand SplatValue = N->getOperand(0); 3079 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 3080 if (N->getOperand(i) != SplatValue) 3081 return false; 3082 return true; 3083} 3084 3085/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 3086/// to an undef. 3087static bool isUndefShuffle(SDNode *N) { 3088 if (N->getOpcode() != ISD::BUILD_VECTOR) 3089 return false; 3090 3091 SDOperand V1 = N->getOperand(0); 3092 SDOperand V2 = N->getOperand(1); 3093 SDOperand Mask = N->getOperand(2); 3094 unsigned NumElems = Mask.getNumOperands(); 3095 for (unsigned i = 0; i != NumElems; ++i) { 3096 SDOperand Arg = Mask.getOperand(i); 3097 if (Arg.getOpcode() != ISD::UNDEF) { 3098 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3099 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 3100 return false; 3101 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 3102 return false; 3103 } 3104 } 3105 return true; 3106} 3107 3108/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 3109/// that point to V2 points to its first element. 3110static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 3111 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 3112 3113 bool Changed = false; 3114 std::vector<SDOperand> MaskVec; 3115 unsigned NumElems = Mask.getNumOperands(); 3116 for (unsigned i = 0; i != NumElems; ++i) { 3117 SDOperand Arg = Mask.getOperand(i); 3118 if (Arg.getOpcode() != ISD::UNDEF) { 3119 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3120 if (Val > NumElems) { 3121 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 3122 Changed = true; 3123 } 3124 } 3125 MaskVec.push_back(Arg); 3126 } 3127 3128 if (Changed) 3129 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 3130 &MaskVec[0], MaskVec.size()); 3131 return Mask; 3132} 3133 3134/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 3135/// operation of specified width. 3136static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 3137 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3138 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3139 3140 std::vector<SDOperand> MaskVec; 3141 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 3142 for (unsigned i = 1; i != NumElems; ++i) 3143 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3144 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3145} 3146 3147/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 3148/// of specified width. 3149static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 3150 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3151 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3152 std::vector<SDOperand> MaskVec; 3153 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 3154 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3155 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 3156 } 3157 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3158} 3159 3160/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 3161/// of specified width. 3162static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 3163 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3164 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3165 unsigned Half = NumElems/2; 3166 std::vector<SDOperand> MaskVec; 3167 for (unsigned i = 0; i != Half; ++i) { 3168 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 3169 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 3170 } 3171 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3172} 3173 3174/// getZeroVector - Returns a vector of specified type with all zero elements. 3175/// 3176static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 3177 assert(MVT::isVector(VT) && "Expected a vector type"); 3178 unsigned NumElems = getVectorNumElements(VT); 3179 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3180 bool isFP = MVT::isFloatingPoint(EVT); 3181 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 3182 std::vector<SDOperand> ZeroVec(NumElems, Zero); 3183 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 3184} 3185 3186/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 3187/// 3188static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 3189 SDOperand V1 = Op.getOperand(0); 3190 SDOperand Mask = Op.getOperand(2); 3191 MVT::ValueType VT = Op.getValueType(); 3192 unsigned NumElems = Mask.getNumOperands(); 3193 Mask = getUnpacklMask(NumElems, DAG); 3194 while (NumElems != 4) { 3195 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 3196 NumElems >>= 1; 3197 } 3198 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 3199 3200 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3201 Mask = getZeroVector(MaskVT, DAG); 3202 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 3203 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 3204 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3205} 3206 3207/// isZeroNode - Returns true if Elt is a constant zero or a floating point 3208/// constant +0.0. 3209static inline bool isZeroNode(SDOperand Elt) { 3210 return ((isa<ConstantSDNode>(Elt) && 3211 cast<ConstantSDNode>(Elt)->getValue() == 0) || 3212 (isa<ConstantFPSDNode>(Elt) && 3213 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 3214} 3215 3216/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3217/// vector and zero or undef vector. 3218static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 3219 unsigned NumElems, unsigned Idx, 3220 bool isZero, SelectionDAG &DAG) { 3221 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 3222 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3223 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3224 SDOperand Zero = DAG.getConstant(0, EVT); 3225 std::vector<SDOperand> MaskVec(NumElems, Zero); 3226 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 3227 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3228 &MaskVec[0], MaskVec.size()); 3229 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3230} 3231 3232/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3233/// 3234static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 3235 unsigned NumNonZero, unsigned NumZero, 3236 SelectionDAG &DAG, TargetLowering &TLI) { 3237 if (NumNonZero > 8) 3238 return SDOperand(); 3239 3240 SDOperand V(0, 0); 3241 bool First = true; 3242 for (unsigned i = 0; i < 16; ++i) { 3243 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3244 if (ThisIsNonZero && First) { 3245 if (NumZero) 3246 V = getZeroVector(MVT::v8i16, DAG); 3247 else 3248 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3249 First = false; 3250 } 3251 3252 if ((i & 1) != 0) { 3253 SDOperand ThisElt(0, 0), LastElt(0, 0); 3254 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3255 if (LastIsNonZero) { 3256 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3257 } 3258 if (ThisIsNonZero) { 3259 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3260 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3261 ThisElt, DAG.getConstant(8, MVT::i8)); 3262 if (LastIsNonZero) 3263 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3264 } else 3265 ThisElt = LastElt; 3266 3267 if (ThisElt.Val) 3268 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3269 DAG.getConstant(i/2, TLI.getPointerTy())); 3270 } 3271 } 3272 3273 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3274} 3275 3276/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 3277/// 3278static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3279 unsigned NumNonZero, unsigned NumZero, 3280 SelectionDAG &DAG, TargetLowering &TLI) { 3281 if (NumNonZero > 4) 3282 return SDOperand(); 3283 3284 SDOperand V(0, 0); 3285 bool First = true; 3286 for (unsigned i = 0; i < 8; ++i) { 3287 bool isNonZero = (NonZeros & (1 << i)) != 0; 3288 if (isNonZero) { 3289 if (First) { 3290 if (NumZero) 3291 V = getZeroVector(MVT::v8i16, DAG); 3292 else 3293 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3294 First = false; 3295 } 3296 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3297 DAG.getConstant(i, TLI.getPointerTy())); 3298 } 3299 } 3300 3301 return V; 3302} 3303 3304SDOperand 3305X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3306 // All zero's are handled with pxor. 3307 if (ISD::isBuildVectorAllZeros(Op.Val)) 3308 return Op; 3309 3310 // All one's are handled with pcmpeqd. 3311 if (ISD::isBuildVectorAllOnes(Op.Val)) 3312 return Op; 3313 3314 MVT::ValueType VT = Op.getValueType(); 3315 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 3316 unsigned EVTBits = MVT::getSizeInBits(EVT); 3317 3318 unsigned NumElems = Op.getNumOperands(); 3319 unsigned NumZero = 0; 3320 unsigned NumNonZero = 0; 3321 unsigned NonZeros = 0; 3322 std::set<SDOperand> Values; 3323 for (unsigned i = 0; i < NumElems; ++i) { 3324 SDOperand Elt = Op.getOperand(i); 3325 if (Elt.getOpcode() != ISD::UNDEF) { 3326 Values.insert(Elt); 3327 if (isZeroNode(Elt)) 3328 NumZero++; 3329 else { 3330 NonZeros |= (1 << i); 3331 NumNonZero++; 3332 } 3333 } 3334 } 3335 3336 if (NumNonZero == 0) 3337 // Must be a mix of zero and undef. Return a zero vector. 3338 return getZeroVector(VT, DAG); 3339 3340 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3341 if (Values.size() == 1) 3342 return SDOperand(); 3343 3344 // Special case for single non-zero element. 3345 if (NumNonZero == 1) { 3346 unsigned Idx = CountTrailingZeros_32(NonZeros); 3347 SDOperand Item = Op.getOperand(Idx); 3348 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3349 if (Idx == 0) 3350 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3351 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3352 NumZero > 0, DAG); 3353 3354 if (EVTBits == 32) { 3355 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3356 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3357 DAG); 3358 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3359 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3360 std::vector<SDOperand> MaskVec; 3361 for (unsigned i = 0; i < NumElems; i++) 3362 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3363 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3364 &MaskVec[0], MaskVec.size()); 3365 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3366 DAG.getNode(ISD::UNDEF, VT), Mask); 3367 } 3368 } 3369 3370 // Let legalizer expand 2-wide build_vector's. 3371 if (EVTBits == 64) 3372 return SDOperand(); 3373 3374 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3375 if (EVTBits == 8) { 3376 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3377 *this); 3378 if (V.Val) return V; 3379 } 3380 3381 if (EVTBits == 16) { 3382 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3383 *this); 3384 if (V.Val) return V; 3385 } 3386 3387 // If element VT is == 32 bits, turn it into a number of shuffles. 3388 std::vector<SDOperand> V(NumElems); 3389 if (NumElems == 4 && NumZero > 0) { 3390 for (unsigned i = 0; i < 4; ++i) { 3391 bool isZero = !(NonZeros & (1 << i)); 3392 if (isZero) 3393 V[i] = getZeroVector(VT, DAG); 3394 else 3395 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3396 } 3397 3398 for (unsigned i = 0; i < 2; ++i) { 3399 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3400 default: break; 3401 case 0: 3402 V[i] = V[i*2]; // Must be a zero vector. 3403 break; 3404 case 1: 3405 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3406 getMOVLMask(NumElems, DAG)); 3407 break; 3408 case 2: 3409 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3410 getMOVLMask(NumElems, DAG)); 3411 break; 3412 case 3: 3413 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3414 getUnpacklMask(NumElems, DAG)); 3415 break; 3416 } 3417 } 3418 3419 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3420 // clears the upper bits. 3421 // FIXME: we can do the same for v4f32 case when we know both parts of 3422 // the lower half come from scalar_to_vector (loadf32). We should do 3423 // that in post legalizer dag combiner with target specific hooks. 3424 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3425 return V[0]; 3426 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3427 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 3428 std::vector<SDOperand> MaskVec; 3429 bool Reverse = (NonZeros & 0x3) == 2; 3430 for (unsigned i = 0; i < 2; ++i) 3431 if (Reverse) 3432 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3433 else 3434 MaskVec.push_back(DAG.getConstant(i, EVT)); 3435 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3436 for (unsigned i = 0; i < 2; ++i) 3437 if (Reverse) 3438 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3439 else 3440 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3441 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3442 &MaskVec[0], MaskVec.size()); 3443 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3444 } 3445 3446 if (Values.size() > 2) { 3447 // Expand into a number of unpckl*. 3448 // e.g. for v4f32 3449 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3450 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3451 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3452 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3453 for (unsigned i = 0; i < NumElems; ++i) 3454 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3455 NumElems >>= 1; 3456 while (NumElems != 0) { 3457 for (unsigned i = 0; i < NumElems; ++i) 3458 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3459 UnpckMask); 3460 NumElems >>= 1; 3461 } 3462 return V[0]; 3463 } 3464 3465 return SDOperand(); 3466} 3467 3468SDOperand 3469X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3470 SDOperand V1 = Op.getOperand(0); 3471 SDOperand V2 = Op.getOperand(1); 3472 SDOperand PermMask = Op.getOperand(2); 3473 MVT::ValueType VT = Op.getValueType(); 3474 unsigned NumElems = PermMask.getNumOperands(); 3475 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3476 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3477 bool V1IsSplat = false; 3478 bool V2IsSplat = false; 3479 3480 if (isUndefShuffle(Op.Val)) 3481 return DAG.getNode(ISD::UNDEF, VT); 3482 3483 if (isSplatMask(PermMask.Val)) { 3484 if (NumElems <= 4) return Op; 3485 // Promote it to a v4i32 splat. 3486 return PromoteSplat(Op, DAG); 3487 } 3488 3489 if (X86::isMOVLMask(PermMask.Val)) 3490 return (V1IsUndef) ? V2 : Op; 3491 3492 if (X86::isMOVSHDUPMask(PermMask.Val) || 3493 X86::isMOVSLDUPMask(PermMask.Val) || 3494 X86::isMOVHLPSMask(PermMask.Val) || 3495 X86::isMOVHPMask(PermMask.Val) || 3496 X86::isMOVLPMask(PermMask.Val)) 3497 return Op; 3498 3499 if (ShouldXformToMOVHLPS(PermMask.Val) || 3500 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3501 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3502 3503 bool Commuted = false; 3504 V1IsSplat = isSplatVector(V1.Val); 3505 V2IsSplat = isSplatVector(V2.Val); 3506 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3507 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3508 std::swap(V1IsSplat, V2IsSplat); 3509 std::swap(V1IsUndef, V2IsUndef); 3510 Commuted = true; 3511 } 3512 3513 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3514 if (V2IsUndef) return V1; 3515 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3516 if (V2IsSplat) { 3517 // V2 is a splat, so the mask may be malformed. That is, it may point 3518 // to any V2 element. The instruction selectior won't like this. Get 3519 // a corrected mask and commute to form a proper MOVS{S|D}. 3520 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3521 if (NewMask.Val != PermMask.Val) 3522 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3523 } 3524 return Op; 3525 } 3526 3527 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3528 X86::isUNPCKLMask(PermMask.Val) || 3529 X86::isUNPCKHMask(PermMask.Val)) 3530 return Op; 3531 3532 if (V2IsSplat) { 3533 // Normalize mask so all entries that point to V2 points to its first 3534 // element then try to match unpck{h|l} again. If match, return a 3535 // new vector_shuffle with the corrected mask. 3536 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3537 if (NewMask.Val != PermMask.Val) { 3538 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3539 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3540 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3541 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3542 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3543 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3544 } 3545 } 3546 } 3547 3548 // Normalize the node to match x86 shuffle ops if needed 3549 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3550 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3551 3552 if (Commuted) { 3553 // Commute is back and try unpck* again. 3554 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3555 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3556 X86::isUNPCKLMask(PermMask.Val) || 3557 X86::isUNPCKHMask(PermMask.Val)) 3558 return Op; 3559 } 3560 3561 // If VT is integer, try PSHUF* first, then SHUFP*. 3562 if (MVT::isInteger(VT)) { 3563 if (X86::isPSHUFDMask(PermMask.Val) || 3564 X86::isPSHUFHWMask(PermMask.Val) || 3565 X86::isPSHUFLWMask(PermMask.Val)) { 3566 if (V2.getOpcode() != ISD::UNDEF) 3567 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3568 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3569 return Op; 3570 } 3571 3572 if (X86::isSHUFPMask(PermMask.Val)) 3573 return Op; 3574 3575 // Handle v8i16 shuffle high / low shuffle node pair. 3576 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3577 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3578 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3579 std::vector<SDOperand> MaskVec; 3580 for (unsigned i = 0; i != 4; ++i) 3581 MaskVec.push_back(PermMask.getOperand(i)); 3582 for (unsigned i = 4; i != 8; ++i) 3583 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3584 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3585 &MaskVec[0], MaskVec.size()); 3586 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3587 MaskVec.clear(); 3588 for (unsigned i = 0; i != 4; ++i) 3589 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3590 for (unsigned i = 4; i != 8; ++i) 3591 MaskVec.push_back(PermMask.getOperand(i)); 3592 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3593 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3594 } 3595 } else { 3596 // Floating point cases in the other order. 3597 if (X86::isSHUFPMask(PermMask.Val)) 3598 return Op; 3599 if (X86::isPSHUFDMask(PermMask.Val) || 3600 X86::isPSHUFHWMask(PermMask.Val) || 3601 X86::isPSHUFLWMask(PermMask.Val)) { 3602 if (V2.getOpcode() != ISD::UNDEF) 3603 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3604 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3605 return Op; 3606 } 3607 } 3608 3609 if (NumElems == 4) { 3610 MVT::ValueType MaskVT = PermMask.getValueType(); 3611 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 3612 std::vector<std::pair<int, int> > Locs; 3613 Locs.reserve(NumElems); 3614 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3615 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3616 unsigned NumHi = 0; 3617 unsigned NumLo = 0; 3618 // If no more than two elements come from either vector. This can be 3619 // implemented with two shuffles. First shuffle gather the elements. 3620 // The second shuffle, which takes the first shuffle as both of its 3621 // vector operands, put the elements into the right order. 3622 for (unsigned i = 0; i != NumElems; ++i) { 3623 SDOperand Elt = PermMask.getOperand(i); 3624 if (Elt.getOpcode() == ISD::UNDEF) { 3625 Locs[i] = std::make_pair(-1, -1); 3626 } else { 3627 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3628 if (Val < NumElems) { 3629 Locs[i] = std::make_pair(0, NumLo); 3630 Mask1[NumLo] = Elt; 3631 NumLo++; 3632 } else { 3633 Locs[i] = std::make_pair(1, NumHi); 3634 if (2+NumHi < NumElems) 3635 Mask1[2+NumHi] = Elt; 3636 NumHi++; 3637 } 3638 } 3639 } 3640 if (NumLo <= 2 && NumHi <= 2) { 3641 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3642 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3643 &Mask1[0], Mask1.size())); 3644 for (unsigned i = 0; i != NumElems; ++i) { 3645 if (Locs[i].first == -1) 3646 continue; 3647 else { 3648 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3649 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3650 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3651 } 3652 } 3653 3654 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3655 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3656 &Mask2[0], Mask2.size())); 3657 } 3658 3659 // Break it into (shuffle shuffle_hi, shuffle_lo). 3660 Locs.clear(); 3661 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3662 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3663 std::vector<SDOperand> *MaskPtr = &LoMask; 3664 unsigned MaskIdx = 0; 3665 unsigned LoIdx = 0; 3666 unsigned HiIdx = NumElems/2; 3667 for (unsigned i = 0; i != NumElems; ++i) { 3668 if (i == NumElems/2) { 3669 MaskPtr = &HiMask; 3670 MaskIdx = 1; 3671 LoIdx = 0; 3672 HiIdx = NumElems/2; 3673 } 3674 SDOperand Elt = PermMask.getOperand(i); 3675 if (Elt.getOpcode() == ISD::UNDEF) { 3676 Locs[i] = std::make_pair(-1, -1); 3677 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3678 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3679 (*MaskPtr)[LoIdx] = Elt; 3680 LoIdx++; 3681 } else { 3682 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3683 (*MaskPtr)[HiIdx] = Elt; 3684 HiIdx++; 3685 } 3686 } 3687 3688 SDOperand LoShuffle = 3689 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3690 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3691 &LoMask[0], LoMask.size())); 3692 SDOperand HiShuffle = 3693 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3694 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3695 &HiMask[0], HiMask.size())); 3696 std::vector<SDOperand> MaskOps; 3697 for (unsigned i = 0; i != NumElems; ++i) { 3698 if (Locs[i].first == -1) { 3699 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3700 } else { 3701 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3702 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3703 } 3704 } 3705 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3706 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3707 &MaskOps[0], MaskOps.size())); 3708 } 3709 3710 return SDOperand(); 3711} 3712 3713SDOperand 3714X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3715 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3716 return SDOperand(); 3717 3718 MVT::ValueType VT = Op.getValueType(); 3719 // TODO: handle v16i8. 3720 if (MVT::getSizeInBits(VT) == 16) { 3721 // Transform it so it match pextrw which produces a 32-bit result. 3722 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3723 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3724 Op.getOperand(0), Op.getOperand(1)); 3725 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3726 DAG.getValueType(VT)); 3727 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3728 } else if (MVT::getSizeInBits(VT) == 32) { 3729 SDOperand Vec = Op.getOperand(0); 3730 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3731 if (Idx == 0) 3732 return Op; 3733 // SHUFPS the element to the lowest double word, then movss. 3734 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3735 std::vector<SDOperand> IdxVec; 3736 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 3737 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3738 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3739 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3740 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3741 &IdxVec[0], IdxVec.size()); 3742 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3743 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3744 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3745 DAG.getConstant(0, getPointerTy())); 3746 } else if (MVT::getSizeInBits(VT) == 64) { 3747 SDOperand Vec = Op.getOperand(0); 3748 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3749 if (Idx == 0) 3750 return Op; 3751 3752 // UNPCKHPD the element to the lowest double word, then movsd. 3753 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3754 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3755 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3756 std::vector<SDOperand> IdxVec; 3757 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 3758 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 3759 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3760 &IdxVec[0], IdxVec.size()); 3761 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3762 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3763 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3764 DAG.getConstant(0, getPointerTy())); 3765 } 3766 3767 return SDOperand(); 3768} 3769 3770SDOperand 3771X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3772 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3773 // as its second argument. 3774 MVT::ValueType VT = Op.getValueType(); 3775 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 3776 SDOperand N0 = Op.getOperand(0); 3777 SDOperand N1 = Op.getOperand(1); 3778 SDOperand N2 = Op.getOperand(2); 3779 if (MVT::getSizeInBits(BaseVT) == 16) { 3780 if (N1.getValueType() != MVT::i32) 3781 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3782 if (N2.getValueType() != MVT::i32) 3783 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 3784 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3785 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3786 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3787 if (Idx == 0) { 3788 // Use a movss. 3789 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3790 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3791 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 3792 std::vector<SDOperand> MaskVec; 3793 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3794 for (unsigned i = 1; i <= 3; ++i) 3795 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3796 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3797 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3798 &MaskVec[0], MaskVec.size())); 3799 } else { 3800 // Use two pinsrw instructions to insert a 32 bit value. 3801 Idx <<= 1; 3802 if (MVT::isFloatingPoint(N1.getValueType())) { 3803 if (ISD::isNON_EXTLoad(N1.Val)) { 3804 // Just load directly from f32mem to GR32. 3805 LoadSDNode *LD = cast<LoadSDNode>(N1); 3806 N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), 3807 LD->getSrcValue(), LD->getSrcValueOffset()); 3808 } else { 3809 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3810 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3811 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3812 DAG.getConstant(0, getPointerTy())); 3813 } 3814 } 3815 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3816 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3817 DAG.getConstant(Idx, getPointerTy())); 3818 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3819 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3820 DAG.getConstant(Idx+1, getPointerTy())); 3821 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3822 } 3823 } 3824 3825 return SDOperand(); 3826} 3827 3828SDOperand 3829X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3830 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3831 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3832} 3833 3834// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3835// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3836// one of the above mentioned nodes. It has to be wrapped because otherwise 3837// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3838// be used to form addressing mode. These wrapped nodes will be selected 3839// into MOV32ri. 3840SDOperand 3841X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3842 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3843 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3844 DAG.getTargetConstantPool(CP->getConstVal(), 3845 getPointerTy(), 3846 CP->getAlignment())); 3847 if (Subtarget->isTargetDarwin()) { 3848 // With PIC, the address is actually $g + Offset. 3849 if (!Subtarget->is64Bit() && 3850 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3851 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3852 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 3853 } 3854 3855 return Result; 3856} 3857 3858SDOperand 3859X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3860 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3861 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3862 DAG.getTargetGlobalAddress(GV, 3863 getPointerTy())); 3864 if (Subtarget->isTargetDarwin()) { 3865 // With PIC, the address is actually $g + Offset. 3866 if (!Subtarget->is64Bit() && 3867 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3868 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3869 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3870 Result); 3871 3872 // For Darwin, external and weak symbols are indirect, so we want to load 3873 // the value at address GV, not the value of GV itself. This means that 3874 // the GlobalAddress must be in the base or index register of the address, 3875 // not the GV offset field. 3876 if (getTargetMachine().getRelocationModel() != Reloc::Static && 3877 DarwinGVRequiresExtraLoad(GV)) 3878 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3879 } else if (Subtarget->isTargetCygwin() || Subtarget->isTargetWindows()) { 3880 // FIXME: What about PIC? 3881 if (WindowsGVRequiresExtraLoad(GV)) 3882 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3883 } 3884 3885 3886 return Result; 3887} 3888 3889SDOperand 3890X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3891 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3892 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 3893 DAG.getTargetExternalSymbol(Sym, 3894 getPointerTy())); 3895 if (Subtarget->isTargetDarwin()) { 3896 // With PIC, the address is actually $g + Offset. 3897 if (!Subtarget->is64Bit() && 3898 getTargetMachine().getRelocationModel() == Reloc::PIC_) 3899 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3900 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3901 Result); 3902 } 3903 3904 return Result; 3905} 3906 3907SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3908 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3909 "Not an i64 shift!"); 3910 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3911 SDOperand ShOpLo = Op.getOperand(0); 3912 SDOperand ShOpHi = Op.getOperand(1); 3913 SDOperand ShAmt = Op.getOperand(2); 3914 SDOperand Tmp1 = isSRA ? 3915 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3916 DAG.getConstant(0, MVT::i32); 3917 3918 SDOperand Tmp2, Tmp3; 3919 if (Op.getOpcode() == ISD::SHL_PARTS) { 3920 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3921 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3922 } else { 3923 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3924 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3925 } 3926 3927 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3928 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3929 DAG.getConstant(32, MVT::i8)); 3930 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3931 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3932 3933 SDOperand Hi, Lo; 3934 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3935 3936 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3937 SmallVector<SDOperand, 4> Ops; 3938 if (Op.getOpcode() == ISD::SHL_PARTS) { 3939 Ops.push_back(Tmp2); 3940 Ops.push_back(Tmp3); 3941 Ops.push_back(CC); 3942 Ops.push_back(InFlag); 3943 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3944 InFlag = Hi.getValue(1); 3945 3946 Ops.clear(); 3947 Ops.push_back(Tmp3); 3948 Ops.push_back(Tmp1); 3949 Ops.push_back(CC); 3950 Ops.push_back(InFlag); 3951 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3952 } else { 3953 Ops.push_back(Tmp2); 3954 Ops.push_back(Tmp3); 3955 Ops.push_back(CC); 3956 Ops.push_back(InFlag); 3957 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3958 InFlag = Lo.getValue(1); 3959 3960 Ops.clear(); 3961 Ops.push_back(Tmp3); 3962 Ops.push_back(Tmp1); 3963 Ops.push_back(CC); 3964 Ops.push_back(InFlag); 3965 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3966 } 3967 3968 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3969 Ops.clear(); 3970 Ops.push_back(Lo); 3971 Ops.push_back(Hi); 3972 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3973} 3974 3975SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3976 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3977 Op.getOperand(0).getValueType() >= MVT::i16 && 3978 "Unknown SINT_TO_FP to lower!"); 3979 3980 SDOperand Result; 3981 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3982 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3983 MachineFunction &MF = DAG.getMachineFunction(); 3984 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3985 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3986 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3987 StackSlot, NULL, 0); 3988 3989 // Build the FILD 3990 std::vector<MVT::ValueType> Tys; 3991 Tys.push_back(MVT::f64); 3992 Tys.push_back(MVT::Other); 3993 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 3994 std::vector<SDOperand> Ops; 3995 Ops.push_back(Chain); 3996 Ops.push_back(StackSlot); 3997 Ops.push_back(DAG.getValueType(SrcVT)); 3998 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3999 Tys, &Ops[0], Ops.size()); 4000 4001 if (X86ScalarSSE) { 4002 Chain = Result.getValue(1); 4003 SDOperand InFlag = Result.getValue(2); 4004 4005 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4006 // shouldn't be necessary except that RFP cannot be live across 4007 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4008 MachineFunction &MF = DAG.getMachineFunction(); 4009 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4010 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4011 std::vector<MVT::ValueType> Tys; 4012 Tys.push_back(MVT::Other); 4013 std::vector<SDOperand> Ops; 4014 Ops.push_back(Chain); 4015 Ops.push_back(Result); 4016 Ops.push_back(StackSlot); 4017 Ops.push_back(DAG.getValueType(Op.getValueType())); 4018 Ops.push_back(InFlag); 4019 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4020 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 4021 } 4022 4023 return Result; 4024} 4025 4026SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 4027 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 4028 "Unknown FP_TO_SINT to lower!"); 4029 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4030 // stack slot. 4031 MachineFunction &MF = DAG.getMachineFunction(); 4032 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 4033 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4034 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4035 4036 unsigned Opc; 4037 switch (Op.getValueType()) { 4038 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4039 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4040 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4041 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4042 } 4043 4044 SDOperand Chain = DAG.getEntryNode(); 4045 SDOperand Value = Op.getOperand(0); 4046 if (X86ScalarSSE) { 4047 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4048 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 4049 std::vector<MVT::ValueType> Tys; 4050 Tys.push_back(MVT::f64); 4051 Tys.push_back(MVT::Other); 4052 std::vector<SDOperand> Ops; 4053 Ops.push_back(Chain); 4054 Ops.push_back(StackSlot); 4055 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 4056 Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 4057 Chain = Value.getValue(1); 4058 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4059 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4060 } 4061 4062 // Build the FP_TO_INT*_IN_MEM 4063 std::vector<SDOperand> Ops; 4064 Ops.push_back(Chain); 4065 Ops.push_back(Value); 4066 Ops.push_back(StackSlot); 4067 SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size()); 4068 4069 // Load the result. 4070 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4071} 4072 4073SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4074 MVT::ValueType VT = Op.getValueType(); 4075 const Type *OpNTy = MVT::getTypeForValueType(VT); 4076 std::vector<Constant*> CV; 4077 if (VT == MVT::f64) { 4078 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 4079 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4080 } else { 4081 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 4082 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4083 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4084 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4085 } 4086 Constant *CS = ConstantStruct::get(CV); 4087 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 4088 std::vector<MVT::ValueType> Tys; 4089 Tys.push_back(VT); 4090 Tys.push_back(MVT::Other); 4091 SmallVector<SDOperand, 3> Ops; 4092 Ops.push_back(DAG.getEntryNode()); 4093 Ops.push_back(CPIdx); 4094 Ops.push_back(DAG.getSrcValue(NULL)); 4095 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 4096 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4097} 4098 4099SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4100 MVT::ValueType VT = Op.getValueType(); 4101 const Type *OpNTy = MVT::getTypeForValueType(VT); 4102 std::vector<Constant*> CV; 4103 if (VT == MVT::f64) { 4104 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 4105 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4106 } else { 4107 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 4108 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4109 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4110 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 4111 } 4112 Constant *CS = ConstantStruct::get(CV); 4113 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 4114 std::vector<MVT::ValueType> Tys; 4115 Tys.push_back(VT); 4116 Tys.push_back(MVT::Other); 4117 SmallVector<SDOperand, 3> Ops; 4118 Ops.push_back(DAG.getEntryNode()); 4119 Ops.push_back(CPIdx); 4120 Ops.push_back(DAG.getSrcValue(NULL)); 4121 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); 4122 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4123} 4124 4125SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 4126 SDOperand Chain) { 4127 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4128 SDOperand Cond; 4129 SDOperand Op0 = Op.getOperand(0); 4130 SDOperand Op1 = Op.getOperand(1); 4131 SDOperand CC = Op.getOperand(2); 4132 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4133 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4134 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4135 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4136 unsigned X86CC; 4137 4138 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4139 Op0, Op1, DAG)) { 4140 SDOperand Ops1[] = { Chain, Op0, Op1 }; 4141 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 4142 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4143 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4144 } 4145 4146 assert(isFP && "Illegal integer SetCC!"); 4147 4148 SDOperand COps[] = { Chain, Op0, Op1 }; 4149 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 4150 4151 switch (SetCCOpcode) { 4152 default: assert(false && "Illegal floating point SetCC!"); 4153 case ISD::SETOEQ: { // !PF & ZF 4154 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 4155 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 4156 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 4157 Tmp1.getValue(1) }; 4158 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4159 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4160 } 4161 case ISD::SETUNE: { // PF | !ZF 4162 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 4163 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 4164 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 4165 Tmp1.getValue(1) }; 4166 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 4167 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4168 } 4169 } 4170} 4171 4172SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4173 bool addTest = true; 4174 SDOperand Chain = DAG.getEntryNode(); 4175 SDOperand Cond = Op.getOperand(0); 4176 SDOperand CC; 4177 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4178 4179 if (Cond.getOpcode() == ISD::SETCC) 4180 Cond = LowerSETCC(Cond, DAG, Chain); 4181 4182 if (Cond.getOpcode() == X86ISD::SETCC) { 4183 CC = Cond.getOperand(0); 4184 4185 // If condition flag is set by a X86ISD::CMP, then make a copy of it 4186 // (since flag operand cannot be shared). Use it as the condition setting 4187 // operand in place of the X86ISD::SETCC. 4188 // If the X86ISD::SETCC has more than one use, then perhaps it's better 4189 // to use a test instead of duplicating the X86ISD::CMP (for register 4190 // pressure reason)? 4191 SDOperand Cmp = Cond.getOperand(1); 4192 unsigned Opc = Cmp.getOpcode(); 4193 bool IllegalFPCMov = !X86ScalarSSE && 4194 MVT::isFloatingPoint(Op.getValueType()) && 4195 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4196 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 4197 !IllegalFPCMov) { 4198 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 4199 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 4200 addTest = false; 4201 } 4202 } 4203 4204 if (addTest) { 4205 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4206 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 4207 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 4208 } 4209 4210 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 4211 SmallVector<SDOperand, 4> Ops; 4212 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4213 // condition is true. 4214 Ops.push_back(Op.getOperand(2)); 4215 Ops.push_back(Op.getOperand(1)); 4216 Ops.push_back(CC); 4217 Ops.push_back(Cond.getValue(1)); 4218 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4219} 4220 4221SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4222 bool addTest = true; 4223 SDOperand Chain = Op.getOperand(0); 4224 SDOperand Cond = Op.getOperand(1); 4225 SDOperand Dest = Op.getOperand(2); 4226 SDOperand CC; 4227 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4228 4229 if (Cond.getOpcode() == ISD::SETCC) 4230 Cond = LowerSETCC(Cond, DAG, Chain); 4231 4232 if (Cond.getOpcode() == X86ISD::SETCC) { 4233 CC = Cond.getOperand(0); 4234 4235 // If condition flag is set by a X86ISD::CMP, then make a copy of it 4236 // (since flag operand cannot be shared). Use it as the condition setting 4237 // operand in place of the X86ISD::SETCC. 4238 // If the X86ISD::SETCC has more than one use, then perhaps it's better 4239 // to use a test instead of duplicating the X86ISD::CMP (for register 4240 // pressure reason)? 4241 SDOperand Cmp = Cond.getOperand(1); 4242 unsigned Opc = Cmp.getOpcode(); 4243 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 4244 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 4245 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 4246 addTest = false; 4247 } 4248 } 4249 4250 if (addTest) { 4251 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4252 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 4253 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 4254 } 4255 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4256 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 4257} 4258 4259SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 4260 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4261 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 4262 DAG.getTargetJumpTable(JT->getIndex(), 4263 getPointerTy())); 4264 if (Subtarget->isTargetDarwin()) { 4265 // With PIC, the address is actually $g + Offset. 4266 if (!Subtarget->is64Bit() && 4267 getTargetMachine().getRelocationModel() == Reloc::PIC_) 4268 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4269 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4270 Result); 4271 } 4272 4273 return Result; 4274} 4275 4276SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4277 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4278 4279 if (Subtarget->is64Bit()) 4280 return LowerX86_64CCCCallTo(Op, DAG); 4281 else 4282 switch (CallingConv) { 4283 default: 4284 assert(0 && "Unsupported calling convention"); 4285 case CallingConv::Fast: 4286 if (EnableFastCC) { 4287 return LowerFastCCCallTo(Op, DAG, false); 4288 } 4289 // Falls through 4290 case CallingConv::C: 4291 case CallingConv::CSRet: 4292 return LowerCCCCallTo(Op, DAG); 4293 case CallingConv::X86_StdCall: 4294 return LowerStdCallCCCallTo(Op, DAG); 4295 case CallingConv::X86_FastCall: 4296 return LowerFastCCCallTo(Op, DAG, true); 4297 } 4298} 4299 4300SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 4301 SDOperand Copy; 4302 4303 switch(Op.getNumOperands()) { 4304 default: 4305 assert(0 && "Do not know how to return this many arguments!"); 4306 abort(); 4307 case 1: // ret void. 4308 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 4309 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 4310 case 3: { 4311 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 4312 4313 if (MVT::isVector(ArgVT) || 4314 (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) { 4315 // Integer or FP vector result -> XMM0. 4316 if (DAG.getMachineFunction().liveout_empty()) 4317 DAG.getMachineFunction().addLiveOut(X86::XMM0); 4318 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 4319 SDOperand()); 4320 } else if (MVT::isInteger(ArgVT)) { 4321 // Integer result -> EAX / RAX. 4322 // The C calling convention guarantees the return value has been 4323 // promoted to at least MVT::i32. The X86-64 ABI doesn't require the 4324 // value to be promoted MVT::i64. So we don't have to extend it to 4325 // 64-bit. Return the value in EAX, but mark RAX as liveout. 4326 unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 4327 if (DAG.getMachineFunction().liveout_empty()) 4328 DAG.getMachineFunction().addLiveOut(Reg); 4329 4330 Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; 4331 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1), 4332 SDOperand()); 4333 } else if (!X86ScalarSSE) { 4334 // FP return with fp-stack value. 4335 if (DAG.getMachineFunction().liveout_empty()) 4336 DAG.getMachineFunction().addLiveOut(X86::ST0); 4337 4338 std::vector<MVT::ValueType> Tys; 4339 Tys.push_back(MVT::Other); 4340 Tys.push_back(MVT::Flag); 4341 std::vector<SDOperand> Ops; 4342 Ops.push_back(Op.getOperand(0)); 4343 Ops.push_back(Op.getOperand(1)); 4344 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 4345 } else { 4346 // FP return with ScalarSSE (return on fp-stack). 4347 if (DAG.getMachineFunction().liveout_empty()) 4348 DAG.getMachineFunction().addLiveOut(X86::ST0); 4349 4350 SDOperand MemLoc; 4351 SDOperand Chain = Op.getOperand(0); 4352 SDOperand Value = Op.getOperand(1); 4353 4354 if (ISD::isNON_EXTLoad(Value.Val) && 4355 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 4356 Chain = Value.getOperand(0); 4357 MemLoc = Value.getOperand(1); 4358 } else { 4359 // Spill the value to memory and reload it into top of stack. 4360 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 4361 MachineFunction &MF = DAG.getMachineFunction(); 4362 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4363 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 4364 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 4365 } 4366 std::vector<MVT::ValueType> Tys; 4367 Tys.push_back(MVT::f64); 4368 Tys.push_back(MVT::Other); 4369 std::vector<SDOperand> Ops; 4370 Ops.push_back(Chain); 4371 Ops.push_back(MemLoc); 4372 Ops.push_back(DAG.getValueType(ArgVT)); 4373 Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size()); 4374 Tys.clear(); 4375 Tys.push_back(MVT::Other); 4376 Tys.push_back(MVT::Flag); 4377 Ops.clear(); 4378 Ops.push_back(Copy.getValue(1)); 4379 Ops.push_back(Copy); 4380 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size()); 4381 } 4382 break; 4383 } 4384 case 5: { 4385 unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 4386 unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX; 4387 if (DAG.getMachineFunction().liveout_empty()) { 4388 DAG.getMachineFunction().addLiveOut(Reg1); 4389 DAG.getMachineFunction().addLiveOut(Reg2); 4390 } 4391 4392 Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3), 4393 SDOperand()); 4394 Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1)); 4395 break; 4396 } 4397 } 4398 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 4399 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 4400 Copy.getValue(1)); 4401} 4402 4403SDOperand 4404X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4405 MachineFunction &MF = DAG.getMachineFunction(); 4406 const Function* Fn = MF.getFunction(); 4407 if (Fn->hasExternalLinkage() && 4408 Subtarget->isTargetCygwin() && 4409 Fn->getName() == "main") 4410 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 4411 4412 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4413 if (Subtarget->is64Bit()) 4414 return LowerX86_64CCCArguments(Op, DAG); 4415 else 4416 switch(CC) { 4417 default: 4418 assert(0 && "Unsupported calling convention"); 4419 case CallingConv::Fast: 4420 if (EnableFastCC) { 4421 return LowerFastCCArguments(Op, DAG); 4422 } 4423 // Falls through 4424 case CallingConv::C: 4425 case CallingConv::CSRet: 4426 return LowerCCCArguments(Op, DAG); 4427 case CallingConv::X86_StdCall: 4428 MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall); 4429 return LowerStdCallCCArguments(Op, DAG); 4430 case CallingConv::X86_FastCall: 4431 MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall); 4432 return LowerFastCallCCArguments(Op, DAG); 4433 } 4434} 4435 4436SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4437 SDOperand InFlag(0, 0); 4438 SDOperand Chain = Op.getOperand(0); 4439 unsigned Align = 4440 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4441 if (Align == 0) Align = 1; 4442 4443 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4444 // If not DWORD aligned, call memset if size is less than the threshold. 4445 // It knows how to align to the right boundary first. 4446 if ((Align & 3) != 0 || 4447 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4448 MVT::ValueType IntPtr = getPointerTy(); 4449 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4450 std::vector<std::pair<SDOperand, const Type*> > Args; 4451 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4452 // Extend the ubyte argument to be an int value for the call. 4453 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4454 Args.push_back(std::make_pair(Val, IntPtrTy)); 4455 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4456 std::pair<SDOperand,SDOperand> CallResult = 4457 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4458 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4459 return CallResult.second; 4460 } 4461 4462 MVT::ValueType AVT; 4463 SDOperand Count; 4464 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4465 unsigned BytesLeft = 0; 4466 bool TwoRepStos = false; 4467 if (ValC) { 4468 unsigned ValReg; 4469 uint64_t Val = ValC->getValue() & 255; 4470 4471 // If the value is a constant, then we can potentially use larger sets. 4472 switch (Align & 3) { 4473 case 2: // WORD aligned 4474 AVT = MVT::i16; 4475 ValReg = X86::AX; 4476 Val = (Val << 8) | Val; 4477 break; 4478 case 0: // DWORD aligned 4479 AVT = MVT::i32; 4480 ValReg = X86::EAX; 4481 Val = (Val << 8) | Val; 4482 Val = (Val << 16) | Val; 4483 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4484 AVT = MVT::i64; 4485 ValReg = X86::RAX; 4486 Val = (Val << 32) | Val; 4487 } 4488 break; 4489 default: // Byte aligned 4490 AVT = MVT::i8; 4491 ValReg = X86::AL; 4492 Count = Op.getOperand(3); 4493 break; 4494 } 4495 4496 if (AVT > MVT::i8) { 4497 if (I) { 4498 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4499 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4500 BytesLeft = I->getValue() % UBytes; 4501 } else { 4502 assert(AVT >= MVT::i32 && 4503 "Do not use rep;stos if not at least DWORD aligned"); 4504 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4505 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4506 TwoRepStos = true; 4507 } 4508 } 4509 4510 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4511 InFlag); 4512 InFlag = Chain.getValue(1); 4513 } else { 4514 AVT = MVT::i8; 4515 Count = Op.getOperand(3); 4516 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4517 InFlag = Chain.getValue(1); 4518 } 4519 4520 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4521 Count, InFlag); 4522 InFlag = Chain.getValue(1); 4523 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4524 Op.getOperand(1), InFlag); 4525 InFlag = Chain.getValue(1); 4526 4527 std::vector<MVT::ValueType> Tys; 4528 Tys.push_back(MVT::Other); 4529 Tys.push_back(MVT::Flag); 4530 std::vector<SDOperand> Ops; 4531 Ops.push_back(Chain); 4532 Ops.push_back(DAG.getValueType(AVT)); 4533 Ops.push_back(InFlag); 4534 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4535 4536 if (TwoRepStos) { 4537 InFlag = Chain.getValue(1); 4538 Count = Op.getOperand(3); 4539 MVT::ValueType CVT = Count.getValueType(); 4540 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4541 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4542 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4543 Left, InFlag); 4544 InFlag = Chain.getValue(1); 4545 Tys.clear(); 4546 Tys.push_back(MVT::Other); 4547 Tys.push_back(MVT::Flag); 4548 Ops.clear(); 4549 Ops.push_back(Chain); 4550 Ops.push_back(DAG.getValueType(MVT::i8)); 4551 Ops.push_back(InFlag); 4552 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4553 } else if (BytesLeft) { 4554 // Issue stores for the last 1 - 7 bytes. 4555 SDOperand Value; 4556 unsigned Val = ValC->getValue() & 255; 4557 unsigned Offset = I->getValue() - BytesLeft; 4558 SDOperand DstAddr = Op.getOperand(1); 4559 MVT::ValueType AddrVT = DstAddr.getValueType(); 4560 if (BytesLeft >= 4) { 4561 Val = (Val << 8) | Val; 4562 Val = (Val << 16) | Val; 4563 Value = DAG.getConstant(Val, MVT::i32); 4564 Chain = DAG.getStore(Chain, Value, 4565 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4566 DAG.getConstant(Offset, AddrVT)), 4567 NULL, 0); 4568 BytesLeft -= 4; 4569 Offset += 4; 4570 } 4571 if (BytesLeft >= 2) { 4572 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4573 Chain = DAG.getStore(Chain, Value, 4574 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4575 DAG.getConstant(Offset, AddrVT)), 4576 NULL, 0); 4577 BytesLeft -= 2; 4578 Offset += 2; 4579 } 4580 if (BytesLeft == 1) { 4581 Value = DAG.getConstant(Val, MVT::i8); 4582 Chain = DAG.getStore(Chain, Value, 4583 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4584 DAG.getConstant(Offset, AddrVT)), 4585 NULL, 0); 4586 } 4587 } 4588 4589 return Chain; 4590} 4591 4592SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4593 SDOperand Chain = Op.getOperand(0); 4594 unsigned Align = 4595 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4596 if (Align == 0) Align = 1; 4597 4598 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4599 // If not DWORD aligned, call memcpy if size is less than the threshold. 4600 // It knows how to align to the right boundary first. 4601 if ((Align & 3) != 0 || 4602 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 4603 MVT::ValueType IntPtr = getPointerTy(); 4604 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4605 std::vector<std::pair<SDOperand, const Type*> > Args; 4606 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 4607 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 4608 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 4609 std::pair<SDOperand,SDOperand> CallResult = 4610 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 4611 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4612 return CallResult.second; 4613 } 4614 4615 MVT::ValueType AVT; 4616 SDOperand Count; 4617 unsigned BytesLeft = 0; 4618 bool TwoRepMovs = false; 4619 switch (Align & 3) { 4620 case 2: // WORD aligned 4621 AVT = MVT::i16; 4622 break; 4623 case 0: // DWORD aligned 4624 AVT = MVT::i32; 4625 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4626 AVT = MVT::i64; 4627 break; 4628 default: // Byte aligned 4629 AVT = MVT::i8; 4630 Count = Op.getOperand(3); 4631 break; 4632 } 4633 4634 if (AVT > MVT::i8) { 4635 if (I) { 4636 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4637 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4638 BytesLeft = I->getValue() % UBytes; 4639 } else { 4640 assert(AVT >= MVT::i32 && 4641 "Do not use rep;movs if not at least DWORD aligned"); 4642 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4643 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4644 TwoRepMovs = true; 4645 } 4646 } 4647 4648 SDOperand InFlag(0, 0); 4649 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4650 Count, InFlag); 4651 InFlag = Chain.getValue(1); 4652 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4653 Op.getOperand(1), InFlag); 4654 InFlag = Chain.getValue(1); 4655 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4656 Op.getOperand(2), InFlag); 4657 InFlag = Chain.getValue(1); 4658 4659 std::vector<MVT::ValueType> Tys; 4660 Tys.push_back(MVT::Other); 4661 Tys.push_back(MVT::Flag); 4662 std::vector<SDOperand> Ops; 4663 Ops.push_back(Chain); 4664 Ops.push_back(DAG.getValueType(AVT)); 4665 Ops.push_back(InFlag); 4666 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4667 4668 if (TwoRepMovs) { 4669 InFlag = Chain.getValue(1); 4670 Count = Op.getOperand(3); 4671 MVT::ValueType CVT = Count.getValueType(); 4672 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4673 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4674 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4675 Left, InFlag); 4676 InFlag = Chain.getValue(1); 4677 Tys.clear(); 4678 Tys.push_back(MVT::Other); 4679 Tys.push_back(MVT::Flag); 4680 Ops.clear(); 4681 Ops.push_back(Chain); 4682 Ops.push_back(DAG.getValueType(MVT::i8)); 4683 Ops.push_back(InFlag); 4684 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4685 } else if (BytesLeft) { 4686 // Issue loads and stores for the last 1 - 7 bytes. 4687 unsigned Offset = I->getValue() - BytesLeft; 4688 SDOperand DstAddr = Op.getOperand(1); 4689 MVT::ValueType DstVT = DstAddr.getValueType(); 4690 SDOperand SrcAddr = Op.getOperand(2); 4691 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4692 SDOperand Value; 4693 if (BytesLeft >= 4) { 4694 Value = DAG.getLoad(MVT::i32, Chain, 4695 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4696 DAG.getConstant(Offset, SrcVT)), 4697 NULL, 0); 4698 Chain = Value.getValue(1); 4699 Chain = DAG.getStore(Chain, Value, 4700 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4701 DAG.getConstant(Offset, DstVT)), 4702 NULL, 0); 4703 BytesLeft -= 4; 4704 Offset += 4; 4705 } 4706 if (BytesLeft >= 2) { 4707 Value = DAG.getLoad(MVT::i16, Chain, 4708 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4709 DAG.getConstant(Offset, SrcVT)), 4710 NULL, 0); 4711 Chain = Value.getValue(1); 4712 Chain = DAG.getStore(Chain, Value, 4713 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4714 DAG.getConstant(Offset, DstVT)), 4715 NULL, 0); 4716 BytesLeft -= 2; 4717 Offset += 2; 4718 } 4719 4720 if (BytesLeft == 1) { 4721 Value = DAG.getLoad(MVT::i8, Chain, 4722 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4723 DAG.getConstant(Offset, SrcVT)), 4724 NULL, 0); 4725 Chain = Value.getValue(1); 4726 Chain = DAG.getStore(Chain, Value, 4727 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4728 DAG.getConstant(Offset, DstVT)), 4729 NULL, 0); 4730 } 4731 } 4732 4733 return Chain; 4734} 4735 4736SDOperand 4737X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4738 std::vector<MVT::ValueType> Tys; 4739 Tys.push_back(MVT::Other); 4740 Tys.push_back(MVT::Flag); 4741 std::vector<SDOperand> Ops; 4742 Ops.push_back(Op.getOperand(0)); 4743 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size()); 4744 Ops.clear(); 4745 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 4746 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 4747 MVT::i32, Ops[0].getValue(2))); 4748 Ops.push_back(Ops[1].getValue(1)); 4749 Tys[0] = Tys[1] = MVT::i32; 4750 Tys.push_back(MVT::Other); 4751 return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size()); 4752} 4753 4754SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4755 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4756 4757 if (!Subtarget->is64Bit()) { 4758 // vastart just stores the address of the VarArgsFrameIndex slot into the 4759 // memory location argument. 4760 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4761 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4762 SV->getOffset()); 4763 } 4764 4765 // __va_list_tag: 4766 // gp_offset (0 - 6 * 8) 4767 // fp_offset (48 - 48 + 8 * 16) 4768 // overflow_arg_area (point to parameters coming in memory). 4769 // reg_save_area 4770 std::vector<SDOperand> MemOps; 4771 SDOperand FIN = Op.getOperand(1); 4772 // Store gp_offset 4773 SDOperand Store = DAG.getStore(Op.getOperand(0), 4774 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4775 FIN, SV->getValue(), SV->getOffset()); 4776 MemOps.push_back(Store); 4777 4778 // Store fp_offset 4779 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4780 DAG.getConstant(4, getPointerTy())); 4781 Store = DAG.getStore(Op.getOperand(0), 4782 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4783 FIN, SV->getValue(), SV->getOffset()); 4784 MemOps.push_back(Store); 4785 4786 // Store ptr to overflow_arg_area 4787 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4788 DAG.getConstant(4, getPointerTy())); 4789 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4790 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4791 SV->getOffset()); 4792 MemOps.push_back(Store); 4793 4794 // Store ptr to reg_save_area. 4795 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4796 DAG.getConstant(8, getPointerTy())); 4797 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4798 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4799 SV->getOffset()); 4800 MemOps.push_back(Store); 4801 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4802} 4803 4804SDOperand 4805X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4806 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4807 switch (IntNo) { 4808 default: return SDOperand(); // Don't custom lower most intrinsics. 4809 // Comparison intrinsics. 4810 case Intrinsic::x86_sse_comieq_ss: 4811 case Intrinsic::x86_sse_comilt_ss: 4812 case Intrinsic::x86_sse_comile_ss: 4813 case Intrinsic::x86_sse_comigt_ss: 4814 case Intrinsic::x86_sse_comige_ss: 4815 case Intrinsic::x86_sse_comineq_ss: 4816 case Intrinsic::x86_sse_ucomieq_ss: 4817 case Intrinsic::x86_sse_ucomilt_ss: 4818 case Intrinsic::x86_sse_ucomile_ss: 4819 case Intrinsic::x86_sse_ucomigt_ss: 4820 case Intrinsic::x86_sse_ucomige_ss: 4821 case Intrinsic::x86_sse_ucomineq_ss: 4822 case Intrinsic::x86_sse2_comieq_sd: 4823 case Intrinsic::x86_sse2_comilt_sd: 4824 case Intrinsic::x86_sse2_comile_sd: 4825 case Intrinsic::x86_sse2_comigt_sd: 4826 case Intrinsic::x86_sse2_comige_sd: 4827 case Intrinsic::x86_sse2_comineq_sd: 4828 case Intrinsic::x86_sse2_ucomieq_sd: 4829 case Intrinsic::x86_sse2_ucomilt_sd: 4830 case Intrinsic::x86_sse2_ucomile_sd: 4831 case Intrinsic::x86_sse2_ucomigt_sd: 4832 case Intrinsic::x86_sse2_ucomige_sd: 4833 case Intrinsic::x86_sse2_ucomineq_sd: { 4834 unsigned Opc = 0; 4835 ISD::CondCode CC = ISD::SETCC_INVALID; 4836 switch (IntNo) { 4837 default: break; 4838 case Intrinsic::x86_sse_comieq_ss: 4839 case Intrinsic::x86_sse2_comieq_sd: 4840 Opc = X86ISD::COMI; 4841 CC = ISD::SETEQ; 4842 break; 4843 case Intrinsic::x86_sse_comilt_ss: 4844 case Intrinsic::x86_sse2_comilt_sd: 4845 Opc = X86ISD::COMI; 4846 CC = ISD::SETLT; 4847 break; 4848 case Intrinsic::x86_sse_comile_ss: 4849 case Intrinsic::x86_sse2_comile_sd: 4850 Opc = X86ISD::COMI; 4851 CC = ISD::SETLE; 4852 break; 4853 case Intrinsic::x86_sse_comigt_ss: 4854 case Intrinsic::x86_sse2_comigt_sd: 4855 Opc = X86ISD::COMI; 4856 CC = ISD::SETGT; 4857 break; 4858 case Intrinsic::x86_sse_comige_ss: 4859 case Intrinsic::x86_sse2_comige_sd: 4860 Opc = X86ISD::COMI; 4861 CC = ISD::SETGE; 4862 break; 4863 case Intrinsic::x86_sse_comineq_ss: 4864 case Intrinsic::x86_sse2_comineq_sd: 4865 Opc = X86ISD::COMI; 4866 CC = ISD::SETNE; 4867 break; 4868 case Intrinsic::x86_sse_ucomieq_ss: 4869 case Intrinsic::x86_sse2_ucomieq_sd: 4870 Opc = X86ISD::UCOMI; 4871 CC = ISD::SETEQ; 4872 break; 4873 case Intrinsic::x86_sse_ucomilt_ss: 4874 case Intrinsic::x86_sse2_ucomilt_sd: 4875 Opc = X86ISD::UCOMI; 4876 CC = ISD::SETLT; 4877 break; 4878 case Intrinsic::x86_sse_ucomile_ss: 4879 case Intrinsic::x86_sse2_ucomile_sd: 4880 Opc = X86ISD::UCOMI; 4881 CC = ISD::SETLE; 4882 break; 4883 case Intrinsic::x86_sse_ucomigt_ss: 4884 case Intrinsic::x86_sse2_ucomigt_sd: 4885 Opc = X86ISD::UCOMI; 4886 CC = ISD::SETGT; 4887 break; 4888 case Intrinsic::x86_sse_ucomige_ss: 4889 case Intrinsic::x86_sse2_ucomige_sd: 4890 Opc = X86ISD::UCOMI; 4891 CC = ISD::SETGE; 4892 break; 4893 case Intrinsic::x86_sse_ucomineq_ss: 4894 case Intrinsic::x86_sse2_ucomineq_sd: 4895 Opc = X86ISD::UCOMI; 4896 CC = ISD::SETNE; 4897 break; 4898 } 4899 4900 unsigned X86CC; 4901 SDOperand LHS = Op.getOperand(1); 4902 SDOperand RHS = Op.getOperand(2); 4903 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4904 4905 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4906 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4907 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4908 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4909 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4910 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4911 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4912 } 4913 } 4914} 4915 4916/// LowerOperation - Provide custom lowering hooks for some operations. 4917/// 4918SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4919 switch (Op.getOpcode()) { 4920 default: assert(0 && "Should not custom lower this!"); 4921 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4922 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4923 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4924 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4925 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4926 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4927 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4928 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4929 case ISD::SHL_PARTS: 4930 case ISD::SRA_PARTS: 4931 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4932 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4933 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4934 case ISD::FABS: return LowerFABS(Op, DAG); 4935 case ISD::FNEG: return LowerFNEG(Op, DAG); 4936 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4937 case ISD::SELECT: return LowerSELECT(Op, DAG); 4938 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4939 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4940 case ISD::CALL: return LowerCALL(Op, DAG); 4941 case ISD::RET: return LowerRET(Op, DAG); 4942 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4943 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4944 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4945 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4946 case ISD::VASTART: return LowerVASTART(Op, DAG); 4947 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4948 } 4949} 4950 4951const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4952 switch (Opcode) { 4953 default: return NULL; 4954 case X86ISD::SHLD: return "X86ISD::SHLD"; 4955 case X86ISD::SHRD: return "X86ISD::SHRD"; 4956 case X86ISD::FAND: return "X86ISD::FAND"; 4957 case X86ISD::FXOR: return "X86ISD::FXOR"; 4958 case X86ISD::FILD: return "X86ISD::FILD"; 4959 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4960 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4961 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4962 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4963 case X86ISD::FLD: return "X86ISD::FLD"; 4964 case X86ISD::FST: return "X86ISD::FST"; 4965 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4966 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4967 case X86ISD::CALL: return "X86ISD::CALL"; 4968 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4969 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4970 case X86ISD::CMP: return "X86ISD::CMP"; 4971 case X86ISD::COMI: return "X86ISD::COMI"; 4972 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4973 case X86ISD::SETCC: return "X86ISD::SETCC"; 4974 case X86ISD::CMOV: return "X86ISD::CMOV"; 4975 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4976 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4977 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4978 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4979 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 4980 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 4981 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4982 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4983 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4984 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4985 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4986 case X86ISD::FMAX: return "X86ISD::FMAX"; 4987 case X86ISD::FMIN: return "X86ISD::FMIN"; 4988 } 4989} 4990 4991/// isLegalAddressImmediate - Return true if the integer value or 4992/// GlobalValue can be used as the offset of the target addressing mode. 4993bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 4994 // X86 allows a sign-extended 32-bit immediate field. 4995 return (V > -(1LL << 32) && V < (1LL << 32)-1); 4996} 4997 4998bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 4999 // GV is 64-bit but displacement field is 32-bit unless we are in small code 5000 // model. Mac OS X happens to support only small PIC code model. 5001 // FIXME: better support for other OS's. 5002 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 5003 return false; 5004 if (Subtarget->isTargetDarwin()) { 5005 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 5006 if (RModel == Reloc::Static) 5007 return true; 5008 else if (RModel == Reloc::DynamicNoPIC) 5009 return !DarwinGVRequiresExtraLoad(GV); 5010 else 5011 return false; 5012 } else 5013 return true; 5014} 5015 5016/// isShuffleMaskLegal - Targets can use this to indicate that they only 5017/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5018/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5019/// are assumed to be legal. 5020bool 5021X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5022 // Only do shuffles on 128-bit vector types for now. 5023 if (MVT::getSizeInBits(VT) == 64) return false; 5024 return (Mask.Val->getNumOperands() <= 4 || 5025 isSplatMask(Mask.Val) || 5026 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5027 X86::isUNPCKLMask(Mask.Val) || 5028 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5029 X86::isUNPCKHMask(Mask.Val)); 5030} 5031 5032bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5033 MVT::ValueType EVT, 5034 SelectionDAG &DAG) const { 5035 unsigned NumElts = BVOps.size(); 5036 // Only do shuffles on 128-bit vector types for now. 5037 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5038 if (NumElts == 2) return true; 5039 if (NumElts == 4) { 5040 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 5041 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 5042 } 5043 return false; 5044} 5045 5046//===----------------------------------------------------------------------===// 5047// X86 Scheduler Hooks 5048//===----------------------------------------------------------------------===// 5049 5050MachineBasicBlock * 5051X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5052 MachineBasicBlock *BB) { 5053 switch (MI->getOpcode()) { 5054 default: assert(false && "Unexpected instr type to insert"); 5055 case X86::CMOV_FR32: 5056 case X86::CMOV_FR64: 5057 case X86::CMOV_V4F32: 5058 case X86::CMOV_V2F64: 5059 case X86::CMOV_V2I64: { 5060 // To "insert" a SELECT_CC instruction, we actually have to insert the 5061 // diamond control-flow pattern. The incoming instruction knows the 5062 // destination vreg to set, the condition code register to branch on, the 5063 // true/false values to select between, and a branch opcode to use. 5064 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5065 ilist<MachineBasicBlock>::iterator It = BB; 5066 ++It; 5067 5068 // thisMBB: 5069 // ... 5070 // TrueVal = ... 5071 // cmpTY ccX, r1, r2 5072 // bCC copy1MBB 5073 // fallthrough --> copy0MBB 5074 MachineBasicBlock *thisMBB = BB; 5075 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5076 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5077 unsigned Opc = 5078 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5079 BuildMI(BB, Opc, 1).addMBB(sinkMBB).addImplicitDefsUses(); 5080 MachineFunction *F = BB->getParent(); 5081 F->getBasicBlockList().insert(It, copy0MBB); 5082 F->getBasicBlockList().insert(It, sinkMBB); 5083 // Update machine-CFG edges by first adding all successors of the current 5084 // block to the new block which will contain the Phi node for the select. 5085 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5086 e = BB->succ_end(); i != e; ++i) 5087 sinkMBB->addSuccessor(*i); 5088 // Next, remove all successors of the current block, and add the true 5089 // and fallthrough blocks as its successors. 5090 while(!BB->succ_empty()) 5091 BB->removeSuccessor(BB->succ_begin()); 5092 BB->addSuccessor(copy0MBB); 5093 BB->addSuccessor(sinkMBB); 5094 5095 // copy0MBB: 5096 // %FalseValue = ... 5097 // # fallthrough to sinkMBB 5098 BB = copy0MBB; 5099 5100 // Update machine-CFG edges 5101 BB->addSuccessor(sinkMBB); 5102 5103 // sinkMBB: 5104 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5105 // ... 5106 BB = sinkMBB; 5107 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 5108 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5109 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5110 5111 delete MI; // The pseudo instruction is gone now. 5112 return BB; 5113 } 5114 5115 case X86::FP_TO_INT16_IN_MEM: 5116 case X86::FP_TO_INT32_IN_MEM: 5117 case X86::FP_TO_INT64_IN_MEM: { 5118 // Change the floating point control register to use "round towards zero" 5119 // mode when truncating to an integer value. 5120 MachineFunction *F = BB->getParent(); 5121 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5122 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 5123 5124 // Load the old value of the high byte of the control word... 5125 unsigned OldCW = 5126 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5127 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 5128 5129 // Set the high part to be round to zero... 5130 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 5131 5132 // Reload the modified control word now... 5133 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 5134 5135 // Restore the memory image of control word to original value 5136 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 5137 5138 // Get the X86 opcode to use. 5139 unsigned Opc; 5140 switch (MI->getOpcode()) { 5141 default: assert(0 && "illegal opcode!"); 5142 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 5143 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 5144 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 5145 } 5146 5147 X86AddressMode AM; 5148 MachineOperand &Op = MI->getOperand(0); 5149 if (Op.isRegister()) { 5150 AM.BaseType = X86AddressMode::RegBase; 5151 AM.Base.Reg = Op.getReg(); 5152 } else { 5153 AM.BaseType = X86AddressMode::FrameIndexBase; 5154 AM.Base.FrameIndex = Op.getFrameIndex(); 5155 } 5156 Op = MI->getOperand(1); 5157 if (Op.isImmediate()) 5158 AM.Scale = Op.getImm(); 5159 Op = MI->getOperand(2); 5160 if (Op.isImmediate()) 5161 AM.IndexReg = Op.getImm(); 5162 Op = MI->getOperand(3); 5163 if (Op.isGlobalAddress()) { 5164 AM.GV = Op.getGlobal(); 5165 } else { 5166 AM.Disp = Op.getImm(); 5167 } 5168 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 5169 5170 // Reload the original control word now. 5171 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 5172 5173 delete MI; // The pseudo instruction is gone now. 5174 return BB; 5175 } 5176 } 5177} 5178 5179//===----------------------------------------------------------------------===// 5180// X86 Optimization Hooks 5181//===----------------------------------------------------------------------===// 5182 5183void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5184 uint64_t Mask, 5185 uint64_t &KnownZero, 5186 uint64_t &KnownOne, 5187 unsigned Depth) const { 5188 unsigned Opc = Op.getOpcode(); 5189 assert((Opc >= ISD::BUILTIN_OP_END || 5190 Opc == ISD::INTRINSIC_WO_CHAIN || 5191 Opc == ISD::INTRINSIC_W_CHAIN || 5192 Opc == ISD::INTRINSIC_VOID) && 5193 "Should use MaskedValueIsZero if you don't know whether Op" 5194 " is a target node!"); 5195 5196 KnownZero = KnownOne = 0; // Don't know anything. 5197 switch (Opc) { 5198 default: break; 5199 case X86ISD::SETCC: 5200 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5201 break; 5202 } 5203} 5204 5205/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5206/// element of the result of the vector shuffle. 5207static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5208 MVT::ValueType VT = N->getValueType(0); 5209 SDOperand PermMask = N->getOperand(2); 5210 unsigned NumElems = PermMask.getNumOperands(); 5211 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5212 i %= NumElems; 5213 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5214 return (i == 0) 5215 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 5216 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5217 SDOperand Idx = PermMask.getOperand(i); 5218 if (Idx.getOpcode() == ISD::UNDEF) 5219 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 5220 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5221 } 5222 return SDOperand(); 5223} 5224 5225/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5226/// node is a GlobalAddress + an offset. 5227static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5228 if (N->getOpcode() == X86ISD::Wrapper) { 5229 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5230 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5231 return true; 5232 } 5233 } else if (N->getOpcode() == ISD::ADD) { 5234 SDOperand N1 = N->getOperand(0); 5235 SDOperand N2 = N->getOperand(1); 5236 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5237 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5238 if (V) { 5239 Offset += V->getSignExtended(); 5240 return true; 5241 } 5242 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5243 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5244 if (V) { 5245 Offset += V->getSignExtended(); 5246 return true; 5247 } 5248 } 5249 } 5250 return false; 5251} 5252 5253/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5254/// + Dist * Size. 5255static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5256 MachineFrameInfo *MFI) { 5257 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5258 return false; 5259 5260 SDOperand Loc = N->getOperand(1); 5261 SDOperand BaseLoc = Base->getOperand(1); 5262 if (Loc.getOpcode() == ISD::FrameIndex) { 5263 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5264 return false; 5265 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 5266 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5267 int FS = MFI->getObjectSize(FI); 5268 int BFS = MFI->getObjectSize(BFI); 5269 if (FS != BFS || FS != Size) return false; 5270 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5271 } else { 5272 GlobalValue *GV1 = NULL; 5273 GlobalValue *GV2 = NULL; 5274 int64_t Offset1 = 0; 5275 int64_t Offset2 = 0; 5276 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5277 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5278 if (isGA1 && isGA2 && GV1 == GV2) 5279 return Offset1 == (Offset2 + Dist*Size); 5280 } 5281 5282 return false; 5283} 5284 5285static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5286 const X86Subtarget *Subtarget) { 5287 GlobalValue *GV; 5288 int64_t Offset; 5289 if (isGAPlusOffset(Base, GV, Offset)) 5290 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5291 else { 5292 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5293 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 5294 if (BFI < 0) 5295 // Fixed objects do not specify alignment, however the offsets are known. 5296 return ((Subtarget->getStackAlignment() % 16) == 0 && 5297 (MFI->getObjectOffset(BFI) % 16) == 0); 5298 else 5299 return MFI->getObjectAlignment(BFI) >= 16; 5300 } 5301 return false; 5302} 5303 5304 5305/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5306/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5307/// if the load addresses are consecutive, non-overlapping, and in the right 5308/// order. 5309static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5310 const X86Subtarget *Subtarget) { 5311 MachineFunction &MF = DAG.getMachineFunction(); 5312 MachineFrameInfo *MFI = MF.getFrameInfo(); 5313 MVT::ValueType VT = N->getValueType(0); 5314 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 5315 SDOperand PermMask = N->getOperand(2); 5316 int NumElems = (int)PermMask.getNumOperands(); 5317 SDNode *Base = NULL; 5318 for (int i = 0; i < NumElems; ++i) { 5319 SDOperand Idx = PermMask.getOperand(i); 5320 if (Idx.getOpcode() == ISD::UNDEF) { 5321 if (!Base) return SDOperand(); 5322 } else { 5323 SDOperand Arg = 5324 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5325 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5326 return SDOperand(); 5327 if (!Base) 5328 Base = Arg.Val; 5329 else if (!isConsecutiveLoad(Arg.Val, Base, 5330 i, MVT::getSizeInBits(EVT)/8,MFI)) 5331 return SDOperand(); 5332 } 5333 } 5334 5335 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5336 if (isAlign16) { 5337 LoadSDNode *LD = cast<LoadSDNode>(Base); 5338 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5339 LD->getSrcValueOffset()); 5340 } else { 5341 // Just use movups, it's shorter. 5342 std::vector<MVT::ValueType> Tys; 5343 Tys.push_back(MVT::v4f32); 5344 Tys.push_back(MVT::Other); 5345 SmallVector<SDOperand, 3> Ops; 5346 Ops.push_back(Base->getOperand(0)); 5347 Ops.push_back(Base->getOperand(1)); 5348 Ops.push_back(Base->getOperand(2)); 5349 return DAG.getNode(ISD::BIT_CONVERT, VT, 5350 DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size())); 5351 } 5352} 5353 5354/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5355static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5356 const X86Subtarget *Subtarget) { 5357 SDOperand Cond = N->getOperand(0); 5358 5359 // If we have SSE[12] support, try to form min/max nodes. 5360 if (Subtarget->hasSSE2() && 5361 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5362 if (Cond.getOpcode() == ISD::SETCC) { 5363 // Get the LHS/RHS of the select. 5364 SDOperand LHS = N->getOperand(1); 5365 SDOperand RHS = N->getOperand(2); 5366 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5367 5368 unsigned Opcode = 0; 5369 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5370 switch (CC) { 5371 default: break; 5372 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5373 case ISD::SETULE: 5374 case ISD::SETLE: 5375 if (!UnsafeFPMath) break; 5376 // FALL THROUGH. 5377 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5378 case ISD::SETLT: 5379 Opcode = X86ISD::FMIN; 5380 break; 5381 5382 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5383 case ISD::SETUGT: 5384 case ISD::SETGT: 5385 if (!UnsafeFPMath) break; 5386 // FALL THROUGH. 5387 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5388 case ISD::SETGE: 5389 Opcode = X86ISD::FMAX; 5390 break; 5391 } 5392 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5393 switch (CC) { 5394 default: break; 5395 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5396 case ISD::SETUGT: 5397 case ISD::SETGT: 5398 if (!UnsafeFPMath) break; 5399 // FALL THROUGH. 5400 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5401 case ISD::SETGE: 5402 Opcode = X86ISD::FMIN; 5403 break; 5404 5405 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5406 case ISD::SETULE: 5407 case ISD::SETLE: 5408 if (!UnsafeFPMath) break; 5409 // FALL THROUGH. 5410 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5411 case ISD::SETLT: 5412 Opcode = X86ISD::FMAX; 5413 break; 5414 } 5415 } 5416 5417 if (Opcode) 5418 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5419 } 5420 5421 } 5422 5423 return SDOperand(); 5424} 5425 5426 5427SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5428 DAGCombinerInfo &DCI) const { 5429 SelectionDAG &DAG = DCI.DAG; 5430 switch (N->getOpcode()) { 5431 default: break; 5432 case ISD::VECTOR_SHUFFLE: 5433 return PerformShuffleCombine(N, DAG, Subtarget); 5434 case ISD::SELECT: 5435 return PerformSELECTCombine(N, DAG, Subtarget); 5436 } 5437 5438 return SDOperand(); 5439} 5440 5441//===----------------------------------------------------------------------===// 5442// X86 Inline Assembly Support 5443//===----------------------------------------------------------------------===// 5444 5445/// getConstraintType - Given a constraint letter, return the type of 5446/// constraint it is for this target. 5447X86TargetLowering::ConstraintType 5448X86TargetLowering::getConstraintType(char ConstraintLetter) const { 5449 switch (ConstraintLetter) { 5450 case 'A': 5451 case 'r': 5452 case 'R': 5453 case 'l': 5454 case 'q': 5455 case 'Q': 5456 case 'x': 5457 case 'Y': 5458 return C_RegisterClass; 5459 default: return TargetLowering::getConstraintType(ConstraintLetter); 5460 } 5461} 5462 5463/// isOperandValidForConstraint - Return the specified operand (possibly 5464/// modified) if the specified SDOperand is valid for the specified target 5465/// constraint letter, otherwise return null. 5466SDOperand X86TargetLowering:: 5467isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { 5468 switch (Constraint) { 5469 default: break; 5470 case 'i': 5471 // Literal immediates are always ok. 5472 if (isa<ConstantSDNode>(Op)) return Op; 5473 5474 // If we are in non-pic codegen mode, we allow the address of a global to 5475 // be used with 'i'. 5476 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 5477 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 5478 return SDOperand(0, 0); 5479 5480 if (GA->getOpcode() != ISD::TargetGlobalAddress) 5481 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5482 GA->getOffset()); 5483 return Op; 5484 } 5485 5486 // Otherwise, not valid for this mode. 5487 return SDOperand(0, 0); 5488 } 5489 return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); 5490} 5491 5492 5493std::vector<unsigned> X86TargetLowering:: 5494getRegClassForInlineAsmConstraint(const std::string &Constraint, 5495 MVT::ValueType VT) const { 5496 if (Constraint.size() == 1) { 5497 // FIXME: not handling fp-stack yet! 5498 // FIXME: not handling MMX registers yet ('y' constraint). 5499 switch (Constraint[0]) { // GCC X86 Constraint Letters 5500 default: break; // Unknown constraint letter 5501 case 'A': // EAX/EDX 5502 if (VT == MVT::i32 || VT == MVT::i64) 5503 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5504 break; 5505 case 'r': // GENERAL_REGS 5506 case 'R': // LEGACY_REGS 5507 if (VT == MVT::i32) 5508 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 5509 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 5510 else if (VT == MVT::i16) 5511 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 5512 X86::SI, X86::DI, X86::BP, X86::SP, 0); 5513 else if (VT == MVT::i8) 5514 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5515 break; 5516 case 'l': // INDEX_REGS 5517 if (VT == MVT::i32) 5518 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 5519 X86::ESI, X86::EDI, X86::EBP, 0); 5520 else if (VT == MVT::i16) 5521 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 5522 X86::SI, X86::DI, X86::BP, 0); 5523 else if (VT == MVT::i8) 5524 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5525 break; 5526 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5527 case 'Q': // Q_REGS 5528 if (VT == MVT::i32) 5529 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5530 else if (VT == MVT::i16) 5531 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5532 else if (VT == MVT::i8) 5533 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5534 break; 5535 case 'x': // SSE_REGS if SSE1 allowed 5536 if (Subtarget->hasSSE1()) 5537 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 5538 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 5539 0); 5540 return std::vector<unsigned>(); 5541 case 'Y': // SSE_REGS if SSE2 allowed 5542 if (Subtarget->hasSSE2()) 5543 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 5544 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 5545 0); 5546 return std::vector<unsigned>(); 5547 } 5548 } 5549 5550 return std::vector<unsigned>(); 5551} 5552 5553std::pair<unsigned, const TargetRegisterClass*> 5554X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5555 MVT::ValueType VT) const { 5556 // Use the default implementation in TargetLowering to convert the register 5557 // constraint into a member of a register class. 5558 std::pair<unsigned, const TargetRegisterClass*> Res; 5559 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5560 5561 // Not found as a standard register? 5562 if (Res.second == 0) { 5563 // GCC calls "st(0)" just plain "st". 5564 if (StringsEqualNoCase("{st}", Constraint)) { 5565 Res.first = X86::ST0; 5566 Res.second = X86::RSTRegisterClass; 5567 } 5568 5569 return Res; 5570 } 5571 5572 // Otherwise, check to see if this is a register class of the wrong value 5573 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5574 // turn into {ax},{dx}. 5575 if (Res.second->hasType(VT)) 5576 return Res; // Correct type already, nothing to do. 5577 5578 // All of the single-register GCC register classes map their values onto 5579 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5580 // really want an 8-bit or 32-bit register, map to the appropriate register 5581 // class and return the appropriate register. 5582 if (Res.second != X86::GR16RegisterClass) 5583 return Res; 5584 5585 if (VT == MVT::i8) { 5586 unsigned DestReg = 0; 5587 switch (Res.first) { 5588 default: break; 5589 case X86::AX: DestReg = X86::AL; break; 5590 case X86::DX: DestReg = X86::DL; break; 5591 case X86::CX: DestReg = X86::CL; break; 5592 case X86::BX: DestReg = X86::BL; break; 5593 } 5594 if (DestReg) { 5595 Res.first = DestReg; 5596 Res.second = Res.second = X86::GR8RegisterClass; 5597 } 5598 } else if (VT == MVT::i32) { 5599 unsigned DestReg = 0; 5600 switch (Res.first) { 5601 default: break; 5602 case X86::AX: DestReg = X86::EAX; break; 5603 case X86::DX: DestReg = X86::EDX; break; 5604 case X86::CX: DestReg = X86::ECX; break; 5605 case X86::BX: DestReg = X86::EBX; break; 5606 case X86::SI: DestReg = X86::ESI; break; 5607 case X86::DI: DestReg = X86::EDI; break; 5608 case X86::BP: DestReg = X86::EBP; break; 5609 case X86::SP: DestReg = X86::ESP; break; 5610 } 5611 if (DestReg) { 5612 Res.first = DestReg; 5613 Res.second = Res.second = X86::GR32RegisterClass; 5614 } 5615 } else if (VT == MVT::i64) { 5616 unsigned DestReg = 0; 5617 switch (Res.first) { 5618 default: break; 5619 case X86::AX: DestReg = X86::RAX; break; 5620 case X86::DX: DestReg = X86::RDX; break; 5621 case X86::CX: DestReg = X86::RCX; break; 5622 case X86::BX: DestReg = X86::RBX; break; 5623 case X86::SI: DestReg = X86::RSI; break; 5624 case X86::DI: DestReg = X86::RDI; break; 5625 case X86::BP: DestReg = X86::RBP; break; 5626 case X86::SP: DestReg = X86::RSP; break; 5627 } 5628 if (DestReg) { 5629 Res.first = DestReg; 5630 Res.second = Res.second = X86::GR64RegisterClass; 5631 } 5632 } 5633 5634 return Res; 5635} 5636 5637