X86ISelLowering.cpp revision 3df24e667f04a7003342b534310919abc9c87418
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineModuleInfo.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/PseudoSourceValue.h" 35#include "llvm/CodeGen/SelectionDAG.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Target/TargetOptions.h" 39#include "llvm/ADT/SmallSet.h" 40#include "llvm/ADT/StringExtras.h" 41using namespace llvm; 42 43// Forward declarations. 44static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG); 45 46X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 47 : TargetLowering(TM) { 48 Subtarget = &TM.getSubtarget<X86Subtarget>(); 49 X86ScalarSSEf64 = Subtarget->hasSSE2(); 50 X86ScalarSSEf32 = Subtarget->hasSSE1(); 51 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 52 53 bool Fast = false; 54 55 RegInfo = TM.getRegisterInfo(); 56 57 // Set up the TargetLowering object. 58 59 // X86 is weird, it always uses i8 for shift amounts and setcc results. 60 setShiftAmountType(MVT::i8); 61 setSetCCResultContents(ZeroOrOneSetCCResult); 62 setSchedulingPreference(SchedulingForRegPressure); 63 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 64 setStackPointerRegisterToSaveRestore(X86StackPtr); 65 66 if (Subtarget->isTargetDarwin()) { 67 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 68 setUseUnderscoreSetJmp(false); 69 setUseUnderscoreLongJmp(false); 70 } else if (Subtarget->isTargetMingw()) { 71 // MS runtime is weird: it exports _setjmp, but longjmp! 72 setUseUnderscoreSetJmp(true); 73 setUseUnderscoreLongJmp(false); 74 } else { 75 setUseUnderscoreSetJmp(true); 76 setUseUnderscoreLongJmp(true); 77 } 78 79 // Set up the register classes. 80 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 81 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 82 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 83 if (Subtarget->is64Bit()) 84 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 85 86 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 87 88 // We don't accept any truncstore of integer registers. 89 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 90 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 91 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 92 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 93 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 94 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 95 96 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 97 // operation. 98 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 99 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 100 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 101 102 if (Subtarget->is64Bit()) { 103 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 104 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 105 } else { 106 if (X86ScalarSSEf64) 107 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 108 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 109 else 110 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 111 } 112 113 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 114 // this operation. 115 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 116 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 117 // SSE has no i16 to fp conversion, only i32 118 if (X86ScalarSSEf32) { 119 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 120 // f32 and f64 cases are Legal, f80 case is not 121 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 122 } else { 123 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 124 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 125 } 126 127 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 128 // are Legal, f80 is custom lowered. 129 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 130 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 131 132 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 133 // this operation. 134 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 135 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 136 137 if (X86ScalarSSEf32) { 138 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 139 // f32 and f64 cases are Legal, f80 case is not 140 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 141 } else { 142 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 143 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 144 } 145 146 // Handle FP_TO_UINT by promoting the destination to a larger signed 147 // conversion. 148 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 149 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 150 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 151 152 if (Subtarget->is64Bit()) { 153 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 154 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 155 } else { 156 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 157 // Expand FP_TO_UINT into a select. 158 // FIXME: We would like to use a Custom expander here eventually to do 159 // the optimal thing for SSE vs. the default expansion in the legalizer. 160 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 161 else 162 // With SSE3 we can use fisttpll to convert to a signed i64. 163 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 164 } 165 166 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 167 if (!X86ScalarSSEf64) { 168 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 169 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 170 } 171 172 // Scalar integer divide and remainder are lowered to use operations that 173 // produce two results, to match the available instructions. This exposes 174 // the two-result form to trivial CSE, which is able to combine x/y and x%y 175 // into a single instruction. 176 // 177 // Scalar integer multiply-high is also lowered to use two-result 178 // operations, to match the available instructions. However, plain multiply 179 // (low) operations are left as Legal, as there are single-result 180 // instructions for this in x86. Using the two-result multiply instructions 181 // when both high and low results are needed must be arranged by dagcombine. 182 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 183 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 184 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 185 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 186 setOperationAction(ISD::SREM , MVT::i8 , Expand); 187 setOperationAction(ISD::UREM , MVT::i8 , Expand); 188 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 189 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 190 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 191 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 192 setOperationAction(ISD::SREM , MVT::i16 , Expand); 193 setOperationAction(ISD::UREM , MVT::i16 , Expand); 194 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 195 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 196 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 197 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 198 setOperationAction(ISD::SREM , MVT::i32 , Expand); 199 setOperationAction(ISD::UREM , MVT::i32 , Expand); 200 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 201 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 202 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 203 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 204 setOperationAction(ISD::SREM , MVT::i64 , Expand); 205 setOperationAction(ISD::UREM , MVT::i64 , Expand); 206 207 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 208 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 209 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 210 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 211 if (Subtarget->is64Bit()) 212 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 213 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 214 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 215 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 216 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 217 setOperationAction(ISD::FREM , MVT::f32 , Expand); 218 setOperationAction(ISD::FREM , MVT::f64 , Expand); 219 setOperationAction(ISD::FREM , MVT::f80 , Expand); 220 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 221 222 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 223 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 224 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 225 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 226 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 227 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 228 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 229 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 230 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 231 if (Subtarget->is64Bit()) { 232 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 233 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 234 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 235 } 236 237 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 238 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 239 240 // These should be promoted to a larger select which is supported. 241 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 242 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 243 // X86 wants to expand cmov itself. 244 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 245 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 246 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 247 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 248 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 249 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 250 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 251 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 252 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 253 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 254 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 255 if (Subtarget->is64Bit()) { 256 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 257 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 258 } 259 // X86 ret instruction may pop stack. 260 setOperationAction(ISD::RET , MVT::Other, Custom); 261 if (!Subtarget->is64Bit()) 262 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 263 264 // Darwin ABI issue. 265 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 266 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 267 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 268 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 269 if (Subtarget->is64Bit()) 270 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 271 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 272 if (Subtarget->is64Bit()) { 273 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 274 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 275 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 276 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 277 } 278 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 279 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 280 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 281 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 282 if (Subtarget->is64Bit()) { 283 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 284 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 285 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 286 } 287 288 if (Subtarget->hasSSE1()) 289 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 290 291 if (!Subtarget->hasSSE2()) 292 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); 293 294 // Expand certain atomics 295 setOperationAction(ISD::ATOMIC_CMP_SWAP_8 , MVT::i8, Custom); 296 setOperationAction(ISD::ATOMIC_CMP_SWAP_16, MVT::i16, Custom); 297 setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom); 298 setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom); 299 300 setOperationAction(ISD::ATOMIC_LOAD_SUB_8, MVT::i8, Expand); 301 setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Expand); 302 setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Expand); 303 setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Expand); 304 305 // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. 306 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 307 // FIXME - use subtarget debug flags 308 if (!Subtarget->isTargetDarwin() && 309 !Subtarget->isTargetELF() && 310 !Subtarget->isTargetCygMing()) { 311 setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); 312 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 313 } 314 315 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 316 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 317 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 318 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 319 if (Subtarget->is64Bit()) { 320 // FIXME: Verify 321 setExceptionPointerRegister(X86::RAX); 322 setExceptionSelectorRegister(X86::RDX); 323 } else { 324 setExceptionPointerRegister(X86::EAX); 325 setExceptionSelectorRegister(X86::EDX); 326 } 327 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 328 329 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 330 331 setOperationAction(ISD::TRAP, MVT::Other, Legal); 332 333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 334 setOperationAction(ISD::VASTART , MVT::Other, Custom); 335 setOperationAction(ISD::VAEND , MVT::Other, Expand); 336 if (Subtarget->is64Bit()) { 337 setOperationAction(ISD::VAARG , MVT::Other, Custom); 338 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 339 } else { 340 setOperationAction(ISD::VAARG , MVT::Other, Expand); 341 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 342 } 343 344 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 345 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 346 if (Subtarget->is64Bit()) 347 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 348 if (Subtarget->isTargetCygMing()) 349 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 350 else 351 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 352 353 if (X86ScalarSSEf64) { 354 // f32 and f64 use SSE. 355 // Set up the FP register classes. 356 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 357 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 358 359 // Use ANDPD to simulate FABS. 360 setOperationAction(ISD::FABS , MVT::f64, Custom); 361 setOperationAction(ISD::FABS , MVT::f32, Custom); 362 363 // Use XORP to simulate FNEG. 364 setOperationAction(ISD::FNEG , MVT::f64, Custom); 365 setOperationAction(ISD::FNEG , MVT::f32, Custom); 366 367 // Use ANDPD and ORPD to simulate FCOPYSIGN. 368 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 369 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 370 371 // We don't support sin/cos/fmod 372 setOperationAction(ISD::FSIN , MVT::f64, Expand); 373 setOperationAction(ISD::FCOS , MVT::f64, Expand); 374 setOperationAction(ISD::FSIN , MVT::f32, Expand); 375 setOperationAction(ISD::FCOS , MVT::f32, Expand); 376 377 // Expand FP immediates into loads from the stack, except for the special 378 // cases we handle. 379 addLegalFPImmediate(APFloat(+0.0)); // xorpd 380 addLegalFPImmediate(APFloat(+0.0f)); // xorps 381 382 // Floating truncations from f80 and extensions to f80 go through memory. 383 // If optimizing, we lie about this though and handle it in 384 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 385 if (Fast) { 386 setConvertAction(MVT::f32, MVT::f80, Expand); 387 setConvertAction(MVT::f64, MVT::f80, Expand); 388 setConvertAction(MVT::f80, MVT::f32, Expand); 389 setConvertAction(MVT::f80, MVT::f64, Expand); 390 } 391 } else if (X86ScalarSSEf32) { 392 // Use SSE for f32, x87 for f64. 393 // Set up the FP register classes. 394 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 395 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 396 397 // Use ANDPS to simulate FABS. 398 setOperationAction(ISD::FABS , MVT::f32, Custom); 399 400 // Use XORP to simulate FNEG. 401 setOperationAction(ISD::FNEG , MVT::f32, Custom); 402 403 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 404 405 // Use ANDPS and ORPS to simulate FCOPYSIGN. 406 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 407 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 408 409 // We don't support sin/cos/fmod 410 setOperationAction(ISD::FSIN , MVT::f32, Expand); 411 setOperationAction(ISD::FCOS , MVT::f32, Expand); 412 413 // Special cases we handle for FP constants. 414 addLegalFPImmediate(APFloat(+0.0f)); // xorps 415 addLegalFPImmediate(APFloat(+0.0)); // FLD0 416 addLegalFPImmediate(APFloat(+1.0)); // FLD1 417 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 418 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 419 420 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 421 // this though and handle it in InstructionSelectPreprocess so that 422 // dagcombine2 can hack on these. 423 if (Fast) { 424 setConvertAction(MVT::f32, MVT::f64, Expand); 425 setConvertAction(MVT::f32, MVT::f80, Expand); 426 setConvertAction(MVT::f80, MVT::f32, Expand); 427 setConvertAction(MVT::f64, MVT::f32, Expand); 428 // And x87->x87 truncations also. 429 setConvertAction(MVT::f80, MVT::f64, Expand); 430 } 431 432 if (!UnsafeFPMath) { 433 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 434 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 435 } 436 } else { 437 // f32 and f64 in x87. 438 // Set up the FP register classes. 439 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 440 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 441 442 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 443 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 444 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 445 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 446 447 // Floating truncations go through memory. If optimizing, we lie about 448 // this though and handle it in InstructionSelectPreprocess so that 449 // dagcombine2 can hack on these. 450 if (Fast) { 451 setConvertAction(MVT::f80, MVT::f32, Expand); 452 setConvertAction(MVT::f64, MVT::f32, Expand); 453 setConvertAction(MVT::f80, MVT::f64, Expand); 454 } 455 456 if (!UnsafeFPMath) { 457 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 458 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 459 } 460 addLegalFPImmediate(APFloat(+0.0)); // FLD0 461 addLegalFPImmediate(APFloat(+1.0)); // FLD1 462 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 463 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 464 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 465 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 466 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 467 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 468 } 469 470 // Long double always uses X87. 471 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 472 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 473 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 474 { 475 APFloat TmpFlt(+0.0); 476 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 477 addLegalFPImmediate(TmpFlt); // FLD0 478 TmpFlt.changeSign(); 479 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 480 APFloat TmpFlt2(+1.0); 481 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 482 addLegalFPImmediate(TmpFlt2); // FLD1 483 TmpFlt2.changeSign(); 484 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 485 } 486 487 if (!UnsafeFPMath) { 488 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 489 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 490 } 491 492 // Always use a library call for pow. 493 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 494 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 495 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 496 497 // First set operation action for all vector types to expand. Then we 498 // will selectively turn on ones that can be effectively codegen'd. 499 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 500 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 501 setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); 502 setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); 503 setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); 504 setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); 505 setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); 506 setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); 507 setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); 508 setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); 509 setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); 510 setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); 511 setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); 512 setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); 513 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); 514 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); 515 setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); 516 setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); 517 setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); 518 setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); 519 setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); 520 setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); 521 setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); 522 setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); 523 setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); 524 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 525 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 526 setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); 527 setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); 528 setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); 529 setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); 530 setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); 531 setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); 532 setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); 533 setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); 534 setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); 535 setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); 536 setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); 537 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 538 setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); 539 } 540 541 if (Subtarget->hasMMX()) { 542 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 543 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 544 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 545 addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); 546 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 547 548 // FIXME: add MMX packed arithmetics 549 550 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 551 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 552 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 553 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 554 555 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 556 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 557 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 558 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 559 560 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 561 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 562 563 setOperationAction(ISD::AND, MVT::v8i8, Promote); 564 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 565 setOperationAction(ISD::AND, MVT::v4i16, Promote); 566 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 567 setOperationAction(ISD::AND, MVT::v2i32, Promote); 568 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 569 setOperationAction(ISD::AND, MVT::v1i64, Legal); 570 571 setOperationAction(ISD::OR, MVT::v8i8, Promote); 572 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 573 setOperationAction(ISD::OR, MVT::v4i16, Promote); 574 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 575 setOperationAction(ISD::OR, MVT::v2i32, Promote); 576 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 577 setOperationAction(ISD::OR, MVT::v1i64, Legal); 578 579 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 580 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 581 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 582 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 583 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 584 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 585 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 586 587 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 588 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 589 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 590 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 591 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 592 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 593 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 594 AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); 595 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 596 597 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 598 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 599 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 600 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); 601 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 602 603 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 604 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 605 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 606 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 607 608 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); 609 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 610 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 611 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 612 613 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); 614 } 615 616 if (Subtarget->hasSSE1()) { 617 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 618 619 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 620 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 621 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 622 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 623 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 624 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 625 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 626 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 627 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 628 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 629 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 630 setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); 631 } 632 633 if (Subtarget->hasSSE2()) { 634 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 635 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 636 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 637 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 638 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 639 640 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 641 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 642 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 643 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 644 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 645 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 646 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 647 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 648 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 649 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 650 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 651 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 652 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 653 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 654 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 655 656 setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); 657 setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); 658 setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); 659 setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); 660 661 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 662 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 663 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 664 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 665 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 666 667 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 668 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { 669 MVT VT = (MVT::SimpleValueType)i; 670 // Do not attempt to custom lower non-power-of-2 vectors 671 if (!isPowerOf2_32(VT.getVectorNumElements())) 672 continue; 673 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 674 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 675 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 676 } 677 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 678 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 679 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 680 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 681 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 682 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 683 if (Subtarget->is64Bit()) { 684 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 685 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 686 } 687 688 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 689 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 690 setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); 691 AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); 692 setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); 693 AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); 694 setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); 695 AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); 696 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); 697 AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); 698 setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); 699 AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); 700 } 701 702 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 703 704 // Custom lower v2i64 and v2f64 selects. 705 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 706 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 707 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 708 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 709 710 } 711 712 if (Subtarget->hasSSE41()) { 713 // FIXME: Do we need to handle scalar-to-vector here? 714 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 715 setOperationAction(ISD::MUL, MVT::v2i64, Legal); 716 717 // i8 and i16 vectors are custom , because the source register and source 718 // source memory operand types are not the same width. f32 vectors are 719 // custom since the immediate controlling the insert encodes additional 720 // information. 721 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 722 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 723 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); 724 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 725 726 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 727 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 728 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 729 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 730 731 if (Subtarget->is64Bit()) { 732 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 733 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 734 } 735 } 736 737 if (Subtarget->hasSSE42()) { 738 setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); 739 } 740 741 // We want to custom lower some of our intrinsics. 742 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 743 744 // We have target-specific dag combine patterns for the following nodes: 745 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 746 setTargetDAGCombine(ISD::BUILD_VECTOR); 747 setTargetDAGCombine(ISD::SELECT); 748 setTargetDAGCombine(ISD::STORE); 749 750 computeRegisterProperties(); 751 752 // FIXME: These should be based on subtarget info. Plus, the values should 753 // be smaller when we are in optimizing for size mode. 754 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 755 maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores 756 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores 757 allowUnalignedMemoryAccesses = true; // x86 supports it! 758 setPrefLoopAlignment(16); 759} 760 761 762MVT X86TargetLowering::getSetCCResultType(const SDValue &) const { 763 return MVT::i8; 764} 765 766 767/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 768/// the desired ByVal argument alignment. 769static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 770 if (MaxAlign == 16) 771 return; 772 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 773 if (VTy->getBitWidth() == 128) 774 MaxAlign = 16; 775 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 776 unsigned EltAlign = 0; 777 getMaxByValAlign(ATy->getElementType(), EltAlign); 778 if (EltAlign > MaxAlign) 779 MaxAlign = EltAlign; 780 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 781 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 782 unsigned EltAlign = 0; 783 getMaxByValAlign(STy->getElementType(i), EltAlign); 784 if (EltAlign > MaxAlign) 785 MaxAlign = EltAlign; 786 if (MaxAlign == 16) 787 break; 788 } 789 } 790 return; 791} 792 793/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 794/// function arguments in the caller parameter area. For X86, aggregates 795/// that contain SSE vectors are placed at 16-byte boundaries while the rest 796/// are at 4-byte boundaries. 797unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 798 if (Subtarget->is64Bit()) { 799 // Max of 8 and alignment of type. 800 unsigned TyAlign = getTargetData()->getABITypeAlignment(Ty); 801 if (TyAlign > 8) 802 return TyAlign; 803 return 8; 804 } 805 806 unsigned Align = 4; 807 if (Subtarget->hasSSE1()) 808 getMaxByValAlign(Ty, Align); 809 return Align; 810} 811 812/// getOptimalMemOpType - Returns the target specific optimal type for load 813/// and store operations as a result of memset, memcpy, and memmove 814/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for 815/// determining it. 816MVT 817X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, 818 bool isSrcConst, bool isSrcStr) const { 819 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) 820 return MVT::v4i32; 821 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) 822 return MVT::v4f32; 823 if (Subtarget->is64Bit() && Size >= 8) 824 return MVT::i64; 825 return MVT::i32; 826} 827 828 829/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 830/// jumptable. 831SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 832 SelectionDAG &DAG) const { 833 if (usesGlobalOffsetTable()) 834 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 835 if (!Subtarget->isPICStyleRIPRel()) 836 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 837 return Table; 838} 839 840//===----------------------------------------------------------------------===// 841// Return Value Calling Convention Implementation 842//===----------------------------------------------------------------------===// 843 844#include "X86GenCallingConv.inc" 845 846/// LowerRET - Lower an ISD::RET node. 847SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { 848 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 849 850 SmallVector<CCValAssign, 16> RVLocs; 851 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 852 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 853 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 854 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); 855 856 // If this is the first return lowered for this function, add the regs to the 857 // liveout set for the function. 858 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 859 for (unsigned i = 0; i != RVLocs.size(); ++i) 860 if (RVLocs[i].isRegLoc()) 861 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 862 } 863 SDValue Chain = Op.getOperand(0); 864 865 // Handle tail call return. 866 Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); 867 if (Chain.getOpcode() == X86ISD::TAILCALL) { 868 SDValue TailCall = Chain; 869 SDValue TargetAddress = TailCall.getOperand(1); 870 SDValue StackAdjustment = TailCall.getOperand(2); 871 assert(((TargetAddress.getOpcode() == ISD::Register && 872 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 873 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 874 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 875 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 876 "Expecting an global address, external symbol, or register"); 877 assert(StackAdjustment.getOpcode() == ISD::Constant && 878 "Expecting a const value"); 879 880 SmallVector<SDValue,8> Operands; 881 Operands.push_back(Chain.getOperand(0)); 882 Operands.push_back(TargetAddress); 883 Operands.push_back(StackAdjustment); 884 // Copy registers used by the call. Last operand is a flag so it is not 885 // copied. 886 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 887 Operands.push_back(Chain.getOperand(i)); 888 } 889 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 890 Operands.size()); 891 } 892 893 // Regular return. 894 SDValue Flag; 895 896 SmallVector<SDValue, 6> RetOps; 897 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 898 // Operand #1 = Bytes To Pop 899 RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 900 901 // Copy the result values into the output registers. 902 for (unsigned i = 0; i != RVLocs.size(); ++i) { 903 CCValAssign &VA = RVLocs[i]; 904 assert(VA.isRegLoc() && "Can only return in registers!"); 905 SDValue ValToCopy = Op.getOperand(i*2+1); 906 907 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 908 // the RET instruction and handled by the FP Stackifier. 909 if (RVLocs[i].getLocReg() == X86::ST0 || 910 RVLocs[i].getLocReg() == X86::ST1) { 911 // If this is a copy from an xmm register to ST(0), use an FPExtend to 912 // change the value to the FP stack register class. 913 if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) 914 ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy); 915 RetOps.push_back(ValToCopy); 916 // Don't emit a copytoreg. 917 continue; 918 } 919 920 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); 921 Flag = Chain.getValue(1); 922 } 923 924 // The x86-64 ABI for returning structs by value requires that we copy 925 // the sret argument into %rax for the return. We saved the argument into 926 // a virtual register in the entry block, so now we copy the value out 927 // and into %rax. 928 if (Subtarget->is64Bit() && 929 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 930 MachineFunction &MF = DAG.getMachineFunction(); 931 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 932 unsigned Reg = FuncInfo->getSRetReturnReg(); 933 if (!Reg) { 934 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 935 FuncInfo->setSRetReturnReg(Reg); 936 } 937 SDValue Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy()); 938 939 Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag); 940 Flag = Chain.getValue(1); 941 } 942 943 RetOps[0] = Chain; // Update chain. 944 945 // Add the flag if we have it. 946 if (Flag.getNode()) 947 RetOps.push_back(Flag); 948 949 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, &RetOps[0], RetOps.size()); 950} 951 952 953/// LowerCallResult - Lower the result values of an ISD::CALL into the 954/// appropriate copies out of appropriate physical registers. This assumes that 955/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 956/// being lowered. The returns a SDNode with the same number of values as the 957/// ISD::CALL. 958SDNode *X86TargetLowering:: 959LowerCallResult(SDValue Chain, SDValue InFlag, SDNode *TheCall, 960 unsigned CallingConv, SelectionDAG &DAG) { 961 962 // Assign locations to each value returned by this call. 963 SmallVector<CCValAssign, 16> RVLocs; 964 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 965 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 966 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 967 968 SmallVector<SDValue, 8> ResultVals; 969 970 // Copy all of the result registers out of their specified physreg. 971 for (unsigned i = 0; i != RVLocs.size(); ++i) { 972 MVT CopyVT = RVLocs[i].getValVT(); 973 974 // If this is a call to a function that returns an fp value on the floating 975 // point stack, but where we prefer to use the value in xmm registers, copy 976 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. 977 if ((RVLocs[i].getLocReg() == X86::ST0 || 978 RVLocs[i].getLocReg() == X86::ST1) && 979 isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { 980 CopyVT = MVT::f80; 981 } 982 983 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 984 CopyVT, InFlag).getValue(1); 985 SDValue Val = Chain.getValue(0); 986 InFlag = Chain.getValue(2); 987 988 if (CopyVT != RVLocs[i].getValVT()) { 989 // Round the F80 the right size, which also moves to the appropriate xmm 990 // register. 991 Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val, 992 // This truncation won't change the value. 993 DAG.getIntPtrConstant(1)); 994 } 995 996 ResultVals.push_back(Val); 997 } 998 999 // Merge everything together with a MERGE_VALUES node. 1000 ResultVals.push_back(Chain); 1001 return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0], 1002 ResultVals.size()).getNode(); 1003} 1004 1005 1006//===----------------------------------------------------------------------===// 1007// C & StdCall & Fast Calling Convention implementation 1008//===----------------------------------------------------------------------===// 1009// StdCall calling convention seems to be standard for many Windows' API 1010// routines and around. It differs from C calling convention just a little: 1011// callee should clean up the stack, not caller. Symbols should be also 1012// decorated in some fancy way :) It doesn't support any vector arguments. 1013// For info on fast calling convention see Fast Calling Convention (tail call) 1014// implementation LowerX86_32FastCCCallTo. 1015 1016/// AddLiveIn - This helper function adds the specified physical register to the 1017/// MachineFunction as a live in value. It also creates a corresponding virtual 1018/// register for it. 1019static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 1020 const TargetRegisterClass *RC) { 1021 assert(RC->contains(PReg) && "Not the correct regclass!"); 1022 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 1023 MF.getRegInfo().addLiveIn(PReg, VReg); 1024 return VReg; 1025} 1026 1027/// CallIsStructReturn - Determines whether a CALL node uses struct return 1028/// semantics. 1029static bool CallIsStructReturn(SDValue Op) { 1030 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1031 if (!NumOps) 1032 return false; 1033 1034 return cast<ARG_FLAGSSDNode>(Op.getOperand(6))->getArgFlags().isSRet(); 1035} 1036 1037/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct 1038/// return semantics. 1039static bool ArgsAreStructReturn(SDValue Op) { 1040 unsigned NumArgs = Op.getNode()->getNumValues() - 1; 1041 if (!NumArgs) 1042 return false; 1043 1044 return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet(); 1045} 1046 1047/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires 1048/// the callee to pop its own arguments. Callee pop is necessary to support tail 1049/// calls. 1050bool X86TargetLowering::IsCalleePop(SDValue Op) { 1051 bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1052 if (IsVarArg) 1053 return false; 1054 1055 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 1056 default: 1057 return false; 1058 case CallingConv::X86_StdCall: 1059 return !Subtarget->is64Bit(); 1060 case CallingConv::X86_FastCall: 1061 return !Subtarget->is64Bit(); 1062 case CallingConv::Fast: 1063 return PerformTailCallOpt; 1064 } 1065} 1066 1067/// CCAssignFnForNode - Selects the correct CCAssignFn for a CALL or 1068/// FORMAL_ARGUMENTS node. 1069CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDValue Op) const { 1070 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1071 1072 if (Subtarget->is64Bit()) { 1073 if (Subtarget->isTargetWin64()) 1074 return CC_X86_Win64_C; 1075 else { 1076 if (CC == CallingConv::Fast && PerformTailCallOpt) 1077 return CC_X86_64_TailCall; 1078 else 1079 return CC_X86_64_C; 1080 } 1081 } 1082 1083 if (CC == CallingConv::X86_FastCall) 1084 return CC_X86_32_FastCall; 1085 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1086 return CC_X86_32_TailCall; 1087 else 1088 return CC_X86_32_C; 1089} 1090 1091/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to 1092/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. 1093NameDecorationStyle 1094X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { 1095 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1096 if (CC == CallingConv::X86_FastCall) 1097 return FastCall; 1098 else if (CC == CallingConv::X86_StdCall) 1099 return StdCall; 1100 return None; 1101} 1102 1103 1104/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer 1105/// in a register before calling. 1106bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { 1107 return !IsTailCall && !Is64Bit && 1108 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1109 Subtarget->isPICStyleGOT(); 1110} 1111 1112/// CallRequiresFnAddressInReg - Check whether the call requires the function 1113/// address to be loaded in a register. 1114bool 1115X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { 1116 return !Is64Bit && IsTailCall && 1117 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1118 Subtarget->isPICStyleGOT(); 1119} 1120 1121/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1122/// by "Src" to address "Dst" with size and alignment information specified by 1123/// the specific parameter attribute. The copy will be passed as a byval 1124/// function parameter. 1125static SDValue 1126CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1127 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1128 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1129 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), 1130 /*AlwaysInline=*/true, NULL, 0, NULL, 0); 1131} 1132 1133SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, 1134 const CCValAssign &VA, 1135 MachineFrameInfo *MFI, 1136 unsigned CC, 1137 SDValue Root, unsigned i) { 1138 // Create the nodes corresponding to a load from this parameter slot. 1139 ISD::ArgFlagsTy Flags = 1140 cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags(); 1141 bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; 1142 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1143 1144 // FIXME: For now, all byval parameter objects are marked mutable. This can be 1145 // changed with more analysis. 1146 // In case of tail call optimization mark all arguments mutable. Since they 1147 // could be overwritten by lowering of arguments in case of a tail call. 1148 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1149 VA.getLocMemOffset(), isImmutable); 1150 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1151 if (Flags.isByVal()) 1152 return FIN; 1153 return DAG.getLoad(VA.getValVT(), Root, FIN, 1154 PseudoSourceValue::getFixedStack(FI), 0); 1155} 1156 1157SDValue 1158X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { 1159 MachineFunction &MF = DAG.getMachineFunction(); 1160 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1161 1162 const Function* Fn = MF.getFunction(); 1163 if (Fn->hasExternalLinkage() && 1164 Subtarget->isTargetCygMing() && 1165 Fn->getName() == "main") 1166 FuncInfo->setForceFramePointer(true); 1167 1168 // Decorate the function name. 1169 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1170 1171 MachineFrameInfo *MFI = MF.getFrameInfo(); 1172 SDValue Root = Op.getOperand(0); 1173 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1174 unsigned CC = MF.getFunction()->getCallingConv(); 1175 bool Is64Bit = Subtarget->is64Bit(); 1176 bool IsWin64 = Subtarget->isTargetWin64(); 1177 1178 assert(!(isVarArg && CC == CallingConv::Fast) && 1179 "Var args not supported with calling convention fastcc"); 1180 1181 // Assign locations to all of the incoming arguments. 1182 SmallVector<CCValAssign, 16> ArgLocs; 1183 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1184 CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(Op)); 1185 1186 SmallVector<SDValue, 8> ArgValues; 1187 unsigned LastVal = ~0U; 1188 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1189 CCValAssign &VA = ArgLocs[i]; 1190 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1191 // places. 1192 assert(VA.getValNo() != LastVal && 1193 "Don't support value assigned to multiple locs yet"); 1194 LastVal = VA.getValNo(); 1195 1196 if (VA.isRegLoc()) { 1197 MVT RegVT = VA.getLocVT(); 1198 TargetRegisterClass *RC; 1199 if (RegVT == MVT::i32) 1200 RC = X86::GR32RegisterClass; 1201 else if (Is64Bit && RegVT == MVT::i64) 1202 RC = X86::GR64RegisterClass; 1203 else if (RegVT == MVT::f32) 1204 RC = X86::FR32RegisterClass; 1205 else if (RegVT == MVT::f64) 1206 RC = X86::FR64RegisterClass; 1207 else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) 1208 RC = X86::VR128RegisterClass; 1209 else if (RegVT.isVector()) { 1210 assert(RegVT.getSizeInBits() == 64); 1211 if (!Is64Bit) 1212 RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. 1213 else { 1214 // Darwin calling convention passes MMX values in either GPRs or 1215 // XMMs in x86-64. Other targets pass them in memory. 1216 if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { 1217 RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. 1218 RegVT = MVT::v2i64; 1219 } else { 1220 RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. 1221 RegVT = MVT::i64; 1222 } 1223 } 1224 } else { 1225 assert(0 && "Unknown argument type!"); 1226 } 1227 1228 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1229 SDValue ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1230 1231 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1232 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1233 // right size. 1234 if (VA.getLocInfo() == CCValAssign::SExt) 1235 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1236 DAG.getValueType(VA.getValVT())); 1237 else if (VA.getLocInfo() == CCValAssign::ZExt) 1238 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1239 DAG.getValueType(VA.getValVT())); 1240 1241 if (VA.getLocInfo() != CCValAssign::Full) 1242 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1243 1244 // Handle MMX values passed in GPRs. 1245 if (Is64Bit && RegVT != VA.getLocVT()) { 1246 if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) 1247 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1248 else if (RC == X86::VR128RegisterClass) { 1249 ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue, 1250 DAG.getConstant(0, MVT::i64)); 1251 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1252 } 1253 } 1254 1255 ArgValues.push_back(ArgValue); 1256 } else { 1257 assert(VA.isMemLoc()); 1258 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); 1259 } 1260 } 1261 1262 // The x86-64 ABI for returning structs by value requires that we copy 1263 // the sret argument into %rax for the return. Save the argument into 1264 // a virtual register so that we can access it from the return points. 1265 if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 1266 MachineFunction &MF = DAG.getMachineFunction(); 1267 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1268 unsigned Reg = FuncInfo->getSRetReturnReg(); 1269 if (!Reg) { 1270 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1271 FuncInfo->setSRetReturnReg(Reg); 1272 } 1273 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]); 1274 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root); 1275 } 1276 1277 unsigned StackSize = CCInfo.getNextStackOffset(); 1278 // align stack specially for tail calls 1279 if (CC == CallingConv::Fast) 1280 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1281 1282 // If the function takes variable number of arguments, make a frame index for 1283 // the start of the first vararg value... for expansion of llvm.va_start. 1284 if (isVarArg) { 1285 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1286 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1287 } 1288 if (Is64Bit) { 1289 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 1290 1291 // FIXME: We should really autogenerate these arrays 1292 static const unsigned GPR64ArgRegsWin64[] = { 1293 X86::RCX, X86::RDX, X86::R8, X86::R9 1294 }; 1295 static const unsigned XMMArgRegsWin64[] = { 1296 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1297 }; 1298 static const unsigned GPR64ArgRegs64Bit[] = { 1299 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1300 }; 1301 static const unsigned XMMArgRegs64Bit[] = { 1302 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1303 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1304 }; 1305 const unsigned *GPR64ArgRegs, *XMMArgRegs; 1306 1307 if (IsWin64) { 1308 TotalNumIntRegs = 4; TotalNumXMMRegs = 4; 1309 GPR64ArgRegs = GPR64ArgRegsWin64; 1310 XMMArgRegs = XMMArgRegsWin64; 1311 } else { 1312 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 1313 GPR64ArgRegs = GPR64ArgRegs64Bit; 1314 XMMArgRegs = XMMArgRegs64Bit; 1315 } 1316 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 1317 TotalNumIntRegs); 1318 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 1319 TotalNumXMMRegs); 1320 1321 // For X86-64, if there are vararg parameters that are passed via 1322 // registers, then we must store them to their spots on the stack so they 1323 // may be loaded by deferencing the result of va_next. 1324 VarArgsGPOffset = NumIntRegs * 8; 1325 VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; 1326 RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + 1327 TotalNumXMMRegs * 16, 16); 1328 1329 // Store the integer parameter registers. 1330 SmallVector<SDValue, 8> MemOps; 1331 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1332 SDValue FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1333 DAG.getIntPtrConstant(VarArgsGPOffset)); 1334 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 1335 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1336 X86::GR64RegisterClass); 1337 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1338 SDValue Store = 1339 DAG.getStore(Val.getValue(1), Val, FIN, 1340 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1341 MemOps.push_back(Store); 1342 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1343 DAG.getIntPtrConstant(8)); 1344 } 1345 1346 // Now store the XMM (fp + vector) parameter registers. 1347 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1348 DAG.getIntPtrConstant(VarArgsFPOffset)); 1349 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 1350 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1351 X86::VR128RegisterClass); 1352 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1353 SDValue Store = 1354 DAG.getStore(Val.getValue(1), Val, FIN, 1355 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1356 MemOps.push_back(Store); 1357 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1358 DAG.getIntPtrConstant(16)); 1359 } 1360 if (!MemOps.empty()) 1361 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1362 &MemOps[0], MemOps.size()); 1363 } 1364 } 1365 1366 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1367 // arguments and the arguments after the retaddr has been pushed are 1368 // aligned. 1369 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1370 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1371 (StackSize & 7) == 0) 1372 StackSize += 4; 1373 1374 ArgValues.push_back(Root); 1375 1376 // Some CCs need callee pop. 1377 if (IsCalleePop(Op)) { 1378 BytesToPopOnReturn = StackSize; // Callee pops everything. 1379 BytesCallerReserves = 0; 1380 } else { 1381 BytesToPopOnReturn = 0; // Callee pops nothing. 1382 // If this is an sret function, the return should pop the hidden pointer. 1383 if (!Is64Bit && ArgsAreStructReturn(Op)) 1384 BytesToPopOnReturn = 4; 1385 BytesCallerReserves = StackSize; 1386 } 1387 1388 if (!Is64Bit) { 1389 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1390 if (CC == CallingConv::X86_FastCall) 1391 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1392 } 1393 1394 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1395 1396 // Return the new list of results. 1397 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0], 1398 ArgValues.size()).getValue(Op.getResNo()); 1399} 1400 1401SDValue 1402X86TargetLowering::LowerMemOpCallTo(SDValue Op, SelectionDAG &DAG, 1403 const SDValue &StackPtr, 1404 const CCValAssign &VA, 1405 SDValue Chain, 1406 SDValue Arg) { 1407 unsigned LocMemOffset = VA.getLocMemOffset(); 1408 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1409 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1410 ISD::ArgFlagsTy Flags = 1411 cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->getArgFlags(); 1412 if (Flags.isByVal()) { 1413 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1414 } 1415 return DAG.getStore(Chain, Arg, PtrOff, 1416 PseudoSourceValue::getStack(), LocMemOffset); 1417} 1418 1419/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call 1420/// optimization is performed and it is required. 1421SDValue 1422X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 1423 SDValue &OutRetAddr, 1424 SDValue Chain, 1425 bool IsTailCall, 1426 bool Is64Bit, 1427 int FPDiff) { 1428 if (!IsTailCall || FPDiff==0) return Chain; 1429 1430 // Adjust the Return address stack slot. 1431 MVT VT = getPointerTy(); 1432 OutRetAddr = getReturnAddressFrameIndex(DAG); 1433 // Load the "old" Return address. 1434 OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0); 1435 return SDValue(OutRetAddr.getNode(), 1); 1436} 1437 1438/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call 1439/// optimization is performed and it is required (FPDiff!=0). 1440static SDValue 1441EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 1442 SDValue Chain, SDValue RetAddrFrIdx, 1443 bool Is64Bit, int FPDiff) { 1444 // Store the return address to the appropriate stack slot. 1445 if (!FPDiff) return Chain; 1446 // Calculate the new stack slot for the return address. 1447 int SlotSize = Is64Bit ? 8 : 4; 1448 int NewReturnAddrFI = 1449 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1450 MVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1451 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1452 Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, 1453 PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); 1454 return Chain; 1455} 1456 1457SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { 1458 MachineFunction &MF = DAG.getMachineFunction(); 1459 SDValue Chain = Op.getOperand(0); 1460 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1461 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1462 bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 1463 && CC == CallingConv::Fast && PerformTailCallOpt; 1464 SDValue Callee = Op.getOperand(4); 1465 bool Is64Bit = Subtarget->is64Bit(); 1466 bool IsStructRet = CallIsStructReturn(Op); 1467 1468 assert(!(isVarArg && CC == CallingConv::Fast) && 1469 "Var args not supported with calling convention fastcc"); 1470 1471 // Analyze operands of the call, assigning locations to each operand. 1472 SmallVector<CCValAssign, 16> ArgLocs; 1473 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1474 CCInfo.AnalyzeCallOperands(Op.getNode(), CCAssignFnForNode(Op)); 1475 1476 // Get a count of how many bytes are to be pushed on the stack. 1477 unsigned NumBytes = CCInfo.getNextStackOffset(); 1478 if (CC == CallingConv::Fast) 1479 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1480 1481 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1482 // arguments and the arguments after the retaddr has been pushed are aligned. 1483 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1484 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1485 (NumBytes & 7) == 0) 1486 NumBytes += 4; 1487 1488 int FPDiff = 0; 1489 if (IsTailCall) { 1490 // Lower arguments at fp - stackoffset + fpdiff. 1491 unsigned NumBytesCallerPushed = 1492 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1493 FPDiff = NumBytesCallerPushed - NumBytes; 1494 1495 // Set the delta of movement of the returnaddr stackslot. 1496 // But only set if delta is greater than previous delta. 1497 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1498 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1499 } 1500 1501 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes)); 1502 1503 SDValue RetAddrFrIdx; 1504 // Load return adress for tail calls. 1505 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, 1506 FPDiff); 1507 1508 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1509 SmallVector<SDValue, 8> MemOpChains; 1510 SDValue StackPtr; 1511 1512 // Walk the register/memloc assignments, inserting copies/loads. In the case 1513 // of tail call optimization arguments are handle later. 1514 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1515 CCValAssign &VA = ArgLocs[i]; 1516 SDValue Arg = Op.getOperand(5+2*VA.getValNo()); 1517 bool isByVal = cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))-> 1518 getArgFlags().isByVal(); 1519 1520 // Promote the value if needed. 1521 switch (VA.getLocInfo()) { 1522 default: assert(0 && "Unknown loc info!"); 1523 case CCValAssign::Full: break; 1524 case CCValAssign::SExt: 1525 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1526 break; 1527 case CCValAssign::ZExt: 1528 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1529 break; 1530 case CCValAssign::AExt: 1531 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1532 break; 1533 } 1534 1535 if (VA.isRegLoc()) { 1536 if (Is64Bit) { 1537 MVT RegVT = VA.getLocVT(); 1538 if (RegVT.isVector() && RegVT.getSizeInBits() == 64) 1539 switch (VA.getLocReg()) { 1540 default: 1541 break; 1542 case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: 1543 case X86::R8: { 1544 // Special case: passing MMX values in GPR registers. 1545 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1546 break; 1547 } 1548 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: 1549 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { 1550 // Special case: passing MMX values in XMM registers. 1551 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1552 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg); 1553 Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 1554 DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg, 1555 getMOVLMask(2, DAG)); 1556 break; 1557 } 1558 } 1559 } 1560 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1561 } else { 1562 if (!IsTailCall || (IsTailCall && isByVal)) { 1563 assert(VA.isMemLoc()); 1564 if (StackPtr.getNode() == 0) 1565 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1566 1567 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1568 Arg)); 1569 } 1570 } 1571 } 1572 1573 if (!MemOpChains.empty()) 1574 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1575 &MemOpChains[0], MemOpChains.size()); 1576 1577 // Build a sequence of copy-to-reg nodes chained together with token chain 1578 // and flag operands which copy the outgoing args into registers. 1579 SDValue InFlag; 1580 // Tail call byval lowering might overwrite argument registers so in case of 1581 // tail call optimization the copies to registers are lowered later. 1582 if (!IsTailCall) 1583 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1584 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1585 InFlag); 1586 InFlag = Chain.getValue(1); 1587 } 1588 1589 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1590 // GOT pointer. 1591 if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { 1592 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1593 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1594 InFlag); 1595 InFlag = Chain.getValue(1); 1596 } 1597 // If we are tail calling and generating PIC/GOT style code load the address 1598 // of the callee into ecx. The value in ecx is used as target of the tail 1599 // jump. This is done to circumvent the ebx/callee-saved problem for tail 1600 // calls on PIC/GOT architectures. Normally we would just put the address of 1601 // GOT into ebx and then call target@PLT. But for tail callss ebx would be 1602 // restored (since ebx is callee saved) before jumping to the target@PLT. 1603 if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { 1604 // Note: The actual moving to ecx is done further down. 1605 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1606 if (G && !G->getGlobal()->hasHiddenVisibility() && 1607 !G->getGlobal()->hasProtectedVisibility()) 1608 Callee = LowerGlobalAddress(Callee, DAG); 1609 else if (isa<ExternalSymbolSDNode>(Callee)) 1610 Callee = LowerExternalSymbol(Callee,DAG); 1611 } 1612 1613 if (Is64Bit && isVarArg) { 1614 // From AMD64 ABI document: 1615 // For calls that may call functions that use varargs or stdargs 1616 // (prototype-less calls or calls to functions containing ellipsis (...) in 1617 // the declaration) %al is used as hidden argument to specify the number 1618 // of SSE registers used. The contents of %al do not need to match exactly 1619 // the number of registers, but must be an ubound on the number of SSE 1620 // registers used and is in the range 0 - 8 inclusive. 1621 1622 // FIXME: Verify this on Win64 1623 // Count the number of XMM registers allocated. 1624 static const unsigned XMMArgRegs[] = { 1625 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1626 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1627 }; 1628 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1629 1630 Chain = DAG.getCopyToReg(Chain, X86::AL, 1631 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1632 InFlag = Chain.getValue(1); 1633 } 1634 1635 1636 // For tail calls lower the arguments to the 'real' stack slot. 1637 if (IsTailCall) { 1638 SmallVector<SDValue, 8> MemOpChains2; 1639 SDValue FIN; 1640 int FI = 0; 1641 // Do not flag preceeding copytoreg stuff together with the following stuff. 1642 InFlag = SDValue(); 1643 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1644 CCValAssign &VA = ArgLocs[i]; 1645 if (!VA.isRegLoc()) { 1646 assert(VA.isMemLoc()); 1647 SDValue Arg = Op.getOperand(5+2*VA.getValNo()); 1648 SDValue FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1649 ISD::ArgFlagsTy Flags = 1650 cast<ARG_FLAGSSDNode>(FlagsOp)->getArgFlags(); 1651 // Create frame index. 1652 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1653 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 1654 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1655 FIN = DAG.getFrameIndex(FI, getPointerTy()); 1656 1657 if (Flags.isByVal()) { 1658 // Copy relative to framepointer. 1659 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1660 if (StackPtr.getNode() == 0) 1661 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1662 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1663 1664 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1665 Flags, DAG)); 1666 } else { 1667 // Store relative to framepointer. 1668 MemOpChains2.push_back( 1669 DAG.getStore(Chain, Arg, FIN, 1670 PseudoSourceValue::getFixedStack(FI), 0)); 1671 } 1672 } 1673 } 1674 1675 if (!MemOpChains2.empty()) 1676 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1677 &MemOpChains2[0], MemOpChains2.size()); 1678 1679 // Copy arguments to their registers. 1680 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1681 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1682 InFlag); 1683 InFlag = Chain.getValue(1); 1684 } 1685 InFlag =SDValue(); 1686 1687 // Store the return address to the appropriate stack slot. 1688 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, 1689 FPDiff); 1690 } 1691 1692 // If the callee is a GlobalAddress node (quite common, every direct call is) 1693 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1694 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1695 // We should use extra load for direct calls to dllimported functions in 1696 // non-JIT mode. 1697 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1698 getTargetMachine(), true)) 1699 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1700 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1701 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1702 } else if (IsTailCall) { 1703 unsigned Opc = Is64Bit ? X86::R9 : X86::ECX; 1704 1705 Chain = DAG.getCopyToReg(Chain, 1706 DAG.getRegister(Opc, getPointerTy()), 1707 Callee,InFlag); 1708 Callee = DAG.getRegister(Opc, getPointerTy()); 1709 // Add register as live out. 1710 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1711 } 1712 1713 // Returns a chain & a flag for retval copy to use. 1714 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1715 SmallVector<SDValue, 8> Ops; 1716 1717 if (IsTailCall) { 1718 Ops.push_back(Chain); 1719 Ops.push_back(DAG.getIntPtrConstant(NumBytes)); 1720 Ops.push_back(DAG.getIntPtrConstant(0)); 1721 if (InFlag.getNode()) 1722 Ops.push_back(InFlag); 1723 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1724 InFlag = Chain.getValue(1); 1725 1726 // Returns a chain & a flag for retval copy to use. 1727 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1728 Ops.clear(); 1729 } 1730 1731 Ops.push_back(Chain); 1732 Ops.push_back(Callee); 1733 1734 if (IsTailCall) 1735 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1736 1737 // Add argument registers to the end of the list so that they are known live 1738 // into the call. 1739 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1740 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1741 RegsToPass[i].second.getValueType())); 1742 1743 // Add an implicit use GOT pointer in EBX. 1744 if (!IsTailCall && !Is64Bit && 1745 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1746 Subtarget->isPICStyleGOT()) 1747 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1748 1749 // Add an implicit use of AL for x86 vararg functions. 1750 if (Is64Bit && isVarArg) 1751 Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); 1752 1753 if (InFlag.getNode()) 1754 Ops.push_back(InFlag); 1755 1756 if (IsTailCall) { 1757 assert(InFlag.getNode() && 1758 "Flag must be set. Depend on flag being set in LowerRET"); 1759 Chain = DAG.getNode(X86ISD::TAILCALL, 1760 Op.getNode()->getVTList(), &Ops[0], Ops.size()); 1761 1762 return SDValue(Chain.getNode(), Op.getResNo()); 1763 } 1764 1765 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1766 InFlag = Chain.getValue(1); 1767 1768 // Create the CALLSEQ_END node. 1769 unsigned NumBytesForCalleeToPush; 1770 if (IsCalleePop(Op)) 1771 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1772 else if (!Is64Bit && IsStructRet) 1773 // If this is is a call to a struct-return function, the callee 1774 // pops the hidden struct pointer, so we have to push it back. 1775 // This is common for Darwin/X86, Linux & Mingw32 targets. 1776 NumBytesForCalleeToPush = 4; 1777 else 1778 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1779 1780 // Returns a flag for retval copy to use. 1781 Chain = DAG.getCALLSEQ_END(Chain, 1782 DAG.getIntPtrConstant(NumBytes), 1783 DAG.getIntPtrConstant(NumBytesForCalleeToPush), 1784 InFlag); 1785 InFlag = Chain.getValue(1); 1786 1787 // Handle result values, copying them out of physregs into vregs that we 1788 // return. 1789 return SDValue(LowerCallResult(Chain, InFlag, Op.getNode(), CC, DAG), 1790 Op.getResNo()); 1791} 1792 1793 1794//===----------------------------------------------------------------------===// 1795// Fast Calling Convention (tail call) implementation 1796//===----------------------------------------------------------------------===// 1797 1798// Like std call, callee cleans arguments, convention except that ECX is 1799// reserved for storing the tail called function address. Only 2 registers are 1800// free for argument passing (inreg). Tail call optimization is performed 1801// provided: 1802// * tailcallopt is enabled 1803// * caller/callee are fastcc 1804// On X86_64 architecture with GOT-style position independent code only local 1805// (within module) calls are supported at the moment. 1806// To keep the stack aligned according to platform abi the function 1807// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1808// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1809// If a tail called function callee has more arguments than the caller the 1810// caller needs to make sure that there is room to move the RETADDR to. This is 1811// achieved by reserving an area the size of the argument delta right after the 1812// original REtADDR, but before the saved framepointer or the spilled registers 1813// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1814// stack layout: 1815// arg1 1816// arg2 1817// RETADDR 1818// [ new RETADDR 1819// move area ] 1820// (possible EBP) 1821// ESI 1822// EDI 1823// local1 .. 1824 1825/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1826/// for a 16 byte align requirement. 1827unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1828 SelectionDAG& DAG) { 1829 if (PerformTailCallOpt) { 1830 MachineFunction &MF = DAG.getMachineFunction(); 1831 const TargetMachine &TM = MF.getTarget(); 1832 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1833 unsigned StackAlignment = TFI.getStackAlignment(); 1834 uint64_t AlignMask = StackAlignment - 1; 1835 int64_t Offset = StackSize; 1836 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1837 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1838 // Number smaller than 12 so just add the difference. 1839 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1840 } else { 1841 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1842 Offset = ((~AlignMask) & Offset) + StackAlignment + 1843 (StackAlignment-SlotSize); 1844 } 1845 StackSize = Offset; 1846 } 1847 return StackSize; 1848} 1849 1850/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1851/// following the call is a return. A function is eligible if caller/callee 1852/// calling conventions match, currently only fastcc supports tail calls, and 1853/// the function CALL is immediatly followed by a RET. 1854bool X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Call, 1855 SDValue Ret, 1856 SelectionDAG& DAG) const { 1857 if (!PerformTailCallOpt) 1858 return false; 1859 1860 if (CheckTailCallReturnConstraints(Call, Ret)) { 1861 MachineFunction &MF = DAG.getMachineFunction(); 1862 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1863 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1864 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1865 SDValue Callee = Call.getOperand(4); 1866 // On x86/32Bit PIC/GOT tail calls are supported. 1867 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1868 !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) 1869 return true; 1870 1871 // Can only do local tail calls (in same module, hidden or protected) on 1872 // x86_64 PIC/GOT at the moment. 1873 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1874 return G->getGlobal()->hasHiddenVisibility() 1875 || G->getGlobal()->hasProtectedVisibility(); 1876 } 1877 } 1878 1879 return false; 1880} 1881 1882FastISel * 1883X86TargetLowering::createFastISel(MachineFunction &mf, 1884 DenseMap<const Value *, unsigned> &vm, 1885 DenseMap<const BasicBlock *, 1886 MachineBasicBlock *> &bm) { 1887 return X86::createFastISel(mf, vm, bm); 1888} 1889 1890 1891//===----------------------------------------------------------------------===// 1892// Other Lowering Hooks 1893//===----------------------------------------------------------------------===// 1894 1895 1896SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1897 MachineFunction &MF = DAG.getMachineFunction(); 1898 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1899 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1900 1901 if (ReturnAddrIndex == 0) { 1902 // Set up a frame object for the return address. 1903 if (Subtarget->is64Bit()) 1904 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1905 else 1906 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1907 1908 FuncInfo->setRAIndex(ReturnAddrIndex); 1909 } 1910 1911 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1912} 1913 1914 1915/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1916/// specific condition code. It returns a false if it cannot do a direct 1917/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1918/// needed. 1919static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1920 unsigned &X86CC, SDValue &LHS, SDValue &RHS, 1921 SelectionDAG &DAG) { 1922 X86CC = X86::COND_INVALID; 1923 if (!isFP) { 1924 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1925 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1926 // X > -1 -> X == 0, jump !sign. 1927 RHS = DAG.getConstant(0, RHS.getValueType()); 1928 X86CC = X86::COND_NS; 1929 return true; 1930 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1931 // X < 0 -> X == 0, jump on sign. 1932 X86CC = X86::COND_S; 1933 return true; 1934 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1935 // X < 1 -> X <= 0 1936 RHS = DAG.getConstant(0, RHS.getValueType()); 1937 X86CC = X86::COND_LE; 1938 return true; 1939 } 1940 } 1941 1942 switch (SetCCOpcode) { 1943 default: break; 1944 case ISD::SETEQ: X86CC = X86::COND_E; break; 1945 case ISD::SETGT: X86CC = X86::COND_G; break; 1946 case ISD::SETGE: X86CC = X86::COND_GE; break; 1947 case ISD::SETLT: X86CC = X86::COND_L; break; 1948 case ISD::SETLE: X86CC = X86::COND_LE; break; 1949 case ISD::SETNE: X86CC = X86::COND_NE; break; 1950 case ISD::SETULT: X86CC = X86::COND_B; break; 1951 case ISD::SETUGT: X86CC = X86::COND_A; break; 1952 case ISD::SETULE: X86CC = X86::COND_BE; break; 1953 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1954 } 1955 } else { 1956 // First determine if it requires or is profitable to flip the operands. 1957 bool Flip = false; 1958 switch (SetCCOpcode) { 1959 default: break; 1960 case ISD::SETOLT: 1961 case ISD::SETOLE: 1962 case ISD::SETUGT: 1963 case ISD::SETUGE: 1964 Flip = true; 1965 break; 1966 } 1967 1968 // If LHS is a foldable load, but RHS is not, flip the condition. 1969 if (!Flip && 1970 (ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && 1971 !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { 1972 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 1973 Flip = true; 1974 } 1975 if (Flip) 1976 std::swap(LHS, RHS); 1977 1978 // On a floating point condition, the flags are set as follows: 1979 // ZF PF CF op 1980 // 0 | 0 | 0 | X > Y 1981 // 0 | 0 | 1 | X < Y 1982 // 1 | 0 | 0 | X == Y 1983 // 1 | 1 | 1 | unordered 1984 switch (SetCCOpcode) { 1985 default: break; 1986 case ISD::SETUEQ: 1987 case ISD::SETEQ: 1988 X86CC = X86::COND_E; 1989 break; 1990 case ISD::SETOLT: // flipped 1991 case ISD::SETOGT: 1992 case ISD::SETGT: 1993 X86CC = X86::COND_A; 1994 break; 1995 case ISD::SETOLE: // flipped 1996 case ISD::SETOGE: 1997 case ISD::SETGE: 1998 X86CC = X86::COND_AE; 1999 break; 2000 case ISD::SETUGT: // flipped 2001 case ISD::SETULT: 2002 case ISD::SETLT: 2003 X86CC = X86::COND_B; 2004 break; 2005 case ISD::SETUGE: // flipped 2006 case ISD::SETULE: 2007 case ISD::SETLE: 2008 X86CC = X86::COND_BE; 2009 break; 2010 case ISD::SETONE: 2011 case ISD::SETNE: 2012 X86CC = X86::COND_NE; 2013 break; 2014 case ISD::SETUO: 2015 X86CC = X86::COND_P; 2016 break; 2017 case ISD::SETO: 2018 X86CC = X86::COND_NP; 2019 break; 2020 } 2021 } 2022 2023 return X86CC != X86::COND_INVALID; 2024} 2025 2026/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2027/// code. Current x86 isa includes the following FP cmov instructions: 2028/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2029static bool hasFPCMov(unsigned X86CC) { 2030 switch (X86CC) { 2031 default: 2032 return false; 2033 case X86::COND_B: 2034 case X86::COND_BE: 2035 case X86::COND_E: 2036 case X86::COND_P: 2037 case X86::COND_A: 2038 case X86::COND_AE: 2039 case X86::COND_NE: 2040 case X86::COND_NP: 2041 return true; 2042 } 2043} 2044 2045/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2046/// true if Op is undef or if its value falls within the specified range (L, H]. 2047static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { 2048 if (Op.getOpcode() == ISD::UNDEF) 2049 return true; 2050 2051 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2052 return (Val >= Low && Val < Hi); 2053} 2054 2055/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2056/// true if Op is undef or if its value equal to the specified value. 2057static bool isUndefOrEqual(SDValue Op, unsigned Val) { 2058 if (Op.getOpcode() == ISD::UNDEF) 2059 return true; 2060 return cast<ConstantSDNode>(Op)->getValue() == Val; 2061} 2062 2063/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2064/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2065bool X86::isPSHUFDMask(SDNode *N) { 2066 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2067 2068 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2069 return false; 2070 2071 // Check if the value doesn't reference the second vector. 2072 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2073 SDValue Arg = N->getOperand(i); 2074 if (Arg.getOpcode() == ISD::UNDEF) continue; 2075 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2076 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 2077 return false; 2078 } 2079 2080 return true; 2081} 2082 2083/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2084/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2085bool X86::isPSHUFHWMask(SDNode *N) { 2086 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2087 2088 if (N->getNumOperands() != 8) 2089 return false; 2090 2091 // Lower quadword copied in order. 2092 for (unsigned i = 0; i != 4; ++i) { 2093 SDValue Arg = N->getOperand(i); 2094 if (Arg.getOpcode() == ISD::UNDEF) continue; 2095 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2096 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2097 return false; 2098 } 2099 2100 // Upper quadword shuffled. 2101 for (unsigned i = 4; i != 8; ++i) { 2102 SDValue Arg = N->getOperand(i); 2103 if (Arg.getOpcode() == ISD::UNDEF) continue; 2104 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2105 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2106 if (Val < 4 || Val > 7) 2107 return false; 2108 } 2109 2110 return true; 2111} 2112 2113/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2114/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2115bool X86::isPSHUFLWMask(SDNode *N) { 2116 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2117 2118 if (N->getNumOperands() != 8) 2119 return false; 2120 2121 // Upper quadword copied in order. 2122 for (unsigned i = 4; i != 8; ++i) 2123 if (!isUndefOrEqual(N->getOperand(i), i)) 2124 return false; 2125 2126 // Lower quadword shuffled. 2127 for (unsigned i = 0; i != 4; ++i) 2128 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2129 return false; 2130 2131 return true; 2132} 2133 2134/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2135/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2136static bool isSHUFPMask(SDOperandPtr Elems, unsigned NumElems) { 2137 if (NumElems != 2 && NumElems != 4) return false; 2138 2139 unsigned Half = NumElems / 2; 2140 for (unsigned i = 0; i < Half; ++i) 2141 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2142 return false; 2143 for (unsigned i = Half; i < NumElems; ++i) 2144 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2145 return false; 2146 2147 return true; 2148} 2149 2150bool X86::isSHUFPMask(SDNode *N) { 2151 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2152 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2153} 2154 2155/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2156/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2157/// half elements to come from vector 1 (which would equal the dest.) and 2158/// the upper half to come from vector 2. 2159static bool isCommutedSHUFP(SDOperandPtr Ops, unsigned NumOps) { 2160 if (NumOps != 2 && NumOps != 4) return false; 2161 2162 unsigned Half = NumOps / 2; 2163 for (unsigned i = 0; i < Half; ++i) 2164 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2165 return false; 2166 for (unsigned i = Half; i < NumOps; ++i) 2167 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2168 return false; 2169 return true; 2170} 2171 2172static bool isCommutedSHUFP(SDNode *N) { 2173 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2174 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2175} 2176 2177/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2178/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2179bool X86::isMOVHLPSMask(SDNode *N) { 2180 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2181 2182 if (N->getNumOperands() != 4) 2183 return false; 2184 2185 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2186 return isUndefOrEqual(N->getOperand(0), 6) && 2187 isUndefOrEqual(N->getOperand(1), 7) && 2188 isUndefOrEqual(N->getOperand(2), 2) && 2189 isUndefOrEqual(N->getOperand(3), 3); 2190} 2191 2192/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2193/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2194/// <2, 3, 2, 3> 2195bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2196 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2197 2198 if (N->getNumOperands() != 4) 2199 return false; 2200 2201 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2202 return isUndefOrEqual(N->getOperand(0), 2) && 2203 isUndefOrEqual(N->getOperand(1), 3) && 2204 isUndefOrEqual(N->getOperand(2), 2) && 2205 isUndefOrEqual(N->getOperand(3), 3); 2206} 2207 2208/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2209/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2210bool X86::isMOVLPMask(SDNode *N) { 2211 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2212 2213 unsigned NumElems = N->getNumOperands(); 2214 if (NumElems != 2 && NumElems != 4) 2215 return false; 2216 2217 for (unsigned i = 0; i < NumElems/2; ++i) 2218 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2219 return false; 2220 2221 for (unsigned i = NumElems/2; i < NumElems; ++i) 2222 if (!isUndefOrEqual(N->getOperand(i), i)) 2223 return false; 2224 2225 return true; 2226} 2227 2228/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2229/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2230/// and MOVLHPS. 2231bool X86::isMOVHPMask(SDNode *N) { 2232 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2233 2234 unsigned NumElems = N->getNumOperands(); 2235 if (NumElems != 2 && NumElems != 4) 2236 return false; 2237 2238 for (unsigned i = 0; i < NumElems/2; ++i) 2239 if (!isUndefOrEqual(N->getOperand(i), i)) 2240 return false; 2241 2242 for (unsigned i = 0; i < NumElems/2; ++i) { 2243 SDValue Arg = N->getOperand(i + NumElems/2); 2244 if (!isUndefOrEqual(Arg, i + NumElems)) 2245 return false; 2246 } 2247 2248 return true; 2249} 2250 2251/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2252/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2253bool static isUNPCKLMask(SDOperandPtr Elts, unsigned NumElts, 2254 bool V2IsSplat = false) { 2255 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2256 return false; 2257 2258 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2259 SDValue BitI = Elts[i]; 2260 SDValue BitI1 = Elts[i+1]; 2261 if (!isUndefOrEqual(BitI, j)) 2262 return false; 2263 if (V2IsSplat) { 2264 if (isUndefOrEqual(BitI1, NumElts)) 2265 return false; 2266 } else { 2267 if (!isUndefOrEqual(BitI1, j + NumElts)) 2268 return false; 2269 } 2270 } 2271 2272 return true; 2273} 2274 2275bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2276 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2277 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2278} 2279 2280/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2281/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2282bool static isUNPCKHMask(SDOperandPtr Elts, unsigned NumElts, 2283 bool V2IsSplat = false) { 2284 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2285 return false; 2286 2287 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2288 SDValue BitI = Elts[i]; 2289 SDValue BitI1 = Elts[i+1]; 2290 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2291 return false; 2292 if (V2IsSplat) { 2293 if (isUndefOrEqual(BitI1, NumElts)) 2294 return false; 2295 } else { 2296 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2297 return false; 2298 } 2299 } 2300 2301 return true; 2302} 2303 2304bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2305 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2306 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2307} 2308 2309/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2310/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2311/// <0, 0, 1, 1> 2312bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2313 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2314 2315 unsigned NumElems = N->getNumOperands(); 2316 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2317 return false; 2318 2319 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2320 SDValue BitI = N->getOperand(i); 2321 SDValue BitI1 = N->getOperand(i+1); 2322 2323 if (!isUndefOrEqual(BitI, j)) 2324 return false; 2325 if (!isUndefOrEqual(BitI1, j)) 2326 return false; 2327 } 2328 2329 return true; 2330} 2331 2332/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2333/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2334/// <2, 2, 3, 3> 2335bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2336 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2337 2338 unsigned NumElems = N->getNumOperands(); 2339 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2340 return false; 2341 2342 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2343 SDValue BitI = N->getOperand(i); 2344 SDValue BitI1 = N->getOperand(i + 1); 2345 2346 if (!isUndefOrEqual(BitI, j)) 2347 return false; 2348 if (!isUndefOrEqual(BitI1, j)) 2349 return false; 2350 } 2351 2352 return true; 2353} 2354 2355/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2356/// specifies a shuffle of elements that is suitable for input to MOVSS, 2357/// MOVSD, and MOVD, i.e. setting the lowest element. 2358static bool isMOVLMask(SDOperandPtr Elts, unsigned NumElts) { 2359 if (NumElts != 2 && NumElts != 4) 2360 return false; 2361 2362 if (!isUndefOrEqual(Elts[0], NumElts)) 2363 return false; 2364 2365 for (unsigned i = 1; i < NumElts; ++i) { 2366 if (!isUndefOrEqual(Elts[i], i)) 2367 return false; 2368 } 2369 2370 return true; 2371} 2372 2373bool X86::isMOVLMask(SDNode *N) { 2374 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2375 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2376} 2377 2378/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2379/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2380/// element of vector 2 and the other elements to come from vector 1 in order. 2381static bool isCommutedMOVL(SDOperandPtr Ops, unsigned NumOps, 2382 bool V2IsSplat = false, 2383 bool V2IsUndef = false) { 2384 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2385 return false; 2386 2387 if (!isUndefOrEqual(Ops[0], 0)) 2388 return false; 2389 2390 for (unsigned i = 1; i < NumOps; ++i) { 2391 SDValue Arg = Ops[i]; 2392 if (!(isUndefOrEqual(Arg, i+NumOps) || 2393 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2394 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2395 return false; 2396 } 2397 2398 return true; 2399} 2400 2401static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2402 bool V2IsUndef = false) { 2403 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2404 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2405 V2IsSplat, V2IsUndef); 2406} 2407 2408/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2409/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2410bool X86::isMOVSHDUPMask(SDNode *N) { 2411 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2412 2413 if (N->getNumOperands() != 4) 2414 return false; 2415 2416 // Expect 1, 1, 3, 3 2417 for (unsigned i = 0; i < 2; ++i) { 2418 SDValue Arg = N->getOperand(i); 2419 if (Arg.getOpcode() == ISD::UNDEF) continue; 2420 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2421 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2422 if (Val != 1) return false; 2423 } 2424 2425 bool HasHi = false; 2426 for (unsigned i = 2; i < 4; ++i) { 2427 SDValue Arg = N->getOperand(i); 2428 if (Arg.getOpcode() == ISD::UNDEF) continue; 2429 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2430 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2431 if (Val != 3) return false; 2432 HasHi = true; 2433 } 2434 2435 // Don't use movshdup if it can be done with a shufps. 2436 return HasHi; 2437} 2438 2439/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2440/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2441bool X86::isMOVSLDUPMask(SDNode *N) { 2442 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2443 2444 if (N->getNumOperands() != 4) 2445 return false; 2446 2447 // Expect 0, 0, 2, 2 2448 for (unsigned i = 0; i < 2; ++i) { 2449 SDValue Arg = N->getOperand(i); 2450 if (Arg.getOpcode() == ISD::UNDEF) continue; 2451 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2452 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2453 if (Val != 0) return false; 2454 } 2455 2456 bool HasHi = false; 2457 for (unsigned i = 2; i < 4; ++i) { 2458 SDValue Arg = N->getOperand(i); 2459 if (Arg.getOpcode() == ISD::UNDEF) continue; 2460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2461 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2462 if (Val != 2) return false; 2463 HasHi = true; 2464 } 2465 2466 // Don't use movshdup if it can be done with a shufps. 2467 return HasHi; 2468} 2469 2470/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2471/// specifies a identity operation on the LHS or RHS. 2472static bool isIdentityMask(SDNode *N, bool RHS = false) { 2473 unsigned NumElems = N->getNumOperands(); 2474 for (unsigned i = 0; i < NumElems; ++i) 2475 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2476 return false; 2477 return true; 2478} 2479 2480/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2481/// a splat of a single element. 2482static bool isSplatMask(SDNode *N) { 2483 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2484 2485 // This is a splat operation if each element of the permute is the same, and 2486 // if the value doesn't reference the second vector. 2487 unsigned NumElems = N->getNumOperands(); 2488 SDValue ElementBase; 2489 unsigned i = 0; 2490 for (; i != NumElems; ++i) { 2491 SDValue Elt = N->getOperand(i); 2492 if (isa<ConstantSDNode>(Elt)) { 2493 ElementBase = Elt; 2494 break; 2495 } 2496 } 2497 2498 if (!ElementBase.getNode()) 2499 return false; 2500 2501 for (; i != NumElems; ++i) { 2502 SDValue Arg = N->getOperand(i); 2503 if (Arg.getOpcode() == ISD::UNDEF) continue; 2504 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2505 if (Arg != ElementBase) return false; 2506 } 2507 2508 // Make sure it is a splat of the first vector operand. 2509 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2510} 2511 2512/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2513/// a splat of a single element and it's a 2 or 4 element mask. 2514bool X86::isSplatMask(SDNode *N) { 2515 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2516 2517 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2518 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2519 return false; 2520 return ::isSplatMask(N); 2521} 2522 2523/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2524/// specifies a splat of zero element. 2525bool X86::isSplatLoMask(SDNode *N) { 2526 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2527 2528 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2529 if (!isUndefOrEqual(N->getOperand(i), 0)) 2530 return false; 2531 return true; 2532} 2533 2534/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2535/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2536/// instructions. 2537unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2538 unsigned NumOperands = N->getNumOperands(); 2539 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2540 unsigned Mask = 0; 2541 for (unsigned i = 0; i < NumOperands; ++i) { 2542 unsigned Val = 0; 2543 SDValue Arg = N->getOperand(NumOperands-i-1); 2544 if (Arg.getOpcode() != ISD::UNDEF) 2545 Val = cast<ConstantSDNode>(Arg)->getValue(); 2546 if (Val >= NumOperands) Val -= NumOperands; 2547 Mask |= Val; 2548 if (i != NumOperands - 1) 2549 Mask <<= Shift; 2550 } 2551 2552 return Mask; 2553} 2554 2555/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2556/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2557/// instructions. 2558unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2559 unsigned Mask = 0; 2560 // 8 nodes, but we only care about the last 4. 2561 for (unsigned i = 7; i >= 4; --i) { 2562 unsigned Val = 0; 2563 SDValue Arg = N->getOperand(i); 2564 if (Arg.getOpcode() != ISD::UNDEF) 2565 Val = cast<ConstantSDNode>(Arg)->getValue(); 2566 Mask |= (Val - 4); 2567 if (i != 4) 2568 Mask <<= 2; 2569 } 2570 2571 return Mask; 2572} 2573 2574/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2575/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2576/// instructions. 2577unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2578 unsigned Mask = 0; 2579 // 8 nodes, but we only care about the first 4. 2580 for (int i = 3; i >= 0; --i) { 2581 unsigned Val = 0; 2582 SDValue Arg = N->getOperand(i); 2583 if (Arg.getOpcode() != ISD::UNDEF) 2584 Val = cast<ConstantSDNode>(Arg)->getValue(); 2585 Mask |= Val; 2586 if (i != 0) 2587 Mask <<= 2; 2588 } 2589 2590 return Mask; 2591} 2592 2593/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2594/// specifies a 8 element shuffle that can be broken into a pair of 2595/// PSHUFHW and PSHUFLW. 2596static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2597 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2598 2599 if (N->getNumOperands() != 8) 2600 return false; 2601 2602 // Lower quadword shuffled. 2603 for (unsigned i = 0; i != 4; ++i) { 2604 SDValue Arg = N->getOperand(i); 2605 if (Arg.getOpcode() == ISD::UNDEF) continue; 2606 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2607 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2608 if (Val >= 4) 2609 return false; 2610 } 2611 2612 // Upper quadword shuffled. 2613 for (unsigned i = 4; i != 8; ++i) { 2614 SDValue Arg = N->getOperand(i); 2615 if (Arg.getOpcode() == ISD::UNDEF) continue; 2616 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2617 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2618 if (Val < 4 || Val > 7) 2619 return false; 2620 } 2621 2622 return true; 2623} 2624 2625/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2626/// values in ther permute mask. 2627static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, 2628 SDValue &V2, SDValue &Mask, 2629 SelectionDAG &DAG) { 2630 MVT VT = Op.getValueType(); 2631 MVT MaskVT = Mask.getValueType(); 2632 MVT EltVT = MaskVT.getVectorElementType(); 2633 unsigned NumElems = Mask.getNumOperands(); 2634 SmallVector<SDValue, 8> MaskVec; 2635 2636 for (unsigned i = 0; i != NumElems; ++i) { 2637 SDValue Arg = Mask.getOperand(i); 2638 if (Arg.getOpcode() == ISD::UNDEF) { 2639 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2640 continue; 2641 } 2642 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2643 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2644 if (Val < NumElems) 2645 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2646 else 2647 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2648 } 2649 2650 std::swap(V1, V2); 2651 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2652 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2653} 2654 2655/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2656/// the two vector operands have swapped position. 2657static 2658SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG) { 2659 MVT MaskVT = Mask.getValueType(); 2660 MVT EltVT = MaskVT.getVectorElementType(); 2661 unsigned NumElems = Mask.getNumOperands(); 2662 SmallVector<SDValue, 8> MaskVec; 2663 for (unsigned i = 0; i != NumElems; ++i) { 2664 SDValue Arg = Mask.getOperand(i); 2665 if (Arg.getOpcode() == ISD::UNDEF) { 2666 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2667 continue; 2668 } 2669 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2670 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2671 if (Val < NumElems) 2672 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2673 else 2674 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2675 } 2676 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2677} 2678 2679 2680/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2681/// match movhlps. The lower half elements should come from upper half of 2682/// V1 (and in order), and the upper half elements should come from the upper 2683/// half of V2 (and in order). 2684static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2685 unsigned NumElems = Mask->getNumOperands(); 2686 if (NumElems != 4) 2687 return false; 2688 for (unsigned i = 0, e = 2; i != e; ++i) 2689 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2690 return false; 2691 for (unsigned i = 2; i != 4; ++i) 2692 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2693 return false; 2694 return true; 2695} 2696 2697/// isScalarLoadToVector - Returns true if the node is a scalar load that 2698/// is promoted to a vector. It also returns the LoadSDNode by reference if 2699/// required. 2700static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 2701 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2702 N = N->getOperand(0).getNode(); 2703 if (ISD::isNON_EXTLoad(N)) { 2704 if (LD) 2705 *LD = cast<LoadSDNode>(N); 2706 return true; 2707 } 2708 } 2709 return false; 2710} 2711 2712/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2713/// match movlp{s|d}. The lower half elements should come from lower half of 2714/// V1 (and in order), and the upper half elements should come from the upper 2715/// half of V2 (and in order). And since V1 will become the source of the 2716/// MOVLP, it must be either a vector load or a scalar load to vector. 2717static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2718 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2719 return false; 2720 // Is V2 is a vector load, don't do this transformation. We will try to use 2721 // load folding shufps op. 2722 if (ISD::isNON_EXTLoad(V2)) 2723 return false; 2724 2725 unsigned NumElems = Mask->getNumOperands(); 2726 if (NumElems != 2 && NumElems != 4) 2727 return false; 2728 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2729 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2730 return false; 2731 for (unsigned i = NumElems/2; i != NumElems; ++i) 2732 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2733 return false; 2734 return true; 2735} 2736 2737/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2738/// all the same. 2739static bool isSplatVector(SDNode *N) { 2740 if (N->getOpcode() != ISD::BUILD_VECTOR) 2741 return false; 2742 2743 SDValue SplatValue = N->getOperand(0); 2744 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2745 if (N->getOperand(i) != SplatValue) 2746 return false; 2747 return true; 2748} 2749 2750/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2751/// to an undef. 2752static bool isUndefShuffle(SDNode *N) { 2753 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2754 return false; 2755 2756 SDValue V1 = N->getOperand(0); 2757 SDValue V2 = N->getOperand(1); 2758 SDValue Mask = N->getOperand(2); 2759 unsigned NumElems = Mask.getNumOperands(); 2760 for (unsigned i = 0; i != NumElems; ++i) { 2761 SDValue Arg = Mask.getOperand(i); 2762 if (Arg.getOpcode() != ISD::UNDEF) { 2763 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2764 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2765 return false; 2766 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2767 return false; 2768 } 2769 } 2770 return true; 2771} 2772 2773/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2774/// constant +0.0. 2775static inline bool isZeroNode(SDValue Elt) { 2776 return ((isa<ConstantSDNode>(Elt) && 2777 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2778 (isa<ConstantFPSDNode>(Elt) && 2779 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2780} 2781 2782/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2783/// to an zero vector. 2784static bool isZeroShuffle(SDNode *N) { 2785 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2786 return false; 2787 2788 SDValue V1 = N->getOperand(0); 2789 SDValue V2 = N->getOperand(1); 2790 SDValue Mask = N->getOperand(2); 2791 unsigned NumElems = Mask.getNumOperands(); 2792 for (unsigned i = 0; i != NumElems; ++i) { 2793 SDValue Arg = Mask.getOperand(i); 2794 if (Arg.getOpcode() == ISD::UNDEF) 2795 continue; 2796 2797 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2798 if (Idx < NumElems) { 2799 unsigned Opc = V1.getNode()->getOpcode(); 2800 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 2801 continue; 2802 if (Opc != ISD::BUILD_VECTOR || 2803 !isZeroNode(V1.getNode()->getOperand(Idx))) 2804 return false; 2805 } else if (Idx >= NumElems) { 2806 unsigned Opc = V2.getNode()->getOpcode(); 2807 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 2808 continue; 2809 if (Opc != ISD::BUILD_VECTOR || 2810 !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) 2811 return false; 2812 } 2813 } 2814 return true; 2815} 2816 2817/// getZeroVector - Returns a vector of specified type with all zero elements. 2818/// 2819static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG) { 2820 assert(VT.isVector() && "Expected a vector type"); 2821 2822 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2823 // type. This ensures they get CSE'd. 2824 SDValue Vec; 2825 if (VT.getSizeInBits() == 64) { // MMX 2826 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2827 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2828 } else if (HasSSE2) { // SSE2 2829 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2830 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2831 } else { // SSE1 2832 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 2833 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst); 2834 } 2835 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2836} 2837 2838/// getOnesVector - Returns a vector of specified type with all bits set. 2839/// 2840static SDValue getOnesVector(MVT VT, SelectionDAG &DAG) { 2841 assert(VT.isVector() && "Expected a vector type"); 2842 2843 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2844 // type. This ensures they get CSE'd. 2845 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 2846 SDValue Vec; 2847 if (VT.getSizeInBits() == 64) // MMX 2848 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2849 else // SSE 2850 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2851 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2852} 2853 2854 2855/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2856/// that point to V2 points to its first element. 2857static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { 2858 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2859 2860 bool Changed = false; 2861 SmallVector<SDValue, 8> MaskVec; 2862 unsigned NumElems = Mask.getNumOperands(); 2863 for (unsigned i = 0; i != NumElems; ++i) { 2864 SDValue Arg = Mask.getOperand(i); 2865 if (Arg.getOpcode() != ISD::UNDEF) { 2866 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2867 if (Val > NumElems) { 2868 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2869 Changed = true; 2870 } 2871 } 2872 MaskVec.push_back(Arg); 2873 } 2874 2875 if (Changed) 2876 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2877 &MaskVec[0], MaskVec.size()); 2878 return Mask; 2879} 2880 2881/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2882/// operation of specified width. 2883static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2884 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2885 MVT BaseVT = MaskVT.getVectorElementType(); 2886 2887 SmallVector<SDValue, 8> MaskVec; 2888 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2889 for (unsigned i = 1; i != NumElems; ++i) 2890 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2891 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2892} 2893 2894/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2895/// of specified width. 2896static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2897 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2898 MVT BaseVT = MaskVT.getVectorElementType(); 2899 SmallVector<SDValue, 8> MaskVec; 2900 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2901 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2902 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2903 } 2904 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2905} 2906 2907/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2908/// of specified width. 2909static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2910 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2911 MVT BaseVT = MaskVT.getVectorElementType(); 2912 unsigned Half = NumElems/2; 2913 SmallVector<SDValue, 8> MaskVec; 2914 for (unsigned i = 0; i != Half; ++i) { 2915 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2916 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2917 } 2918 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2919} 2920 2921/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps 2922/// element #0 of a vector with the specified index, leaving the rest of the 2923/// elements in place. 2924static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, 2925 SelectionDAG &DAG) { 2926 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2927 MVT BaseVT = MaskVT.getVectorElementType(); 2928 SmallVector<SDValue, 8> MaskVec; 2929 // Element #0 of the result gets the elt we are replacing. 2930 MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); 2931 for (unsigned i = 1; i != NumElems; ++i) 2932 MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); 2933 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2934} 2935 2936/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. 2937static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { 2938 MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; 2939 MVT VT = Op.getValueType(); 2940 if (PVT == VT) 2941 return Op; 2942 SDValue V1 = Op.getOperand(0); 2943 SDValue Mask = Op.getOperand(2); 2944 unsigned NumElems = Mask.getNumOperands(); 2945 // Special handling of v4f32 -> v4i32. 2946 if (VT != MVT::v4f32) { 2947 Mask = getUnpacklMask(NumElems, DAG); 2948 while (NumElems > 4) { 2949 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2950 NumElems >>= 1; 2951 } 2952 Mask = getZeroVector(MVT::v4i32, true, DAG); 2953 } 2954 2955 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 2956 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 2957 DAG.getNode(ISD::UNDEF, PVT), Mask); 2958 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2959} 2960 2961/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2962/// vector of zero or undef vector. This produces a shuffle where the low 2963/// element of V2 is swizzled into the zero/undef vector, landing at element 2964/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 2965static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 2966 bool isZero, bool HasSSE2, 2967 SelectionDAG &DAG) { 2968 MVT VT = V2.getValueType(); 2969 SDValue V1 = isZero 2970 ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT); 2971 unsigned NumElems = V2.getValueType().getVectorNumElements(); 2972 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2973 MVT EVT = MaskVT.getVectorElementType(); 2974 SmallVector<SDValue, 16> MaskVec; 2975 for (unsigned i = 0; i != NumElems; ++i) 2976 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 2977 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 2978 else 2979 MaskVec.push_back(DAG.getConstant(i, EVT)); 2980 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2981 &MaskVec[0], MaskVec.size()); 2982 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2983} 2984 2985/// getNumOfConsecutiveZeros - Return the number of elements in a result of 2986/// a shuffle that is zero. 2987static 2988unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, 2989 unsigned NumElems, bool Low, 2990 SelectionDAG &DAG) { 2991 unsigned NumZeros = 0; 2992 for (unsigned i = 0; i < NumElems; ++i) { 2993 unsigned Index = Low ? i : NumElems-i-1; 2994 SDValue Idx = Mask.getOperand(Index); 2995 if (Idx.getOpcode() == ISD::UNDEF) { 2996 ++NumZeros; 2997 continue; 2998 } 2999 SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); 3000 if (Elt.getNode() && isZeroNode(Elt)) 3001 ++NumZeros; 3002 else 3003 break; 3004 } 3005 return NumZeros; 3006} 3007 3008/// isVectorShift - Returns true if the shuffle can be implemented as a 3009/// logical left or right shift of a vector. 3010static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, 3011 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 3012 unsigned NumElems = Mask.getNumOperands(); 3013 3014 isLeft = true; 3015 unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); 3016 if (!NumZeros) { 3017 isLeft = false; 3018 NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); 3019 if (!NumZeros) 3020 return false; 3021 } 3022 3023 bool SeenV1 = false; 3024 bool SeenV2 = false; 3025 for (unsigned i = NumZeros; i < NumElems; ++i) { 3026 unsigned Val = isLeft ? (i - NumZeros) : i; 3027 SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); 3028 if (Idx.getOpcode() == ISD::UNDEF) 3029 continue; 3030 unsigned Index = cast<ConstantSDNode>(Idx)->getValue(); 3031 if (Index < NumElems) 3032 SeenV1 = true; 3033 else { 3034 Index -= NumElems; 3035 SeenV2 = true; 3036 } 3037 if (Index != Val) 3038 return false; 3039 } 3040 if (SeenV1 && SeenV2) 3041 return false; 3042 3043 ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); 3044 ShAmt = NumZeros; 3045 return true; 3046} 3047 3048 3049/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3050/// 3051static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 3052 unsigned NumNonZero, unsigned NumZero, 3053 SelectionDAG &DAG, TargetLowering &TLI) { 3054 if (NumNonZero > 8) 3055 return SDValue(); 3056 3057 SDValue V(0, 0); 3058 bool First = true; 3059 for (unsigned i = 0; i < 16; ++i) { 3060 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3061 if (ThisIsNonZero && First) { 3062 if (NumZero) 3063 V = getZeroVector(MVT::v8i16, true, DAG); 3064 else 3065 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3066 First = false; 3067 } 3068 3069 if ((i & 1) != 0) { 3070 SDValue ThisElt(0, 0), LastElt(0, 0); 3071 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3072 if (LastIsNonZero) { 3073 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3074 } 3075 if (ThisIsNonZero) { 3076 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3077 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3078 ThisElt, DAG.getConstant(8, MVT::i8)); 3079 if (LastIsNonZero) 3080 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3081 } else 3082 ThisElt = LastElt; 3083 3084 if (ThisElt.getNode()) 3085 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3086 DAG.getIntPtrConstant(i/2)); 3087 } 3088 } 3089 3090 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3091} 3092 3093/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3094/// 3095static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 3096 unsigned NumNonZero, unsigned NumZero, 3097 SelectionDAG &DAG, TargetLowering &TLI) { 3098 if (NumNonZero > 4) 3099 return SDValue(); 3100 3101 SDValue V(0, 0); 3102 bool First = true; 3103 for (unsigned i = 0; i < 8; ++i) { 3104 bool isNonZero = (NonZeros & (1 << i)) != 0; 3105 if (isNonZero) { 3106 if (First) { 3107 if (NumZero) 3108 V = getZeroVector(MVT::v8i16, true, DAG); 3109 else 3110 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3111 First = false; 3112 } 3113 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3114 DAG.getIntPtrConstant(i)); 3115 } 3116 } 3117 3118 return V; 3119} 3120 3121/// getVShift - Return a vector logical shift node. 3122/// 3123static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, 3124 unsigned NumBits, SelectionDAG &DAG, 3125 const TargetLowering &TLI) { 3126 bool isMMX = VT.getSizeInBits() == 64; 3127 MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; 3128 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; 3129 SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp); 3130 return DAG.getNode(ISD::BIT_CONVERT, VT, 3131 DAG.getNode(Opc, ShVT, SrcOp, 3132 DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); 3133} 3134 3135SDValue 3136X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3137 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3138 if (ISD::isBuildVectorAllZeros(Op.getNode()) 3139 || ISD::isBuildVectorAllOnes(Op.getNode())) { 3140 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3141 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3142 // eliminated on x86-32 hosts. 3143 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3144 return Op; 3145 3146 if (ISD::isBuildVectorAllOnes(Op.getNode())) 3147 return getOnesVector(Op.getValueType(), DAG); 3148 return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG); 3149 } 3150 3151 MVT VT = Op.getValueType(); 3152 MVT EVT = VT.getVectorElementType(); 3153 unsigned EVTBits = EVT.getSizeInBits(); 3154 3155 unsigned NumElems = Op.getNumOperands(); 3156 unsigned NumZero = 0; 3157 unsigned NumNonZero = 0; 3158 unsigned NonZeros = 0; 3159 bool IsAllConstants = true; 3160 SmallSet<SDValue, 8> Values; 3161 for (unsigned i = 0; i < NumElems; ++i) { 3162 SDValue Elt = Op.getOperand(i); 3163 if (Elt.getOpcode() == ISD::UNDEF) 3164 continue; 3165 Values.insert(Elt); 3166 if (Elt.getOpcode() != ISD::Constant && 3167 Elt.getOpcode() != ISD::ConstantFP) 3168 IsAllConstants = false; 3169 if (isZeroNode(Elt)) 3170 NumZero++; 3171 else { 3172 NonZeros |= (1 << i); 3173 NumNonZero++; 3174 } 3175 } 3176 3177 if (NumNonZero == 0) { 3178 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3179 return DAG.getNode(ISD::UNDEF, VT); 3180 } 3181 3182 // Special case for single non-zero, non-undef, element. 3183 if (NumNonZero == 1 && NumElems <= 4) { 3184 unsigned Idx = CountTrailingZeros_32(NonZeros); 3185 SDValue Item = Op.getOperand(Idx); 3186 3187 // If this is an insertion of an i64 value on x86-32, and if the top bits of 3188 // the value are obviously zero, truncate the value to i32 and do the 3189 // insertion that way. Only do this if the value is non-constant or if the 3190 // value is a constant being inserted into element 0. It is cheaper to do 3191 // a constant pool load than it is to do a movd + shuffle. 3192 if (EVT == MVT::i64 && !Subtarget->is64Bit() && 3193 (!IsAllConstants || Idx == 0)) { 3194 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 3195 // Handle MMX and SSE both. 3196 MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; 3197 unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; 3198 3199 // Truncate the value (which may itself be a constant) to i32, and 3200 // convert it to a vector with movd (S2V+shuffle to zero extend). 3201 Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item); 3202 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item); 3203 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3204 Subtarget->hasSSE2(), DAG); 3205 3206 // Now we have our 32-bit value zero extended in the low element of 3207 // a vector. If Idx != 0, swizzle it into place. 3208 if (Idx != 0) { 3209 SDValue Ops[] = { 3210 Item, DAG.getNode(ISD::UNDEF, Item.getValueType()), 3211 getSwapEltZeroMask(VecElts, Idx, DAG) 3212 }; 3213 Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3); 3214 } 3215 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item); 3216 } 3217 } 3218 3219 // If we have a constant or non-constant insertion into the low element of 3220 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 3221 // the rest of the elements. This will be matched as movd/movq/movss/movsd 3222 // depending on what the source datatype is. Because we can only get here 3223 // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. 3224 if (Idx == 0 && 3225 // Don't do this for i64 values on x86-32. 3226 (EVT != MVT::i64 || Subtarget->is64Bit())) { 3227 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3228 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3229 return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3230 Subtarget->hasSSE2(), DAG); 3231 } 3232 3233 // Is it a vector logical left shift? 3234 if (NumElems == 2 && Idx == 1 && 3235 isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { 3236 unsigned NumBits = VT.getSizeInBits(); 3237 return getVShift(true, VT, 3238 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)), 3239 NumBits/2, DAG, *this); 3240 } 3241 3242 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 3243 return SDValue(); 3244 3245 // Otherwise, if this is a vector with i32 or f32 elements, and the element 3246 // is a non-constant being inserted into an element other than the low one, 3247 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 3248 // movd/movss) to move this into the low element, then shuffle it into 3249 // place. 3250 if (EVTBits == 32) { 3251 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3252 3253 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3254 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3255 Subtarget->hasSSE2(), DAG); 3256 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3257 MVT MaskEVT = MaskVT.getVectorElementType(); 3258 SmallVector<SDValue, 8> MaskVec; 3259 for (unsigned i = 0; i < NumElems; i++) 3260 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3261 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3262 &MaskVec[0], MaskVec.size()); 3263 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3264 DAG.getNode(ISD::UNDEF, VT), Mask); 3265 } 3266 } 3267 3268 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3269 if (Values.size() == 1) 3270 return SDValue(); 3271 3272 // A vector full of immediates; various special cases are already 3273 // handled, so this is best done with a single constant-pool load. 3274 if (IsAllConstants) 3275 return SDValue(); 3276 3277 // Let legalizer expand 2-wide build_vectors. 3278 if (EVTBits == 64) { 3279 if (NumNonZero == 1) { 3280 // One half is zero or undef. 3281 unsigned Idx = CountTrailingZeros_32(NonZeros); 3282 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, 3283 Op.getOperand(Idx)); 3284 return getShuffleVectorZeroOrUndef(V2, Idx, true, 3285 Subtarget->hasSSE2(), DAG); 3286 } 3287 return SDValue(); 3288 } 3289 3290 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3291 if (EVTBits == 8 && NumElems == 16) { 3292 SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3293 *this); 3294 if (V.getNode()) return V; 3295 } 3296 3297 if (EVTBits == 16 && NumElems == 8) { 3298 SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3299 *this); 3300 if (V.getNode()) return V; 3301 } 3302 3303 // If element VT is == 32 bits, turn it into a number of shuffles. 3304 SmallVector<SDValue, 8> V; 3305 V.resize(NumElems); 3306 if (NumElems == 4 && NumZero > 0) { 3307 for (unsigned i = 0; i < 4; ++i) { 3308 bool isZero = !(NonZeros & (1 << i)); 3309 if (isZero) 3310 V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3311 else 3312 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3313 } 3314 3315 for (unsigned i = 0; i < 2; ++i) { 3316 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3317 default: break; 3318 case 0: 3319 V[i] = V[i*2]; // Must be a zero vector. 3320 break; 3321 case 1: 3322 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3323 getMOVLMask(NumElems, DAG)); 3324 break; 3325 case 2: 3326 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3327 getMOVLMask(NumElems, DAG)); 3328 break; 3329 case 3: 3330 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3331 getUnpacklMask(NumElems, DAG)); 3332 break; 3333 } 3334 } 3335 3336 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3337 MVT EVT = MaskVT.getVectorElementType(); 3338 SmallVector<SDValue, 8> MaskVec; 3339 bool Reverse = (NonZeros & 0x3) == 2; 3340 for (unsigned i = 0; i < 2; ++i) 3341 if (Reverse) 3342 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3343 else 3344 MaskVec.push_back(DAG.getConstant(i, EVT)); 3345 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3346 for (unsigned i = 0; i < 2; ++i) 3347 if (Reverse) 3348 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3349 else 3350 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3351 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3352 &MaskVec[0], MaskVec.size()); 3353 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3354 } 3355 3356 if (Values.size() > 2) { 3357 // Expand into a number of unpckl*. 3358 // e.g. for v4f32 3359 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3360 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3361 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3362 SDValue UnpckMask = getUnpacklMask(NumElems, DAG); 3363 for (unsigned i = 0; i < NumElems; ++i) 3364 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3365 NumElems >>= 1; 3366 while (NumElems != 0) { 3367 for (unsigned i = 0; i < NumElems; ++i) 3368 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3369 UnpckMask); 3370 NumElems >>= 1; 3371 } 3372 return V[0]; 3373 } 3374 3375 return SDValue(); 3376} 3377 3378static 3379SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, 3380 SDValue PermMask, SelectionDAG &DAG, 3381 TargetLowering &TLI) { 3382 SDValue NewV; 3383 MVT MaskVT = MVT::getIntVectorWithNumElements(8); 3384 MVT MaskEVT = MaskVT.getVectorElementType(); 3385 MVT PtrVT = TLI.getPointerTy(); 3386 SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), 3387 PermMask.getNode()->op_end()); 3388 3389 // First record which half of which vector the low elements come from. 3390 SmallVector<unsigned, 4> LowQuad(4); 3391 for (unsigned i = 0; i < 4; ++i) { 3392 SDValue Elt = MaskElts[i]; 3393 if (Elt.getOpcode() == ISD::UNDEF) 3394 continue; 3395 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3396 int QuadIdx = EltIdx / 4; 3397 ++LowQuad[QuadIdx]; 3398 } 3399 3400 int BestLowQuad = -1; 3401 unsigned MaxQuad = 1; 3402 for (unsigned i = 0; i < 4; ++i) { 3403 if (LowQuad[i] > MaxQuad) { 3404 BestLowQuad = i; 3405 MaxQuad = LowQuad[i]; 3406 } 3407 } 3408 3409 // Record which half of which vector the high elements come from. 3410 SmallVector<unsigned, 4> HighQuad(4); 3411 for (unsigned i = 4; i < 8; ++i) { 3412 SDValue Elt = MaskElts[i]; 3413 if (Elt.getOpcode() == ISD::UNDEF) 3414 continue; 3415 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3416 int QuadIdx = EltIdx / 4; 3417 ++HighQuad[QuadIdx]; 3418 } 3419 3420 int BestHighQuad = -1; 3421 MaxQuad = 1; 3422 for (unsigned i = 0; i < 4; ++i) { 3423 if (HighQuad[i] > MaxQuad) { 3424 BestHighQuad = i; 3425 MaxQuad = HighQuad[i]; 3426 } 3427 } 3428 3429 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3430 if (BestLowQuad != -1 || BestHighQuad != -1) { 3431 // First sort the 4 chunks in order using shufpd. 3432 SmallVector<SDValue, 8> MaskVec; 3433 3434 if (BestLowQuad != -1) 3435 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3436 else 3437 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3438 3439 if (BestHighQuad != -1) 3440 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3441 else 3442 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3443 3444 SDValue Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3445 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3446 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3447 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3448 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3449 3450 // Now sort high and low parts separately. 3451 BitVector InOrder(8); 3452 if (BestLowQuad != -1) { 3453 // Sort lower half in order using PSHUFLW. 3454 MaskVec.clear(); 3455 bool AnyOutOrder = false; 3456 3457 for (unsigned i = 0; i != 4; ++i) { 3458 SDValue Elt = MaskElts[i]; 3459 if (Elt.getOpcode() == ISD::UNDEF) { 3460 MaskVec.push_back(Elt); 3461 InOrder.set(i); 3462 } else { 3463 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3464 if (EltIdx != i) 3465 AnyOutOrder = true; 3466 3467 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3468 3469 // If this element is in the right place after this shuffle, then 3470 // remember it. 3471 if ((int)(EltIdx / 4) == BestLowQuad) 3472 InOrder.set(i); 3473 } 3474 } 3475 if (AnyOutOrder) { 3476 for (unsigned i = 4; i != 8; ++i) 3477 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3478 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3479 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3480 } 3481 } 3482 3483 if (BestHighQuad != -1) { 3484 // Sort high half in order using PSHUFHW if possible. 3485 MaskVec.clear(); 3486 3487 for (unsigned i = 0; i != 4; ++i) 3488 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3489 3490 bool AnyOutOrder = false; 3491 for (unsigned i = 4; i != 8; ++i) { 3492 SDValue Elt = MaskElts[i]; 3493 if (Elt.getOpcode() == ISD::UNDEF) { 3494 MaskVec.push_back(Elt); 3495 InOrder.set(i); 3496 } else { 3497 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3498 if (EltIdx != i) 3499 AnyOutOrder = true; 3500 3501 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3502 3503 // If this element is in the right place after this shuffle, then 3504 // remember it. 3505 if ((int)(EltIdx / 4) == BestHighQuad) 3506 InOrder.set(i); 3507 } 3508 } 3509 3510 if (AnyOutOrder) { 3511 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3512 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3513 } 3514 } 3515 3516 // The other elements are put in the right place using pextrw and pinsrw. 3517 for (unsigned i = 0; i != 8; ++i) { 3518 if (InOrder[i]) 3519 continue; 3520 SDValue Elt = MaskElts[i]; 3521 if (Elt.getOpcode() == ISD::UNDEF) 3522 continue; 3523 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3524 SDValue ExtOp = (EltIdx < 8) 3525 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3526 DAG.getConstant(EltIdx, PtrVT)) 3527 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3528 DAG.getConstant(EltIdx - 8, PtrVT)); 3529 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3530 DAG.getConstant(i, PtrVT)); 3531 } 3532 3533 return NewV; 3534 } 3535 3536 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use as 3537 // few as possible. First, let's find out how many elements are already in the 3538 // right order. 3539 unsigned V1InOrder = 0; 3540 unsigned V1FromV1 = 0; 3541 unsigned V2InOrder = 0; 3542 unsigned V2FromV2 = 0; 3543 SmallVector<SDValue, 8> V1Elts; 3544 SmallVector<SDValue, 8> V2Elts; 3545 for (unsigned i = 0; i < 8; ++i) { 3546 SDValue Elt = MaskElts[i]; 3547 if (Elt.getOpcode() == ISD::UNDEF) { 3548 V1Elts.push_back(Elt); 3549 V2Elts.push_back(Elt); 3550 ++V1InOrder; 3551 ++V2InOrder; 3552 continue; 3553 } 3554 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3555 if (EltIdx == i) { 3556 V1Elts.push_back(Elt); 3557 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3558 ++V1InOrder; 3559 } else if (EltIdx == i+8) { 3560 V1Elts.push_back(Elt); 3561 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3562 ++V2InOrder; 3563 } else if (EltIdx < 8) { 3564 V1Elts.push_back(Elt); 3565 ++V1FromV1; 3566 } else { 3567 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3568 ++V2FromV2; 3569 } 3570 } 3571 3572 if (V2InOrder > V1InOrder) { 3573 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3574 std::swap(V1, V2); 3575 std::swap(V1Elts, V2Elts); 3576 std::swap(V1FromV1, V2FromV2); 3577 } 3578 3579 if ((V1FromV1 + V1InOrder) != 8) { 3580 // Some elements are from V2. 3581 if (V1FromV1) { 3582 // If there are elements that are from V1 but out of place, 3583 // then first sort them in place 3584 SmallVector<SDValue, 8> MaskVec; 3585 for (unsigned i = 0; i < 8; ++i) { 3586 SDValue Elt = V1Elts[i]; 3587 if (Elt.getOpcode() == ISD::UNDEF) { 3588 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3589 continue; 3590 } 3591 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3592 if (EltIdx >= 8) 3593 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3594 else 3595 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3596 } 3597 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3598 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3599 } 3600 3601 NewV = V1; 3602 for (unsigned i = 0; i < 8; ++i) { 3603 SDValue Elt = V1Elts[i]; 3604 if (Elt.getOpcode() == ISD::UNDEF) 3605 continue; 3606 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3607 if (EltIdx < 8) 3608 continue; 3609 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3610 DAG.getConstant(EltIdx - 8, PtrVT)); 3611 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3612 DAG.getConstant(i, PtrVT)); 3613 } 3614 return NewV; 3615 } else { 3616 // All elements are from V1. 3617 NewV = V1; 3618 for (unsigned i = 0; i < 8; ++i) { 3619 SDValue Elt = V1Elts[i]; 3620 if (Elt.getOpcode() == ISD::UNDEF) 3621 continue; 3622 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3623 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3624 DAG.getConstant(EltIdx, PtrVT)); 3625 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3626 DAG.getConstant(i, PtrVT)); 3627 } 3628 return NewV; 3629 } 3630} 3631 3632/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3633/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3634/// done when every pair / quad of shuffle mask elements point to elements in 3635/// the right sequence. e.g. 3636/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3637static 3638SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, 3639 MVT VT, 3640 SDValue PermMask, SelectionDAG &DAG, 3641 TargetLowering &TLI) { 3642 unsigned NumElems = PermMask.getNumOperands(); 3643 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3644 MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3645 MVT MaskEltVT = MaskVT.getVectorElementType(); 3646 MVT NewVT = MaskVT; 3647 switch (VT.getSimpleVT()) { 3648 default: assert(false && "Unexpected!"); 3649 case MVT::v4f32: NewVT = MVT::v2f64; break; 3650 case MVT::v4i32: NewVT = MVT::v2i64; break; 3651 case MVT::v8i16: NewVT = MVT::v4i32; break; 3652 case MVT::v16i8: NewVT = MVT::v4i32; break; 3653 } 3654 3655 if (NewWidth == 2) { 3656 if (VT.isInteger()) 3657 NewVT = MVT::v2i64; 3658 else 3659 NewVT = MVT::v2f64; 3660 } 3661 unsigned Scale = NumElems / NewWidth; 3662 SmallVector<SDValue, 8> MaskVec; 3663 for (unsigned i = 0; i < NumElems; i += Scale) { 3664 unsigned StartIdx = ~0U; 3665 for (unsigned j = 0; j < Scale; ++j) { 3666 SDValue Elt = PermMask.getOperand(i+j); 3667 if (Elt.getOpcode() == ISD::UNDEF) 3668 continue; 3669 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3670 if (StartIdx == ~0U) 3671 StartIdx = EltIdx - (EltIdx % Scale); 3672 if (EltIdx != StartIdx + j) 3673 return SDValue(); 3674 } 3675 if (StartIdx == ~0U) 3676 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT)); 3677 else 3678 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); 3679 } 3680 3681 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3682 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3683 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3684 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3685 &MaskVec[0], MaskVec.size())); 3686} 3687 3688/// getVZextMovL - Return a zero-extending vector move low node. 3689/// 3690static SDValue getVZextMovL(MVT VT, MVT OpVT, 3691 SDValue SrcOp, SelectionDAG &DAG, 3692 const X86Subtarget *Subtarget) { 3693 if (VT == MVT::v2f64 || VT == MVT::v4f32) { 3694 LoadSDNode *LD = NULL; 3695 if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) 3696 LD = dyn_cast<LoadSDNode>(SrcOp); 3697 if (!LD) { 3698 // movssrr and movsdrr do not clear top bits. Try to use movd, movq 3699 // instead. 3700 MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; 3701 if ((EVT != MVT::i64 || Subtarget->is64Bit()) && 3702 SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 3703 SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && 3704 SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { 3705 // PR2108 3706 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; 3707 return DAG.getNode(ISD::BIT_CONVERT, VT, 3708 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3709 DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT, 3710 SrcOp.getOperand(0) 3711 .getOperand(0)))); 3712 } 3713 } 3714 } 3715 3716 return DAG.getNode(ISD::BIT_CONVERT, VT, 3717 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3718 DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp))); 3719} 3720 3721/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of 3722/// shuffles. 3723static SDValue 3724LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, 3725 SDValue PermMask, MVT VT, SelectionDAG &DAG) { 3726 MVT MaskVT = PermMask.getValueType(); 3727 MVT MaskEVT = MaskVT.getVectorElementType(); 3728 SmallVector<std::pair<int, int>, 8> Locs; 3729 Locs.resize(4); 3730 SmallVector<SDValue, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3731 unsigned NumHi = 0; 3732 unsigned NumLo = 0; 3733 for (unsigned i = 0; i != 4; ++i) { 3734 SDValue Elt = PermMask.getOperand(i); 3735 if (Elt.getOpcode() == ISD::UNDEF) { 3736 Locs[i] = std::make_pair(-1, -1); 3737 } else { 3738 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3739 assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); 3740 if (Val < 4) { 3741 Locs[i] = std::make_pair(0, NumLo); 3742 Mask1[NumLo] = Elt; 3743 NumLo++; 3744 } else { 3745 Locs[i] = std::make_pair(1, NumHi); 3746 if (2+NumHi < 4) 3747 Mask1[2+NumHi] = Elt; 3748 NumHi++; 3749 } 3750 } 3751 } 3752 3753 if (NumLo <= 2 && NumHi <= 2) { 3754 // If no more than two elements come from either vector. This can be 3755 // implemented with two shuffles. First shuffle gather the elements. 3756 // The second shuffle, which takes the first shuffle as both of its 3757 // vector operands, put the elements into the right order. 3758 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3759 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3760 &Mask1[0], Mask1.size())); 3761 3762 SmallVector<SDValue, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3763 for (unsigned i = 0; i != 4; ++i) { 3764 if (Locs[i].first == -1) 3765 continue; 3766 else { 3767 unsigned Idx = (i < 2) ? 0 : 4; 3768 Idx += Locs[i].first * 2 + Locs[i].second; 3769 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3770 } 3771 } 3772 3773 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3774 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3775 &Mask2[0], Mask2.size())); 3776 } else if (NumLo == 3 || NumHi == 3) { 3777 // Otherwise, we must have three elements from one vector, call it X, and 3778 // one element from the other, call it Y. First, use a shufps to build an 3779 // intermediate vector with the one element from Y and the element from X 3780 // that will be in the same half in the final destination (the indexes don't 3781 // matter). Then, use a shufps to build the final vector, taking the half 3782 // containing the element from Y from the intermediate, and the other half 3783 // from X. 3784 if (NumHi == 3) { 3785 // Normalize it so the 3 elements come from V1. 3786 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3787 std::swap(V1, V2); 3788 } 3789 3790 // Find the element from V2. 3791 unsigned HiIndex; 3792 for (HiIndex = 0; HiIndex < 3; ++HiIndex) { 3793 SDValue Elt = PermMask.getOperand(HiIndex); 3794 if (Elt.getOpcode() == ISD::UNDEF) 3795 continue; 3796 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3797 if (Val >= 4) 3798 break; 3799 } 3800 3801 Mask1[0] = PermMask.getOperand(HiIndex); 3802 Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT); 3803 Mask1[2] = PermMask.getOperand(HiIndex^1); 3804 Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT); 3805 V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3806 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3807 3808 if (HiIndex >= 2) { 3809 Mask1[0] = PermMask.getOperand(0); 3810 Mask1[1] = PermMask.getOperand(1); 3811 Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); 3812 Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); 3813 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3814 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3815 } else { 3816 Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); 3817 Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); 3818 Mask1[2] = PermMask.getOperand(2); 3819 Mask1[3] = PermMask.getOperand(3); 3820 if (Mask1[2].getOpcode() != ISD::UNDEF) 3821 Mask1[2] = DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getValue()+4, 3822 MaskEVT); 3823 if (Mask1[3].getOpcode() != ISD::UNDEF) 3824 Mask1[3] = DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getValue()+4, 3825 MaskEVT); 3826 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, 3827 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3828 } 3829 } 3830 3831 // Break it into (shuffle shuffle_hi, shuffle_lo). 3832 Locs.clear(); 3833 SmallVector<SDValue,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3834 SmallVector<SDValue,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3835 SmallVector<SDValue,8> *MaskPtr = &LoMask; 3836 unsigned MaskIdx = 0; 3837 unsigned LoIdx = 0; 3838 unsigned HiIdx = 2; 3839 for (unsigned i = 0; i != 4; ++i) { 3840 if (i == 2) { 3841 MaskPtr = &HiMask; 3842 MaskIdx = 1; 3843 LoIdx = 0; 3844 HiIdx = 2; 3845 } 3846 SDValue Elt = PermMask.getOperand(i); 3847 if (Elt.getOpcode() == ISD::UNDEF) { 3848 Locs[i] = std::make_pair(-1, -1); 3849 } else if (cast<ConstantSDNode>(Elt)->getValue() < 4) { 3850 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3851 (*MaskPtr)[LoIdx] = Elt; 3852 LoIdx++; 3853 } else { 3854 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3855 (*MaskPtr)[HiIdx] = Elt; 3856 HiIdx++; 3857 } 3858 } 3859 3860 SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3861 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3862 &LoMask[0], LoMask.size())); 3863 SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3864 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3865 &HiMask[0], HiMask.size())); 3866 SmallVector<SDValue, 8> MaskOps; 3867 for (unsigned i = 0; i != 4; ++i) { 3868 if (Locs[i].first == -1) { 3869 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3870 } else { 3871 unsigned Idx = Locs[i].first * 4 + Locs[i].second; 3872 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3873 } 3874 } 3875 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3876 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3877 &MaskOps[0], MaskOps.size())); 3878} 3879 3880SDValue 3881X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3882 SDValue V1 = Op.getOperand(0); 3883 SDValue V2 = Op.getOperand(1); 3884 SDValue PermMask = Op.getOperand(2); 3885 MVT VT = Op.getValueType(); 3886 unsigned NumElems = PermMask.getNumOperands(); 3887 bool isMMX = VT.getSizeInBits() == 64; 3888 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3889 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3890 bool V1IsSplat = false; 3891 bool V2IsSplat = false; 3892 3893 if (isUndefShuffle(Op.getNode())) 3894 return DAG.getNode(ISD::UNDEF, VT); 3895 3896 if (isZeroShuffle(Op.getNode())) 3897 return getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3898 3899 if (isIdentityMask(PermMask.getNode())) 3900 return V1; 3901 else if (isIdentityMask(PermMask.getNode(), true)) 3902 return V2; 3903 3904 if (isSplatMask(PermMask.getNode())) { 3905 if (isMMX || NumElems < 4) return Op; 3906 // Promote it to a v4{if}32 splat. 3907 return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); 3908 } 3909 3910 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3911 // do it! 3912 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3913 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3914 if (NewOp.getNode()) 3915 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3916 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3917 // FIXME: Figure out a cleaner way to do this. 3918 // Try to make use of movq to zero out the top part. 3919 if (ISD::isBuildVectorAllZeros(V2.getNode())) { 3920 SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 3921 DAG, *this); 3922 if (NewOp.getNode()) { 3923 SDValue NewV1 = NewOp.getOperand(0); 3924 SDValue NewV2 = NewOp.getOperand(1); 3925 SDValue NewMask = NewOp.getOperand(2); 3926 if (isCommutedMOVL(NewMask.getNode(), true, false)) { 3927 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 3928 return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget); 3929 } 3930 } 3931 } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { 3932 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 3933 DAG, *this); 3934 if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) 3935 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), 3936 DAG, Subtarget); 3937 } 3938 } 3939 3940 // Check if this can be converted into a logical shift. 3941 bool isLeft = false; 3942 unsigned ShAmt = 0; 3943 SDValue ShVal; 3944 bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); 3945 if (isShift && ShVal.hasOneUse()) { 3946 // If the shifted value has multiple uses, it may be cheaper to use 3947 // v_set0 + movlhps or movhlps, etc. 3948 MVT EVT = VT.getVectorElementType(); 3949 ShAmt *= EVT.getSizeInBits(); 3950 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 3951 } 3952 3953 if (X86::isMOVLMask(PermMask.getNode())) { 3954 if (V1IsUndef) 3955 return V2; 3956 if (ISD::isBuildVectorAllZeros(V1.getNode())) 3957 return getVZextMovL(VT, VT, V2, DAG, Subtarget); 3958 if (!isMMX) 3959 return Op; 3960 } 3961 3962 if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || 3963 X86::isMOVSLDUPMask(PermMask.getNode()) || 3964 X86::isMOVHLPSMask(PermMask.getNode()) || 3965 X86::isMOVHPMask(PermMask.getNode()) || 3966 X86::isMOVLPMask(PermMask.getNode()))) 3967 return Op; 3968 3969 if (ShouldXformToMOVHLPS(PermMask.getNode()) || 3970 ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) 3971 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3972 3973 if (isShift) { 3974 // No better options. Use a vshl / vsrl. 3975 MVT EVT = VT.getVectorElementType(); 3976 ShAmt *= EVT.getSizeInBits(); 3977 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 3978 } 3979 3980 bool Commuted = false; 3981 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3982 // 1,1,1,1 -> v8i16 though. 3983 V1IsSplat = isSplatVector(V1.getNode()); 3984 V2IsSplat = isSplatVector(V2.getNode()); 3985 3986 // Canonicalize the splat or undef, if present, to be on the RHS. 3987 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3988 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3989 std::swap(V1IsSplat, V2IsSplat); 3990 std::swap(V1IsUndef, V2IsUndef); 3991 Commuted = true; 3992 } 3993 3994 // FIXME: Figure out a cleaner way to do this. 3995 if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { 3996 if (V2IsUndef) return V1; 3997 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3998 if (V2IsSplat) { 3999 // V2 is a splat, so the mask may be malformed. That is, it may point 4000 // to any V2 element. The instruction selectior won't like this. Get 4001 // a corrected mask and commute to form a proper MOVS{S|D}. 4002 SDValue NewMask = getMOVLMask(NumElems, DAG); 4003 if (NewMask.getNode() != PermMask.getNode()) 4004 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4005 } 4006 return Op; 4007 } 4008 4009 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4010 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4011 X86::isUNPCKLMask(PermMask.getNode()) || 4012 X86::isUNPCKHMask(PermMask.getNode())) 4013 return Op; 4014 4015 if (V2IsSplat) { 4016 // Normalize mask so all entries that point to V2 points to its first 4017 // element then try to match unpck{h|l} again. If match, return a 4018 // new vector_shuffle with the corrected mask. 4019 SDValue NewMask = NormalizeMask(PermMask, DAG); 4020 if (NewMask.getNode() != PermMask.getNode()) { 4021 if (X86::isUNPCKLMask(PermMask.getNode(), true)) { 4022 SDValue NewMask = getUnpacklMask(NumElems, DAG); 4023 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4024 } else if (X86::isUNPCKHMask(PermMask.getNode(), true)) { 4025 SDValue NewMask = getUnpackhMask(NumElems, DAG); 4026 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4027 } 4028 } 4029 } 4030 4031 // Normalize the node to match x86 shuffle ops if needed 4032 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) 4033 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4034 4035 if (Commuted) { 4036 // Commute is back and try unpck* again. 4037 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4038 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4039 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4040 X86::isUNPCKLMask(PermMask.getNode()) || 4041 X86::isUNPCKHMask(PermMask.getNode())) 4042 return Op; 4043 } 4044 4045 // Try PSHUF* first, then SHUFP*. 4046 // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically 4047 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 4048 if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { 4049 if (V2.getOpcode() != ISD::UNDEF) 4050 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 4051 DAG.getNode(ISD::UNDEF, VT), PermMask); 4052 return Op; 4053 } 4054 4055 if (!isMMX) { 4056 if (Subtarget->hasSSE2() && 4057 (X86::isPSHUFDMask(PermMask.getNode()) || 4058 X86::isPSHUFHWMask(PermMask.getNode()) || 4059 X86::isPSHUFLWMask(PermMask.getNode()))) { 4060 MVT RVT = VT; 4061 if (VT == MVT::v4f32) { 4062 RVT = MVT::v4i32; 4063 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, 4064 DAG.getNode(ISD::BIT_CONVERT, RVT, V1), 4065 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4066 } else if (V2.getOpcode() != ISD::UNDEF) 4067 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1, 4068 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4069 if (RVT != VT) 4070 Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); 4071 return Op; 4072 } 4073 4074 // Binary or unary shufps. 4075 if (X86::isSHUFPMask(PermMask.getNode()) || 4076 (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) 4077 return Op; 4078 } 4079 4080 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 4081 if (VT == MVT::v8i16) { 4082 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 4083 if (NewOp.getNode()) 4084 return NewOp; 4085 } 4086 4087 // Handle all 4 wide cases with a number of shuffles except for MMX. 4088 if (NumElems == 4 && !isMMX) 4089 return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG); 4090 4091 return SDValue(); 4092} 4093 4094SDValue 4095X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, 4096 SelectionDAG &DAG) { 4097 MVT VT = Op.getValueType(); 4098 if (VT.getSizeInBits() == 8) { 4099 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32, 4100 Op.getOperand(0), Op.getOperand(1)); 4101 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4102 DAG.getValueType(VT)); 4103 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4104 } else if (VT.getSizeInBits() == 16) { 4105 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32, 4106 Op.getOperand(0), Op.getOperand(1)); 4107 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4108 DAG.getValueType(VT)); 4109 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4110 } else if (VT == MVT::f32) { 4111 // EXTRACTPS outputs to a GPR32 register which will require a movd to copy 4112 // the result back to FR32 register. It's only worth matching if the 4113 // result has a single use which is a store or a bitcast to i32. 4114 if (!Op.hasOneUse()) 4115 return SDValue(); 4116 SDNode *User = *Op.getNode()->use_begin(); 4117 if (User->getOpcode() != ISD::STORE && 4118 (User->getOpcode() != ISD::BIT_CONVERT || 4119 User->getValueType(0) != MVT::i32)) 4120 return SDValue(); 4121 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4122 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), 4123 Op.getOperand(1)); 4124 return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract); 4125 } 4126 return SDValue(); 4127} 4128 4129 4130SDValue 4131X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4132 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4133 return SDValue(); 4134 4135 if (Subtarget->hasSSE41()) { 4136 SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 4137 if (Res.getNode()) 4138 return Res; 4139 } 4140 4141 MVT VT = Op.getValueType(); 4142 // TODO: handle v16i8. 4143 if (VT.getSizeInBits() == 16) { 4144 SDValue Vec = Op.getOperand(0); 4145 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4146 if (Idx == 0) 4147 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 4148 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4149 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 4150 Op.getOperand(1))); 4151 // Transform it so it match pextrw which produces a 32-bit result. 4152 MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); 4153 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 4154 Op.getOperand(0), Op.getOperand(1)); 4155 SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 4156 DAG.getValueType(VT)); 4157 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4158 } else if (VT.getSizeInBits() == 32) { 4159 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4160 if (Idx == 0) 4161 return Op; 4162 // SHUFPS the element to the lowest double word, then movss. 4163 MVT MaskVT = MVT::getIntVectorWithNumElements(4); 4164 SmallVector<SDValue, 8> IdxVec; 4165 IdxVec. 4166 push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); 4167 IdxVec. 4168 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4169 IdxVec. 4170 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4171 IdxVec. 4172 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4173 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4174 &IdxVec[0], IdxVec.size()); 4175 SDValue Vec = Op.getOperand(0); 4176 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4177 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4178 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4179 DAG.getIntPtrConstant(0)); 4180 } else if (VT.getSizeInBits() == 64) { 4181 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 4182 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 4183 // to match extract_elt for f64. 4184 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4185 if (Idx == 0) 4186 return Op; 4187 4188 // UNPCKHPD the element to the lowest double word, then movsd. 4189 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4190 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4191 MVT MaskVT = MVT::getIntVectorWithNumElements(2); 4192 SmallVector<SDValue, 8> IdxVec; 4193 IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); 4194 IdxVec. 4195 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4196 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4197 &IdxVec[0], IdxVec.size()); 4198 SDValue Vec = Op.getOperand(0); 4199 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4200 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4201 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4202 DAG.getIntPtrConstant(0)); 4203 } 4204 4205 return SDValue(); 4206} 4207 4208SDValue 4209X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ 4210 MVT VT = Op.getValueType(); 4211 MVT EVT = VT.getVectorElementType(); 4212 4213 SDValue N0 = Op.getOperand(0); 4214 SDValue N1 = Op.getOperand(1); 4215 SDValue N2 = Op.getOperand(2); 4216 4217 if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && 4218 isa<ConstantSDNode>(N2)) { 4219 unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB 4220 : X86ISD::PINSRW; 4221 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 4222 // argument. 4223 if (N1.getValueType() != MVT::i32) 4224 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4225 if (N2.getValueType() != MVT::i32) 4226 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()); 4227 return DAG.getNode(Opc, VT, N0, N1, N2); 4228 } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) { 4229 // Bits [7:6] of the constant are the source select. This will always be 4230 // zero here. The DAG Combiner may combine an extract_elt index into these 4231 // bits. For example (insert (extract, 3), 2) could be matched by putting 4232 // the '3' into bits [7:6] of X86ISD::INSERTPS. 4233 // Bits [5:4] of the constant are the destination select. This is the 4234 // value of the incoming immediate. 4235 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 4236 // combine either bitwise AND or insert of float 0.0 to set these bits. 4237 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4); 4238 return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2); 4239 } 4240 return SDValue(); 4241} 4242 4243SDValue 4244X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4245 MVT VT = Op.getValueType(); 4246 MVT EVT = VT.getVectorElementType(); 4247 4248 if (Subtarget->hasSSE41()) 4249 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 4250 4251 if (EVT == MVT::i8) 4252 return SDValue(); 4253 4254 SDValue N0 = Op.getOperand(0); 4255 SDValue N1 = Op.getOperand(1); 4256 SDValue N2 = Op.getOperand(2); 4257 4258 if (EVT.getSizeInBits() == 16) { 4259 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 4260 // as its second argument. 4261 if (N1.getValueType() != MVT::i32) 4262 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4263 if (N2.getValueType() != MVT::i32) 4264 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()); 4265 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 4266 } 4267 return SDValue(); 4268} 4269 4270SDValue 4271X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 4272 if (Op.getValueType() == MVT::v2f32) 4273 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32, 4274 DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32, 4275 DAG.getNode(ISD::BIT_CONVERT, MVT::i32, 4276 Op.getOperand(0)))); 4277 4278 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 4279 MVT VT = MVT::v2i32; 4280 switch (Op.getValueType().getSimpleVT()) { 4281 default: break; 4282 case MVT::v16i8: 4283 case MVT::v8i16: 4284 VT = MVT::v4i32; 4285 break; 4286 } 4287 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), 4288 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt)); 4289} 4290 4291// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 4292// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 4293// one of the above mentioned nodes. It has to be wrapped because otherwise 4294// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 4295// be used to form addressing mode. These wrapped nodes will be selected 4296// into MOV32ri. 4297SDValue 4298X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 4299 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 4300 SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), 4301 getPointerTy(), 4302 CP->getAlignment()); 4303 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4304 // With PIC, the address is actually $g + Offset. 4305 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4306 !Subtarget->isPICStyleRIPRel()) { 4307 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4308 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4309 Result); 4310 } 4311 4312 return Result; 4313} 4314 4315SDValue 4316X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { 4317 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 4318 SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 4319 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4320 // With PIC, the address is actually $g + Offset. 4321 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4322 !Subtarget->isPICStyleRIPRel()) { 4323 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4324 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4325 Result); 4326 } 4327 4328 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4329 // load the value at address GV, not the value of GV itself. This means that 4330 // the GlobalAddress must be in the base or index register of the address, not 4331 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4332 // The same applies for external symbols during PIC codegen 4333 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 4334 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, 4335 PseudoSourceValue::getGOT(), 0); 4336 4337 return Result; 4338} 4339 4340// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit 4341static SDValue 4342LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4343 const MVT PtrVT) { 4344 SDValue InFlag; 4345 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4346 DAG.getNode(X86ISD::GlobalBaseReg, 4347 PtrVT), InFlag); 4348 InFlag = Chain.getValue(1); 4349 4350 // emit leal symbol@TLSGD(,%ebx,1), %eax 4351 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4352 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4353 GA->getValueType(0), 4354 GA->getOffset()); 4355 SDValue Ops[] = { Chain, TGA, InFlag }; 4356 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4357 InFlag = Result.getValue(2); 4358 Chain = Result.getValue(1); 4359 4360 // call ___tls_get_addr. This function receives its argument in 4361 // the register EAX. 4362 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4363 InFlag = Chain.getValue(1); 4364 4365 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4366 SDValue Ops1[] = { Chain, 4367 DAG.getTargetExternalSymbol("___tls_get_addr", 4368 PtrVT), 4369 DAG.getRegister(X86::EAX, PtrVT), 4370 DAG.getRegister(X86::EBX, PtrVT), 4371 InFlag }; 4372 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4373 InFlag = Chain.getValue(1); 4374 4375 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4376} 4377 4378// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit 4379static SDValue 4380LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4381 const MVT PtrVT) { 4382 SDValue InFlag, Chain; 4383 4384 // emit leaq symbol@TLSGD(%rip), %rdi 4385 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4386 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4387 GA->getValueType(0), 4388 GA->getOffset()); 4389 SDValue Ops[] = { DAG.getEntryNode(), TGA}; 4390 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2); 4391 Chain = Result.getValue(1); 4392 InFlag = Result.getValue(2); 4393 4394 // call __tls_get_addr. This function receives its argument in 4395 // the register RDI. 4396 Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag); 4397 InFlag = Chain.getValue(1); 4398 4399 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4400 SDValue Ops1[] = { Chain, 4401 DAG.getTargetExternalSymbol("__tls_get_addr", 4402 PtrVT), 4403 DAG.getRegister(X86::RDI, PtrVT), 4404 InFlag }; 4405 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4); 4406 InFlag = Chain.getValue(1); 4407 4408 return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag); 4409} 4410 4411// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4412// "local exec" model. 4413static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4414 const MVT PtrVT) { 4415 // Get the Thread Pointer 4416 SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4417 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4418 // exec) 4419 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4420 GA->getValueType(0), 4421 GA->getOffset()); 4422 SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4423 4424 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4425 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, 4426 PseudoSourceValue::getGOT(), 0); 4427 4428 // The address of the thread local variable is the add of the thread 4429 // pointer with the offset of the variable. 4430 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4431} 4432 4433SDValue 4434X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 4435 // TODO: implement the "local dynamic" model 4436 // TODO: implement the "initial exec"model for pic executables 4437 assert(Subtarget->isTargetELF() && 4438 "TLS not implemented for non-ELF targets"); 4439 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4440 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4441 // otherwise use the "Local Exec"TLS Model 4442 if (Subtarget->is64Bit()) { 4443 return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); 4444 } else { 4445 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4446 return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); 4447 else 4448 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4449 } 4450} 4451 4452SDValue 4453X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { 4454 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4455 SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4456 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4457 // With PIC, the address is actually $g + Offset. 4458 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4459 !Subtarget->isPICStyleRIPRel()) { 4460 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4461 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4462 Result); 4463 } 4464 4465 return Result; 4466} 4467 4468SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 4469 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4470 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4471 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4472 // With PIC, the address is actually $g + Offset. 4473 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4474 !Subtarget->isPICStyleRIPRel()) { 4475 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4476 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4477 Result); 4478 } 4479 4480 return Result; 4481} 4482 4483/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4484/// take a 2 x i32 value to shift plus a shift amount. 4485SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { 4486 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4487 MVT VT = Op.getValueType(); 4488 unsigned VTBits = VT.getSizeInBits(); 4489 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4490 SDValue ShOpLo = Op.getOperand(0); 4491 SDValue ShOpHi = Op.getOperand(1); 4492 SDValue ShAmt = Op.getOperand(2); 4493 SDValue Tmp1 = isSRA ? 4494 DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) : 4495 DAG.getConstant(0, VT); 4496 4497 SDValue Tmp2, Tmp3; 4498 if (Op.getOpcode() == ISD::SHL_PARTS) { 4499 Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt); 4500 Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt); 4501 } else { 4502 Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt); 4503 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt); 4504 } 4505 4506 SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4507 DAG.getConstant(VTBits, MVT::i8)); 4508 SDValue Cond = DAG.getNode(X86ISD::CMP, VT, 4509 AndNode, DAG.getConstant(0, MVT::i8)); 4510 4511 SDValue Hi, Lo; 4512 SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4513 SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; 4514 SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; 4515 4516 if (Op.getOpcode() == ISD::SHL_PARTS) { 4517 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4518 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4519 } else { 4520 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4521 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4522 } 4523 4524 SDValue Ops[2] = { Lo, Hi }; 4525 return DAG.getMergeValues(Ops, 2); 4526} 4527 4528SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4529 MVT SrcVT = Op.getOperand(0).getValueType(); 4530 assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && 4531 "Unknown SINT_TO_FP to lower!"); 4532 4533 // These are really Legal; caller falls through into that case. 4534 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 4535 return SDValue(); 4536 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 4537 Subtarget->is64Bit()) 4538 return SDValue(); 4539 4540 unsigned Size = SrcVT.getSizeInBits()/8; 4541 MachineFunction &MF = DAG.getMachineFunction(); 4542 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4543 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4544 SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4545 StackSlot, 4546 PseudoSourceValue::getFixedStack(SSFI), 0); 4547 4548 // Build the FILD 4549 SDVTList Tys; 4550 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 4551 if (useSSE) 4552 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4553 else 4554 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4555 SmallVector<SDValue, 8> Ops; 4556 Ops.push_back(Chain); 4557 Ops.push_back(StackSlot); 4558 Ops.push_back(DAG.getValueType(SrcVT)); 4559 SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, 4560 Tys, &Ops[0], Ops.size()); 4561 4562 if (useSSE) { 4563 Chain = Result.getValue(1); 4564 SDValue InFlag = Result.getValue(2); 4565 4566 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4567 // shouldn't be necessary except that RFP cannot be live across 4568 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4569 MachineFunction &MF = DAG.getMachineFunction(); 4570 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4571 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4572 Tys = DAG.getVTList(MVT::Other); 4573 SmallVector<SDValue, 8> Ops; 4574 Ops.push_back(Chain); 4575 Ops.push_back(Result); 4576 Ops.push_back(StackSlot); 4577 Ops.push_back(DAG.getValueType(Op.getValueType())); 4578 Ops.push_back(InFlag); 4579 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4580 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 4581 PseudoSourceValue::getFixedStack(SSFI), 0); 4582 } 4583 4584 return Result; 4585} 4586 4587std::pair<SDValue,SDValue> X86TargetLowering:: 4588FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { 4589 assert(Op.getValueType().getSimpleVT() <= MVT::i64 && 4590 Op.getValueType().getSimpleVT() >= MVT::i16 && 4591 "Unknown FP_TO_SINT to lower!"); 4592 4593 // These are really Legal. 4594 if (Op.getValueType() == MVT::i32 && 4595 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4596 return std::make_pair(SDValue(), SDValue()); 4597 if (Subtarget->is64Bit() && 4598 Op.getValueType() == MVT::i64 && 4599 Op.getOperand(0).getValueType() != MVT::f80) 4600 return std::make_pair(SDValue(), SDValue()); 4601 4602 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4603 // stack slot. 4604 MachineFunction &MF = DAG.getMachineFunction(); 4605 unsigned MemSize = Op.getValueType().getSizeInBits()/8; 4606 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4607 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4608 unsigned Opc; 4609 switch (Op.getValueType().getSimpleVT()) { 4610 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4611 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4612 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4613 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4614 } 4615 4616 SDValue Chain = DAG.getEntryNode(); 4617 SDValue Value = Op.getOperand(0); 4618 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4619 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4620 Chain = DAG.getStore(Chain, Value, StackSlot, 4621 PseudoSourceValue::getFixedStack(SSFI), 0); 4622 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4623 SDValue Ops[] = { 4624 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4625 }; 4626 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4627 Chain = Value.getValue(1); 4628 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4629 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4630 } 4631 4632 // Build the FP_TO_INT*_IN_MEM 4633 SDValue Ops[] = { Chain, Value, StackSlot }; 4634 SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4635 4636 return std::make_pair(FIST, StackSlot); 4637} 4638 4639SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 4640 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG); 4641 SDValue FIST = Vals.first, StackSlot = Vals.second; 4642 if (FIST.getNode() == 0) return SDValue(); 4643 4644 // Load the result. 4645 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4646} 4647 4648SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4649 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG); 4650 SDValue FIST = Vals.first, StackSlot = Vals.second; 4651 if (FIST.getNode() == 0) return 0; 4652 4653 MVT VT = N->getValueType(0); 4654 4655 // Return a load from the stack slot. 4656 SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0); 4657 4658 // Use MERGE_VALUES to drop the chain result value and get a node with one 4659 // result. This requires turning off getMergeValues simplification, since 4660 // otherwise it will give us Res back. 4661 return DAG.getMergeValues(&Res, 1, false).getNode(); 4662} 4663 4664SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { 4665 MVT VT = Op.getValueType(); 4666 MVT EltVT = VT; 4667 if (VT.isVector()) 4668 EltVT = VT.getVectorElementType(); 4669 std::vector<Constant*> CV; 4670 if (EltVT == MVT::f64) { 4671 Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); 4672 CV.push_back(C); 4673 CV.push_back(C); 4674 } else { 4675 Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); 4676 CV.push_back(C); 4677 CV.push_back(C); 4678 CV.push_back(C); 4679 CV.push_back(C); 4680 } 4681 Constant *C = ConstantVector::get(CV); 4682 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4683 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4684 PseudoSourceValue::getConstantPool(), 0, 4685 false, 16); 4686 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4687} 4688 4689SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { 4690 MVT VT = Op.getValueType(); 4691 MVT EltVT = VT; 4692 unsigned EltNum = 1; 4693 if (VT.isVector()) { 4694 EltVT = VT.getVectorElementType(); 4695 EltNum = VT.getVectorNumElements(); 4696 } 4697 std::vector<Constant*> CV; 4698 if (EltVT == MVT::f64) { 4699 Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); 4700 CV.push_back(C); 4701 CV.push_back(C); 4702 } else { 4703 Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); 4704 CV.push_back(C); 4705 CV.push_back(C); 4706 CV.push_back(C); 4707 CV.push_back(C); 4708 } 4709 Constant *C = ConstantVector::get(CV); 4710 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4711 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4712 PseudoSourceValue::getConstantPool(), 0, 4713 false, 16); 4714 if (VT.isVector()) { 4715 return DAG.getNode(ISD::BIT_CONVERT, VT, 4716 DAG.getNode(ISD::XOR, MVT::v2i64, 4717 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4718 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4719 } else { 4720 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4721 } 4722} 4723 4724SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 4725 SDValue Op0 = Op.getOperand(0); 4726 SDValue Op1 = Op.getOperand(1); 4727 MVT VT = Op.getValueType(); 4728 MVT SrcVT = Op1.getValueType(); 4729 4730 // If second operand is smaller, extend it first. 4731 if (SrcVT.bitsLT(VT)) { 4732 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4733 SrcVT = VT; 4734 } 4735 // And if it is bigger, shrink it first. 4736 if (SrcVT.bitsGT(VT)) { 4737 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 4738 SrcVT = VT; 4739 } 4740 4741 // At this point the operands and the result should have the same 4742 // type, and that won't be f80 since that is not custom lowered. 4743 4744 // First get the sign bit of second operand. 4745 std::vector<Constant*> CV; 4746 if (SrcVT == MVT::f64) { 4747 CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); 4748 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4749 } else { 4750 CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); 4751 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4752 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4753 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4754 } 4755 Constant *C = ConstantVector::get(CV); 4756 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4757 SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, 4758 PseudoSourceValue::getConstantPool(), 0, 4759 false, 16); 4760 SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4761 4762 // Shift sign bit right or left if the two operands have different types. 4763 if (SrcVT.bitsGT(VT)) { 4764 // Op0 is MVT::f32, Op1 is MVT::f64. 4765 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4766 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4767 DAG.getConstant(32, MVT::i32)); 4768 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4769 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4770 DAG.getIntPtrConstant(0)); 4771 } 4772 4773 // Clear first operand sign bit. 4774 CV.clear(); 4775 if (VT == MVT::f64) { 4776 CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); 4777 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4778 } else { 4779 CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); 4780 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4781 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4782 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4783 } 4784 C = ConstantVector::get(CV); 4785 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4786 SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4787 PseudoSourceValue::getConstantPool(), 0, 4788 false, 16); 4789 SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4790 4791 // Or the value with the sign bit. 4792 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4793} 4794 4795SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 4796 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4797 SDValue Cond; 4798 SDValue Op0 = Op.getOperand(0); 4799 SDValue Op1 = Op.getOperand(1); 4800 SDValue CC = Op.getOperand(2); 4801 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4802 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 4803 unsigned X86CC; 4804 4805 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4806 Op0, Op1, DAG)) { 4807 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4808 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4809 DAG.getConstant(X86CC, MVT::i8), Cond); 4810 } 4811 4812 assert(isFP && "Illegal integer SetCC!"); 4813 4814 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4815 switch (SetCCOpcode) { 4816 default: assert(false && "Illegal floating point SetCC!"); 4817 case ISD::SETOEQ: { // !PF & ZF 4818 SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4819 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4820 SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4821 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4822 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4823 } 4824 case ISD::SETUNE: { // PF | !ZF 4825 SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4826 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4827 SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4828 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4829 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4830 } 4831 } 4832} 4833 4834SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 4835 SDValue Cond; 4836 SDValue Op0 = Op.getOperand(0); 4837 SDValue Op1 = Op.getOperand(1); 4838 SDValue CC = Op.getOperand(2); 4839 MVT VT = Op.getValueType(); 4840 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4841 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 4842 4843 if (isFP) { 4844 unsigned SSECC = 8; 4845 MVT VT0 = Op0.getValueType(); 4846 assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); 4847 unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; 4848 bool Swap = false; 4849 4850 switch (SetCCOpcode) { 4851 default: break; 4852 case ISD::SETOEQ: 4853 case ISD::SETEQ: SSECC = 0; break; 4854 case ISD::SETOGT: 4855 case ISD::SETGT: Swap = true; // Fallthrough 4856 case ISD::SETLT: 4857 case ISD::SETOLT: SSECC = 1; break; 4858 case ISD::SETOGE: 4859 case ISD::SETGE: Swap = true; // Fallthrough 4860 case ISD::SETLE: 4861 case ISD::SETOLE: SSECC = 2; break; 4862 case ISD::SETUO: SSECC = 3; break; 4863 case ISD::SETUNE: 4864 case ISD::SETNE: SSECC = 4; break; 4865 case ISD::SETULE: Swap = true; 4866 case ISD::SETUGE: SSECC = 5; break; 4867 case ISD::SETULT: Swap = true; 4868 case ISD::SETUGT: SSECC = 6; break; 4869 case ISD::SETO: SSECC = 7; break; 4870 } 4871 if (Swap) 4872 std::swap(Op0, Op1); 4873 4874 // In the two special cases we can't handle, emit two comparisons. 4875 if (SSECC == 8) { 4876 if (SetCCOpcode == ISD::SETUEQ) { 4877 SDValue UNORD, EQ; 4878 UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); 4879 EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); 4880 return DAG.getNode(ISD::OR, VT, UNORD, EQ); 4881 } 4882 else if (SetCCOpcode == ISD::SETONE) { 4883 SDValue ORD, NEQ; 4884 ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); 4885 NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); 4886 return DAG.getNode(ISD::AND, VT, ORD, NEQ); 4887 } 4888 assert(0 && "Illegal FP comparison"); 4889 } 4890 // Handle all other FP comparisons here. 4891 return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); 4892 } 4893 4894 // We are handling one of the integer comparisons here. Since SSE only has 4895 // GT and EQ comparisons for integer, swapping operands and multiple 4896 // operations may be required for some comparisons. 4897 unsigned Opc = 0, EQOpc = 0, GTOpc = 0; 4898 bool Swap = false, Invert = false, FlipSigns = false; 4899 4900 switch (VT.getSimpleVT()) { 4901 default: break; 4902 case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; 4903 case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; 4904 case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; 4905 case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; 4906 } 4907 4908 switch (SetCCOpcode) { 4909 default: break; 4910 case ISD::SETNE: Invert = true; 4911 case ISD::SETEQ: Opc = EQOpc; break; 4912 case ISD::SETLT: Swap = true; 4913 case ISD::SETGT: Opc = GTOpc; break; 4914 case ISD::SETGE: Swap = true; 4915 case ISD::SETLE: Opc = GTOpc; Invert = true; break; 4916 case ISD::SETULT: Swap = true; 4917 case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; 4918 case ISD::SETUGE: Swap = true; 4919 case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; 4920 } 4921 if (Swap) 4922 std::swap(Op0, Op1); 4923 4924 // Since SSE has no unsigned integer comparisons, we need to flip the sign 4925 // bits of the inputs before performing those operations. 4926 if (FlipSigns) { 4927 MVT EltVT = VT.getVectorElementType(); 4928 SDValue SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT); 4929 std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); 4930 SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0], 4931 SignBits.size()); 4932 Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec); 4933 Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec); 4934 } 4935 4936 SDValue Result = DAG.getNode(Opc, VT, Op0, Op1); 4937 4938 // If the logical-not of the result is required, perform that now. 4939 if (Invert) { 4940 MVT EltVT = VT.getVectorElementType(); 4941 SDValue NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT); 4942 std::vector<SDValue> NegOnes(VT.getVectorNumElements(), NegOne); 4943 SDValue NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0], 4944 NegOnes.size()); 4945 Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV); 4946 } 4947 return Result; 4948} 4949 4950SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { 4951 bool addTest = true; 4952 SDValue Cond = Op.getOperand(0); 4953 SDValue CC; 4954 4955 if (Cond.getOpcode() == ISD::SETCC) 4956 Cond = LowerSETCC(Cond, DAG); 4957 4958 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4959 // setting operand in place of the X86ISD::SETCC. 4960 if (Cond.getOpcode() == X86ISD::SETCC) { 4961 CC = Cond.getOperand(0); 4962 4963 SDValue Cmp = Cond.getOperand(1); 4964 unsigned Opc = Cmp.getOpcode(); 4965 MVT VT = Op.getValueType(); 4966 4967 bool IllegalFPCMov = false; 4968 if (VT.isFloatingPoint() && !VT.isVector() && 4969 !isScalarFPTypeInSSEReg(VT)) // FPStack? 4970 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4971 4972 if ((Opc == X86ISD::CMP || 4973 Opc == X86ISD::COMI || 4974 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4975 Cond = Cmp; 4976 addTest = false; 4977 } 4978 } 4979 4980 if (addTest) { 4981 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4982 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4983 } 4984 4985 const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4986 MVT::Flag); 4987 SmallVector<SDValue, 4> Ops; 4988 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4989 // condition is true. 4990 Ops.push_back(Op.getOperand(2)); 4991 Ops.push_back(Op.getOperand(1)); 4992 Ops.push_back(CC); 4993 Ops.push_back(Cond); 4994 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4995} 4996 4997SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { 4998 bool addTest = true; 4999 SDValue Chain = Op.getOperand(0); 5000 SDValue Cond = Op.getOperand(1); 5001 SDValue Dest = Op.getOperand(2); 5002 SDValue CC; 5003 5004 if (Cond.getOpcode() == ISD::SETCC) 5005 Cond = LowerSETCC(Cond, DAG); 5006 5007 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5008 // setting operand in place of the X86ISD::SETCC. 5009 if (Cond.getOpcode() == X86ISD::SETCC) { 5010 CC = Cond.getOperand(0); 5011 5012 SDValue Cmp = Cond.getOperand(1); 5013 unsigned Opc = Cmp.getOpcode(); 5014 if (Opc == X86ISD::CMP || 5015 Opc == X86ISD::COMI || 5016 Opc == X86ISD::UCOMI) { 5017 Cond = Cmp; 5018 addTest = false; 5019 } 5020 } 5021 5022 if (addTest) { 5023 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5024 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5025 } 5026 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5027 Chain, Op.getOperand(2), CC, Cond); 5028} 5029 5030 5031// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 5032// Calls to _alloca is needed to probe the stack when allocating more than 4k 5033// bytes in one go. Touching the stack at 4K increments is necessary to ensure 5034// that the guard pages used by the OS virtual memory manager are allocated in 5035// correct sequence. 5036SDValue 5037X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5038 SelectionDAG &DAG) { 5039 assert(Subtarget->isTargetCygMing() && 5040 "This should be used only on Cygwin/Mingw targets"); 5041 5042 // Get the inputs. 5043 SDValue Chain = Op.getOperand(0); 5044 SDValue Size = Op.getOperand(1); 5045 // FIXME: Ensure alignment here 5046 5047 SDValue Flag; 5048 5049 MVT IntPtr = getPointerTy(); 5050 MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 5051 5052 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0)); 5053 5054 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 5055 Flag = Chain.getValue(1); 5056 5057 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 5058 SDValue Ops[] = { Chain, 5059 DAG.getTargetExternalSymbol("_alloca", IntPtr), 5060 DAG.getRegister(X86::EAX, IntPtr), 5061 DAG.getRegister(X86StackPtr, SPTy), 5062 Flag }; 5063 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5); 5064 Flag = Chain.getValue(1); 5065 5066 Chain = DAG.getCALLSEQ_END(Chain, 5067 DAG.getIntPtrConstant(0), 5068 DAG.getIntPtrConstant(0), 5069 Flag); 5070 5071 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 5072 5073 SDValue Ops1[2] = { Chain.getValue(0), Chain }; 5074 return DAG.getMergeValues(Ops1, 2); 5075} 5076 5077SDValue 5078X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, 5079 SDValue Chain, 5080 SDValue Dst, SDValue Src, 5081 SDValue Size, unsigned Align, 5082 const Value *DstSV, uint64_t DstSVOff) { 5083 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5084 5085 /// If not DWORD aligned or size is more than the threshold, call the library. 5086 /// The libc version is likely to be faster for these cases. It can use the 5087 /// address value and run time information about the CPU. 5088 if ((Align & 3) != 0 || 5089 !ConstantSize || 5090 ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { 5091 SDValue InFlag(0, 0); 5092 5093 // Check to see if there is a specialized entry-point for memory zeroing. 5094 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 5095 if (const char *bzeroEntry = 5096 V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 5097 MVT IntPtr = getPointerTy(); 5098 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 5099 TargetLowering::ArgListTy Args; 5100 TargetLowering::ArgListEntry Entry; 5101 Entry.Node = Dst; 5102 Entry.Ty = IntPtrTy; 5103 Args.push_back(Entry); 5104 Entry.Node = Size; 5105 Args.push_back(Entry); 5106 std::pair<SDValue,SDValue> CallResult = 5107 LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, 5108 false, DAG.getExternalSymbol(bzeroEntry, IntPtr), 5109 Args, DAG); 5110 return CallResult.second; 5111 } 5112 5113 // Otherwise have the target-independent code call memset. 5114 return SDValue(); 5115 } 5116 5117 uint64_t SizeVal = ConstantSize->getValue(); 5118 SDValue InFlag(0, 0); 5119 MVT AVT; 5120 SDValue Count; 5121 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 5122 unsigned BytesLeft = 0; 5123 bool TwoRepStos = false; 5124 if (ValC) { 5125 unsigned ValReg; 5126 uint64_t Val = ValC->getValue() & 255; 5127 5128 // If the value is a constant, then we can potentially use larger sets. 5129 switch (Align & 3) { 5130 case 2: // WORD aligned 5131 AVT = MVT::i16; 5132 ValReg = X86::AX; 5133 Val = (Val << 8) | Val; 5134 break; 5135 case 0: // DWORD aligned 5136 AVT = MVT::i32; 5137 ValReg = X86::EAX; 5138 Val = (Val << 8) | Val; 5139 Val = (Val << 16) | Val; 5140 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 5141 AVT = MVT::i64; 5142 ValReg = X86::RAX; 5143 Val = (Val << 32) | Val; 5144 } 5145 break; 5146 default: // Byte aligned 5147 AVT = MVT::i8; 5148 ValReg = X86::AL; 5149 Count = DAG.getIntPtrConstant(SizeVal); 5150 break; 5151 } 5152 5153 if (AVT.bitsGT(MVT::i8)) { 5154 unsigned UBytes = AVT.getSizeInBits() / 8; 5155 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 5156 BytesLeft = SizeVal % UBytes; 5157 } 5158 5159 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 5160 InFlag); 5161 InFlag = Chain.getValue(1); 5162 } else { 5163 AVT = MVT::i8; 5164 Count = DAG.getIntPtrConstant(SizeVal); 5165 Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); 5166 InFlag = Chain.getValue(1); 5167 } 5168 5169 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5170 Count, InFlag); 5171 InFlag = Chain.getValue(1); 5172 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5173 Dst, InFlag); 5174 InFlag = Chain.getValue(1); 5175 5176 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5177 SmallVector<SDValue, 8> Ops; 5178 Ops.push_back(Chain); 5179 Ops.push_back(DAG.getValueType(AVT)); 5180 Ops.push_back(InFlag); 5181 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5182 5183 if (TwoRepStos) { 5184 InFlag = Chain.getValue(1); 5185 Count = Size; 5186 MVT CVT = Count.getValueType(); 5187 SDValue Left = DAG.getNode(ISD::AND, CVT, Count, 5188 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 5189 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 5190 Left, InFlag); 5191 InFlag = Chain.getValue(1); 5192 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5193 Ops.clear(); 5194 Ops.push_back(Chain); 5195 Ops.push_back(DAG.getValueType(MVT::i8)); 5196 Ops.push_back(InFlag); 5197 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5198 } else if (BytesLeft) { 5199 // Handle the last 1 - 7 bytes. 5200 unsigned Offset = SizeVal - BytesLeft; 5201 MVT AddrVT = Dst.getValueType(); 5202 MVT SizeVT = Size.getValueType(); 5203 5204 Chain = DAG.getMemset(Chain, 5205 DAG.getNode(ISD::ADD, AddrVT, Dst, 5206 DAG.getConstant(Offset, AddrVT)), 5207 Src, 5208 DAG.getConstant(BytesLeft, SizeVT), 5209 Align, DstSV, DstSVOff + Offset); 5210 } 5211 5212 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 5213 return Chain; 5214} 5215 5216SDValue 5217X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, 5218 SDValue Chain, SDValue Dst, SDValue Src, 5219 SDValue Size, unsigned Align, 5220 bool AlwaysInline, 5221 const Value *DstSV, uint64_t DstSVOff, 5222 const Value *SrcSV, uint64_t SrcSVOff) { 5223 // This requires the copy size to be a constant, preferrably 5224 // within a subtarget-specific limit. 5225 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5226 if (!ConstantSize) 5227 return SDValue(); 5228 uint64_t SizeVal = ConstantSize->getValue(); 5229 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 5230 return SDValue(); 5231 5232 /// If not DWORD aligned, call the library. 5233 if ((Align & 3) != 0) 5234 return SDValue(); 5235 5236 // DWORD aligned 5237 MVT AVT = MVT::i32; 5238 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned 5239 AVT = MVT::i64; 5240 5241 unsigned UBytes = AVT.getSizeInBits() / 8; 5242 unsigned CountVal = SizeVal / UBytes; 5243 SDValue Count = DAG.getIntPtrConstant(CountVal); 5244 unsigned BytesLeft = SizeVal % UBytes; 5245 5246 SDValue InFlag(0, 0); 5247 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5248 Count, InFlag); 5249 InFlag = Chain.getValue(1); 5250 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5251 Dst, InFlag); 5252 InFlag = Chain.getValue(1); 5253 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 5254 Src, InFlag); 5255 InFlag = Chain.getValue(1); 5256 5257 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5258 SmallVector<SDValue, 8> Ops; 5259 Ops.push_back(Chain); 5260 Ops.push_back(DAG.getValueType(AVT)); 5261 Ops.push_back(InFlag); 5262 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 5263 5264 SmallVector<SDValue, 4> Results; 5265 Results.push_back(RepMovs); 5266 if (BytesLeft) { 5267 // Handle the last 1 - 7 bytes. 5268 unsigned Offset = SizeVal - BytesLeft; 5269 MVT DstVT = Dst.getValueType(); 5270 MVT SrcVT = Src.getValueType(); 5271 MVT SizeVT = Size.getValueType(); 5272 Results.push_back(DAG.getMemcpy(Chain, 5273 DAG.getNode(ISD::ADD, DstVT, Dst, 5274 DAG.getConstant(Offset, DstVT)), 5275 DAG.getNode(ISD::ADD, SrcVT, Src, 5276 DAG.getConstant(Offset, SrcVT)), 5277 DAG.getConstant(BytesLeft, SizeVT), 5278 Align, AlwaysInline, 5279 DstSV, DstSVOff + Offset, 5280 SrcSV, SrcSVOff + Offset)); 5281 } 5282 5283 return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); 5284} 5285 5286/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 5287SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 5288 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5289 SDValue TheChain = N->getOperand(0); 5290 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 5291 if (Subtarget->is64Bit()) { 5292 SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 5293 SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 5294 MVT::i64, rax.getValue(2)); 5295 SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 5296 DAG.getConstant(32, MVT::i8)); 5297 SDValue Ops[] = { 5298 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 5299 }; 5300 5301 return DAG.getMergeValues(Ops, 2).getNode(); 5302 } 5303 5304 SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 5305 SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 5306 MVT::i32, eax.getValue(2)); 5307 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 5308 SDValue Ops[] = { eax, edx }; 5309 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 5310 5311 // Use a MERGE_VALUES to return the value and chain. 5312 Ops[1] = edx.getValue(1); 5313 return DAG.getMergeValues(Ops, 2).getNode(); 5314} 5315 5316SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { 5317 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 5318 5319 if (!Subtarget->is64Bit()) { 5320 // vastart just stores the address of the VarArgsFrameIndex slot into the 5321 // memory location argument. 5322 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5323 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0); 5324 } 5325 5326 // __va_list_tag: 5327 // gp_offset (0 - 6 * 8) 5328 // fp_offset (48 - 48 + 8 * 16) 5329 // overflow_arg_area (point to parameters coming in memory). 5330 // reg_save_area 5331 SmallVector<SDValue, 8> MemOps; 5332 SDValue FIN = Op.getOperand(1); 5333 // Store gp_offset 5334 SDValue Store = DAG.getStore(Op.getOperand(0), 5335 DAG.getConstant(VarArgsGPOffset, MVT::i32), 5336 FIN, SV, 0); 5337 MemOps.push_back(Store); 5338 5339 // Store fp_offset 5340 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5341 Store = DAG.getStore(Op.getOperand(0), 5342 DAG.getConstant(VarArgsFPOffset, MVT::i32), 5343 FIN, SV, 0); 5344 MemOps.push_back(Store); 5345 5346 // Store ptr to overflow_arg_area 5347 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5348 SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5349 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0); 5350 MemOps.push_back(Store); 5351 5352 // Store ptr to reg_save_area. 5353 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 5354 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5355 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0); 5356 MemOps.push_back(Store); 5357 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5358} 5359 5360SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { 5361 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5362 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); 5363 SDValue Chain = Op.getOperand(0); 5364 SDValue SrcPtr = Op.getOperand(1); 5365 SDValue SrcSV = Op.getOperand(2); 5366 5367 assert(0 && "VAArgInst is not yet implemented for x86-64!"); 5368 abort(); 5369 return SDValue(); 5370} 5371 5372SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { 5373 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5374 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); 5375 SDValue Chain = Op.getOperand(0); 5376 SDValue DstPtr = Op.getOperand(1); 5377 SDValue SrcPtr = Op.getOperand(2); 5378 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 5379 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5380 5381 return DAG.getMemcpy(Chain, DstPtr, SrcPtr, 5382 DAG.getIntPtrConstant(24), 8, false, 5383 DstSV, 0, SrcSV, 0); 5384} 5385 5386SDValue 5387X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 5388 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 5389 switch (IntNo) { 5390 default: return SDValue(); // Don't custom lower most intrinsics. 5391 // Comparison intrinsics. 5392 case Intrinsic::x86_sse_comieq_ss: 5393 case Intrinsic::x86_sse_comilt_ss: 5394 case Intrinsic::x86_sse_comile_ss: 5395 case Intrinsic::x86_sse_comigt_ss: 5396 case Intrinsic::x86_sse_comige_ss: 5397 case Intrinsic::x86_sse_comineq_ss: 5398 case Intrinsic::x86_sse_ucomieq_ss: 5399 case Intrinsic::x86_sse_ucomilt_ss: 5400 case Intrinsic::x86_sse_ucomile_ss: 5401 case Intrinsic::x86_sse_ucomigt_ss: 5402 case Intrinsic::x86_sse_ucomige_ss: 5403 case Intrinsic::x86_sse_ucomineq_ss: 5404 case Intrinsic::x86_sse2_comieq_sd: 5405 case Intrinsic::x86_sse2_comilt_sd: 5406 case Intrinsic::x86_sse2_comile_sd: 5407 case Intrinsic::x86_sse2_comigt_sd: 5408 case Intrinsic::x86_sse2_comige_sd: 5409 case Intrinsic::x86_sse2_comineq_sd: 5410 case Intrinsic::x86_sse2_ucomieq_sd: 5411 case Intrinsic::x86_sse2_ucomilt_sd: 5412 case Intrinsic::x86_sse2_ucomile_sd: 5413 case Intrinsic::x86_sse2_ucomigt_sd: 5414 case Intrinsic::x86_sse2_ucomige_sd: 5415 case Intrinsic::x86_sse2_ucomineq_sd: { 5416 unsigned Opc = 0; 5417 ISD::CondCode CC = ISD::SETCC_INVALID; 5418 switch (IntNo) { 5419 default: break; 5420 case Intrinsic::x86_sse_comieq_ss: 5421 case Intrinsic::x86_sse2_comieq_sd: 5422 Opc = X86ISD::COMI; 5423 CC = ISD::SETEQ; 5424 break; 5425 case Intrinsic::x86_sse_comilt_ss: 5426 case Intrinsic::x86_sse2_comilt_sd: 5427 Opc = X86ISD::COMI; 5428 CC = ISD::SETLT; 5429 break; 5430 case Intrinsic::x86_sse_comile_ss: 5431 case Intrinsic::x86_sse2_comile_sd: 5432 Opc = X86ISD::COMI; 5433 CC = ISD::SETLE; 5434 break; 5435 case Intrinsic::x86_sse_comigt_ss: 5436 case Intrinsic::x86_sse2_comigt_sd: 5437 Opc = X86ISD::COMI; 5438 CC = ISD::SETGT; 5439 break; 5440 case Intrinsic::x86_sse_comige_ss: 5441 case Intrinsic::x86_sse2_comige_sd: 5442 Opc = X86ISD::COMI; 5443 CC = ISD::SETGE; 5444 break; 5445 case Intrinsic::x86_sse_comineq_ss: 5446 case Intrinsic::x86_sse2_comineq_sd: 5447 Opc = X86ISD::COMI; 5448 CC = ISD::SETNE; 5449 break; 5450 case Intrinsic::x86_sse_ucomieq_ss: 5451 case Intrinsic::x86_sse2_ucomieq_sd: 5452 Opc = X86ISD::UCOMI; 5453 CC = ISD::SETEQ; 5454 break; 5455 case Intrinsic::x86_sse_ucomilt_ss: 5456 case Intrinsic::x86_sse2_ucomilt_sd: 5457 Opc = X86ISD::UCOMI; 5458 CC = ISD::SETLT; 5459 break; 5460 case Intrinsic::x86_sse_ucomile_ss: 5461 case Intrinsic::x86_sse2_ucomile_sd: 5462 Opc = X86ISD::UCOMI; 5463 CC = ISD::SETLE; 5464 break; 5465 case Intrinsic::x86_sse_ucomigt_ss: 5466 case Intrinsic::x86_sse2_ucomigt_sd: 5467 Opc = X86ISD::UCOMI; 5468 CC = ISD::SETGT; 5469 break; 5470 case Intrinsic::x86_sse_ucomige_ss: 5471 case Intrinsic::x86_sse2_ucomige_sd: 5472 Opc = X86ISD::UCOMI; 5473 CC = ISD::SETGE; 5474 break; 5475 case Intrinsic::x86_sse_ucomineq_ss: 5476 case Intrinsic::x86_sse2_ucomineq_sd: 5477 Opc = X86ISD::UCOMI; 5478 CC = ISD::SETNE; 5479 break; 5480 } 5481 5482 unsigned X86CC; 5483 SDValue LHS = Op.getOperand(1); 5484 SDValue RHS = Op.getOperand(2); 5485 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 5486 5487 SDValue Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5488 SDValue SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5489 DAG.getConstant(X86CC, MVT::i8), Cond); 5490 return DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, SetCC); 5491 } 5492 5493 // Fix vector shift instructions where the last operand is a non-immediate 5494 // i32 value. 5495 case Intrinsic::x86_sse2_pslli_w: 5496 case Intrinsic::x86_sse2_pslli_d: 5497 case Intrinsic::x86_sse2_pslli_q: 5498 case Intrinsic::x86_sse2_psrli_w: 5499 case Intrinsic::x86_sse2_psrli_d: 5500 case Intrinsic::x86_sse2_psrli_q: 5501 case Intrinsic::x86_sse2_psrai_w: 5502 case Intrinsic::x86_sse2_psrai_d: 5503 case Intrinsic::x86_mmx_pslli_w: 5504 case Intrinsic::x86_mmx_pslli_d: 5505 case Intrinsic::x86_mmx_pslli_q: 5506 case Intrinsic::x86_mmx_psrli_w: 5507 case Intrinsic::x86_mmx_psrli_d: 5508 case Intrinsic::x86_mmx_psrli_q: 5509 case Intrinsic::x86_mmx_psrai_w: 5510 case Intrinsic::x86_mmx_psrai_d: { 5511 SDValue ShAmt = Op.getOperand(2); 5512 if (isa<ConstantSDNode>(ShAmt)) 5513 return SDValue(); 5514 5515 unsigned NewIntNo = 0; 5516 MVT ShAmtVT = MVT::v4i32; 5517 switch (IntNo) { 5518 case Intrinsic::x86_sse2_pslli_w: 5519 NewIntNo = Intrinsic::x86_sse2_psll_w; 5520 break; 5521 case Intrinsic::x86_sse2_pslli_d: 5522 NewIntNo = Intrinsic::x86_sse2_psll_d; 5523 break; 5524 case Intrinsic::x86_sse2_pslli_q: 5525 NewIntNo = Intrinsic::x86_sse2_psll_q; 5526 break; 5527 case Intrinsic::x86_sse2_psrli_w: 5528 NewIntNo = Intrinsic::x86_sse2_psrl_w; 5529 break; 5530 case Intrinsic::x86_sse2_psrli_d: 5531 NewIntNo = Intrinsic::x86_sse2_psrl_d; 5532 break; 5533 case Intrinsic::x86_sse2_psrli_q: 5534 NewIntNo = Intrinsic::x86_sse2_psrl_q; 5535 break; 5536 case Intrinsic::x86_sse2_psrai_w: 5537 NewIntNo = Intrinsic::x86_sse2_psra_w; 5538 break; 5539 case Intrinsic::x86_sse2_psrai_d: 5540 NewIntNo = Intrinsic::x86_sse2_psra_d; 5541 break; 5542 default: { 5543 ShAmtVT = MVT::v2i32; 5544 switch (IntNo) { 5545 case Intrinsic::x86_mmx_pslli_w: 5546 NewIntNo = Intrinsic::x86_mmx_psll_w; 5547 break; 5548 case Intrinsic::x86_mmx_pslli_d: 5549 NewIntNo = Intrinsic::x86_mmx_psll_d; 5550 break; 5551 case Intrinsic::x86_mmx_pslli_q: 5552 NewIntNo = Intrinsic::x86_mmx_psll_q; 5553 break; 5554 case Intrinsic::x86_mmx_psrli_w: 5555 NewIntNo = Intrinsic::x86_mmx_psrl_w; 5556 break; 5557 case Intrinsic::x86_mmx_psrli_d: 5558 NewIntNo = Intrinsic::x86_mmx_psrl_d; 5559 break; 5560 case Intrinsic::x86_mmx_psrli_q: 5561 NewIntNo = Intrinsic::x86_mmx_psrl_q; 5562 break; 5563 case Intrinsic::x86_mmx_psrai_w: 5564 NewIntNo = Intrinsic::x86_mmx_psra_w; 5565 break; 5566 case Intrinsic::x86_mmx_psrai_d: 5567 NewIntNo = Intrinsic::x86_mmx_psra_d; 5568 break; 5569 default: abort(); // Can't reach here. 5570 } 5571 break; 5572 } 5573 } 5574 MVT VT = Op.getValueType(); 5575 ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT, 5576 DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt)); 5577 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 5578 DAG.getConstant(NewIntNo, MVT::i32), 5579 Op.getOperand(1), ShAmt); 5580 } 5581 } 5582} 5583 5584SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 5585 // Depths > 0 not supported yet! 5586 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5587 return SDValue(); 5588 5589 // Just load the return address 5590 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 5591 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5592} 5593 5594SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 5595 // Depths > 0 not supported yet! 5596 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5597 return SDValue(); 5598 5599 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 5600 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 5601 DAG.getIntPtrConstant(!Subtarget->is64Bit() ? 4 : 8)); 5602} 5603 5604SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, 5605 SelectionDAG &DAG) { 5606 // Is not yet supported on x86-64 5607 if (Subtarget->is64Bit()) 5608 return SDValue(); 5609 5610 return DAG.getIntPtrConstant(8); 5611} 5612 5613SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) 5614{ 5615 assert(!Subtarget->is64Bit() && 5616 "Lowering of eh_return builtin is not supported yet on x86-64"); 5617 5618 MachineFunction &MF = DAG.getMachineFunction(); 5619 SDValue Chain = Op.getOperand(0); 5620 SDValue Offset = Op.getOperand(1); 5621 SDValue Handler = Op.getOperand(2); 5622 5623 SDValue Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 5624 getPointerTy()); 5625 5626 SDValue StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5627 DAG.getIntPtrConstant(-4UL)); 5628 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5629 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5630 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 5631 MF.getRegInfo().addLiveOut(X86::ECX); 5632 5633 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 5634 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 5635} 5636 5637SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, 5638 SelectionDAG &DAG) { 5639 SDValue Root = Op.getOperand(0); 5640 SDValue Trmp = Op.getOperand(1); // trampoline 5641 SDValue FPtr = Op.getOperand(2); // nested function 5642 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 5643 5644 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5645 5646 const X86InstrInfo *TII = 5647 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5648 5649 if (Subtarget->is64Bit()) { 5650 SDValue OutChains[6]; 5651 5652 // Large code-model. 5653 5654 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 5655 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 5656 5657 const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); 5658 const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); 5659 5660 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 5661 5662 // Load the pointer to the nested function into R11. 5663 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 5664 SDValue Addr = Trmp; 5665 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5666 TrmpAddr, 0); 5667 5668 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 5669 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2); 5670 5671 // Load the 'nest' parameter value into R10. 5672 // R10 is specified in X86CallingConv.td 5673 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 5674 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 5675 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5676 TrmpAddr, 10); 5677 5678 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 5679 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2); 5680 5681 // Jump to the nested function. 5682 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 5683 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 5684 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5685 TrmpAddr, 20); 5686 5687 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 5688 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 5689 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 5690 TrmpAddr, 22); 5691 5692 SDValue Ops[] = 5693 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 5694 return DAG.getMergeValues(Ops, 2); 5695 } else { 5696 const Function *Func = 5697 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5698 unsigned CC = Func->getCallingConv(); 5699 unsigned NestReg; 5700 5701 switch (CC) { 5702 default: 5703 assert(0 && "Unsupported calling convention"); 5704 case CallingConv::C: 5705 case CallingConv::X86_StdCall: { 5706 // Pass 'nest' parameter in ECX. 5707 // Must be kept in sync with X86CallingConv.td 5708 NestReg = X86::ECX; 5709 5710 // Check that ECX wasn't needed by an 'inreg' parameter. 5711 const FunctionType *FTy = Func->getFunctionType(); 5712 const PAListPtr &Attrs = Func->getParamAttrs(); 5713 5714 if (!Attrs.isEmpty() && !Func->isVarArg()) { 5715 unsigned InRegCount = 0; 5716 unsigned Idx = 1; 5717 5718 for (FunctionType::param_iterator I = FTy->param_begin(), 5719 E = FTy->param_end(); I != E; ++I, ++Idx) 5720 if (Attrs.paramHasAttr(Idx, ParamAttr::InReg)) 5721 // FIXME: should only count parameters that are lowered to integers. 5722 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 5723 5724 if (InRegCount > 2) { 5725 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5726 abort(); 5727 } 5728 } 5729 break; 5730 } 5731 case CallingConv::X86_FastCall: 5732 // Pass 'nest' parameter in EAX. 5733 // Must be kept in sync with X86CallingConv.td 5734 NestReg = X86::EAX; 5735 break; 5736 } 5737 5738 SDValue OutChains[4]; 5739 SDValue Addr, Disp; 5740 5741 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5742 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5743 5744 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5745 const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); 5746 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5747 Trmp, TrmpAddr, 0); 5748 5749 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 5750 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1); 5751 5752 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 5753 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 5754 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 5755 TrmpAddr, 5, false, 1); 5756 5757 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 5758 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1); 5759 5760 SDValue Ops[] = 5761 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 5762 return DAG.getMergeValues(Ops, 2); 5763 } 5764} 5765 5766SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 5767 /* 5768 The rounding mode is in bits 11:10 of FPSR, and has the following 5769 settings: 5770 00 Round to nearest 5771 01 Round to -inf 5772 10 Round to +inf 5773 11 Round to 0 5774 5775 FLT_ROUNDS, on the other hand, expects the following: 5776 -1 Undefined 5777 0 Round to 0 5778 1 Round to nearest 5779 2 Round to +inf 5780 3 Round to -inf 5781 5782 To perform the conversion, we do: 5783 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 5784 */ 5785 5786 MachineFunction &MF = DAG.getMachineFunction(); 5787 const TargetMachine &TM = MF.getTarget(); 5788 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 5789 unsigned StackAlignment = TFI.getStackAlignment(); 5790 MVT VT = Op.getValueType(); 5791 5792 // Save FP Control Word to stack slot 5793 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 5794 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5795 5796 SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 5797 DAG.getEntryNode(), StackSlot); 5798 5799 // Load FP Control Word from stack slot 5800 SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 5801 5802 // Transform as necessary 5803 SDValue CWD1 = 5804 DAG.getNode(ISD::SRL, MVT::i16, 5805 DAG.getNode(ISD::AND, MVT::i16, 5806 CWD, DAG.getConstant(0x800, MVT::i16)), 5807 DAG.getConstant(11, MVT::i8)); 5808 SDValue CWD2 = 5809 DAG.getNode(ISD::SRL, MVT::i16, 5810 DAG.getNode(ISD::AND, MVT::i16, 5811 CWD, DAG.getConstant(0x400, MVT::i16)), 5812 DAG.getConstant(9, MVT::i8)); 5813 5814 SDValue RetVal = 5815 DAG.getNode(ISD::AND, MVT::i16, 5816 DAG.getNode(ISD::ADD, MVT::i16, 5817 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 5818 DAG.getConstant(1, MVT::i16)), 5819 DAG.getConstant(3, MVT::i16)); 5820 5821 5822 return DAG.getNode((VT.getSizeInBits() < 16 ? 5823 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 5824} 5825 5826SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { 5827 MVT VT = Op.getValueType(); 5828 MVT OpVT = VT; 5829 unsigned NumBits = VT.getSizeInBits(); 5830 5831 Op = Op.getOperand(0); 5832 if (VT == MVT::i8) { 5833 // Zero extend to i32 since there is not an i8 bsr. 5834 OpVT = MVT::i32; 5835 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5836 } 5837 5838 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 5839 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5840 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 5841 5842 // If src is zero (i.e. bsr sets ZF), returns NumBits. 5843 SmallVector<SDValue, 4> Ops; 5844 Ops.push_back(Op); 5845 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 5846 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5847 Ops.push_back(Op.getValue(1)); 5848 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5849 5850 // Finally xor with NumBits-1. 5851 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 5852 5853 if (VT == MVT::i8) 5854 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5855 return Op; 5856} 5857 5858SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { 5859 MVT VT = Op.getValueType(); 5860 MVT OpVT = VT; 5861 unsigned NumBits = VT.getSizeInBits(); 5862 5863 Op = Op.getOperand(0); 5864 if (VT == MVT::i8) { 5865 OpVT = MVT::i32; 5866 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5867 } 5868 5869 // Issue a bsf (scan bits forward) which also sets EFLAGS. 5870 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5871 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 5872 5873 // If src is zero (i.e. bsf sets ZF), returns NumBits. 5874 SmallVector<SDValue, 4> Ops; 5875 Ops.push_back(Op); 5876 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 5877 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5878 Ops.push_back(Op.getValue(1)); 5879 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5880 5881 if (VT == MVT::i8) 5882 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5883 return Op; 5884} 5885 5886SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { 5887 MVT T = Op.getValueType(); 5888 unsigned Reg = 0; 5889 unsigned size = 0; 5890 switch(T.getSimpleVT()) { 5891 default: 5892 assert(false && "Invalid value type!"); 5893 case MVT::i8: Reg = X86::AL; size = 1; break; 5894 case MVT::i16: Reg = X86::AX; size = 2; break; 5895 case MVT::i32: Reg = X86::EAX; size = 4; break; 5896 case MVT::i64: 5897 if (Subtarget->is64Bit()) { 5898 Reg = X86::RAX; size = 8; 5899 } else //Should go away when LowerType stuff lands 5900 return SDValue(ExpandATOMIC_CMP_SWAP(Op.getNode(), DAG), 0); 5901 break; 5902 }; 5903 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg, 5904 Op.getOperand(3), SDValue()); 5905 SDValue Ops[] = { cpIn.getValue(0), 5906 Op.getOperand(1), 5907 Op.getOperand(2), 5908 DAG.getTargetConstant(size, MVT::i8), 5909 cpIn.getValue(1) }; 5910 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5911 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5); 5912 SDValue cpOut = 5913 DAG.getCopyFromReg(Result.getValue(0), Reg, T, Result.getValue(1)); 5914 return cpOut; 5915} 5916 5917SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, 5918 SelectionDAG &DAG) { 5919 MVT T = Op->getValueType(0); 5920 assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); 5921 SDValue cpInL, cpInH; 5922 cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 5923 DAG.getConstant(0, MVT::i32)); 5924 cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 5925 DAG.getConstant(1, MVT::i32)); 5926 cpInL = DAG.getCopyToReg(Op->getOperand(0), X86::EAX, 5927 cpInL, SDValue()); 5928 cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX, 5929 cpInH, cpInL.getValue(1)); 5930 SDValue swapInL, swapInH; 5931 swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 5932 DAG.getConstant(0, MVT::i32)); 5933 swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 5934 DAG.getConstant(1, MVT::i32)); 5935 swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX, 5936 swapInL, cpInH.getValue(1)); 5937 swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, 5938 swapInH, swapInL.getValue(1)); 5939 SDValue Ops[] = { swapInH.getValue(0), 5940 Op->getOperand(1), 5941 swapInH.getValue(1)}; 5942 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5943 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3); 5944 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32, 5945 Result.getValue(1)); 5946 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32, 5947 cpOutL.getValue(2)); 5948 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; 5949 SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); 5950 SDValue Vals[2] = { ResultVal, cpOutH.getValue(1) }; 5951 return DAG.getMergeValues(Vals, 2).getNode(); 5952} 5953 5954SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op, 5955 SelectionDAG &DAG) { 5956 MVT T = Op->getValueType(0); 5957 SDValue negOp = DAG.getNode(ISD::SUB, T, 5958 DAG.getConstant(0, T), Op->getOperand(2)); 5959 return DAG.getAtomic((T==MVT::i8 ? ISD::ATOMIC_LOAD_ADD_8: 5960 T==MVT::i16 ? ISD::ATOMIC_LOAD_ADD_16: 5961 T==MVT::i32 ? ISD::ATOMIC_LOAD_ADD_32: 5962 T==MVT::i64 ? ISD::ATOMIC_LOAD_ADD_64: 0), 5963 Op->getOperand(0), Op->getOperand(1), negOp, 5964 cast<AtomicSDNode>(Op)->getSrcValue(), 5965 cast<AtomicSDNode>(Op)->getAlignment()).getNode(); 5966} 5967 5968/// LowerOperation - Provide custom lowering hooks for some operations. 5969/// 5970SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 5971 switch (Op.getOpcode()) { 5972 default: assert(0 && "Should not custom lower this!"); 5973 case ISD::ATOMIC_CMP_SWAP_8: return LowerCMP_SWAP(Op,DAG); 5974 case ISD::ATOMIC_CMP_SWAP_16: return LowerCMP_SWAP(Op,DAG); 5975 case ISD::ATOMIC_CMP_SWAP_32: return LowerCMP_SWAP(Op,DAG); 5976 case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG); 5977 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5978 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5979 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5980 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5981 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5982 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5983 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5984 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5985 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5986 case ISD::SHL_PARTS: 5987 case ISD::SRA_PARTS: 5988 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5989 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5990 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5991 case ISD::FABS: return LowerFABS(Op, DAG); 5992 case ISD::FNEG: return LowerFNEG(Op, DAG); 5993 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5994 case ISD::SETCC: return LowerSETCC(Op, DAG); 5995 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 5996 case ISD::SELECT: return LowerSELECT(Op, DAG); 5997 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5998 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5999 case ISD::CALL: return LowerCALL(Op, DAG); 6000 case ISD::RET: return LowerRET(Op, DAG); 6001 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 6002 case ISD::VASTART: return LowerVASTART(Op, DAG); 6003 case ISD::VAARG: return LowerVAARG(Op, DAG); 6004 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 6005 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6006 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6007 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6008 case ISD::FRAME_TO_ARGS_OFFSET: 6009 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 6010 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 6011 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 6012 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 6013 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6014 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 6015 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 6016 6017 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 6018 case ISD::READCYCLECOUNTER: 6019 return SDValue(ExpandREADCYCLECOUNTER(Op.getNode(), DAG), 0); 6020 } 6021} 6022 6023/// ReplaceNodeResults - Replace a node with an illegal result type 6024/// with a new node built out of custom code. 6025SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { 6026 switch (N->getOpcode()) { 6027 default: assert(0 && "Should not custom lower this!"); 6028 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 6029 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 6030 case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG); 6031 case ISD::ATOMIC_LOAD_SUB_8: return ExpandATOMIC_LOAD_SUB(N,DAG); 6032 case ISD::ATOMIC_LOAD_SUB_16: return ExpandATOMIC_LOAD_SUB(N,DAG); 6033 case ISD::ATOMIC_LOAD_SUB_32: return ExpandATOMIC_LOAD_SUB(N,DAG); 6034 case ISD::ATOMIC_LOAD_SUB_64: return ExpandATOMIC_LOAD_SUB(N,DAG); 6035 } 6036} 6037 6038const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 6039 switch (Opcode) { 6040 default: return NULL; 6041 case X86ISD::BSF: return "X86ISD::BSF"; 6042 case X86ISD::BSR: return "X86ISD::BSR"; 6043 case X86ISD::SHLD: return "X86ISD::SHLD"; 6044 case X86ISD::SHRD: return "X86ISD::SHRD"; 6045 case X86ISD::FAND: return "X86ISD::FAND"; 6046 case X86ISD::FOR: return "X86ISD::FOR"; 6047 case X86ISD::FXOR: return "X86ISD::FXOR"; 6048 case X86ISD::FSRL: return "X86ISD::FSRL"; 6049 case X86ISD::FILD: return "X86ISD::FILD"; 6050 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 6051 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 6052 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 6053 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 6054 case X86ISD::FLD: return "X86ISD::FLD"; 6055 case X86ISD::FST: return "X86ISD::FST"; 6056 case X86ISD::CALL: return "X86ISD::CALL"; 6057 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 6058 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 6059 case X86ISD::CMP: return "X86ISD::CMP"; 6060 case X86ISD::COMI: return "X86ISD::COMI"; 6061 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 6062 case X86ISD::SETCC: return "X86ISD::SETCC"; 6063 case X86ISD::CMOV: return "X86ISD::CMOV"; 6064 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 6065 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 6066 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 6067 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 6068 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 6069 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 6070 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 6071 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 6072 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 6073 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 6074 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 6075 case X86ISD::FMAX: return "X86ISD::FMAX"; 6076 case X86ISD::FMIN: return "X86ISD::FMIN"; 6077 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 6078 case X86ISD::FRCP: return "X86ISD::FRCP"; 6079 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 6080 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 6081 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 6082 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 6083 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 6084 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; 6085 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; 6086 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; 6087 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; 6088 case X86ISD::VSHL: return "X86ISD::VSHL"; 6089 case X86ISD::VSRL: return "X86ISD::VSRL"; 6090 case X86ISD::CMPPD: return "X86ISD::CMPPD"; 6091 case X86ISD::CMPPS: return "X86ISD::CMPPS"; 6092 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; 6093 case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; 6094 case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; 6095 case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; 6096 case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; 6097 case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; 6098 case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; 6099 case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; 6100 } 6101} 6102 6103// isLegalAddressingMode - Return true if the addressing mode represented 6104// by AM is legal for this target, for a load/store of the specified type. 6105bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 6106 const Type *Ty) const { 6107 // X86 supports extremely general addressing modes. 6108 6109 // X86 allows a sign-extended 32-bit immediate field as a displacement. 6110 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 6111 return false; 6112 6113 if (AM.BaseGV) { 6114 // We can only fold this if we don't need an extra load. 6115 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 6116 return false; 6117 6118 // X86-64 only supports addr of globals in small code model. 6119 if (Subtarget->is64Bit()) { 6120 if (getTargetMachine().getCodeModel() != CodeModel::Small) 6121 return false; 6122 // If lower 4G is not available, then we must use rip-relative addressing. 6123 if (AM.BaseOffs || AM.Scale > 1) 6124 return false; 6125 } 6126 } 6127 6128 switch (AM.Scale) { 6129 case 0: 6130 case 1: 6131 case 2: 6132 case 4: 6133 case 8: 6134 // These scales always work. 6135 break; 6136 case 3: 6137 case 5: 6138 case 9: 6139 // These scales are formed with basereg+scalereg. Only accept if there is 6140 // no basereg yet. 6141 if (AM.HasBaseReg) 6142 return false; 6143 break; 6144 default: // Other stuff never works. 6145 return false; 6146 } 6147 6148 return true; 6149} 6150 6151 6152bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 6153 if (!Ty1->isInteger() || !Ty2->isInteger()) 6154 return false; 6155 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 6156 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 6157 if (NumBits1 <= NumBits2) 6158 return false; 6159 return Subtarget->is64Bit() || NumBits1 < 64; 6160} 6161 6162bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { 6163 if (!VT1.isInteger() || !VT2.isInteger()) 6164 return false; 6165 unsigned NumBits1 = VT1.getSizeInBits(); 6166 unsigned NumBits2 = VT2.getSizeInBits(); 6167 if (NumBits1 <= NumBits2) 6168 return false; 6169 return Subtarget->is64Bit() || NumBits1 < 64; 6170} 6171 6172/// isShuffleMaskLegal - Targets can use this to indicate that they only 6173/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 6174/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 6175/// are assumed to be legal. 6176bool 6177X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { 6178 // Only do shuffles on 128-bit vector types for now. 6179 if (VT.getSizeInBits() == 64) return false; 6180 return (Mask.getNode()->getNumOperands() <= 4 || 6181 isIdentityMask(Mask.getNode()) || 6182 isIdentityMask(Mask.getNode(), true) || 6183 isSplatMask(Mask.getNode()) || 6184 isPSHUFHW_PSHUFLWMask(Mask.getNode()) || 6185 X86::isUNPCKLMask(Mask.getNode()) || 6186 X86::isUNPCKHMask(Mask.getNode()) || 6187 X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || 6188 X86::isUNPCKH_v_undef_Mask(Mask.getNode())); 6189} 6190 6191bool 6192X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, 6193 MVT EVT, SelectionDAG &DAG) const { 6194 unsigned NumElts = BVOps.size(); 6195 // Only do shuffles on 128-bit vector types for now. 6196 if (EVT.getSizeInBits() * NumElts == 64) return false; 6197 if (NumElts == 2) return true; 6198 if (NumElts == 4) { 6199 return (isMOVLMask(&BVOps[0], 4) || 6200 isCommutedMOVL(&BVOps[0], 4, true) || 6201 isSHUFPMask(&BVOps[0], 4) || 6202 isCommutedSHUFP(&BVOps[0], 4)); 6203 } 6204 return false; 6205} 6206 6207//===----------------------------------------------------------------------===// 6208// X86 Scheduler Hooks 6209//===----------------------------------------------------------------------===// 6210 6211// private utility function 6212MachineBasicBlock * 6213X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, 6214 MachineBasicBlock *MBB, 6215 unsigned regOpc, 6216 unsigned immOpc, 6217 unsigned LoadOpc, 6218 unsigned CXchgOpc, 6219 unsigned copyOpc, 6220 unsigned notOpc, 6221 unsigned EAXreg, 6222 TargetRegisterClass *RC, 6223 bool invSrc) { 6224 // For the atomic bitwise operator, we generate 6225 // thisMBB: 6226 // newMBB: 6227 // ld t1 = [bitinstr.addr] 6228 // op t2 = t1, [bitinstr.val] 6229 // mov EAX = t1 6230 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6231 // bz newMBB 6232 // fallthrough -->nextMBB 6233 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6234 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6235 MachineFunction::iterator MBBIter = MBB; 6236 ++MBBIter; 6237 6238 /// First build the CFG 6239 MachineFunction *F = MBB->getParent(); 6240 MachineBasicBlock *thisMBB = MBB; 6241 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6242 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6243 F->insert(MBBIter, newMBB); 6244 F->insert(MBBIter, nextMBB); 6245 6246 // Move all successors to thisMBB to nextMBB 6247 nextMBB->transferSuccessors(thisMBB); 6248 6249 // Update thisMBB to fall through to newMBB 6250 thisMBB->addSuccessor(newMBB); 6251 6252 // newMBB jumps to itself and fall through to nextMBB 6253 newMBB->addSuccessor(nextMBB); 6254 newMBB->addSuccessor(newMBB); 6255 6256 // Insert instructions into newMBB based on incoming instruction 6257 assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); 6258 MachineOperand& destOper = bInstr->getOperand(0); 6259 MachineOperand* argOpers[6]; 6260 int numArgs = bInstr->getNumOperands() - 1; 6261 for (int i=0; i < numArgs; ++i) 6262 argOpers[i] = &bInstr->getOperand(i+1); 6263 6264 // x86 address has 4 operands: base, index, scale, and displacement 6265 int lastAddrIndx = 3; // [0,3] 6266 int valArgIndx = 4; 6267 6268 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6269 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(LoadOpc), t1); 6270 for (int i=0; i <= lastAddrIndx; ++i) 6271 (*MIB).addOperand(*argOpers[i]); 6272 6273 unsigned tt = F->getRegInfo().createVirtualRegister(RC); 6274 if (invSrc) { 6275 MIB = BuildMI(newMBB, TII->get(notOpc), tt).addReg(t1); 6276 } 6277 else 6278 tt = t1; 6279 6280 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6281 assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) 6282 && "invalid operand"); 6283 if (argOpers[valArgIndx]->isReg()) 6284 MIB = BuildMI(newMBB, TII->get(regOpc), t2); 6285 else 6286 MIB = BuildMI(newMBB, TII->get(immOpc), t2); 6287 MIB.addReg(tt); 6288 (*MIB).addOperand(*argOpers[valArgIndx]); 6289 6290 MIB = BuildMI(newMBB, TII->get(copyOpc), EAXreg); 6291 MIB.addReg(t1); 6292 6293 MIB = BuildMI(newMBB, TII->get(CXchgOpc)); 6294 for (int i=0; i <= lastAddrIndx; ++i) 6295 (*MIB).addOperand(*argOpers[i]); 6296 MIB.addReg(t2); 6297 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6298 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6299 6300 MIB = BuildMI(newMBB, TII->get(copyOpc), destOper.getReg()); 6301 MIB.addReg(EAXreg); 6302 6303 // insert branch 6304 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6305 6306 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6307 return nextMBB; 6308} 6309 6310// private utility function 6311MachineBasicBlock * 6312X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, 6313 MachineBasicBlock *MBB, 6314 unsigned cmovOpc) { 6315 // For the atomic min/max operator, we generate 6316 // thisMBB: 6317 // newMBB: 6318 // ld t1 = [min/max.addr] 6319 // mov t2 = [min/max.val] 6320 // cmp t1, t2 6321 // cmov[cond] t2 = t1 6322 // mov EAX = t1 6323 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6324 // bz newMBB 6325 // fallthrough -->nextMBB 6326 // 6327 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6328 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6329 MachineFunction::iterator MBBIter = MBB; 6330 ++MBBIter; 6331 6332 /// First build the CFG 6333 MachineFunction *F = MBB->getParent(); 6334 MachineBasicBlock *thisMBB = MBB; 6335 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6336 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6337 F->insert(MBBIter, newMBB); 6338 F->insert(MBBIter, nextMBB); 6339 6340 // Move all successors to thisMBB to nextMBB 6341 nextMBB->transferSuccessors(thisMBB); 6342 6343 // Update thisMBB to fall through to newMBB 6344 thisMBB->addSuccessor(newMBB); 6345 6346 // newMBB jumps to newMBB and fall through to nextMBB 6347 newMBB->addSuccessor(nextMBB); 6348 newMBB->addSuccessor(newMBB); 6349 6350 // Insert instructions into newMBB based on incoming instruction 6351 assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); 6352 MachineOperand& destOper = mInstr->getOperand(0); 6353 MachineOperand* argOpers[6]; 6354 int numArgs = mInstr->getNumOperands() - 1; 6355 for (int i=0; i < numArgs; ++i) 6356 argOpers[i] = &mInstr->getOperand(i+1); 6357 6358 // x86 address has 4 operands: base, index, scale, and displacement 6359 int lastAddrIndx = 3; // [0,3] 6360 int valArgIndx = 4; 6361 6362 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6363 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1); 6364 for (int i=0; i <= lastAddrIndx; ++i) 6365 (*MIB).addOperand(*argOpers[i]); 6366 6367 // We only support register and immediate values 6368 assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) 6369 && "invalid operand"); 6370 6371 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6372 if (argOpers[valArgIndx]->isReg()) 6373 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6374 else 6375 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6376 (*MIB).addOperand(*argOpers[valArgIndx]); 6377 6378 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX); 6379 MIB.addReg(t1); 6380 6381 MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); 6382 MIB.addReg(t1); 6383 MIB.addReg(t2); 6384 6385 // Generate movc 6386 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6387 MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); 6388 MIB.addReg(t2); 6389 MIB.addReg(t1); 6390 6391 // Cmp and exchange if none has modified the memory location 6392 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); 6393 for (int i=0; i <= lastAddrIndx; ++i) 6394 (*MIB).addOperand(*argOpers[i]); 6395 MIB.addReg(t3); 6396 assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6397 (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); 6398 6399 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); 6400 MIB.addReg(X86::EAX); 6401 6402 // insert branch 6403 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6404 6405 F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. 6406 return nextMBB; 6407} 6408 6409 6410MachineBasicBlock * 6411X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6412 MachineBasicBlock *BB) { 6413 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6414 switch (MI->getOpcode()) { 6415 default: assert(false && "Unexpected instr type to insert"); 6416 case X86::CMOV_FR32: 6417 case X86::CMOV_FR64: 6418 case X86::CMOV_V4F32: 6419 case X86::CMOV_V2F64: 6420 case X86::CMOV_V2I64: { 6421 // To "insert" a SELECT_CC instruction, we actually have to insert the 6422 // diamond control-flow pattern. The incoming instruction knows the 6423 // destination vreg to set, the condition code register to branch on, the 6424 // true/false values to select between, and a branch opcode to use. 6425 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6426 MachineFunction::iterator It = BB; 6427 ++It; 6428 6429 // thisMBB: 6430 // ... 6431 // TrueVal = ... 6432 // cmpTY ccX, r1, r2 6433 // bCC copy1MBB 6434 // fallthrough --> copy0MBB 6435 MachineBasicBlock *thisMBB = BB; 6436 MachineFunction *F = BB->getParent(); 6437 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6438 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6439 unsigned Opc = 6440 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 6441 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 6442 F->insert(It, copy0MBB); 6443 F->insert(It, sinkMBB); 6444 // Update machine-CFG edges by transferring all successors of the current 6445 // block to the new block which will contain the Phi node for the select. 6446 sinkMBB->transferSuccessors(BB); 6447 6448 // Add the true and fallthrough blocks as its successors. 6449 BB->addSuccessor(copy0MBB); 6450 BB->addSuccessor(sinkMBB); 6451 6452 // copy0MBB: 6453 // %FalseValue = ... 6454 // # fallthrough to sinkMBB 6455 BB = copy0MBB; 6456 6457 // Update machine-CFG edges 6458 BB->addSuccessor(sinkMBB); 6459 6460 // sinkMBB: 6461 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6462 // ... 6463 BB = sinkMBB; 6464 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 6465 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6466 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6467 6468 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 6469 return BB; 6470 } 6471 6472 case X86::FP32_TO_INT16_IN_MEM: 6473 case X86::FP32_TO_INT32_IN_MEM: 6474 case X86::FP32_TO_INT64_IN_MEM: 6475 case X86::FP64_TO_INT16_IN_MEM: 6476 case X86::FP64_TO_INT32_IN_MEM: 6477 case X86::FP64_TO_INT64_IN_MEM: 6478 case X86::FP80_TO_INT16_IN_MEM: 6479 case X86::FP80_TO_INT32_IN_MEM: 6480 case X86::FP80_TO_INT64_IN_MEM: { 6481 // Change the floating point control register to use "round towards zero" 6482 // mode when truncating to an integer value. 6483 MachineFunction *F = BB->getParent(); 6484 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 6485 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 6486 6487 // Load the old value of the high byte of the control word... 6488 unsigned OldCW = 6489 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 6490 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 6491 6492 // Set the high part to be round to zero... 6493 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 6494 .addImm(0xC7F); 6495 6496 // Reload the modified control word now... 6497 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 6498 6499 // Restore the memory image of control word to original value 6500 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 6501 .addReg(OldCW); 6502 6503 // Get the X86 opcode to use. 6504 unsigned Opc; 6505 switch (MI->getOpcode()) { 6506 default: assert(0 && "illegal opcode!"); 6507 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 6508 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 6509 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 6510 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 6511 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 6512 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 6513 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 6514 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 6515 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 6516 } 6517 6518 X86AddressMode AM; 6519 MachineOperand &Op = MI->getOperand(0); 6520 if (Op.isRegister()) { 6521 AM.BaseType = X86AddressMode::RegBase; 6522 AM.Base.Reg = Op.getReg(); 6523 } else { 6524 AM.BaseType = X86AddressMode::FrameIndexBase; 6525 AM.Base.FrameIndex = Op.getIndex(); 6526 } 6527 Op = MI->getOperand(1); 6528 if (Op.isImmediate()) 6529 AM.Scale = Op.getImm(); 6530 Op = MI->getOperand(2); 6531 if (Op.isImmediate()) 6532 AM.IndexReg = Op.getImm(); 6533 Op = MI->getOperand(3); 6534 if (Op.isGlobalAddress()) { 6535 AM.GV = Op.getGlobal(); 6536 } else { 6537 AM.Disp = Op.getImm(); 6538 } 6539 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 6540 .addReg(MI->getOperand(4).getReg()); 6541 6542 // Reload the original control word now. 6543 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 6544 6545 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 6546 return BB; 6547 } 6548 case X86::ATOMAND32: 6549 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 6550 X86::AND32ri, X86::MOV32rm, 6551 X86::LCMPXCHG32, X86::MOV32rr, 6552 X86::NOT32r, X86::EAX, 6553 X86::GR32RegisterClass); 6554 case X86::ATOMOR32: 6555 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, 6556 X86::OR32ri, X86::MOV32rm, 6557 X86::LCMPXCHG32, X86::MOV32rr, 6558 X86::NOT32r, X86::EAX, 6559 X86::GR32RegisterClass); 6560 case X86::ATOMXOR32: 6561 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, 6562 X86::XOR32ri, X86::MOV32rm, 6563 X86::LCMPXCHG32, X86::MOV32rr, 6564 X86::NOT32r, X86::EAX, 6565 X86::GR32RegisterClass); 6566 case X86::ATOMNAND32: 6567 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 6568 X86::AND32ri, X86::MOV32rm, 6569 X86::LCMPXCHG32, X86::MOV32rr, 6570 X86::NOT32r, X86::EAX, 6571 X86::GR32RegisterClass, true); 6572 case X86::ATOMMIN32: 6573 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); 6574 case X86::ATOMMAX32: 6575 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); 6576 case X86::ATOMUMIN32: 6577 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); 6578 case X86::ATOMUMAX32: 6579 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); 6580 6581 case X86::ATOMAND16: 6582 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 6583 X86::AND16ri, X86::MOV16rm, 6584 X86::LCMPXCHG16, X86::MOV16rr, 6585 X86::NOT16r, X86::AX, 6586 X86::GR16RegisterClass); 6587 case X86::ATOMOR16: 6588 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, 6589 X86::OR16ri, X86::MOV16rm, 6590 X86::LCMPXCHG16, X86::MOV16rr, 6591 X86::NOT16r, X86::AX, 6592 X86::GR16RegisterClass); 6593 case X86::ATOMXOR16: 6594 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, 6595 X86::XOR16ri, X86::MOV16rm, 6596 X86::LCMPXCHG16, X86::MOV16rr, 6597 X86::NOT16r, X86::AX, 6598 X86::GR16RegisterClass); 6599 case X86::ATOMNAND16: 6600 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 6601 X86::AND16ri, X86::MOV16rm, 6602 X86::LCMPXCHG16, X86::MOV16rr, 6603 X86::NOT16r, X86::AX, 6604 X86::GR16RegisterClass, true); 6605 case X86::ATOMMIN16: 6606 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); 6607 case X86::ATOMMAX16: 6608 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); 6609 case X86::ATOMUMIN16: 6610 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); 6611 case X86::ATOMUMAX16: 6612 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); 6613 6614 case X86::ATOMAND8: 6615 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 6616 X86::AND8ri, X86::MOV8rm, 6617 X86::LCMPXCHG8, X86::MOV8rr, 6618 X86::NOT8r, X86::AL, 6619 X86::GR8RegisterClass); 6620 case X86::ATOMOR8: 6621 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, 6622 X86::OR8ri, X86::MOV8rm, 6623 X86::LCMPXCHG8, X86::MOV8rr, 6624 X86::NOT8r, X86::AL, 6625 X86::GR8RegisterClass); 6626 case X86::ATOMXOR8: 6627 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, 6628 X86::XOR8ri, X86::MOV8rm, 6629 X86::LCMPXCHG8, X86::MOV8rr, 6630 X86::NOT8r, X86::AL, 6631 X86::GR8RegisterClass); 6632 case X86::ATOMNAND8: 6633 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 6634 X86::AND8ri, X86::MOV8rm, 6635 X86::LCMPXCHG8, X86::MOV8rr, 6636 X86::NOT8r, X86::AL, 6637 X86::GR8RegisterClass, true); 6638 // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. 6639 case X86::ATOMAND64: 6640 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 6641 X86::AND64ri32, X86::MOV64rm, 6642 X86::LCMPXCHG64, X86::MOV64rr, 6643 X86::NOT64r, X86::RAX, 6644 X86::GR64RegisterClass); 6645 case X86::ATOMOR64: 6646 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, 6647 X86::OR64ri32, X86::MOV64rm, 6648 X86::LCMPXCHG64, X86::MOV64rr, 6649 X86::NOT64r, X86::RAX, 6650 X86::GR64RegisterClass); 6651 case X86::ATOMXOR64: 6652 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, 6653 X86::XOR64ri32, X86::MOV64rm, 6654 X86::LCMPXCHG64, X86::MOV64rr, 6655 X86::NOT64r, X86::RAX, 6656 X86::GR64RegisterClass); 6657 case X86::ATOMNAND64: 6658 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 6659 X86::AND64ri32, X86::MOV64rm, 6660 X86::LCMPXCHG64, X86::MOV64rr, 6661 X86::NOT64r, X86::RAX, 6662 X86::GR64RegisterClass, true); 6663 case X86::ATOMMIN64: 6664 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); 6665 case X86::ATOMMAX64: 6666 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); 6667 case X86::ATOMUMIN64: 6668 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); 6669 case X86::ATOMUMAX64: 6670 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); 6671 } 6672} 6673 6674//===----------------------------------------------------------------------===// 6675// X86 Optimization Hooks 6676//===----------------------------------------------------------------------===// 6677 6678void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 6679 const APInt &Mask, 6680 APInt &KnownZero, 6681 APInt &KnownOne, 6682 const SelectionDAG &DAG, 6683 unsigned Depth) const { 6684 unsigned Opc = Op.getOpcode(); 6685 assert((Opc >= ISD::BUILTIN_OP_END || 6686 Opc == ISD::INTRINSIC_WO_CHAIN || 6687 Opc == ISD::INTRINSIC_W_CHAIN || 6688 Opc == ISD::INTRINSIC_VOID) && 6689 "Should use MaskedValueIsZero if you don't know whether Op" 6690 " is a target node!"); 6691 6692 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 6693 switch (Opc) { 6694 default: break; 6695 case X86ISD::SETCC: 6696 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 6697 Mask.getBitWidth() - 1); 6698 break; 6699 } 6700} 6701 6702/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 6703/// node is a GlobalAddress + offset. 6704bool X86TargetLowering::isGAPlusOffset(SDNode *N, 6705 GlobalValue* &GA, int64_t &Offset) const{ 6706 if (N->getOpcode() == X86ISD::Wrapper) { 6707 if (isa<GlobalAddressSDNode>(N->getOperand(0))) { 6708 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 6709 return true; 6710 } 6711 } 6712 return TargetLowering::isGAPlusOffset(N, GA, Offset); 6713} 6714 6715static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, 6716 const TargetLowering &TLI) { 6717 GlobalValue *GV; 6718 int64_t Offset = 0; 6719 if (TLI.isGAPlusOffset(Base, GV, Offset)) 6720 return (GV->getAlignment() >= N && (Offset % N) == 0); 6721 // DAG combine handles the stack object case. 6722 return false; 6723} 6724 6725static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, 6726 unsigned NumElems, MVT EVT, 6727 SDNode *&Base, 6728 SelectionDAG &DAG, MachineFrameInfo *MFI, 6729 const TargetLowering &TLI) { 6730 Base = NULL; 6731 for (unsigned i = 0; i < NumElems; ++i) { 6732 SDValue Idx = PermMask.getOperand(i); 6733 if (Idx.getOpcode() == ISD::UNDEF) { 6734 if (!Base) 6735 return false; 6736 continue; 6737 } 6738 6739 SDValue Elt = DAG.getShuffleScalarElt(N, i); 6740 if (!Elt.getNode() || 6741 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 6742 return false; 6743 if (!Base) { 6744 Base = Elt.getNode(); 6745 if (Base->getOpcode() == ISD::UNDEF) 6746 return false; 6747 continue; 6748 } 6749 if (Elt.getOpcode() == ISD::UNDEF) 6750 continue; 6751 6752 if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, 6753 EVT.getSizeInBits()/8, i, MFI)) 6754 return false; 6755 } 6756 return true; 6757} 6758 6759/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 6760/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 6761/// if the load addresses are consecutive, non-overlapping, and in the right 6762/// order. 6763static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 6764 const TargetLowering &TLI) { 6765 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 6766 MVT VT = N->getValueType(0); 6767 MVT EVT = VT.getVectorElementType(); 6768 SDValue PermMask = N->getOperand(2); 6769 unsigned NumElems = PermMask.getNumOperands(); 6770 SDNode *Base = NULL; 6771 if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, 6772 DAG, MFI, TLI)) 6773 return SDValue(); 6774 6775 LoadSDNode *LD = cast<LoadSDNode>(Base); 6776 if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) 6777 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 6778 LD->getSrcValueOffset(), LD->isVolatile()); 6779 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 6780 LD->getSrcValueOffset(), LD->isVolatile(), 6781 LD->getAlignment()); 6782} 6783 6784/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. 6785static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, 6786 const X86Subtarget *Subtarget, 6787 const TargetLowering &TLI) { 6788 unsigned NumOps = N->getNumOperands(); 6789 6790 // Ignore single operand BUILD_VECTOR. 6791 if (NumOps == 1) 6792 return SDValue(); 6793 6794 MVT VT = N->getValueType(0); 6795 MVT EVT = VT.getVectorElementType(); 6796 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) 6797 // We are looking for load i64 and zero extend. We want to transform 6798 // it before legalizer has a chance to expand it. Also look for i64 6799 // BUILD_PAIR bit casted to f64. 6800 return SDValue(); 6801 // This must be an insertion into a zero vector. 6802 SDValue HighElt = N->getOperand(1); 6803 if (!isZeroNode(HighElt)) 6804 return SDValue(); 6805 6806 // Value must be a load. 6807 SDNode *Base = N->getOperand(0).getNode(); 6808 if (!isa<LoadSDNode>(Base)) { 6809 if (Base->getOpcode() != ISD::BIT_CONVERT) 6810 return SDValue(); 6811 Base = Base->getOperand(0).getNode(); 6812 if (!isa<LoadSDNode>(Base)) 6813 return SDValue(); 6814 } 6815 6816 // Transform it into VZEXT_LOAD addr. 6817 LoadSDNode *LD = cast<LoadSDNode>(Base); 6818 6819 // Load must not be an extload. 6820 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 6821 return SDValue(); 6822 6823 return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr()); 6824} 6825 6826/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 6827static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 6828 const X86Subtarget *Subtarget) { 6829 SDValue Cond = N->getOperand(0); 6830 6831 // If we have SSE[12] support, try to form min/max nodes. 6832 if (Subtarget->hasSSE2() && 6833 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 6834 if (Cond.getOpcode() == ISD::SETCC) { 6835 // Get the LHS/RHS of the select. 6836 SDValue LHS = N->getOperand(1); 6837 SDValue RHS = N->getOperand(2); 6838 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 6839 6840 unsigned Opcode = 0; 6841 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 6842 switch (CC) { 6843 default: break; 6844 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 6845 case ISD::SETULE: 6846 case ISD::SETLE: 6847 if (!UnsafeFPMath) break; 6848 // FALL THROUGH. 6849 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 6850 case ISD::SETLT: 6851 Opcode = X86ISD::FMIN; 6852 break; 6853 6854 case ISD::SETOGT: // (X > Y) ? X : Y -> max 6855 case ISD::SETUGT: 6856 case ISD::SETGT: 6857 if (!UnsafeFPMath) break; 6858 // FALL THROUGH. 6859 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 6860 case ISD::SETGE: 6861 Opcode = X86ISD::FMAX; 6862 break; 6863 } 6864 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 6865 switch (CC) { 6866 default: break; 6867 case ISD::SETOGT: // (X > Y) ? Y : X -> min 6868 case ISD::SETUGT: 6869 case ISD::SETGT: 6870 if (!UnsafeFPMath) break; 6871 // FALL THROUGH. 6872 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 6873 case ISD::SETGE: 6874 Opcode = X86ISD::FMIN; 6875 break; 6876 6877 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 6878 case ISD::SETULE: 6879 case ISD::SETLE: 6880 if (!UnsafeFPMath) break; 6881 // FALL THROUGH. 6882 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 6883 case ISD::SETLT: 6884 Opcode = X86ISD::FMAX; 6885 break; 6886 } 6887 } 6888 6889 if (Opcode) 6890 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 6891 } 6892 6893 } 6894 6895 return SDValue(); 6896} 6897 6898/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. 6899static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, 6900 const X86Subtarget *Subtarget) { 6901 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering 6902 // the FP state in cases where an emms may be missing. 6903 // A preferable solution to the general problem is to figure out the right 6904 // places to insert EMMS. This qualifies as a quick hack. 6905 StoreSDNode *St = cast<StoreSDNode>(N); 6906 if (St->getValue().getValueType().isVector() && 6907 St->getValue().getValueType().getSizeInBits() == 64 && 6908 isa<LoadSDNode>(St->getValue()) && 6909 !cast<LoadSDNode>(St->getValue())->isVolatile() && 6910 St->getChain().hasOneUse() && !St->isVolatile()) { 6911 SDNode* LdVal = St->getValue().getNode(); 6912 LoadSDNode *Ld = 0; 6913 int TokenFactorIndex = -1; 6914 SmallVector<SDValue, 8> Ops; 6915 SDNode* ChainVal = St->getChain().getNode(); 6916 // Must be a store of a load. We currently handle two cases: the load 6917 // is a direct child, and it's under an intervening TokenFactor. It is 6918 // possible to dig deeper under nested TokenFactors. 6919 if (ChainVal == LdVal) 6920 Ld = cast<LoadSDNode>(St->getChain()); 6921 else if (St->getValue().hasOneUse() && 6922 ChainVal->getOpcode() == ISD::TokenFactor) { 6923 for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { 6924 if (ChainVal->getOperand(i).getNode() == LdVal) { 6925 TokenFactorIndex = i; 6926 Ld = cast<LoadSDNode>(St->getValue()); 6927 } else 6928 Ops.push_back(ChainVal->getOperand(i)); 6929 } 6930 } 6931 if (Ld) { 6932 // If we are a 64-bit capable x86, lower to a single movq load/store pair. 6933 if (Subtarget->is64Bit()) { 6934 SDValue NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), 6935 Ld->getBasePtr(), Ld->getSrcValue(), 6936 Ld->getSrcValueOffset(), Ld->isVolatile(), 6937 Ld->getAlignment()); 6938 SDValue NewChain = NewLd.getValue(1); 6939 if (TokenFactorIndex != -1) { 6940 Ops.push_back(NewChain); 6941 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 6942 Ops.size()); 6943 } 6944 return DAG.getStore(NewChain, NewLd, St->getBasePtr(), 6945 St->getSrcValue(), St->getSrcValueOffset(), 6946 St->isVolatile(), St->getAlignment()); 6947 } 6948 6949 // Otherwise, lower to two 32-bit copies. 6950 SDValue LoAddr = Ld->getBasePtr(); 6951 SDValue HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 6952 DAG.getConstant(4, MVT::i32)); 6953 6954 SDValue LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr, 6955 Ld->getSrcValue(), Ld->getSrcValueOffset(), 6956 Ld->isVolatile(), Ld->getAlignment()); 6957 SDValue HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr, 6958 Ld->getSrcValue(), Ld->getSrcValueOffset()+4, 6959 Ld->isVolatile(), 6960 MinAlign(Ld->getAlignment(), 4)); 6961 6962 SDValue NewChain = LoLd.getValue(1); 6963 if (TokenFactorIndex != -1) { 6964 Ops.push_back(LoLd); 6965 Ops.push_back(HiLd); 6966 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 6967 Ops.size()); 6968 } 6969 6970 LoAddr = St->getBasePtr(); 6971 HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 6972 DAG.getConstant(4, MVT::i32)); 6973 6974 SDValue LoSt = DAG.getStore(NewChain, LoLd, LoAddr, 6975 St->getSrcValue(), St->getSrcValueOffset(), 6976 St->isVolatile(), St->getAlignment()); 6977 SDValue HiSt = DAG.getStore(NewChain, HiLd, HiAddr, 6978 St->getSrcValue(), 6979 St->getSrcValueOffset() + 4, 6980 St->isVolatile(), 6981 MinAlign(St->getAlignment(), 4)); 6982 return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt); 6983 } 6984 } 6985 return SDValue(); 6986} 6987 6988/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 6989/// X86ISD::FXOR nodes. 6990static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 6991 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 6992 // F[X]OR(0.0, x) -> x 6993 // F[X]OR(x, 0.0) -> x 6994 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 6995 if (C->getValueAPF().isPosZero()) 6996 return N->getOperand(1); 6997 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 6998 if (C->getValueAPF().isPosZero()) 6999 return N->getOperand(0); 7000 return SDValue(); 7001} 7002 7003/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 7004static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 7005 // FAND(0.0, x) -> 0.0 7006 // FAND(x, 0.0) -> 0.0 7007 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7008 if (C->getValueAPF().isPosZero()) 7009 return N->getOperand(0); 7010 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7011 if (C->getValueAPF().isPosZero()) 7012 return N->getOperand(1); 7013 return SDValue(); 7014} 7015 7016 7017SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 7018 DAGCombinerInfo &DCI) const { 7019 SelectionDAG &DAG = DCI.DAG; 7020 switch (N->getOpcode()) { 7021 default: break; 7022 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); 7023 case ISD::BUILD_VECTOR: 7024 return PerformBuildVectorCombine(N, DAG, Subtarget, *this); 7025 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 7026 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 7027 case X86ISD::FXOR: 7028 case X86ISD::FOR: return PerformFORCombine(N, DAG); 7029 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 7030 } 7031 7032 return SDValue(); 7033} 7034 7035//===----------------------------------------------------------------------===// 7036// X86 Inline Assembly Support 7037//===----------------------------------------------------------------------===// 7038 7039/// getConstraintType - Given a constraint letter, return the type of 7040/// constraint it is for this target. 7041X86TargetLowering::ConstraintType 7042X86TargetLowering::getConstraintType(const std::string &Constraint) const { 7043 if (Constraint.size() == 1) { 7044 switch (Constraint[0]) { 7045 case 'A': 7046 case 'f': 7047 case 'r': 7048 case 'R': 7049 case 'l': 7050 case 'q': 7051 case 'Q': 7052 case 'x': 7053 case 'y': 7054 case 'Y': 7055 return C_RegisterClass; 7056 default: 7057 break; 7058 } 7059 } 7060 return TargetLowering::getConstraintType(Constraint); 7061} 7062 7063/// LowerXConstraint - try to replace an X constraint, which matches anything, 7064/// with another that has more specific requirements based on the type of the 7065/// corresponding operand. 7066const char *X86TargetLowering:: 7067LowerXConstraint(MVT ConstraintVT) const { 7068 // FP X constraints get lowered to SSE1/2 registers if available, otherwise 7069 // 'f' like normal targets. 7070 if (ConstraintVT.isFloatingPoint()) { 7071 if (Subtarget->hasSSE2()) 7072 return "Y"; 7073 if (Subtarget->hasSSE1()) 7074 return "x"; 7075 } 7076 7077 return TargetLowering::LowerXConstraint(ConstraintVT); 7078} 7079 7080/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7081/// vector. If it is invalid, don't add anything to Ops. 7082void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7083 char Constraint, 7084 std::vector<SDValue>&Ops, 7085 SelectionDAG &DAG) const { 7086 SDValue Result(0, 0); 7087 7088 switch (Constraint) { 7089 default: break; 7090 case 'I': 7091 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7092 if (C->getValue() <= 31) { 7093 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 7094 break; 7095 } 7096 } 7097 return; 7098 case 'N': 7099 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7100 if (C->getValue() <= 255) { 7101 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 7102 break; 7103 } 7104 } 7105 return; 7106 case 'i': { 7107 // Literal immediates are always ok. 7108 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 7109 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 7110 break; 7111 } 7112 7113 // If we are in non-pic codegen mode, we allow the address of a global (with 7114 // an optional displacement) to be used with 'i'. 7115 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 7116 int64_t Offset = 0; 7117 7118 // Match either (GA) or (GA+C) 7119 if (GA) { 7120 Offset = GA->getOffset(); 7121 } else if (Op.getOpcode() == ISD::ADD) { 7122 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7123 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7124 if (C && GA) { 7125 Offset = GA->getOffset()+C->getValue(); 7126 } else { 7127 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7128 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7129 if (C && GA) 7130 Offset = GA->getOffset()+C->getValue(); 7131 else 7132 C = 0, GA = 0; 7133 } 7134 } 7135 7136 if (GA) { 7137 // If addressing this global requires a load (e.g. in PIC mode), we can't 7138 // match. 7139 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 7140 false)) 7141 return; 7142 7143 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 7144 Offset); 7145 Result = Op; 7146 break; 7147 } 7148 7149 // Otherwise, not valid for this mode. 7150 return; 7151 } 7152 } 7153 7154 if (Result.getNode()) { 7155 Ops.push_back(Result); 7156 return; 7157 } 7158 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7159} 7160 7161std::vector<unsigned> X86TargetLowering:: 7162getRegClassForInlineAsmConstraint(const std::string &Constraint, 7163 MVT VT) const { 7164 if (Constraint.size() == 1) { 7165 // FIXME: not handling fp-stack yet! 7166 switch (Constraint[0]) { // GCC X86 Constraint Letters 7167 default: break; // Unknown constraint letter 7168 case 'A': // EAX/EDX 7169 if (VT == MVT::i32 || VT == MVT::i64) 7170 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 7171 break; 7172 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 7173 case 'Q': // Q_REGS 7174 if (VT == MVT::i32) 7175 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 7176 else if (VT == MVT::i16) 7177 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 7178 else if (VT == MVT::i8) 7179 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 7180 else if (VT == MVT::i64) 7181 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 7182 break; 7183 } 7184 } 7185 7186 return std::vector<unsigned>(); 7187} 7188 7189std::pair<unsigned, const TargetRegisterClass*> 7190X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7191 MVT VT) const { 7192 // First, see if this is a constraint that directly corresponds to an LLVM 7193 // register class. 7194 if (Constraint.size() == 1) { 7195 // GCC Constraint Letters 7196 switch (Constraint[0]) { 7197 default: break; 7198 case 'r': // GENERAL_REGS 7199 case 'R': // LEGACY_REGS 7200 case 'l': // INDEX_REGS 7201 if (VT == MVT::i64 && Subtarget->is64Bit()) 7202 return std::make_pair(0U, X86::GR64RegisterClass); 7203 if (VT == MVT::i32) 7204 return std::make_pair(0U, X86::GR32RegisterClass); 7205 else if (VT == MVT::i16) 7206 return std::make_pair(0U, X86::GR16RegisterClass); 7207 else if (VT == MVT::i8) 7208 return std::make_pair(0U, X86::GR8RegisterClass); 7209 break; 7210 case 'f': // FP Stack registers. 7211 // If SSE is enabled for this VT, use f80 to ensure the isel moves the 7212 // value to the correct fpstack register class. 7213 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) 7214 return std::make_pair(0U, X86::RFP32RegisterClass); 7215 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) 7216 return std::make_pair(0U, X86::RFP64RegisterClass); 7217 return std::make_pair(0U, X86::RFP80RegisterClass); 7218 case 'y': // MMX_REGS if MMX allowed. 7219 if (!Subtarget->hasMMX()) break; 7220 return std::make_pair(0U, X86::VR64RegisterClass); 7221 break; 7222 case 'Y': // SSE_REGS if SSE2 allowed 7223 if (!Subtarget->hasSSE2()) break; 7224 // FALL THROUGH. 7225 case 'x': // SSE_REGS if SSE1 allowed 7226 if (!Subtarget->hasSSE1()) break; 7227 7228 switch (VT.getSimpleVT()) { 7229 default: break; 7230 // Scalar SSE types. 7231 case MVT::f32: 7232 case MVT::i32: 7233 return std::make_pair(0U, X86::FR32RegisterClass); 7234 case MVT::f64: 7235 case MVT::i64: 7236 return std::make_pair(0U, X86::FR64RegisterClass); 7237 // Vector types. 7238 case MVT::v16i8: 7239 case MVT::v8i16: 7240 case MVT::v4i32: 7241 case MVT::v2i64: 7242 case MVT::v4f32: 7243 case MVT::v2f64: 7244 return std::make_pair(0U, X86::VR128RegisterClass); 7245 } 7246 break; 7247 } 7248 } 7249 7250 // Use the default implementation in TargetLowering to convert the register 7251 // constraint into a member of a register class. 7252 std::pair<unsigned, const TargetRegisterClass*> Res; 7253 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7254 7255 // Not found as a standard register? 7256 if (Res.second == 0) { 7257 // GCC calls "st(0)" just plain "st". 7258 if (StringsEqualNoCase("{st}", Constraint)) { 7259 Res.first = X86::ST0; 7260 Res.second = X86::RFP80RegisterClass; 7261 } 7262 7263 return Res; 7264 } 7265 7266 // Otherwise, check to see if this is a register class of the wrong value 7267 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 7268 // turn into {ax},{dx}. 7269 if (Res.second->hasType(VT)) 7270 return Res; // Correct type already, nothing to do. 7271 7272 // All of the single-register GCC register classes map their values onto 7273 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 7274 // really want an 8-bit or 32-bit register, map to the appropriate register 7275 // class and return the appropriate register. 7276 if (Res.second == X86::GR16RegisterClass) { 7277 if (VT == MVT::i8) { 7278 unsigned DestReg = 0; 7279 switch (Res.first) { 7280 default: break; 7281 case X86::AX: DestReg = X86::AL; break; 7282 case X86::DX: DestReg = X86::DL; break; 7283 case X86::CX: DestReg = X86::CL; break; 7284 case X86::BX: DestReg = X86::BL; break; 7285 } 7286 if (DestReg) { 7287 Res.first = DestReg; 7288 Res.second = Res.second = X86::GR8RegisterClass; 7289 } 7290 } else if (VT == MVT::i32) { 7291 unsigned DestReg = 0; 7292 switch (Res.first) { 7293 default: break; 7294 case X86::AX: DestReg = X86::EAX; break; 7295 case X86::DX: DestReg = X86::EDX; break; 7296 case X86::CX: DestReg = X86::ECX; break; 7297 case X86::BX: DestReg = X86::EBX; break; 7298 case X86::SI: DestReg = X86::ESI; break; 7299 case X86::DI: DestReg = X86::EDI; break; 7300 case X86::BP: DestReg = X86::EBP; break; 7301 case X86::SP: DestReg = X86::ESP; break; 7302 } 7303 if (DestReg) { 7304 Res.first = DestReg; 7305 Res.second = Res.second = X86::GR32RegisterClass; 7306 } 7307 } else if (VT == MVT::i64) { 7308 unsigned DestReg = 0; 7309 switch (Res.first) { 7310 default: break; 7311 case X86::AX: DestReg = X86::RAX; break; 7312 case X86::DX: DestReg = X86::RDX; break; 7313 case X86::CX: DestReg = X86::RCX; break; 7314 case X86::BX: DestReg = X86::RBX; break; 7315 case X86::SI: DestReg = X86::RSI; break; 7316 case X86::DI: DestReg = X86::RDI; break; 7317 case X86::BP: DestReg = X86::RBP; break; 7318 case X86::SP: DestReg = X86::RSP; break; 7319 } 7320 if (DestReg) { 7321 Res.first = DestReg; 7322 Res.second = Res.second = X86::GR64RegisterClass; 7323 } 7324 } 7325 } else if (Res.second == X86::FR32RegisterClass || 7326 Res.second == X86::FR64RegisterClass || 7327 Res.second == X86::VR128RegisterClass) { 7328 // Handle references to XMM physical registers that got mapped into the 7329 // wrong class. This can happen with constraints like {xmm0} where the 7330 // target independent register mapper will just pick the first match it can 7331 // find, ignoring the required type. 7332 if (VT == MVT::f32) 7333 Res.second = X86::FR32RegisterClass; 7334 else if (VT == MVT::f64) 7335 Res.second = X86::FR64RegisterClass; 7336 else if (X86::VR128RegisterClass->hasType(VT)) 7337 Res.second = X86::VR128RegisterClass; 7338 } 7339 7340 return Res; 7341} 7342