X86ISelLowering.cpp revision 4002a1b6f1cb7c7db7965bd281fb29664fd6c816
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineModuleInfo.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/PseudoSourceValue.h" 35#include "llvm/CodeGen/SelectionDAG.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Target/TargetOptions.h" 39#include "llvm/ADT/SmallSet.h" 40#include "llvm/ADT/StringExtras.h" 41using namespace llvm; 42 43// Forward declarations. 44static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG); 45 46X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 47 : TargetLowering(TM) { 48 Subtarget = &TM.getSubtarget<X86Subtarget>(); 49 X86ScalarSSEf64 = Subtarget->hasSSE2(); 50 X86ScalarSSEf32 = Subtarget->hasSSE1(); 51 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 52 53 bool Fast = false; 54 55 RegInfo = TM.getRegisterInfo(); 56 TD = getTargetData(); 57 58 // Set up the TargetLowering object. 59 60 // X86 is weird, it always uses i8 for shift amounts and setcc results. 61 setShiftAmountType(MVT::i8); 62 setSetCCResultContents(ZeroOrOneSetCCResult); 63 setSchedulingPreference(SchedulingForRegPressure); 64 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 65 setStackPointerRegisterToSaveRestore(X86StackPtr); 66 67 if (Subtarget->isTargetDarwin()) { 68 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 69 setUseUnderscoreSetJmp(false); 70 setUseUnderscoreLongJmp(false); 71 } else if (Subtarget->isTargetMingw()) { 72 // MS runtime is weird: it exports _setjmp, but longjmp! 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(false); 75 } else { 76 setUseUnderscoreSetJmp(true); 77 setUseUnderscoreLongJmp(true); 78 } 79 80 // Set up the register classes. 81 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 82 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 83 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 84 if (Subtarget->is64Bit()) 85 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 86 87 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 88 89 // We don't accept any truncstore of integer registers. 90 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 91 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 92 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 93 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 94 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 95 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 96 97 // SETOEQ and SETUNE require checking two conditions. 98 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 99 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 100 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 101 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 102 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 103 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 104 105 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 106 // operation. 107 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 108 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 109 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 110 111 if (Subtarget->is64Bit()) { 112 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 113 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 114 } else { 115 if (X86ScalarSSEf64) { 116 // We have an impenetrably clever algorithm for ui64->double only. 117 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 118 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 119 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 120 } else 121 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 122 } 123 124 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 125 // this operation. 126 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 127 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 128 // SSE has no i16 to fp conversion, only i32 129 if (X86ScalarSSEf32) { 130 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 131 // f32 and f64 cases are Legal, f80 case is not 132 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 133 } else { 134 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 135 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 136 } 137 138 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 139 // are Legal, f80 is custom lowered. 140 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 141 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 142 143 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 144 // this operation. 145 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 146 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 147 148 if (X86ScalarSSEf32) { 149 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 150 // f32 and f64 cases are Legal, f80 case is not 151 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 152 } else { 153 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 154 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 155 } 156 157 // Handle FP_TO_UINT by promoting the destination to a larger signed 158 // conversion. 159 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 160 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 161 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 162 163 if (Subtarget->is64Bit()) { 164 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 165 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 166 } else { 167 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 168 // Expand FP_TO_UINT into a select. 169 // FIXME: We would like to use a Custom expander here eventually to do 170 // the optimal thing for SSE vs. the default expansion in the legalizer. 171 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 172 else 173 // With SSE3 we can use fisttpll to convert to a signed i64. 174 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 175 } 176 177 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 178 if (!X86ScalarSSEf64) { 179 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 180 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 181 } 182 183 // Scalar integer divide and remainder are lowered to use operations that 184 // produce two results, to match the available instructions. This exposes 185 // the two-result form to trivial CSE, which is able to combine x/y and x%y 186 // into a single instruction. 187 // 188 // Scalar integer multiply-high is also lowered to use two-result 189 // operations, to match the available instructions. However, plain multiply 190 // (low) operations are left as Legal, as there are single-result 191 // instructions for this in x86. Using the two-result multiply instructions 192 // when both high and low results are needed must be arranged by dagcombine. 193 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 194 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 195 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 196 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 197 setOperationAction(ISD::SREM , MVT::i8 , Expand); 198 setOperationAction(ISD::UREM , MVT::i8 , Expand); 199 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 200 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 201 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 202 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 203 setOperationAction(ISD::SREM , MVT::i16 , Expand); 204 setOperationAction(ISD::UREM , MVT::i16 , Expand); 205 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 206 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 207 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 208 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 209 setOperationAction(ISD::SREM , MVT::i32 , Expand); 210 setOperationAction(ISD::UREM , MVT::i32 , Expand); 211 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 212 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 213 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 214 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 215 setOperationAction(ISD::SREM , MVT::i64 , Expand); 216 setOperationAction(ISD::UREM , MVT::i64 , Expand); 217 218 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 219 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 220 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 221 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 222 if (Subtarget->is64Bit()) 223 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 224 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 225 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 226 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 227 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 228 setOperationAction(ISD::FREM , MVT::f32 , Expand); 229 setOperationAction(ISD::FREM , MVT::f64 , Expand); 230 setOperationAction(ISD::FREM , MVT::f80 , Expand); 231 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 232 233 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 234 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 235 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 236 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 237 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 238 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 239 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 240 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 241 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 242 if (Subtarget->is64Bit()) { 243 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 244 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 245 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 246 } 247 248 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 249 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 250 251 // These should be promoted to a larger select which is supported. 252 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 253 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 254 // X86 wants to expand cmov itself. 255 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 256 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 257 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 258 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 259 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 260 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 261 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 262 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 263 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 264 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 265 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 266 if (Subtarget->is64Bit()) { 267 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 268 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 269 } 270 // X86 ret instruction may pop stack. 271 setOperationAction(ISD::RET , MVT::Other, Custom); 272 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 273 274 // Darwin ABI issue. 275 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 276 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 277 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 278 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 279 if (Subtarget->is64Bit()) 280 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 281 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 282 if (Subtarget->is64Bit()) { 283 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 284 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 285 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 286 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 287 } 288 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 289 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 290 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 291 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 292 if (Subtarget->is64Bit()) { 293 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 294 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 295 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 296 } 297 298 if (Subtarget->hasSSE1()) 299 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 300 301 if (!Subtarget->hasSSE2()) 302 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); 303 304 // Expand certain atomics 305 setOperationAction(ISD::ATOMIC_CMP_SWAP_8 , MVT::i8, Custom); 306 setOperationAction(ISD::ATOMIC_CMP_SWAP_16, MVT::i16, Custom); 307 setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom); 308 setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom); 309 310 setOperationAction(ISD::ATOMIC_LOAD_SUB_8 , MVT::i8, Custom); 311 setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Custom); 312 setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); 314 315 if (!Subtarget->is64Bit()) { 316 setOperationAction(ISD::ATOMIC_LOAD_ADD_64, MVT::i64, Custom); 317 setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); 318 setOperationAction(ISD::ATOMIC_LOAD_AND_64, MVT::i64, Custom); 319 setOperationAction(ISD::ATOMIC_LOAD_OR_64, MVT::i64, Custom); 320 setOperationAction(ISD::ATOMIC_LOAD_XOR_64, MVT::i64, Custom); 321 setOperationAction(ISD::ATOMIC_LOAD_NAND_64, MVT::i64, Custom); 322 setOperationAction(ISD::ATOMIC_SWAP_64, MVT::i64, Custom); 323 } 324 325 // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. 326 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 327 // FIXME - use subtarget debug flags 328 if (!Subtarget->isTargetDarwin() && 329 !Subtarget->isTargetELF() && 330 !Subtarget->isTargetCygMing()) { 331 setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); 332 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 333 } 334 335 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 336 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 337 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 338 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 339 if (Subtarget->is64Bit()) { 340 setExceptionPointerRegister(X86::RAX); 341 setExceptionSelectorRegister(X86::RDX); 342 } else { 343 setExceptionPointerRegister(X86::EAX); 344 setExceptionSelectorRegister(X86::EDX); 345 } 346 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 347 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 348 349 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 350 351 setOperationAction(ISD::TRAP, MVT::Other, Legal); 352 353 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 354 setOperationAction(ISD::VASTART , MVT::Other, Custom); 355 setOperationAction(ISD::VAEND , MVT::Other, Expand); 356 if (Subtarget->is64Bit()) { 357 setOperationAction(ISD::VAARG , MVT::Other, Custom); 358 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 359 } else { 360 setOperationAction(ISD::VAARG , MVT::Other, Expand); 361 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 362 } 363 364 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 365 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 366 if (Subtarget->is64Bit()) 367 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 368 if (Subtarget->isTargetCygMing()) 369 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 370 else 371 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 372 373 if (X86ScalarSSEf64) { 374 // f32 and f64 use SSE. 375 // Set up the FP register classes. 376 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 377 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 378 379 // Use ANDPD to simulate FABS. 380 setOperationAction(ISD::FABS , MVT::f64, Custom); 381 setOperationAction(ISD::FABS , MVT::f32, Custom); 382 383 // Use XORP to simulate FNEG. 384 setOperationAction(ISD::FNEG , MVT::f64, Custom); 385 setOperationAction(ISD::FNEG , MVT::f32, Custom); 386 387 // Use ANDPD and ORPD to simulate FCOPYSIGN. 388 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 389 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 390 391 // We don't support sin/cos/fmod 392 setOperationAction(ISD::FSIN , MVT::f64, Expand); 393 setOperationAction(ISD::FCOS , MVT::f64, Expand); 394 setOperationAction(ISD::FSIN , MVT::f32, Expand); 395 setOperationAction(ISD::FCOS , MVT::f32, Expand); 396 397 // Expand FP immediates into loads from the stack, except for the special 398 // cases we handle. 399 addLegalFPImmediate(APFloat(+0.0)); // xorpd 400 addLegalFPImmediate(APFloat(+0.0f)); // xorps 401 402 // Floating truncations from f80 and extensions to f80 go through memory. 403 // If optimizing, we lie about this though and handle it in 404 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 405 if (Fast) { 406 setConvertAction(MVT::f32, MVT::f80, Expand); 407 setConvertAction(MVT::f64, MVT::f80, Expand); 408 setConvertAction(MVT::f80, MVT::f32, Expand); 409 setConvertAction(MVT::f80, MVT::f64, Expand); 410 } 411 } else if (X86ScalarSSEf32) { 412 // Use SSE for f32, x87 for f64. 413 // Set up the FP register classes. 414 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 415 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 416 417 // Use ANDPS to simulate FABS. 418 setOperationAction(ISD::FABS , MVT::f32, Custom); 419 420 // Use XORP to simulate FNEG. 421 setOperationAction(ISD::FNEG , MVT::f32, Custom); 422 423 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 424 425 // Use ANDPS and ORPS to simulate FCOPYSIGN. 426 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 427 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 428 429 // We don't support sin/cos/fmod 430 setOperationAction(ISD::FSIN , MVT::f32, Expand); 431 setOperationAction(ISD::FCOS , MVT::f32, Expand); 432 433 // Special cases we handle for FP constants. 434 addLegalFPImmediate(APFloat(+0.0f)); // xorps 435 addLegalFPImmediate(APFloat(+0.0)); // FLD0 436 addLegalFPImmediate(APFloat(+1.0)); // FLD1 437 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 438 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 439 440 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 441 // this though and handle it in InstructionSelectPreprocess so that 442 // dagcombine2 can hack on these. 443 if (Fast) { 444 setConvertAction(MVT::f32, MVT::f64, Expand); 445 setConvertAction(MVT::f32, MVT::f80, Expand); 446 setConvertAction(MVT::f80, MVT::f32, Expand); 447 setConvertAction(MVT::f64, MVT::f32, Expand); 448 // And x87->x87 truncations also. 449 setConvertAction(MVT::f80, MVT::f64, Expand); 450 } 451 452 if (!UnsafeFPMath) { 453 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 454 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 455 } 456 } else { 457 // f32 and f64 in x87. 458 // Set up the FP register classes. 459 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 460 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 461 462 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 463 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 464 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 465 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 466 467 // Floating truncations go through memory. If optimizing, we lie about 468 // this though and handle it in InstructionSelectPreprocess so that 469 // dagcombine2 can hack on these. 470 if (Fast) { 471 setConvertAction(MVT::f80, MVT::f32, Expand); 472 setConvertAction(MVT::f64, MVT::f32, Expand); 473 setConvertAction(MVT::f80, MVT::f64, Expand); 474 } 475 476 if (!UnsafeFPMath) { 477 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 478 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 479 } 480 addLegalFPImmediate(APFloat(+0.0)); // FLD0 481 addLegalFPImmediate(APFloat(+1.0)); // FLD1 482 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 483 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 484 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 485 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 486 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 487 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 488 } 489 490 // Long double always uses X87. 491 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 492 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 493 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 494 { 495 bool ignored; 496 APFloat TmpFlt(+0.0); 497 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 498 &ignored); 499 addLegalFPImmediate(TmpFlt); // FLD0 500 TmpFlt.changeSign(); 501 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 502 APFloat TmpFlt2(+1.0); 503 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 504 &ignored); 505 addLegalFPImmediate(TmpFlt2); // FLD1 506 TmpFlt2.changeSign(); 507 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 508 } 509 510 if (!UnsafeFPMath) { 511 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 512 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 513 } 514 515 // Always use a library call for pow. 516 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 517 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 518 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 519 520 setOperationAction(ISD::FLOG, MVT::f80, Expand); 521 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 522 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 523 setOperationAction(ISD::FEXP, MVT::f80, Expand); 524 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 525 526 // First set operation action for all vector types to expand. Then we 527 // will selectively turn on ones that can be effectively codegen'd. 528 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 529 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 530 setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); 531 setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); 532 setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); 533 setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); 534 setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); 535 setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); 536 setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); 537 setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); 538 setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); 539 setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); 540 setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); 541 setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); 542 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); 543 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); 544 setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); 545 setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); 546 setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); 547 setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); 548 setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); 549 setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); 550 setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); 551 setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); 552 setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); 553 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 554 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 555 setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); 556 setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); 557 setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); 558 setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); 559 setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); 560 setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); 561 setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); 562 setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); 563 setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); 564 setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); 565 setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); 566 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 567 setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); 568 setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); 569 setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); 570 setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); 571 setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); 572 setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); 573 } 574 575 if (Subtarget->hasMMX()) { 576 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 577 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 578 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 579 addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); 580 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 581 582 // FIXME: add MMX packed arithmetics 583 584 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 585 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 586 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 587 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 588 589 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 590 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 591 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 592 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 593 594 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 595 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 596 597 setOperationAction(ISD::AND, MVT::v8i8, Promote); 598 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 599 setOperationAction(ISD::AND, MVT::v4i16, Promote); 600 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 601 setOperationAction(ISD::AND, MVT::v2i32, Promote); 602 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 603 setOperationAction(ISD::AND, MVT::v1i64, Legal); 604 605 setOperationAction(ISD::OR, MVT::v8i8, Promote); 606 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 607 setOperationAction(ISD::OR, MVT::v4i16, Promote); 608 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 609 setOperationAction(ISD::OR, MVT::v2i32, Promote); 610 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 611 setOperationAction(ISD::OR, MVT::v1i64, Legal); 612 613 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 614 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 615 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 616 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 617 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 618 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 619 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 620 621 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 622 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 623 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 624 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 625 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 626 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 627 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 628 AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); 629 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 630 631 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 632 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 633 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 634 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); 635 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 636 637 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 638 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 639 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 640 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 641 642 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); 643 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 644 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 645 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 646 647 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); 648 } 649 650 if (Subtarget->hasSSE1()) { 651 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 652 653 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 654 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 655 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 656 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 657 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 658 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 659 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 660 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 661 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 662 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 663 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 664 setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); 665 } 666 667 if (Subtarget->hasSSE2()) { 668 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 669 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 670 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 671 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 672 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 673 674 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 675 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 676 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 677 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 678 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 679 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 680 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 681 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 682 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 683 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 684 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 685 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 686 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 687 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 688 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 689 690 setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); 691 setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); 692 setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); 693 setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); 694 695 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 696 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 697 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 698 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 699 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 700 701 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 702 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { 703 MVT VT = (MVT::SimpleValueType)i; 704 // Do not attempt to custom lower non-power-of-2 vectors 705 if (!isPowerOf2_32(VT.getVectorNumElements())) 706 continue; 707 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 708 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 709 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 710 } 711 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 712 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 713 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 714 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 715 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 716 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 717 if (Subtarget->is64Bit()) { 718 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 719 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 720 } 721 722 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 723 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 724 setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); 725 AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); 726 setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); 727 AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); 728 setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); 729 AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); 730 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); 731 AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); 732 setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); 733 AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); 734 } 735 736 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 737 738 // Custom lower v2i64 and v2f64 selects. 739 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 740 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 741 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 742 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 743 744 } 745 746 if (Subtarget->hasSSE41()) { 747 // FIXME: Do we need to handle scalar-to-vector here? 748 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 749 setOperationAction(ISD::MUL, MVT::v2i64, Legal); 750 751 // i8 and i16 vectors are custom , because the source register and source 752 // source memory operand types are not the same width. f32 vectors are 753 // custom since the immediate controlling the insert encodes additional 754 // information. 755 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 756 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 757 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); 758 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 759 760 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 761 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 762 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 763 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 764 765 if (Subtarget->is64Bit()) { 766 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 767 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 768 } 769 } 770 771 if (Subtarget->hasSSE42()) { 772 setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); 773 } 774 775 // We want to custom lower some of our intrinsics. 776 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 777 778 // We have target-specific dag combine patterns for the following nodes: 779 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 780 setTargetDAGCombine(ISD::BUILD_VECTOR); 781 setTargetDAGCombine(ISD::SELECT); 782 setTargetDAGCombine(ISD::STORE); 783 784 computeRegisterProperties(); 785 786 // FIXME: These should be based on subtarget info. Plus, the values should 787 // be smaller when we are in optimizing for size mode. 788 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 789 maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores 790 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores 791 allowUnalignedMemoryAccesses = true; // x86 supports it! 792 setPrefLoopAlignment(16); 793} 794 795 796MVT X86TargetLowering::getSetCCResultType(const SDValue &) const { 797 return MVT::i8; 798} 799 800 801/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 802/// the desired ByVal argument alignment. 803static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 804 if (MaxAlign == 16) 805 return; 806 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 807 if (VTy->getBitWidth() == 128) 808 MaxAlign = 16; 809 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 810 unsigned EltAlign = 0; 811 getMaxByValAlign(ATy->getElementType(), EltAlign); 812 if (EltAlign > MaxAlign) 813 MaxAlign = EltAlign; 814 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 815 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 816 unsigned EltAlign = 0; 817 getMaxByValAlign(STy->getElementType(i), EltAlign); 818 if (EltAlign > MaxAlign) 819 MaxAlign = EltAlign; 820 if (MaxAlign == 16) 821 break; 822 } 823 } 824 return; 825} 826 827/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 828/// function arguments in the caller parameter area. For X86, aggregates 829/// that contain SSE vectors are placed at 16-byte boundaries while the rest 830/// are at 4-byte boundaries. 831unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 832 if (Subtarget->is64Bit()) { 833 // Max of 8 and alignment of type. 834 unsigned TyAlign = TD->getABITypeAlignment(Ty); 835 if (TyAlign > 8) 836 return TyAlign; 837 return 8; 838 } 839 840 unsigned Align = 4; 841 if (Subtarget->hasSSE1()) 842 getMaxByValAlign(Ty, Align); 843 return Align; 844} 845 846/// getOptimalMemOpType - Returns the target specific optimal type for load 847/// and store operations as a result of memset, memcpy, and memmove 848/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for 849/// determining it. 850MVT 851X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, 852 bool isSrcConst, bool isSrcStr) const { 853 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like 854 // linux. This is because the stack realignment code can't handle certain 855 // cases like PR2962. This should be removed when PR2962 is fixed. 856 if (Subtarget->getStackAlignment() >= 16) { 857 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) 858 return MVT::v4i32; 859 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) 860 return MVT::v4f32; 861 } 862 if (Subtarget->is64Bit() && Size >= 8) 863 return MVT::i64; 864 return MVT::i32; 865} 866 867 868/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 869/// jumptable. 870SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 871 SelectionDAG &DAG) const { 872 if (usesGlobalOffsetTable()) 873 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 874 if (!Subtarget->isPICStyleRIPRel()) 875 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 876 return Table; 877} 878 879//===----------------------------------------------------------------------===// 880// Return Value Calling Convention Implementation 881//===----------------------------------------------------------------------===// 882 883#include "X86GenCallingConv.inc" 884 885/// LowerRET - Lower an ISD::RET node. 886SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { 887 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 888 889 SmallVector<CCValAssign, 16> RVLocs; 890 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 891 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 892 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 893 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); 894 895 // If this is the first return lowered for this function, add the regs to the 896 // liveout set for the function. 897 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 898 for (unsigned i = 0; i != RVLocs.size(); ++i) 899 if (RVLocs[i].isRegLoc()) 900 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 901 } 902 SDValue Chain = Op.getOperand(0); 903 904 // Handle tail call return. 905 Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); 906 if (Chain.getOpcode() == X86ISD::TAILCALL) { 907 SDValue TailCall = Chain; 908 SDValue TargetAddress = TailCall.getOperand(1); 909 SDValue StackAdjustment = TailCall.getOperand(2); 910 assert(((TargetAddress.getOpcode() == ISD::Register && 911 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || 912 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 913 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 914 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 915 "Expecting an global address, external symbol, or register"); 916 assert(StackAdjustment.getOpcode() == ISD::Constant && 917 "Expecting a const value"); 918 919 SmallVector<SDValue,8> Operands; 920 Operands.push_back(Chain.getOperand(0)); 921 Operands.push_back(TargetAddress); 922 Operands.push_back(StackAdjustment); 923 // Copy registers used by the call. Last operand is a flag so it is not 924 // copied. 925 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 926 Operands.push_back(Chain.getOperand(i)); 927 } 928 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 929 Operands.size()); 930 } 931 932 // Regular return. 933 SDValue Flag; 934 935 SmallVector<SDValue, 6> RetOps; 936 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 937 // Operand #1 = Bytes To Pop 938 RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 939 940 // Copy the result values into the output registers. 941 for (unsigned i = 0; i != RVLocs.size(); ++i) { 942 CCValAssign &VA = RVLocs[i]; 943 assert(VA.isRegLoc() && "Can only return in registers!"); 944 SDValue ValToCopy = Op.getOperand(i*2+1); 945 946 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 947 // the RET instruction and handled by the FP Stackifier. 948 if (RVLocs[i].getLocReg() == X86::ST0 || 949 RVLocs[i].getLocReg() == X86::ST1) { 950 // If this is a copy from an xmm register to ST(0), use an FPExtend to 951 // change the value to the FP stack register class. 952 if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) 953 ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy); 954 RetOps.push_back(ValToCopy); 955 // Don't emit a copytoreg. 956 continue; 957 } 958 959 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); 960 Flag = Chain.getValue(1); 961 } 962 963 // The x86-64 ABI for returning structs by value requires that we copy 964 // the sret argument into %rax for the return. We saved the argument into 965 // a virtual register in the entry block, so now we copy the value out 966 // and into %rax. 967 if (Subtarget->is64Bit() && 968 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 969 MachineFunction &MF = DAG.getMachineFunction(); 970 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 971 unsigned Reg = FuncInfo->getSRetReturnReg(); 972 if (!Reg) { 973 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 974 FuncInfo->setSRetReturnReg(Reg); 975 } 976 SDValue Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy()); 977 978 Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag); 979 Flag = Chain.getValue(1); 980 } 981 982 RetOps[0] = Chain; // Update chain. 983 984 // Add the flag if we have it. 985 if (Flag.getNode()) 986 RetOps.push_back(Flag); 987 988 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, &RetOps[0], RetOps.size()); 989} 990 991 992/// LowerCallResult - Lower the result values of an ISD::CALL into the 993/// appropriate copies out of appropriate physical registers. This assumes that 994/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 995/// being lowered. The returns a SDNode with the same number of values as the 996/// ISD::CALL. 997SDNode *X86TargetLowering:: 998LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, 999 unsigned CallingConv, SelectionDAG &DAG) { 1000 1001 // Assign locations to each value returned by this call. 1002 SmallVector<CCValAssign, 16> RVLocs; 1003 bool isVarArg = TheCall->isVarArg(); 1004 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 1005 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 1006 1007 SmallVector<SDValue, 8> ResultVals; 1008 1009 // Copy all of the result registers out of their specified physreg. 1010 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1011 MVT CopyVT = RVLocs[i].getValVT(); 1012 1013 // If this is a call to a function that returns an fp value on the floating 1014 // point stack, but where we prefer to use the value in xmm registers, copy 1015 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1016 if ((RVLocs[i].getLocReg() == X86::ST0 || 1017 RVLocs[i].getLocReg() == X86::ST1) && 1018 isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { 1019 CopyVT = MVT::f80; 1020 } 1021 1022 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 1023 CopyVT, InFlag).getValue(1); 1024 SDValue Val = Chain.getValue(0); 1025 InFlag = Chain.getValue(2); 1026 1027 if (CopyVT != RVLocs[i].getValVT()) { 1028 // Round the F80 the right size, which also moves to the appropriate xmm 1029 // register. 1030 Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val, 1031 // This truncation won't change the value. 1032 DAG.getIntPtrConstant(1)); 1033 } 1034 1035 ResultVals.push_back(Val); 1036 } 1037 1038 // Merge everything together with a MERGE_VALUES node. 1039 ResultVals.push_back(Chain); 1040 return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0], 1041 ResultVals.size()).getNode(); 1042} 1043 1044 1045//===----------------------------------------------------------------------===// 1046// C & StdCall & Fast Calling Convention implementation 1047//===----------------------------------------------------------------------===// 1048// StdCall calling convention seems to be standard for many Windows' API 1049// routines and around. It differs from C calling convention just a little: 1050// callee should clean up the stack, not caller. Symbols should be also 1051// decorated in some fancy way :) It doesn't support any vector arguments. 1052// For info on fast calling convention see Fast Calling Convention (tail call) 1053// implementation LowerX86_32FastCCCallTo. 1054 1055/// AddLiveIn - This helper function adds the specified physical register to the 1056/// MachineFunction as a live in value. It also creates a corresponding virtual 1057/// register for it. 1058static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 1059 const TargetRegisterClass *RC) { 1060 assert(RC->contains(PReg) && "Not the correct regclass!"); 1061 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 1062 MF.getRegInfo().addLiveIn(PReg, VReg); 1063 return VReg; 1064} 1065 1066/// CallIsStructReturn - Determines whether a CALL node uses struct return 1067/// semantics. 1068static bool CallIsStructReturn(CallSDNode *TheCall) { 1069 unsigned NumOps = TheCall->getNumArgs(); 1070 if (!NumOps) 1071 return false; 1072 1073 return TheCall->getArgFlags(0).isSRet(); 1074} 1075 1076/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct 1077/// return semantics. 1078static bool ArgsAreStructReturn(SDValue Op) { 1079 unsigned NumArgs = Op.getNode()->getNumValues() - 1; 1080 if (!NumArgs) 1081 return false; 1082 1083 return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet(); 1084} 1085 1086/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires 1087/// the callee to pop its own arguments. Callee pop is necessary to support tail 1088/// calls. 1089bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { 1090 if (IsVarArg) 1091 return false; 1092 1093 switch (CallingConv) { 1094 default: 1095 return false; 1096 case CallingConv::X86_StdCall: 1097 return !Subtarget->is64Bit(); 1098 case CallingConv::X86_FastCall: 1099 return !Subtarget->is64Bit(); 1100 case CallingConv::Fast: 1101 return PerformTailCallOpt; 1102 } 1103} 1104 1105/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1106/// given CallingConvention value. 1107CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { 1108 if (Subtarget->is64Bit()) { 1109 if (Subtarget->isTargetWin64()) 1110 return CC_X86_Win64_C; 1111 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1112 return CC_X86_64_TailCall; 1113 else 1114 return CC_X86_64_C; 1115 } 1116 1117 if (CC == CallingConv::X86_FastCall) 1118 return CC_X86_32_FastCall; 1119 else if (CC == CallingConv::Fast) 1120 return CC_X86_32_FastCC; 1121 else 1122 return CC_X86_32_C; 1123} 1124 1125/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to 1126/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. 1127NameDecorationStyle 1128X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { 1129 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1130 if (CC == CallingConv::X86_FastCall) 1131 return FastCall; 1132 else if (CC == CallingConv::X86_StdCall) 1133 return StdCall; 1134 return None; 1135} 1136 1137 1138/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer 1139/// in a register before calling. 1140bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { 1141 return !IsTailCall && !Is64Bit && 1142 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1143 Subtarget->isPICStyleGOT(); 1144} 1145 1146/// CallRequiresFnAddressInReg - Check whether the call requires the function 1147/// address to be loaded in a register. 1148bool 1149X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { 1150 return !Is64Bit && IsTailCall && 1151 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1152 Subtarget->isPICStyleGOT(); 1153} 1154 1155/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1156/// by "Src" to address "Dst" with size and alignment information specified by 1157/// the specific parameter attribute. The copy will be passed as a byval 1158/// function parameter. 1159static SDValue 1160CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1161 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1162 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1163 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), 1164 /*AlwaysInline=*/true, NULL, 0, NULL, 0); 1165} 1166 1167SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, 1168 const CCValAssign &VA, 1169 MachineFrameInfo *MFI, 1170 unsigned CC, 1171 SDValue Root, unsigned i) { 1172 // Create the nodes corresponding to a load from this parameter slot. 1173 ISD::ArgFlagsTy Flags = 1174 cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags(); 1175 bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; 1176 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1177 1178 // FIXME: For now, all byval parameter objects are marked mutable. This can be 1179 // changed with more analysis. 1180 // In case of tail call optimization mark all arguments mutable. Since they 1181 // could be overwritten by lowering of arguments in case of a tail call. 1182 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1183 VA.getLocMemOffset(), isImmutable); 1184 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1185 if (Flags.isByVal()) 1186 return FIN; 1187 return DAG.getLoad(VA.getValVT(), Root, FIN, 1188 PseudoSourceValue::getFixedStack(FI), 0); 1189} 1190 1191SDValue 1192X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { 1193 MachineFunction &MF = DAG.getMachineFunction(); 1194 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1195 1196 const Function* Fn = MF.getFunction(); 1197 if (Fn->hasExternalLinkage() && 1198 Subtarget->isTargetCygMing() && 1199 Fn->getName() == "main") 1200 FuncInfo->setForceFramePointer(true); 1201 1202 // Decorate the function name. 1203 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1204 1205 MachineFrameInfo *MFI = MF.getFrameInfo(); 1206 SDValue Root = Op.getOperand(0); 1207 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; 1208 unsigned CC = MF.getFunction()->getCallingConv(); 1209 bool Is64Bit = Subtarget->is64Bit(); 1210 bool IsWin64 = Subtarget->isTargetWin64(); 1211 1212 assert(!(isVarArg && CC == CallingConv::Fast) && 1213 "Var args not supported with calling convention fastcc"); 1214 1215 // Assign locations to all of the incoming arguments. 1216 SmallVector<CCValAssign, 16> ArgLocs; 1217 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1218 CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); 1219 1220 SmallVector<SDValue, 8> ArgValues; 1221 unsigned LastVal = ~0U; 1222 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1223 CCValAssign &VA = ArgLocs[i]; 1224 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1225 // places. 1226 assert(VA.getValNo() != LastVal && 1227 "Don't support value assigned to multiple locs yet"); 1228 LastVal = VA.getValNo(); 1229 1230 if (VA.isRegLoc()) { 1231 MVT RegVT = VA.getLocVT(); 1232 TargetRegisterClass *RC; 1233 if (RegVT == MVT::i32) 1234 RC = X86::GR32RegisterClass; 1235 else if (Is64Bit && RegVT == MVT::i64) 1236 RC = X86::GR64RegisterClass; 1237 else if (RegVT == MVT::f32) 1238 RC = X86::FR32RegisterClass; 1239 else if (RegVT == MVT::f64) 1240 RC = X86::FR64RegisterClass; 1241 else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) 1242 RC = X86::VR128RegisterClass; 1243 else if (RegVT.isVector()) { 1244 assert(RegVT.getSizeInBits() == 64); 1245 if (!Is64Bit) 1246 RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. 1247 else { 1248 // Darwin calling convention passes MMX values in either GPRs or 1249 // XMMs in x86-64. Other targets pass them in memory. 1250 if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { 1251 RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. 1252 RegVT = MVT::v2i64; 1253 } else { 1254 RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. 1255 RegVT = MVT::i64; 1256 } 1257 } 1258 } else { 1259 assert(0 && "Unknown argument type!"); 1260 } 1261 1262 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1263 SDValue ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1264 1265 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1266 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1267 // right size. 1268 if (VA.getLocInfo() == CCValAssign::SExt) 1269 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1270 DAG.getValueType(VA.getValVT())); 1271 else if (VA.getLocInfo() == CCValAssign::ZExt) 1272 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1273 DAG.getValueType(VA.getValVT())); 1274 1275 if (VA.getLocInfo() != CCValAssign::Full) 1276 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1277 1278 // Handle MMX values passed in GPRs. 1279 if (Is64Bit && RegVT != VA.getLocVT()) { 1280 if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) 1281 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1282 else if (RC == X86::VR128RegisterClass) { 1283 ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue, 1284 DAG.getConstant(0, MVT::i64)); 1285 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1286 } 1287 } 1288 1289 ArgValues.push_back(ArgValue); 1290 } else { 1291 assert(VA.isMemLoc()); 1292 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); 1293 } 1294 } 1295 1296 // The x86-64 ABI for returning structs by value requires that we copy 1297 // the sret argument into %rax for the return. Save the argument into 1298 // a virtual register so that we can access it from the return points. 1299 if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 1300 MachineFunction &MF = DAG.getMachineFunction(); 1301 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1302 unsigned Reg = FuncInfo->getSRetReturnReg(); 1303 if (!Reg) { 1304 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1305 FuncInfo->setSRetReturnReg(Reg); 1306 } 1307 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]); 1308 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root); 1309 } 1310 1311 unsigned StackSize = CCInfo.getNextStackOffset(); 1312 // align stack specially for tail calls 1313 if (PerformTailCallOpt && CC == CallingConv::Fast) 1314 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1315 1316 // If the function takes variable number of arguments, make a frame index for 1317 // the start of the first vararg value... for expansion of llvm.va_start. 1318 if (isVarArg) { 1319 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1320 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1321 } 1322 if (Is64Bit) { 1323 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 1324 1325 // FIXME: We should really autogenerate these arrays 1326 static const unsigned GPR64ArgRegsWin64[] = { 1327 X86::RCX, X86::RDX, X86::R8, X86::R9 1328 }; 1329 static const unsigned XMMArgRegsWin64[] = { 1330 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1331 }; 1332 static const unsigned GPR64ArgRegs64Bit[] = { 1333 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1334 }; 1335 static const unsigned XMMArgRegs64Bit[] = { 1336 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1337 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1338 }; 1339 const unsigned *GPR64ArgRegs, *XMMArgRegs; 1340 1341 if (IsWin64) { 1342 TotalNumIntRegs = 4; TotalNumXMMRegs = 4; 1343 GPR64ArgRegs = GPR64ArgRegsWin64; 1344 XMMArgRegs = XMMArgRegsWin64; 1345 } else { 1346 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 1347 GPR64ArgRegs = GPR64ArgRegs64Bit; 1348 XMMArgRegs = XMMArgRegs64Bit; 1349 } 1350 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 1351 TotalNumIntRegs); 1352 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 1353 TotalNumXMMRegs); 1354 1355 // For X86-64, if there are vararg parameters that are passed via 1356 // registers, then we must store them to their spots on the stack so they 1357 // may be loaded by deferencing the result of va_next. 1358 VarArgsGPOffset = NumIntRegs * 8; 1359 VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; 1360 RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + 1361 TotalNumXMMRegs * 16, 16); 1362 1363 // Store the integer parameter registers. 1364 SmallVector<SDValue, 8> MemOps; 1365 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1366 SDValue FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1367 DAG.getIntPtrConstant(VarArgsGPOffset)); 1368 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 1369 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1370 X86::GR64RegisterClass); 1371 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1372 SDValue Store = 1373 DAG.getStore(Val.getValue(1), Val, FIN, 1374 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1375 MemOps.push_back(Store); 1376 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1377 DAG.getIntPtrConstant(8)); 1378 } 1379 1380 // Now store the XMM (fp + vector) parameter registers. 1381 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1382 DAG.getIntPtrConstant(VarArgsFPOffset)); 1383 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 1384 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1385 X86::VR128RegisterClass); 1386 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1387 SDValue Store = 1388 DAG.getStore(Val.getValue(1), Val, FIN, 1389 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1390 MemOps.push_back(Store); 1391 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1392 DAG.getIntPtrConstant(16)); 1393 } 1394 if (!MemOps.empty()) 1395 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1396 &MemOps[0], MemOps.size()); 1397 } 1398 } 1399 1400 ArgValues.push_back(Root); 1401 1402 // Some CCs need callee pop. 1403 if (IsCalleePop(isVarArg, CC)) { 1404 BytesToPopOnReturn = StackSize; // Callee pops everything. 1405 BytesCallerReserves = 0; 1406 } else { 1407 BytesToPopOnReturn = 0; // Callee pops nothing. 1408 // If this is an sret function, the return should pop the hidden pointer. 1409 if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) 1410 BytesToPopOnReturn = 4; 1411 BytesCallerReserves = StackSize; 1412 } 1413 1414 if (!Is64Bit) { 1415 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1416 if (CC == CallingConv::X86_FastCall) 1417 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1418 } 1419 1420 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1421 1422 // Return the new list of results. 1423 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0], 1424 ArgValues.size()).getValue(Op.getResNo()); 1425} 1426 1427SDValue 1428X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, 1429 const SDValue &StackPtr, 1430 const CCValAssign &VA, 1431 SDValue Chain, 1432 SDValue Arg, ISD::ArgFlagsTy Flags) { 1433 unsigned LocMemOffset = VA.getLocMemOffset(); 1434 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1435 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1436 if (Flags.isByVal()) { 1437 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1438 } 1439 return DAG.getStore(Chain, Arg, PtrOff, 1440 PseudoSourceValue::getStack(), LocMemOffset); 1441} 1442 1443/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call 1444/// optimization is performed and it is required. 1445SDValue 1446X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 1447 SDValue &OutRetAddr, 1448 SDValue Chain, 1449 bool IsTailCall, 1450 bool Is64Bit, 1451 int FPDiff) { 1452 if (!IsTailCall || FPDiff==0) return Chain; 1453 1454 // Adjust the Return address stack slot. 1455 MVT VT = getPointerTy(); 1456 OutRetAddr = getReturnAddressFrameIndex(DAG); 1457 // Load the "old" Return address. 1458 OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0); 1459 return SDValue(OutRetAddr.getNode(), 1); 1460} 1461 1462/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call 1463/// optimization is performed and it is required (FPDiff!=0). 1464static SDValue 1465EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 1466 SDValue Chain, SDValue RetAddrFrIdx, 1467 bool Is64Bit, int FPDiff) { 1468 // Store the return address to the appropriate stack slot. 1469 if (!FPDiff) return Chain; 1470 // Calculate the new stack slot for the return address. 1471 int SlotSize = Is64Bit ? 8 : 4; 1472 int NewReturnAddrFI = 1473 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1474 MVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1475 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1476 Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, 1477 PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); 1478 return Chain; 1479} 1480 1481SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { 1482 MachineFunction &MF = DAG.getMachineFunction(); 1483 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); 1484 SDValue Chain = TheCall->getChain(); 1485 unsigned CC = TheCall->getCallingConv(); 1486 bool isVarArg = TheCall->isVarArg(); 1487 bool IsTailCall = TheCall->isTailCall() && 1488 CC == CallingConv::Fast && PerformTailCallOpt; 1489 SDValue Callee = TheCall->getCallee(); 1490 bool Is64Bit = Subtarget->is64Bit(); 1491 bool IsStructRet = CallIsStructReturn(TheCall); 1492 1493 assert(!(isVarArg && CC == CallingConv::Fast) && 1494 "Var args not supported with calling convention fastcc"); 1495 1496 // Analyze operands of the call, assigning locations to each operand. 1497 SmallVector<CCValAssign, 16> ArgLocs; 1498 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1499 CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); 1500 1501 // Get a count of how many bytes are to be pushed on the stack. 1502 unsigned NumBytes = CCInfo.getNextStackOffset(); 1503 if (PerformTailCallOpt && CC == CallingConv::Fast) 1504 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1505 1506 int FPDiff = 0; 1507 if (IsTailCall) { 1508 // Lower arguments at fp - stackoffset + fpdiff. 1509 unsigned NumBytesCallerPushed = 1510 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1511 FPDiff = NumBytesCallerPushed - NumBytes; 1512 1513 // Set the delta of movement of the returnaddr stackslot. 1514 // But only set if delta is greater than previous delta. 1515 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1516 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1517 } 1518 1519 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1520 1521 SDValue RetAddrFrIdx; 1522 // Load return adress for tail calls. 1523 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, 1524 FPDiff); 1525 1526 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1527 SmallVector<SDValue, 8> MemOpChains; 1528 SDValue StackPtr; 1529 1530 // Walk the register/memloc assignments, inserting copies/loads. In the case 1531 // of tail call optimization arguments are handle later. 1532 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1533 CCValAssign &VA = ArgLocs[i]; 1534 SDValue Arg = TheCall->getArg(i); 1535 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1536 bool isByVal = Flags.isByVal(); 1537 1538 // Promote the value if needed. 1539 switch (VA.getLocInfo()) { 1540 default: assert(0 && "Unknown loc info!"); 1541 case CCValAssign::Full: break; 1542 case CCValAssign::SExt: 1543 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1544 break; 1545 case CCValAssign::ZExt: 1546 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1547 break; 1548 case CCValAssign::AExt: 1549 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1550 break; 1551 } 1552 1553 if (VA.isRegLoc()) { 1554 if (Is64Bit) { 1555 MVT RegVT = VA.getLocVT(); 1556 if (RegVT.isVector() && RegVT.getSizeInBits() == 64) 1557 switch (VA.getLocReg()) { 1558 default: 1559 break; 1560 case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: 1561 case X86::R8: { 1562 // Special case: passing MMX values in GPR registers. 1563 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1564 break; 1565 } 1566 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: 1567 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { 1568 // Special case: passing MMX values in XMM registers. 1569 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1570 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg); 1571 Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 1572 DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg, 1573 getMOVLMask(2, DAG)); 1574 break; 1575 } 1576 } 1577 } 1578 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1579 } else { 1580 if (!IsTailCall || (IsTailCall && isByVal)) { 1581 assert(VA.isMemLoc()); 1582 if (StackPtr.getNode() == 0) 1583 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1584 1585 MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, 1586 Chain, Arg, Flags)); 1587 } 1588 } 1589 } 1590 1591 if (!MemOpChains.empty()) 1592 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1593 &MemOpChains[0], MemOpChains.size()); 1594 1595 // Build a sequence of copy-to-reg nodes chained together with token chain 1596 // and flag operands which copy the outgoing args into registers. 1597 SDValue InFlag; 1598 // Tail call byval lowering might overwrite argument registers so in case of 1599 // tail call optimization the copies to registers are lowered later. 1600 if (!IsTailCall) 1601 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1602 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1603 InFlag); 1604 InFlag = Chain.getValue(1); 1605 } 1606 1607 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1608 // GOT pointer. 1609 if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { 1610 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1611 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1612 InFlag); 1613 InFlag = Chain.getValue(1); 1614 } 1615 // If we are tail calling and generating PIC/GOT style code load the address 1616 // of the callee into ecx. The value in ecx is used as target of the tail 1617 // jump. This is done to circumvent the ebx/callee-saved problem for tail 1618 // calls on PIC/GOT architectures. Normally we would just put the address of 1619 // GOT into ebx and then call target@PLT. But for tail callss ebx would be 1620 // restored (since ebx is callee saved) before jumping to the target@PLT. 1621 if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { 1622 // Note: The actual moving to ecx is done further down. 1623 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1624 if (G && !G->getGlobal()->hasHiddenVisibility() && 1625 !G->getGlobal()->hasProtectedVisibility()) 1626 Callee = LowerGlobalAddress(Callee, DAG); 1627 else if (isa<ExternalSymbolSDNode>(Callee)) 1628 Callee = LowerExternalSymbol(Callee,DAG); 1629 } 1630 1631 if (Is64Bit && isVarArg) { 1632 // From AMD64 ABI document: 1633 // For calls that may call functions that use varargs or stdargs 1634 // (prototype-less calls or calls to functions containing ellipsis (...) in 1635 // the declaration) %al is used as hidden argument to specify the number 1636 // of SSE registers used. The contents of %al do not need to match exactly 1637 // the number of registers, but must be an ubound on the number of SSE 1638 // registers used and is in the range 0 - 8 inclusive. 1639 1640 // FIXME: Verify this on Win64 1641 // Count the number of XMM registers allocated. 1642 static const unsigned XMMArgRegs[] = { 1643 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1644 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1645 }; 1646 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1647 1648 Chain = DAG.getCopyToReg(Chain, X86::AL, 1649 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1650 InFlag = Chain.getValue(1); 1651 } 1652 1653 1654 // For tail calls lower the arguments to the 'real' stack slot. 1655 if (IsTailCall) { 1656 SmallVector<SDValue, 8> MemOpChains2; 1657 SDValue FIN; 1658 int FI = 0; 1659 // Do not flag preceeding copytoreg stuff together with the following stuff. 1660 InFlag = SDValue(); 1661 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1662 CCValAssign &VA = ArgLocs[i]; 1663 if (!VA.isRegLoc()) { 1664 assert(VA.isMemLoc()); 1665 SDValue Arg = TheCall->getArg(i); 1666 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1667 // Create frame index. 1668 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1669 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 1670 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1671 FIN = DAG.getFrameIndex(FI, getPointerTy()); 1672 1673 if (Flags.isByVal()) { 1674 // Copy relative to framepointer. 1675 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1676 if (StackPtr.getNode() == 0) 1677 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1678 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1679 1680 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1681 Flags, DAG)); 1682 } else { 1683 // Store relative to framepointer. 1684 MemOpChains2.push_back( 1685 DAG.getStore(Chain, Arg, FIN, 1686 PseudoSourceValue::getFixedStack(FI), 0)); 1687 } 1688 } 1689 } 1690 1691 if (!MemOpChains2.empty()) 1692 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1693 &MemOpChains2[0], MemOpChains2.size()); 1694 1695 // Copy arguments to their registers. 1696 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1697 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1698 InFlag); 1699 InFlag = Chain.getValue(1); 1700 } 1701 InFlag =SDValue(); 1702 1703 // Store the return address to the appropriate stack slot. 1704 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, 1705 FPDiff); 1706 } 1707 1708 // If the callee is a GlobalAddress node (quite common, every direct call is) 1709 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1710 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1711 // We should use extra load for direct calls to dllimported functions in 1712 // non-JIT mode. 1713 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1714 getTargetMachine(), true)) 1715 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), 1716 G->getOffset()); 1717 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1718 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1719 } else if (IsTailCall) { 1720 unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; 1721 1722 Chain = DAG.getCopyToReg(Chain, 1723 DAG.getRegister(Opc, getPointerTy()), 1724 Callee,InFlag); 1725 Callee = DAG.getRegister(Opc, getPointerTy()); 1726 // Add register as live out. 1727 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1728 } 1729 1730 // Returns a chain & a flag for retval copy to use. 1731 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1732 SmallVector<SDValue, 8> Ops; 1733 1734 if (IsTailCall) { 1735 Ops.push_back(Chain); 1736 Ops.push_back(DAG.getIntPtrConstant(NumBytes, true)); 1737 Ops.push_back(DAG.getIntPtrConstant(0, true)); 1738 if (InFlag.getNode()) 1739 Ops.push_back(InFlag); 1740 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1741 InFlag = Chain.getValue(1); 1742 1743 // Returns a chain & a flag for retval copy to use. 1744 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1745 Ops.clear(); 1746 } 1747 1748 Ops.push_back(Chain); 1749 Ops.push_back(Callee); 1750 1751 if (IsTailCall) 1752 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1753 1754 // Add argument registers to the end of the list so that they are known live 1755 // into the call. 1756 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1757 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1758 RegsToPass[i].second.getValueType())); 1759 1760 // Add an implicit use GOT pointer in EBX. 1761 if (!IsTailCall && !Is64Bit && 1762 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1763 Subtarget->isPICStyleGOT()) 1764 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1765 1766 // Add an implicit use of AL for x86 vararg functions. 1767 if (Is64Bit && isVarArg) 1768 Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); 1769 1770 if (InFlag.getNode()) 1771 Ops.push_back(InFlag); 1772 1773 if (IsTailCall) { 1774 assert(InFlag.getNode() && 1775 "Flag must be set. Depend on flag being set in LowerRET"); 1776 Chain = DAG.getNode(X86ISD::TAILCALL, 1777 TheCall->getVTList(), &Ops[0], Ops.size()); 1778 1779 return SDValue(Chain.getNode(), Op.getResNo()); 1780 } 1781 1782 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1783 InFlag = Chain.getValue(1); 1784 1785 // Create the CALLSEQ_END node. 1786 unsigned NumBytesForCalleeToPush; 1787 if (IsCalleePop(isVarArg, CC)) 1788 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1789 else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) 1790 // If this is is a call to a struct-return function, the callee 1791 // pops the hidden struct pointer, so we have to push it back. 1792 // This is common for Darwin/X86, Linux & Mingw32 targets. 1793 NumBytesForCalleeToPush = 4; 1794 else 1795 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1796 1797 // Returns a flag for retval copy to use. 1798 Chain = DAG.getCALLSEQ_END(Chain, 1799 DAG.getIntPtrConstant(NumBytes, true), 1800 DAG.getIntPtrConstant(NumBytesForCalleeToPush, 1801 true), 1802 InFlag); 1803 InFlag = Chain.getValue(1); 1804 1805 // Handle result values, copying them out of physregs into vregs that we 1806 // return. 1807 return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), 1808 Op.getResNo()); 1809} 1810 1811 1812//===----------------------------------------------------------------------===// 1813// Fast Calling Convention (tail call) implementation 1814//===----------------------------------------------------------------------===// 1815 1816// Like std call, callee cleans arguments, convention except that ECX is 1817// reserved for storing the tail called function address. Only 2 registers are 1818// free for argument passing (inreg). Tail call optimization is performed 1819// provided: 1820// * tailcallopt is enabled 1821// * caller/callee are fastcc 1822// On X86_64 architecture with GOT-style position independent code only local 1823// (within module) calls are supported at the moment. 1824// To keep the stack aligned according to platform abi the function 1825// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1826// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1827// If a tail called function callee has more arguments than the caller the 1828// caller needs to make sure that there is room to move the RETADDR to. This is 1829// achieved by reserving an area the size of the argument delta right after the 1830// original REtADDR, but before the saved framepointer or the spilled registers 1831// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1832// stack layout: 1833// arg1 1834// arg2 1835// RETADDR 1836// [ new RETADDR 1837// move area ] 1838// (possible EBP) 1839// ESI 1840// EDI 1841// local1 .. 1842 1843/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1844/// for a 16 byte align requirement. 1845unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1846 SelectionDAG& DAG) { 1847 MachineFunction &MF = DAG.getMachineFunction(); 1848 const TargetMachine &TM = MF.getTarget(); 1849 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1850 unsigned StackAlignment = TFI.getStackAlignment(); 1851 uint64_t AlignMask = StackAlignment - 1; 1852 int64_t Offset = StackSize; 1853 uint64_t SlotSize = TD->getPointerSize(); 1854 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1855 // Number smaller than 12 so just add the difference. 1856 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1857 } else { 1858 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1859 Offset = ((~AlignMask) & Offset) + StackAlignment + 1860 (StackAlignment-SlotSize); 1861 } 1862 return Offset; 1863} 1864 1865/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1866/// following the call is a return. A function is eligible if caller/callee 1867/// calling conventions match, currently only fastcc supports tail calls, and 1868/// the function CALL is immediatly followed by a RET. 1869bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, 1870 SDValue Ret, 1871 SelectionDAG& DAG) const { 1872 if (!PerformTailCallOpt) 1873 return false; 1874 1875 if (CheckTailCallReturnConstraints(TheCall, Ret)) { 1876 MachineFunction &MF = DAG.getMachineFunction(); 1877 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1878 unsigned CalleeCC= TheCall->getCallingConv(); 1879 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1880 SDValue Callee = TheCall->getCallee(); 1881 // On x86/32Bit PIC/GOT tail calls are supported. 1882 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1883 !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) 1884 return true; 1885 1886 // Can only do local tail calls (in same module, hidden or protected) on 1887 // x86_64 PIC/GOT at the moment. 1888 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1889 return G->getGlobal()->hasHiddenVisibility() 1890 || G->getGlobal()->hasProtectedVisibility(); 1891 } 1892 } 1893 1894 return false; 1895} 1896 1897FastISel * 1898X86TargetLowering::createFastISel(MachineFunction &mf, 1899 MachineModuleInfo *mmo, 1900 DenseMap<const Value *, unsigned> &vm, 1901 DenseMap<const BasicBlock *, 1902 MachineBasicBlock *> &bm, 1903 DenseMap<const AllocaInst *, int> &am 1904#ifndef NDEBUG 1905 , SmallSet<Instruction*, 8> &cil 1906#endif 1907 ) { 1908 return X86::createFastISel(mf, mmo, vm, bm, am 1909#ifndef NDEBUG 1910 , cil 1911#endif 1912 ); 1913} 1914 1915 1916//===----------------------------------------------------------------------===// 1917// Other Lowering Hooks 1918//===----------------------------------------------------------------------===// 1919 1920 1921SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1922 MachineFunction &MF = DAG.getMachineFunction(); 1923 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1924 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1925 uint64_t SlotSize = TD->getPointerSize(); 1926 1927 if (ReturnAddrIndex == 0) { 1928 // Set up a frame object for the return address. 1929 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); 1930 FuncInfo->setRAIndex(ReturnAddrIndex); 1931 } 1932 1933 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1934} 1935 1936 1937/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1938/// specific condition code. It returns a false if it cannot do a direct 1939/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1940/// needed. 1941static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1942 unsigned &X86CC, SDValue &LHS, SDValue &RHS, 1943 SelectionDAG &DAG) { 1944 X86CC = X86::COND_INVALID; 1945 if (!isFP) { 1946 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1947 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1948 // X > -1 -> X == 0, jump !sign. 1949 RHS = DAG.getConstant(0, RHS.getValueType()); 1950 X86CC = X86::COND_NS; 1951 return true; 1952 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1953 // X < 0 -> X == 0, jump on sign. 1954 X86CC = X86::COND_S; 1955 return true; 1956 } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 1957 // X < 1 -> X <= 0 1958 RHS = DAG.getConstant(0, RHS.getValueType()); 1959 X86CC = X86::COND_LE; 1960 return true; 1961 } 1962 } 1963 1964 switch (SetCCOpcode) { 1965 default: break; 1966 case ISD::SETEQ: X86CC = X86::COND_E; break; 1967 case ISD::SETGT: X86CC = X86::COND_G; break; 1968 case ISD::SETGE: X86CC = X86::COND_GE; break; 1969 case ISD::SETLT: X86CC = X86::COND_L; break; 1970 case ISD::SETLE: X86CC = X86::COND_LE; break; 1971 case ISD::SETNE: X86CC = X86::COND_NE; break; 1972 case ISD::SETULT: X86CC = X86::COND_B; break; 1973 case ISD::SETUGT: X86CC = X86::COND_A; break; 1974 case ISD::SETULE: X86CC = X86::COND_BE; break; 1975 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1976 } 1977 } else { 1978 // First determine if it is required or is profitable to flip the operands. 1979 1980 // If LHS is a foldable load, but RHS is not, flip the condition. 1981 if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && 1982 !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { 1983 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 1984 std::swap(LHS, RHS); 1985 } 1986 1987 switch (SetCCOpcode) { 1988 default: break; 1989 case ISD::SETOLT: 1990 case ISD::SETOLE: 1991 case ISD::SETUGT: 1992 case ISD::SETUGE: 1993 std::swap(LHS, RHS); 1994 break; 1995 } 1996 1997 // On a floating point condition, the flags are set as follows: 1998 // ZF PF CF op 1999 // 0 | 0 | 0 | X > Y 2000 // 0 | 0 | 1 | X < Y 2001 // 1 | 0 | 0 | X == Y 2002 // 1 | 1 | 1 | unordered 2003 switch (SetCCOpcode) { 2004 default: break; 2005 case ISD::SETUEQ: 2006 case ISD::SETEQ: 2007 X86CC = X86::COND_E; 2008 break; 2009 case ISD::SETOLT: // flipped 2010 case ISD::SETOGT: 2011 case ISD::SETGT: 2012 X86CC = X86::COND_A; 2013 break; 2014 case ISD::SETOLE: // flipped 2015 case ISD::SETOGE: 2016 case ISD::SETGE: 2017 X86CC = X86::COND_AE; 2018 break; 2019 case ISD::SETUGT: // flipped 2020 case ISD::SETULT: 2021 case ISD::SETLT: 2022 X86CC = X86::COND_B; 2023 break; 2024 case ISD::SETUGE: // flipped 2025 case ISD::SETULE: 2026 case ISD::SETLE: 2027 X86CC = X86::COND_BE; 2028 break; 2029 case ISD::SETONE: 2030 case ISD::SETNE: 2031 X86CC = X86::COND_NE; 2032 break; 2033 case ISD::SETUO: 2034 X86CC = X86::COND_P; 2035 break; 2036 case ISD::SETO: 2037 X86CC = X86::COND_NP; 2038 break; 2039 } 2040 } 2041 2042 return X86CC != X86::COND_INVALID; 2043} 2044 2045/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2046/// code. Current x86 isa includes the following FP cmov instructions: 2047/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2048static bool hasFPCMov(unsigned X86CC) { 2049 switch (X86CC) { 2050 default: 2051 return false; 2052 case X86::COND_B: 2053 case X86::COND_BE: 2054 case X86::COND_E: 2055 case X86::COND_P: 2056 case X86::COND_A: 2057 case X86::COND_AE: 2058 case X86::COND_NE: 2059 case X86::COND_NP: 2060 return true; 2061 } 2062} 2063 2064/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2065/// true if Op is undef or if its value falls within the specified range (L, H]. 2066static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { 2067 if (Op.getOpcode() == ISD::UNDEF) 2068 return true; 2069 2070 unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); 2071 return (Val >= Low && Val < Hi); 2072} 2073 2074/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2075/// true if Op is undef or if its value equal to the specified value. 2076static bool isUndefOrEqual(SDValue Op, unsigned Val) { 2077 if (Op.getOpcode() == ISD::UNDEF) 2078 return true; 2079 return cast<ConstantSDNode>(Op)->getZExtValue() == Val; 2080} 2081 2082/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2083/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2084bool X86::isPSHUFDMask(SDNode *N) { 2085 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2086 2087 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2088 return false; 2089 2090 // Check if the value doesn't reference the second vector. 2091 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2092 SDValue Arg = N->getOperand(i); 2093 if (Arg.getOpcode() == ISD::UNDEF) continue; 2094 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2095 if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) 2096 return false; 2097 } 2098 2099 return true; 2100} 2101 2102/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2103/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2104bool X86::isPSHUFHWMask(SDNode *N) { 2105 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2106 2107 if (N->getNumOperands() != 8) 2108 return false; 2109 2110 // Lower quadword copied in order. 2111 for (unsigned i = 0; i != 4; ++i) { 2112 SDValue Arg = N->getOperand(i); 2113 if (Arg.getOpcode() == ISD::UNDEF) continue; 2114 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2115 if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) 2116 return false; 2117 } 2118 2119 // Upper quadword shuffled. 2120 for (unsigned i = 4; i != 8; ++i) { 2121 SDValue Arg = N->getOperand(i); 2122 if (Arg.getOpcode() == ISD::UNDEF) continue; 2123 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2124 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2125 if (Val < 4 || Val > 7) 2126 return false; 2127 } 2128 2129 return true; 2130} 2131 2132/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2133/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2134bool X86::isPSHUFLWMask(SDNode *N) { 2135 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2136 2137 if (N->getNumOperands() != 8) 2138 return false; 2139 2140 // Upper quadword copied in order. 2141 for (unsigned i = 4; i != 8; ++i) 2142 if (!isUndefOrEqual(N->getOperand(i), i)) 2143 return false; 2144 2145 // Lower quadword shuffled. 2146 for (unsigned i = 0; i != 4; ++i) 2147 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2148 return false; 2149 2150 return true; 2151} 2152 2153/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2154/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2155static bool isSHUFPMask(SDOperandPtr Elems, unsigned NumElems) { 2156 if (NumElems != 2 && NumElems != 4) return false; 2157 2158 unsigned Half = NumElems / 2; 2159 for (unsigned i = 0; i < Half; ++i) 2160 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2161 return false; 2162 for (unsigned i = Half; i < NumElems; ++i) 2163 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2164 return false; 2165 2166 return true; 2167} 2168 2169bool X86::isSHUFPMask(SDNode *N) { 2170 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2171 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2172} 2173 2174/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2175/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2176/// half elements to come from vector 1 (which would equal the dest.) and 2177/// the upper half to come from vector 2. 2178static bool isCommutedSHUFP(SDOperandPtr Ops, unsigned NumOps) { 2179 if (NumOps != 2 && NumOps != 4) return false; 2180 2181 unsigned Half = NumOps / 2; 2182 for (unsigned i = 0; i < Half; ++i) 2183 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2184 return false; 2185 for (unsigned i = Half; i < NumOps; ++i) 2186 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2187 return false; 2188 return true; 2189} 2190 2191static bool isCommutedSHUFP(SDNode *N) { 2192 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2193 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2194} 2195 2196/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2197/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2198bool X86::isMOVHLPSMask(SDNode *N) { 2199 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2200 2201 if (N->getNumOperands() != 4) 2202 return false; 2203 2204 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2205 return isUndefOrEqual(N->getOperand(0), 6) && 2206 isUndefOrEqual(N->getOperand(1), 7) && 2207 isUndefOrEqual(N->getOperand(2), 2) && 2208 isUndefOrEqual(N->getOperand(3), 3); 2209} 2210 2211/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2212/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2213/// <2, 3, 2, 3> 2214bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2215 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2216 2217 if (N->getNumOperands() != 4) 2218 return false; 2219 2220 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2221 return isUndefOrEqual(N->getOperand(0), 2) && 2222 isUndefOrEqual(N->getOperand(1), 3) && 2223 isUndefOrEqual(N->getOperand(2), 2) && 2224 isUndefOrEqual(N->getOperand(3), 3); 2225} 2226 2227/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2228/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2229bool X86::isMOVLPMask(SDNode *N) { 2230 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2231 2232 unsigned NumElems = N->getNumOperands(); 2233 if (NumElems != 2 && NumElems != 4) 2234 return false; 2235 2236 for (unsigned i = 0; i < NumElems/2; ++i) 2237 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2238 return false; 2239 2240 for (unsigned i = NumElems/2; i < NumElems; ++i) 2241 if (!isUndefOrEqual(N->getOperand(i), i)) 2242 return false; 2243 2244 return true; 2245} 2246 2247/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2248/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2249/// and MOVLHPS. 2250bool X86::isMOVHPMask(SDNode *N) { 2251 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2252 2253 unsigned NumElems = N->getNumOperands(); 2254 if (NumElems != 2 && NumElems != 4) 2255 return false; 2256 2257 for (unsigned i = 0; i < NumElems/2; ++i) 2258 if (!isUndefOrEqual(N->getOperand(i), i)) 2259 return false; 2260 2261 for (unsigned i = 0; i < NumElems/2; ++i) { 2262 SDValue Arg = N->getOperand(i + NumElems/2); 2263 if (!isUndefOrEqual(Arg, i + NumElems)) 2264 return false; 2265 } 2266 2267 return true; 2268} 2269 2270/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2271/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2272bool static isUNPCKLMask(SDOperandPtr Elts, unsigned NumElts, 2273 bool V2IsSplat = false) { 2274 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2275 return false; 2276 2277 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2278 SDValue BitI = Elts[i]; 2279 SDValue BitI1 = Elts[i+1]; 2280 if (!isUndefOrEqual(BitI, j)) 2281 return false; 2282 if (V2IsSplat) { 2283 if (isUndefOrEqual(BitI1, NumElts)) 2284 return false; 2285 } else { 2286 if (!isUndefOrEqual(BitI1, j + NumElts)) 2287 return false; 2288 } 2289 } 2290 2291 return true; 2292} 2293 2294bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2295 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2296 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2297} 2298 2299/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2300/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2301bool static isUNPCKHMask(SDOperandPtr Elts, unsigned NumElts, 2302 bool V2IsSplat = false) { 2303 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2304 return false; 2305 2306 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2307 SDValue BitI = Elts[i]; 2308 SDValue BitI1 = Elts[i+1]; 2309 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2310 return false; 2311 if (V2IsSplat) { 2312 if (isUndefOrEqual(BitI1, NumElts)) 2313 return false; 2314 } else { 2315 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2316 return false; 2317 } 2318 } 2319 2320 return true; 2321} 2322 2323bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2324 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2325 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2326} 2327 2328/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2329/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2330/// <0, 0, 1, 1> 2331bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2332 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2333 2334 unsigned NumElems = N->getNumOperands(); 2335 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2336 return false; 2337 2338 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2339 SDValue BitI = N->getOperand(i); 2340 SDValue BitI1 = N->getOperand(i+1); 2341 2342 if (!isUndefOrEqual(BitI, j)) 2343 return false; 2344 if (!isUndefOrEqual(BitI1, j)) 2345 return false; 2346 } 2347 2348 return true; 2349} 2350 2351/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2352/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2353/// <2, 2, 3, 3> 2354bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2355 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2356 2357 unsigned NumElems = N->getNumOperands(); 2358 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2359 return false; 2360 2361 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2362 SDValue BitI = N->getOperand(i); 2363 SDValue BitI1 = N->getOperand(i + 1); 2364 2365 if (!isUndefOrEqual(BitI, j)) 2366 return false; 2367 if (!isUndefOrEqual(BitI1, j)) 2368 return false; 2369 } 2370 2371 return true; 2372} 2373 2374/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2375/// specifies a shuffle of elements that is suitable for input to MOVSS, 2376/// MOVSD, and MOVD, i.e. setting the lowest element. 2377static bool isMOVLMask(SDOperandPtr Elts, unsigned NumElts) { 2378 if (NumElts != 2 && NumElts != 4) 2379 return false; 2380 2381 if (!isUndefOrEqual(Elts[0], NumElts)) 2382 return false; 2383 2384 for (unsigned i = 1; i < NumElts; ++i) { 2385 if (!isUndefOrEqual(Elts[i], i)) 2386 return false; 2387 } 2388 2389 return true; 2390} 2391 2392bool X86::isMOVLMask(SDNode *N) { 2393 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2394 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2395} 2396 2397/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2398/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2399/// element of vector 2 and the other elements to come from vector 1 in order. 2400static bool isCommutedMOVL(SDOperandPtr Ops, unsigned NumOps, 2401 bool V2IsSplat = false, 2402 bool V2IsUndef = false) { 2403 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2404 return false; 2405 2406 if (!isUndefOrEqual(Ops[0], 0)) 2407 return false; 2408 2409 for (unsigned i = 1; i < NumOps; ++i) { 2410 SDValue Arg = Ops[i]; 2411 if (!(isUndefOrEqual(Arg, i+NumOps) || 2412 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2413 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2414 return false; 2415 } 2416 2417 return true; 2418} 2419 2420static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2421 bool V2IsUndef = false) { 2422 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2423 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2424 V2IsSplat, V2IsUndef); 2425} 2426 2427/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2428/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2429bool X86::isMOVSHDUPMask(SDNode *N) { 2430 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2431 2432 if (N->getNumOperands() != 4) 2433 return false; 2434 2435 // Expect 1, 1, 3, 3 2436 for (unsigned i = 0; i < 2; ++i) { 2437 SDValue Arg = N->getOperand(i); 2438 if (Arg.getOpcode() == ISD::UNDEF) continue; 2439 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2440 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2441 if (Val != 1) return false; 2442 } 2443 2444 bool HasHi = false; 2445 for (unsigned i = 2; i < 4; ++i) { 2446 SDValue Arg = N->getOperand(i); 2447 if (Arg.getOpcode() == ISD::UNDEF) continue; 2448 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2449 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2450 if (Val != 3) return false; 2451 HasHi = true; 2452 } 2453 2454 // Don't use movshdup if it can be done with a shufps. 2455 return HasHi; 2456} 2457 2458/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2459/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2460bool X86::isMOVSLDUPMask(SDNode *N) { 2461 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2462 2463 if (N->getNumOperands() != 4) 2464 return false; 2465 2466 // Expect 0, 0, 2, 2 2467 for (unsigned i = 0; i < 2; ++i) { 2468 SDValue Arg = N->getOperand(i); 2469 if (Arg.getOpcode() == ISD::UNDEF) continue; 2470 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2471 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2472 if (Val != 0) return false; 2473 } 2474 2475 bool HasHi = false; 2476 for (unsigned i = 2; i < 4; ++i) { 2477 SDValue Arg = N->getOperand(i); 2478 if (Arg.getOpcode() == ISD::UNDEF) continue; 2479 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2480 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2481 if (Val != 2) return false; 2482 HasHi = true; 2483 } 2484 2485 // Don't use movshdup if it can be done with a shufps. 2486 return HasHi; 2487} 2488 2489/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2490/// specifies a identity operation on the LHS or RHS. 2491static bool isIdentityMask(SDNode *N, bool RHS = false) { 2492 unsigned NumElems = N->getNumOperands(); 2493 for (unsigned i = 0; i < NumElems; ++i) 2494 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2495 return false; 2496 return true; 2497} 2498 2499/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2500/// a splat of a single element. 2501static bool isSplatMask(SDNode *N) { 2502 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2503 2504 // This is a splat operation if each element of the permute is the same, and 2505 // if the value doesn't reference the second vector. 2506 unsigned NumElems = N->getNumOperands(); 2507 SDValue ElementBase; 2508 unsigned i = 0; 2509 for (; i != NumElems; ++i) { 2510 SDValue Elt = N->getOperand(i); 2511 if (isa<ConstantSDNode>(Elt)) { 2512 ElementBase = Elt; 2513 break; 2514 } 2515 } 2516 2517 if (!ElementBase.getNode()) 2518 return false; 2519 2520 for (; i != NumElems; ++i) { 2521 SDValue Arg = N->getOperand(i); 2522 if (Arg.getOpcode() == ISD::UNDEF) continue; 2523 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2524 if (Arg != ElementBase) return false; 2525 } 2526 2527 // Make sure it is a splat of the first vector operand. 2528 return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; 2529} 2530 2531/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2532/// a splat of a single element and it's a 2 or 4 element mask. 2533bool X86::isSplatMask(SDNode *N) { 2534 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2535 2536 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2537 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2538 return false; 2539 return ::isSplatMask(N); 2540} 2541 2542/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2543/// specifies a splat of zero element. 2544bool X86::isSplatLoMask(SDNode *N) { 2545 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2546 2547 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2548 if (!isUndefOrEqual(N->getOperand(i), 0)) 2549 return false; 2550 return true; 2551} 2552 2553/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2554/// specifies a shuffle of elements that is suitable for input to MOVDDUP. 2555bool X86::isMOVDDUPMask(SDNode *N) { 2556 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2557 2558 unsigned e = N->getNumOperands() / 2; 2559 for (unsigned i = 0; i < e; ++i) 2560 if (!isUndefOrEqual(N->getOperand(i), i)) 2561 return false; 2562 for (unsigned i = 0; i < e; ++i) 2563 if (!isUndefOrEqual(N->getOperand(e+i), i)) 2564 return false; 2565 return true; 2566} 2567 2568/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2569/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2570/// instructions. 2571unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2572 unsigned NumOperands = N->getNumOperands(); 2573 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2574 unsigned Mask = 0; 2575 for (unsigned i = 0; i < NumOperands; ++i) { 2576 unsigned Val = 0; 2577 SDValue Arg = N->getOperand(NumOperands-i-1); 2578 if (Arg.getOpcode() != ISD::UNDEF) 2579 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2580 if (Val >= NumOperands) Val -= NumOperands; 2581 Mask |= Val; 2582 if (i != NumOperands - 1) 2583 Mask <<= Shift; 2584 } 2585 2586 return Mask; 2587} 2588 2589/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2590/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2591/// instructions. 2592unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2593 unsigned Mask = 0; 2594 // 8 nodes, but we only care about the last 4. 2595 for (unsigned i = 7; i >= 4; --i) { 2596 unsigned Val = 0; 2597 SDValue Arg = N->getOperand(i); 2598 if (Arg.getOpcode() != ISD::UNDEF) 2599 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2600 Mask |= (Val - 4); 2601 if (i != 4) 2602 Mask <<= 2; 2603 } 2604 2605 return Mask; 2606} 2607 2608/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2609/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2610/// instructions. 2611unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2612 unsigned Mask = 0; 2613 // 8 nodes, but we only care about the first 4. 2614 for (int i = 3; i >= 0; --i) { 2615 unsigned Val = 0; 2616 SDValue Arg = N->getOperand(i); 2617 if (Arg.getOpcode() != ISD::UNDEF) 2618 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2619 Mask |= Val; 2620 if (i != 0) 2621 Mask <<= 2; 2622 } 2623 2624 return Mask; 2625} 2626 2627/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2628/// specifies a 8 element shuffle that can be broken into a pair of 2629/// PSHUFHW and PSHUFLW. 2630static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2631 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2632 2633 if (N->getNumOperands() != 8) 2634 return false; 2635 2636 // Lower quadword shuffled. 2637 for (unsigned i = 0; i != 4; ++i) { 2638 SDValue Arg = N->getOperand(i); 2639 if (Arg.getOpcode() == ISD::UNDEF) continue; 2640 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2641 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2642 if (Val >= 4) 2643 return false; 2644 } 2645 2646 // Upper quadword shuffled. 2647 for (unsigned i = 4; i != 8; ++i) { 2648 SDValue Arg = N->getOperand(i); 2649 if (Arg.getOpcode() == ISD::UNDEF) continue; 2650 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2651 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2652 if (Val < 4 || Val > 7) 2653 return false; 2654 } 2655 2656 return true; 2657} 2658 2659/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2660/// values in ther permute mask. 2661static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, 2662 SDValue &V2, SDValue &Mask, 2663 SelectionDAG &DAG) { 2664 MVT VT = Op.getValueType(); 2665 MVT MaskVT = Mask.getValueType(); 2666 MVT EltVT = MaskVT.getVectorElementType(); 2667 unsigned NumElems = Mask.getNumOperands(); 2668 SmallVector<SDValue, 8> MaskVec; 2669 2670 for (unsigned i = 0; i != NumElems; ++i) { 2671 SDValue Arg = Mask.getOperand(i); 2672 if (Arg.getOpcode() == ISD::UNDEF) { 2673 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2674 continue; 2675 } 2676 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2677 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2678 if (Val < NumElems) 2679 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2680 else 2681 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2682 } 2683 2684 std::swap(V1, V2); 2685 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2686 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2687} 2688 2689/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2690/// the two vector operands have swapped position. 2691static 2692SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG) { 2693 MVT MaskVT = Mask.getValueType(); 2694 MVT EltVT = MaskVT.getVectorElementType(); 2695 unsigned NumElems = Mask.getNumOperands(); 2696 SmallVector<SDValue, 8> MaskVec; 2697 for (unsigned i = 0; i != NumElems; ++i) { 2698 SDValue Arg = Mask.getOperand(i); 2699 if (Arg.getOpcode() == ISD::UNDEF) { 2700 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2701 continue; 2702 } 2703 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2704 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2705 if (Val < NumElems) 2706 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2707 else 2708 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2709 } 2710 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2711} 2712 2713 2714/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2715/// match movhlps. The lower half elements should come from upper half of 2716/// V1 (and in order), and the upper half elements should come from the upper 2717/// half of V2 (and in order). 2718static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2719 unsigned NumElems = Mask->getNumOperands(); 2720 if (NumElems != 4) 2721 return false; 2722 for (unsigned i = 0, e = 2; i != e; ++i) 2723 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2724 return false; 2725 for (unsigned i = 2; i != 4; ++i) 2726 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2727 return false; 2728 return true; 2729} 2730 2731/// isScalarLoadToVector - Returns true if the node is a scalar load that 2732/// is promoted to a vector. It also returns the LoadSDNode by reference if 2733/// required. 2734static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 2735 if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) 2736 return false; 2737 N = N->getOperand(0).getNode(); 2738 if (!ISD::isNON_EXTLoad(N)) 2739 return false; 2740 if (LD) 2741 *LD = cast<LoadSDNode>(N); 2742 return true; 2743} 2744 2745/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2746/// match movlp{s|d}. The lower half elements should come from lower half of 2747/// V1 (and in order), and the upper half elements should come from the upper 2748/// half of V2 (and in order). And since V1 will become the source of the 2749/// MOVLP, it must be either a vector load or a scalar load to vector. 2750static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2751 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2752 return false; 2753 // Is V2 is a vector load, don't do this transformation. We will try to use 2754 // load folding shufps op. 2755 if (ISD::isNON_EXTLoad(V2)) 2756 return false; 2757 2758 unsigned NumElems = Mask->getNumOperands(); 2759 if (NumElems != 2 && NumElems != 4) 2760 return false; 2761 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2762 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2763 return false; 2764 for (unsigned i = NumElems/2; i != NumElems; ++i) 2765 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2766 return false; 2767 return true; 2768} 2769 2770/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2771/// all the same. 2772static bool isSplatVector(SDNode *N) { 2773 if (N->getOpcode() != ISD::BUILD_VECTOR) 2774 return false; 2775 2776 SDValue SplatValue = N->getOperand(0); 2777 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2778 if (N->getOperand(i) != SplatValue) 2779 return false; 2780 return true; 2781} 2782 2783/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2784/// to an undef. 2785static bool isUndefShuffle(SDNode *N) { 2786 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2787 return false; 2788 2789 SDValue V1 = N->getOperand(0); 2790 SDValue V2 = N->getOperand(1); 2791 SDValue Mask = N->getOperand(2); 2792 unsigned NumElems = Mask.getNumOperands(); 2793 for (unsigned i = 0; i != NumElems; ++i) { 2794 SDValue Arg = Mask.getOperand(i); 2795 if (Arg.getOpcode() != ISD::UNDEF) { 2796 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2797 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2798 return false; 2799 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2800 return false; 2801 } 2802 } 2803 return true; 2804} 2805 2806/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2807/// constant +0.0. 2808static inline bool isZeroNode(SDValue Elt) { 2809 return ((isa<ConstantSDNode>(Elt) && 2810 cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || 2811 (isa<ConstantFPSDNode>(Elt) && 2812 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2813} 2814 2815/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2816/// to an zero vector. 2817static bool isZeroShuffle(SDNode *N) { 2818 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2819 return false; 2820 2821 SDValue V1 = N->getOperand(0); 2822 SDValue V2 = N->getOperand(1); 2823 SDValue Mask = N->getOperand(2); 2824 unsigned NumElems = Mask.getNumOperands(); 2825 for (unsigned i = 0; i != NumElems; ++i) { 2826 SDValue Arg = Mask.getOperand(i); 2827 if (Arg.getOpcode() == ISD::UNDEF) 2828 continue; 2829 2830 unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); 2831 if (Idx < NumElems) { 2832 unsigned Opc = V1.getNode()->getOpcode(); 2833 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 2834 continue; 2835 if (Opc != ISD::BUILD_VECTOR || 2836 !isZeroNode(V1.getNode()->getOperand(Idx))) 2837 return false; 2838 } else if (Idx >= NumElems) { 2839 unsigned Opc = V2.getNode()->getOpcode(); 2840 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 2841 continue; 2842 if (Opc != ISD::BUILD_VECTOR || 2843 !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) 2844 return false; 2845 } 2846 } 2847 return true; 2848} 2849 2850/// getZeroVector - Returns a vector of specified type with all zero elements. 2851/// 2852static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG) { 2853 assert(VT.isVector() && "Expected a vector type"); 2854 2855 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2856 // type. This ensures they get CSE'd. 2857 SDValue Vec; 2858 if (VT.getSizeInBits() == 64) { // MMX 2859 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2860 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2861 } else if (HasSSE2) { // SSE2 2862 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2863 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2864 } else { // SSE1 2865 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 2866 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst); 2867 } 2868 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2869} 2870 2871/// getOnesVector - Returns a vector of specified type with all bits set. 2872/// 2873static SDValue getOnesVector(MVT VT, SelectionDAG &DAG) { 2874 assert(VT.isVector() && "Expected a vector type"); 2875 2876 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2877 // type. This ensures they get CSE'd. 2878 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 2879 SDValue Vec; 2880 if (VT.getSizeInBits() == 64) // MMX 2881 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2882 else // SSE 2883 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2884 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2885} 2886 2887 2888/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2889/// that point to V2 points to its first element. 2890static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { 2891 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2892 2893 bool Changed = false; 2894 SmallVector<SDValue, 8> MaskVec; 2895 unsigned NumElems = Mask.getNumOperands(); 2896 for (unsigned i = 0; i != NumElems; ++i) { 2897 SDValue Arg = Mask.getOperand(i); 2898 if (Arg.getOpcode() != ISD::UNDEF) { 2899 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2900 if (Val > NumElems) { 2901 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2902 Changed = true; 2903 } 2904 } 2905 MaskVec.push_back(Arg); 2906 } 2907 2908 if (Changed) 2909 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2910 &MaskVec[0], MaskVec.size()); 2911 return Mask; 2912} 2913 2914/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2915/// operation of specified width. 2916static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2917 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2918 MVT BaseVT = MaskVT.getVectorElementType(); 2919 2920 SmallVector<SDValue, 8> MaskVec; 2921 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2922 for (unsigned i = 1; i != NumElems; ++i) 2923 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2924 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2925} 2926 2927/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2928/// of specified width. 2929static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2930 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2931 MVT BaseVT = MaskVT.getVectorElementType(); 2932 SmallVector<SDValue, 8> MaskVec; 2933 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2934 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2935 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2936 } 2937 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2938} 2939 2940/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2941/// of specified width. 2942static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2943 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2944 MVT BaseVT = MaskVT.getVectorElementType(); 2945 unsigned Half = NumElems/2; 2946 SmallVector<SDValue, 8> MaskVec; 2947 for (unsigned i = 0; i != Half; ++i) { 2948 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2949 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2950 } 2951 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2952} 2953 2954/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps 2955/// element #0 of a vector with the specified index, leaving the rest of the 2956/// elements in place. 2957static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, 2958 SelectionDAG &DAG) { 2959 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2960 MVT BaseVT = MaskVT.getVectorElementType(); 2961 SmallVector<SDValue, 8> MaskVec; 2962 // Element #0 of the result gets the elt we are replacing. 2963 MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); 2964 for (unsigned i = 1; i != NumElems; ++i) 2965 MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); 2966 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2967} 2968 2969/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. 2970static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { 2971 MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; 2972 MVT VT = Op.getValueType(); 2973 if (PVT == VT) 2974 return Op; 2975 SDValue V1 = Op.getOperand(0); 2976 SDValue Mask = Op.getOperand(2); 2977 unsigned NumElems = Mask.getNumOperands(); 2978 // Special handling of v4f32 -> v4i32. 2979 if (VT != MVT::v4f32) { 2980 Mask = getUnpacklMask(NumElems, DAG); 2981 while (NumElems > 4) { 2982 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2983 NumElems >>= 1; 2984 } 2985 Mask = getZeroVector(MVT::v4i32, true, DAG); 2986 } 2987 2988 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 2989 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 2990 DAG.getNode(ISD::UNDEF, PVT), Mask); 2991 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2992} 2993 2994/// isVectorLoad - Returns true if the node is a vector load, a scalar 2995/// load that's promoted to vector, or a load bitcasted. 2996static bool isVectorLoad(SDValue Op) { 2997 assert(Op.getValueType().isVector() && "Expected a vector type"); 2998 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || 2999 Op.getOpcode() == ISD::BIT_CONVERT) { 3000 return isa<LoadSDNode>(Op.getOperand(0)); 3001 } 3002 return isa<LoadSDNode>(Op); 3003} 3004 3005 3006/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. 3007/// 3008static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, 3009 SelectionDAG &DAG, bool HasSSE3) { 3010 // If we have sse3 and shuffle has more than one use or input is a load, then 3011 // use movddup. Otherwise, use movlhps. 3012 bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); 3013 MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; 3014 MVT VT = Op.getValueType(); 3015 if (VT == PVT) 3016 return Op; 3017 unsigned NumElems = PVT.getVectorNumElements(); 3018 if (NumElems == 2) { 3019 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 3020 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 3021 } else { 3022 assert(NumElems == 4); 3023 SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); 3024 SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); 3025 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1); 3026 } 3027 3028 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 3029 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 3030 DAG.getNode(ISD::UNDEF, PVT), Mask); 3031 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3032} 3033 3034/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3035/// vector of zero or undef vector. This produces a shuffle where the low 3036/// element of V2 is swizzled into the zero/undef vector, landing at element 3037/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3038static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 3039 bool isZero, bool HasSSE2, 3040 SelectionDAG &DAG) { 3041 MVT VT = V2.getValueType(); 3042 SDValue V1 = isZero 3043 ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT); 3044 unsigned NumElems = V2.getValueType().getVectorNumElements(); 3045 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3046 MVT EVT = MaskVT.getVectorElementType(); 3047 SmallVector<SDValue, 16> MaskVec; 3048 for (unsigned i = 0; i != NumElems; ++i) 3049 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 3050 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 3051 else 3052 MaskVec.push_back(DAG.getConstant(i, EVT)); 3053 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3054 &MaskVec[0], MaskVec.size()); 3055 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3056} 3057 3058/// getNumOfConsecutiveZeros - Return the number of elements in a result of 3059/// a shuffle that is zero. 3060static 3061unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, 3062 unsigned NumElems, bool Low, 3063 SelectionDAG &DAG) { 3064 unsigned NumZeros = 0; 3065 for (unsigned i = 0; i < NumElems; ++i) { 3066 unsigned Index = Low ? i : NumElems-i-1; 3067 SDValue Idx = Mask.getOperand(Index); 3068 if (Idx.getOpcode() == ISD::UNDEF) { 3069 ++NumZeros; 3070 continue; 3071 } 3072 SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); 3073 if (Elt.getNode() && isZeroNode(Elt)) 3074 ++NumZeros; 3075 else 3076 break; 3077 } 3078 return NumZeros; 3079} 3080 3081/// isVectorShift - Returns true if the shuffle can be implemented as a 3082/// logical left or right shift of a vector. 3083static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, 3084 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 3085 unsigned NumElems = Mask.getNumOperands(); 3086 3087 isLeft = true; 3088 unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); 3089 if (!NumZeros) { 3090 isLeft = false; 3091 NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); 3092 if (!NumZeros) 3093 return false; 3094 } 3095 3096 bool SeenV1 = false; 3097 bool SeenV2 = false; 3098 for (unsigned i = NumZeros; i < NumElems; ++i) { 3099 unsigned Val = isLeft ? (i - NumZeros) : i; 3100 SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); 3101 if (Idx.getOpcode() == ISD::UNDEF) 3102 continue; 3103 unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); 3104 if (Index < NumElems) 3105 SeenV1 = true; 3106 else { 3107 Index -= NumElems; 3108 SeenV2 = true; 3109 } 3110 if (Index != Val) 3111 return false; 3112 } 3113 if (SeenV1 && SeenV2) 3114 return false; 3115 3116 ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); 3117 ShAmt = NumZeros; 3118 return true; 3119} 3120 3121 3122/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3123/// 3124static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 3125 unsigned NumNonZero, unsigned NumZero, 3126 SelectionDAG &DAG, TargetLowering &TLI) { 3127 if (NumNonZero > 8) 3128 return SDValue(); 3129 3130 SDValue V(0, 0); 3131 bool First = true; 3132 for (unsigned i = 0; i < 16; ++i) { 3133 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3134 if (ThisIsNonZero && First) { 3135 if (NumZero) 3136 V = getZeroVector(MVT::v8i16, true, DAG); 3137 else 3138 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3139 First = false; 3140 } 3141 3142 if ((i & 1) != 0) { 3143 SDValue ThisElt(0, 0), LastElt(0, 0); 3144 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3145 if (LastIsNonZero) { 3146 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3147 } 3148 if (ThisIsNonZero) { 3149 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3150 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3151 ThisElt, DAG.getConstant(8, MVT::i8)); 3152 if (LastIsNonZero) 3153 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3154 } else 3155 ThisElt = LastElt; 3156 3157 if (ThisElt.getNode()) 3158 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3159 DAG.getIntPtrConstant(i/2)); 3160 } 3161 } 3162 3163 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3164} 3165 3166/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3167/// 3168static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 3169 unsigned NumNonZero, unsigned NumZero, 3170 SelectionDAG &DAG, TargetLowering &TLI) { 3171 if (NumNonZero > 4) 3172 return SDValue(); 3173 3174 SDValue V(0, 0); 3175 bool First = true; 3176 for (unsigned i = 0; i < 8; ++i) { 3177 bool isNonZero = (NonZeros & (1 << i)) != 0; 3178 if (isNonZero) { 3179 if (First) { 3180 if (NumZero) 3181 V = getZeroVector(MVT::v8i16, true, DAG); 3182 else 3183 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3184 First = false; 3185 } 3186 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3187 DAG.getIntPtrConstant(i)); 3188 } 3189 } 3190 3191 return V; 3192} 3193 3194/// getVShift - Return a vector logical shift node. 3195/// 3196static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, 3197 unsigned NumBits, SelectionDAG &DAG, 3198 const TargetLowering &TLI) { 3199 bool isMMX = VT.getSizeInBits() == 64; 3200 MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; 3201 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; 3202 SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp); 3203 return DAG.getNode(ISD::BIT_CONVERT, VT, 3204 DAG.getNode(Opc, ShVT, SrcOp, 3205 DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); 3206} 3207 3208SDValue 3209X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3210 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3211 if (ISD::isBuildVectorAllZeros(Op.getNode()) 3212 || ISD::isBuildVectorAllOnes(Op.getNode())) { 3213 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3214 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3215 // eliminated on x86-32 hosts. 3216 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3217 return Op; 3218 3219 if (ISD::isBuildVectorAllOnes(Op.getNode())) 3220 return getOnesVector(Op.getValueType(), DAG); 3221 return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG); 3222 } 3223 3224 MVT VT = Op.getValueType(); 3225 MVT EVT = VT.getVectorElementType(); 3226 unsigned EVTBits = EVT.getSizeInBits(); 3227 3228 unsigned NumElems = Op.getNumOperands(); 3229 unsigned NumZero = 0; 3230 unsigned NumNonZero = 0; 3231 unsigned NonZeros = 0; 3232 bool IsAllConstants = true; 3233 SmallSet<SDValue, 8> Values; 3234 for (unsigned i = 0; i < NumElems; ++i) { 3235 SDValue Elt = Op.getOperand(i); 3236 if (Elt.getOpcode() == ISD::UNDEF) 3237 continue; 3238 Values.insert(Elt); 3239 if (Elt.getOpcode() != ISD::Constant && 3240 Elt.getOpcode() != ISD::ConstantFP) 3241 IsAllConstants = false; 3242 if (isZeroNode(Elt)) 3243 NumZero++; 3244 else { 3245 NonZeros |= (1 << i); 3246 NumNonZero++; 3247 } 3248 } 3249 3250 if (NumNonZero == 0) { 3251 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3252 return DAG.getNode(ISD::UNDEF, VT); 3253 } 3254 3255 // Special case for single non-zero, non-undef, element. 3256 if (NumNonZero == 1 && NumElems <= 4) { 3257 unsigned Idx = CountTrailingZeros_32(NonZeros); 3258 SDValue Item = Op.getOperand(Idx); 3259 3260 // If this is an insertion of an i64 value on x86-32, and if the top bits of 3261 // the value are obviously zero, truncate the value to i32 and do the 3262 // insertion that way. Only do this if the value is non-constant or if the 3263 // value is a constant being inserted into element 0. It is cheaper to do 3264 // a constant pool load than it is to do a movd + shuffle. 3265 if (EVT == MVT::i64 && !Subtarget->is64Bit() && 3266 (!IsAllConstants || Idx == 0)) { 3267 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 3268 // Handle MMX and SSE both. 3269 MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; 3270 unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; 3271 3272 // Truncate the value (which may itself be a constant) to i32, and 3273 // convert it to a vector with movd (S2V+shuffle to zero extend). 3274 Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item); 3275 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item); 3276 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3277 Subtarget->hasSSE2(), DAG); 3278 3279 // Now we have our 32-bit value zero extended in the low element of 3280 // a vector. If Idx != 0, swizzle it into place. 3281 if (Idx != 0) { 3282 SDValue Ops[] = { 3283 Item, DAG.getNode(ISD::UNDEF, Item.getValueType()), 3284 getSwapEltZeroMask(VecElts, Idx, DAG) 3285 }; 3286 Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3); 3287 } 3288 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item); 3289 } 3290 } 3291 3292 // If we have a constant or non-constant insertion into the low element of 3293 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 3294 // the rest of the elements. This will be matched as movd/movq/movss/movsd 3295 // depending on what the source datatype is. Because we can only get here 3296 // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. 3297 if (Idx == 0 && 3298 // Don't do this for i64 values on x86-32. 3299 (EVT != MVT::i64 || Subtarget->is64Bit())) { 3300 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3301 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3302 return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3303 Subtarget->hasSSE2(), DAG); 3304 } 3305 3306 // Is it a vector logical left shift? 3307 if (NumElems == 2 && Idx == 1 && 3308 isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { 3309 unsigned NumBits = VT.getSizeInBits(); 3310 return getVShift(true, VT, 3311 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)), 3312 NumBits/2, DAG, *this); 3313 } 3314 3315 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 3316 return SDValue(); 3317 3318 // Otherwise, if this is a vector with i32 or f32 elements, and the element 3319 // is a non-constant being inserted into an element other than the low one, 3320 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 3321 // movd/movss) to move this into the low element, then shuffle it into 3322 // place. 3323 if (EVTBits == 32) { 3324 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3325 3326 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3327 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3328 Subtarget->hasSSE2(), DAG); 3329 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3330 MVT MaskEVT = MaskVT.getVectorElementType(); 3331 SmallVector<SDValue, 8> MaskVec; 3332 for (unsigned i = 0; i < NumElems; i++) 3333 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3334 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3335 &MaskVec[0], MaskVec.size()); 3336 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3337 DAG.getNode(ISD::UNDEF, VT), Mask); 3338 } 3339 } 3340 3341 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3342 if (Values.size() == 1) 3343 return SDValue(); 3344 3345 // A vector full of immediates; various special cases are already 3346 // handled, so this is best done with a single constant-pool load. 3347 if (IsAllConstants) 3348 return SDValue(); 3349 3350 // Let legalizer expand 2-wide build_vectors. 3351 if (EVTBits == 64) { 3352 if (NumNonZero == 1) { 3353 // One half is zero or undef. 3354 unsigned Idx = CountTrailingZeros_32(NonZeros); 3355 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, 3356 Op.getOperand(Idx)); 3357 return getShuffleVectorZeroOrUndef(V2, Idx, true, 3358 Subtarget->hasSSE2(), DAG); 3359 } 3360 return SDValue(); 3361 } 3362 3363 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3364 if (EVTBits == 8 && NumElems == 16) { 3365 SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3366 *this); 3367 if (V.getNode()) return V; 3368 } 3369 3370 if (EVTBits == 16 && NumElems == 8) { 3371 SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3372 *this); 3373 if (V.getNode()) return V; 3374 } 3375 3376 // If element VT is == 32 bits, turn it into a number of shuffles. 3377 SmallVector<SDValue, 8> V; 3378 V.resize(NumElems); 3379 if (NumElems == 4 && NumZero > 0) { 3380 for (unsigned i = 0; i < 4; ++i) { 3381 bool isZero = !(NonZeros & (1 << i)); 3382 if (isZero) 3383 V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3384 else 3385 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3386 } 3387 3388 for (unsigned i = 0; i < 2; ++i) { 3389 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3390 default: break; 3391 case 0: 3392 V[i] = V[i*2]; // Must be a zero vector. 3393 break; 3394 case 1: 3395 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3396 getMOVLMask(NumElems, DAG)); 3397 break; 3398 case 2: 3399 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3400 getMOVLMask(NumElems, DAG)); 3401 break; 3402 case 3: 3403 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3404 getUnpacklMask(NumElems, DAG)); 3405 break; 3406 } 3407 } 3408 3409 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3410 MVT EVT = MaskVT.getVectorElementType(); 3411 SmallVector<SDValue, 8> MaskVec; 3412 bool Reverse = (NonZeros & 0x3) == 2; 3413 for (unsigned i = 0; i < 2; ++i) 3414 if (Reverse) 3415 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3416 else 3417 MaskVec.push_back(DAG.getConstant(i, EVT)); 3418 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3419 for (unsigned i = 0; i < 2; ++i) 3420 if (Reverse) 3421 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3422 else 3423 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3424 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3425 &MaskVec[0], MaskVec.size()); 3426 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3427 } 3428 3429 if (Values.size() > 2) { 3430 // Expand into a number of unpckl*. 3431 // e.g. for v4f32 3432 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3433 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3434 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3435 SDValue UnpckMask = getUnpacklMask(NumElems, DAG); 3436 for (unsigned i = 0; i < NumElems; ++i) 3437 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3438 NumElems >>= 1; 3439 while (NumElems != 0) { 3440 for (unsigned i = 0; i < NumElems; ++i) 3441 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3442 UnpckMask); 3443 NumElems >>= 1; 3444 } 3445 return V[0]; 3446 } 3447 3448 return SDValue(); 3449} 3450 3451static 3452SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, 3453 SDValue PermMask, SelectionDAG &DAG, 3454 TargetLowering &TLI) { 3455 SDValue NewV; 3456 MVT MaskVT = MVT::getIntVectorWithNumElements(8); 3457 MVT MaskEVT = MaskVT.getVectorElementType(); 3458 MVT PtrVT = TLI.getPointerTy(); 3459 SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), 3460 PermMask.getNode()->op_end()); 3461 3462 // First record which half of which vector the low elements come from. 3463 SmallVector<unsigned, 4> LowQuad(4); 3464 for (unsigned i = 0; i < 4; ++i) { 3465 SDValue Elt = MaskElts[i]; 3466 if (Elt.getOpcode() == ISD::UNDEF) 3467 continue; 3468 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3469 int QuadIdx = EltIdx / 4; 3470 ++LowQuad[QuadIdx]; 3471 } 3472 3473 int BestLowQuad = -1; 3474 unsigned MaxQuad = 1; 3475 for (unsigned i = 0; i < 4; ++i) { 3476 if (LowQuad[i] > MaxQuad) { 3477 BestLowQuad = i; 3478 MaxQuad = LowQuad[i]; 3479 } 3480 } 3481 3482 // Record which half of which vector the high elements come from. 3483 SmallVector<unsigned, 4> HighQuad(4); 3484 for (unsigned i = 4; i < 8; ++i) { 3485 SDValue Elt = MaskElts[i]; 3486 if (Elt.getOpcode() == ISD::UNDEF) 3487 continue; 3488 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3489 int QuadIdx = EltIdx / 4; 3490 ++HighQuad[QuadIdx]; 3491 } 3492 3493 int BestHighQuad = -1; 3494 MaxQuad = 1; 3495 for (unsigned i = 0; i < 4; ++i) { 3496 if (HighQuad[i] > MaxQuad) { 3497 BestHighQuad = i; 3498 MaxQuad = HighQuad[i]; 3499 } 3500 } 3501 3502 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3503 if (BestLowQuad != -1 || BestHighQuad != -1) { 3504 // First sort the 4 chunks in order using shufpd. 3505 SmallVector<SDValue, 8> MaskVec; 3506 3507 if (BestLowQuad != -1) 3508 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3509 else 3510 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3511 3512 if (BestHighQuad != -1) 3513 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3514 else 3515 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3516 3517 SDValue Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3518 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3519 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3520 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3521 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3522 3523 // Now sort high and low parts separately. 3524 BitVector InOrder(8); 3525 if (BestLowQuad != -1) { 3526 // Sort lower half in order using PSHUFLW. 3527 MaskVec.clear(); 3528 bool AnyOutOrder = false; 3529 3530 for (unsigned i = 0; i != 4; ++i) { 3531 SDValue Elt = MaskElts[i]; 3532 if (Elt.getOpcode() == ISD::UNDEF) { 3533 MaskVec.push_back(Elt); 3534 InOrder.set(i); 3535 } else { 3536 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3537 if (EltIdx != i) 3538 AnyOutOrder = true; 3539 3540 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3541 3542 // If this element is in the right place after this shuffle, then 3543 // remember it. 3544 if ((int)(EltIdx / 4) == BestLowQuad) 3545 InOrder.set(i); 3546 } 3547 } 3548 if (AnyOutOrder) { 3549 for (unsigned i = 4; i != 8; ++i) 3550 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3551 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3552 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3553 } 3554 } 3555 3556 if (BestHighQuad != -1) { 3557 // Sort high half in order using PSHUFHW if possible. 3558 MaskVec.clear(); 3559 3560 for (unsigned i = 0; i != 4; ++i) 3561 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3562 3563 bool AnyOutOrder = false; 3564 for (unsigned i = 4; i != 8; ++i) { 3565 SDValue Elt = MaskElts[i]; 3566 if (Elt.getOpcode() == ISD::UNDEF) { 3567 MaskVec.push_back(Elt); 3568 InOrder.set(i); 3569 } else { 3570 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3571 if (EltIdx != i) 3572 AnyOutOrder = true; 3573 3574 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3575 3576 // If this element is in the right place after this shuffle, then 3577 // remember it. 3578 if ((int)(EltIdx / 4) == BestHighQuad) 3579 InOrder.set(i); 3580 } 3581 } 3582 3583 if (AnyOutOrder) { 3584 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3585 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3586 } 3587 } 3588 3589 // The other elements are put in the right place using pextrw and pinsrw. 3590 for (unsigned i = 0; i != 8; ++i) { 3591 if (InOrder[i]) 3592 continue; 3593 SDValue Elt = MaskElts[i]; 3594 if (Elt.getOpcode() == ISD::UNDEF) 3595 continue; 3596 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3597 SDValue ExtOp = (EltIdx < 8) 3598 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3599 DAG.getConstant(EltIdx, PtrVT)) 3600 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3601 DAG.getConstant(EltIdx - 8, PtrVT)); 3602 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3603 DAG.getConstant(i, PtrVT)); 3604 } 3605 3606 return NewV; 3607 } 3608 3609 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use as 3610 // few as possible. First, let's find out how many elements are already in the 3611 // right order. 3612 unsigned V1InOrder = 0; 3613 unsigned V1FromV1 = 0; 3614 unsigned V2InOrder = 0; 3615 unsigned V2FromV2 = 0; 3616 SmallVector<SDValue, 8> V1Elts; 3617 SmallVector<SDValue, 8> V2Elts; 3618 for (unsigned i = 0; i < 8; ++i) { 3619 SDValue Elt = MaskElts[i]; 3620 if (Elt.getOpcode() == ISD::UNDEF) { 3621 V1Elts.push_back(Elt); 3622 V2Elts.push_back(Elt); 3623 ++V1InOrder; 3624 ++V2InOrder; 3625 continue; 3626 } 3627 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3628 if (EltIdx == i) { 3629 V1Elts.push_back(Elt); 3630 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3631 ++V1InOrder; 3632 } else if (EltIdx == i+8) { 3633 V1Elts.push_back(Elt); 3634 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3635 ++V2InOrder; 3636 } else if (EltIdx < 8) { 3637 V1Elts.push_back(Elt); 3638 ++V1FromV1; 3639 } else { 3640 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3641 ++V2FromV2; 3642 } 3643 } 3644 3645 if (V2InOrder > V1InOrder) { 3646 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3647 std::swap(V1, V2); 3648 std::swap(V1Elts, V2Elts); 3649 std::swap(V1FromV1, V2FromV2); 3650 } 3651 3652 if ((V1FromV1 + V1InOrder) != 8) { 3653 // Some elements are from V2. 3654 if (V1FromV1) { 3655 // If there are elements that are from V1 but out of place, 3656 // then first sort them in place 3657 SmallVector<SDValue, 8> MaskVec; 3658 for (unsigned i = 0; i < 8; ++i) { 3659 SDValue Elt = V1Elts[i]; 3660 if (Elt.getOpcode() == ISD::UNDEF) { 3661 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3662 continue; 3663 } 3664 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3665 if (EltIdx >= 8) 3666 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3667 else 3668 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3669 } 3670 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3671 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3672 } 3673 3674 NewV = V1; 3675 for (unsigned i = 0; i < 8; ++i) { 3676 SDValue Elt = V1Elts[i]; 3677 if (Elt.getOpcode() == ISD::UNDEF) 3678 continue; 3679 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3680 if (EltIdx < 8) 3681 continue; 3682 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3683 DAG.getConstant(EltIdx - 8, PtrVT)); 3684 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3685 DAG.getConstant(i, PtrVT)); 3686 } 3687 return NewV; 3688 } else { 3689 // All elements are from V1. 3690 NewV = V1; 3691 for (unsigned i = 0; i < 8; ++i) { 3692 SDValue Elt = V1Elts[i]; 3693 if (Elt.getOpcode() == ISD::UNDEF) 3694 continue; 3695 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3696 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3697 DAG.getConstant(EltIdx, PtrVT)); 3698 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3699 DAG.getConstant(i, PtrVT)); 3700 } 3701 return NewV; 3702 } 3703} 3704 3705/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3706/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3707/// done when every pair / quad of shuffle mask elements point to elements in 3708/// the right sequence. e.g. 3709/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3710static 3711SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, 3712 MVT VT, 3713 SDValue PermMask, SelectionDAG &DAG, 3714 TargetLowering &TLI) { 3715 unsigned NumElems = PermMask.getNumOperands(); 3716 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3717 MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3718 MVT MaskEltVT = MaskVT.getVectorElementType(); 3719 MVT NewVT = MaskVT; 3720 switch (VT.getSimpleVT()) { 3721 default: assert(false && "Unexpected!"); 3722 case MVT::v4f32: NewVT = MVT::v2f64; break; 3723 case MVT::v4i32: NewVT = MVT::v2i64; break; 3724 case MVT::v8i16: NewVT = MVT::v4i32; break; 3725 case MVT::v16i8: NewVT = MVT::v4i32; break; 3726 } 3727 3728 if (NewWidth == 2) { 3729 if (VT.isInteger()) 3730 NewVT = MVT::v2i64; 3731 else 3732 NewVT = MVT::v2f64; 3733 } 3734 unsigned Scale = NumElems / NewWidth; 3735 SmallVector<SDValue, 8> MaskVec; 3736 for (unsigned i = 0; i < NumElems; i += Scale) { 3737 unsigned StartIdx = ~0U; 3738 for (unsigned j = 0; j < Scale; ++j) { 3739 SDValue Elt = PermMask.getOperand(i+j); 3740 if (Elt.getOpcode() == ISD::UNDEF) 3741 continue; 3742 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3743 if (StartIdx == ~0U) 3744 StartIdx = EltIdx - (EltIdx % Scale); 3745 if (EltIdx != StartIdx + j) 3746 return SDValue(); 3747 } 3748 if (StartIdx == ~0U) 3749 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT)); 3750 else 3751 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); 3752 } 3753 3754 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3755 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3756 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3757 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3758 &MaskVec[0], MaskVec.size())); 3759} 3760 3761/// getVZextMovL - Return a zero-extending vector move low node. 3762/// 3763static SDValue getVZextMovL(MVT VT, MVT OpVT, 3764 SDValue SrcOp, SelectionDAG &DAG, 3765 const X86Subtarget *Subtarget) { 3766 if (VT == MVT::v2f64 || VT == MVT::v4f32) { 3767 LoadSDNode *LD = NULL; 3768 if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) 3769 LD = dyn_cast<LoadSDNode>(SrcOp); 3770 if (!LD) { 3771 // movssrr and movsdrr do not clear top bits. Try to use movd, movq 3772 // instead. 3773 MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; 3774 if ((EVT != MVT::i64 || Subtarget->is64Bit()) && 3775 SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 3776 SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && 3777 SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { 3778 // PR2108 3779 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; 3780 return DAG.getNode(ISD::BIT_CONVERT, VT, 3781 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3782 DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT, 3783 SrcOp.getOperand(0) 3784 .getOperand(0)))); 3785 } 3786 } 3787 } 3788 3789 return DAG.getNode(ISD::BIT_CONVERT, VT, 3790 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3791 DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp))); 3792} 3793 3794/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of 3795/// shuffles. 3796static SDValue 3797LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, 3798 SDValue PermMask, MVT VT, SelectionDAG &DAG) { 3799 MVT MaskVT = PermMask.getValueType(); 3800 MVT MaskEVT = MaskVT.getVectorElementType(); 3801 SmallVector<std::pair<int, int>, 8> Locs; 3802 Locs.resize(4); 3803 SmallVector<SDValue, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3804 unsigned NumHi = 0; 3805 unsigned NumLo = 0; 3806 for (unsigned i = 0; i != 4; ++i) { 3807 SDValue Elt = PermMask.getOperand(i); 3808 if (Elt.getOpcode() == ISD::UNDEF) { 3809 Locs[i] = std::make_pair(-1, -1); 3810 } else { 3811 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3812 assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); 3813 if (Val < 4) { 3814 Locs[i] = std::make_pair(0, NumLo); 3815 Mask1[NumLo] = Elt; 3816 NumLo++; 3817 } else { 3818 Locs[i] = std::make_pair(1, NumHi); 3819 if (2+NumHi < 4) 3820 Mask1[2+NumHi] = Elt; 3821 NumHi++; 3822 } 3823 } 3824 } 3825 3826 if (NumLo <= 2 && NumHi <= 2) { 3827 // If no more than two elements come from either vector. This can be 3828 // implemented with two shuffles. First shuffle gather the elements. 3829 // The second shuffle, which takes the first shuffle as both of its 3830 // vector operands, put the elements into the right order. 3831 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3832 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3833 &Mask1[0], Mask1.size())); 3834 3835 SmallVector<SDValue, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3836 for (unsigned i = 0; i != 4; ++i) { 3837 if (Locs[i].first == -1) 3838 continue; 3839 else { 3840 unsigned Idx = (i < 2) ? 0 : 4; 3841 Idx += Locs[i].first * 2 + Locs[i].second; 3842 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3843 } 3844 } 3845 3846 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3847 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3848 &Mask2[0], Mask2.size())); 3849 } else if (NumLo == 3 || NumHi == 3) { 3850 // Otherwise, we must have three elements from one vector, call it X, and 3851 // one element from the other, call it Y. First, use a shufps to build an 3852 // intermediate vector with the one element from Y and the element from X 3853 // that will be in the same half in the final destination (the indexes don't 3854 // matter). Then, use a shufps to build the final vector, taking the half 3855 // containing the element from Y from the intermediate, and the other half 3856 // from X. 3857 if (NumHi == 3) { 3858 // Normalize it so the 3 elements come from V1. 3859 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3860 std::swap(V1, V2); 3861 } 3862 3863 // Find the element from V2. 3864 unsigned HiIndex; 3865 for (HiIndex = 0; HiIndex < 3; ++HiIndex) { 3866 SDValue Elt = PermMask.getOperand(HiIndex); 3867 if (Elt.getOpcode() == ISD::UNDEF) 3868 continue; 3869 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3870 if (Val >= 4) 3871 break; 3872 } 3873 3874 Mask1[0] = PermMask.getOperand(HiIndex); 3875 Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT); 3876 Mask1[2] = PermMask.getOperand(HiIndex^1); 3877 Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT); 3878 V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3879 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3880 3881 if (HiIndex >= 2) { 3882 Mask1[0] = PermMask.getOperand(0); 3883 Mask1[1] = PermMask.getOperand(1); 3884 Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); 3885 Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); 3886 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3887 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3888 } else { 3889 Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); 3890 Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); 3891 Mask1[2] = PermMask.getOperand(2); 3892 Mask1[3] = PermMask.getOperand(3); 3893 if (Mask1[2].getOpcode() != ISD::UNDEF) 3894 Mask1[2] = 3895 DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, 3896 MaskEVT); 3897 if (Mask1[3].getOpcode() != ISD::UNDEF) 3898 Mask1[3] = 3899 DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, 3900 MaskEVT); 3901 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, 3902 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3903 } 3904 } 3905 3906 // Break it into (shuffle shuffle_hi, shuffle_lo). 3907 Locs.clear(); 3908 SmallVector<SDValue,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3909 SmallVector<SDValue,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3910 SmallVector<SDValue,8> *MaskPtr = &LoMask; 3911 unsigned MaskIdx = 0; 3912 unsigned LoIdx = 0; 3913 unsigned HiIdx = 2; 3914 for (unsigned i = 0; i != 4; ++i) { 3915 if (i == 2) { 3916 MaskPtr = &HiMask; 3917 MaskIdx = 1; 3918 LoIdx = 0; 3919 HiIdx = 2; 3920 } 3921 SDValue Elt = PermMask.getOperand(i); 3922 if (Elt.getOpcode() == ISD::UNDEF) { 3923 Locs[i] = std::make_pair(-1, -1); 3924 } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { 3925 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3926 (*MaskPtr)[LoIdx] = Elt; 3927 LoIdx++; 3928 } else { 3929 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3930 (*MaskPtr)[HiIdx] = Elt; 3931 HiIdx++; 3932 } 3933 } 3934 3935 SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3936 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3937 &LoMask[0], LoMask.size())); 3938 SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3939 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3940 &HiMask[0], HiMask.size())); 3941 SmallVector<SDValue, 8> MaskOps; 3942 for (unsigned i = 0; i != 4; ++i) { 3943 if (Locs[i].first == -1) { 3944 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3945 } else { 3946 unsigned Idx = Locs[i].first * 4 + Locs[i].second; 3947 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3948 } 3949 } 3950 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3951 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3952 &MaskOps[0], MaskOps.size())); 3953} 3954 3955SDValue 3956X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3957 SDValue V1 = Op.getOperand(0); 3958 SDValue V2 = Op.getOperand(1); 3959 SDValue PermMask = Op.getOperand(2); 3960 MVT VT = Op.getValueType(); 3961 unsigned NumElems = PermMask.getNumOperands(); 3962 bool isMMX = VT.getSizeInBits() == 64; 3963 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3964 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3965 bool V1IsSplat = false; 3966 bool V2IsSplat = false; 3967 3968 if (isUndefShuffle(Op.getNode())) 3969 return DAG.getNode(ISD::UNDEF, VT); 3970 3971 if (isZeroShuffle(Op.getNode())) 3972 return getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3973 3974 if (isIdentityMask(PermMask.getNode())) 3975 return V1; 3976 else if (isIdentityMask(PermMask.getNode(), true)) 3977 return V2; 3978 3979 // Canonicalize movddup shuffles. 3980 if (V2IsUndef && Subtarget->hasSSE2() && 3981 VT.getSizeInBits() == 128 && 3982 X86::isMOVDDUPMask(PermMask.getNode())) 3983 return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); 3984 3985 if (isSplatMask(PermMask.getNode())) { 3986 if (isMMX || NumElems < 4) return Op; 3987 // Promote it to a v4{if}32 splat. 3988 return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); 3989 } 3990 3991 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3992 // do it! 3993 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3994 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3995 if (NewOp.getNode()) 3996 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3997 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3998 // FIXME: Figure out a cleaner way to do this. 3999 // Try to make use of movq to zero out the top part. 4000 if (ISD::isBuildVectorAllZeros(V2.getNode())) { 4001 SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4002 DAG, *this); 4003 if (NewOp.getNode()) { 4004 SDValue NewV1 = NewOp.getOperand(0); 4005 SDValue NewV2 = NewOp.getOperand(1); 4006 SDValue NewMask = NewOp.getOperand(2); 4007 if (isCommutedMOVL(NewMask.getNode(), true, false)) { 4008 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 4009 return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget); 4010 } 4011 } 4012 } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { 4013 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4014 DAG, *this); 4015 if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) 4016 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), 4017 DAG, Subtarget); 4018 } 4019 } 4020 4021 // Check if this can be converted into a logical shift. 4022 bool isLeft = false; 4023 unsigned ShAmt = 0; 4024 SDValue ShVal; 4025 bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); 4026 if (isShift && ShVal.hasOneUse()) { 4027 // If the shifted value has multiple uses, it may be cheaper to use 4028 // v_set0 + movlhps or movhlps, etc. 4029 MVT EVT = VT.getVectorElementType(); 4030 ShAmt *= EVT.getSizeInBits(); 4031 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4032 } 4033 4034 if (X86::isMOVLMask(PermMask.getNode())) { 4035 if (V1IsUndef) 4036 return V2; 4037 if (ISD::isBuildVectorAllZeros(V1.getNode())) 4038 return getVZextMovL(VT, VT, V2, DAG, Subtarget); 4039 if (!isMMX) 4040 return Op; 4041 } 4042 4043 if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || 4044 X86::isMOVSLDUPMask(PermMask.getNode()) || 4045 X86::isMOVHLPSMask(PermMask.getNode()) || 4046 X86::isMOVHPMask(PermMask.getNode()) || 4047 X86::isMOVLPMask(PermMask.getNode()))) 4048 return Op; 4049 4050 if (ShouldXformToMOVHLPS(PermMask.getNode()) || 4051 ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) 4052 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4053 4054 if (isShift) { 4055 // No better options. Use a vshl / vsrl. 4056 MVT EVT = VT.getVectorElementType(); 4057 ShAmt *= EVT.getSizeInBits(); 4058 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4059 } 4060 4061 bool Commuted = false; 4062 // FIXME: This should also accept a bitcast of a splat? Be careful, not 4063 // 1,1,1,1 -> v8i16 though. 4064 V1IsSplat = isSplatVector(V1.getNode()); 4065 V2IsSplat = isSplatVector(V2.getNode()); 4066 4067 // Canonicalize the splat or undef, if present, to be on the RHS. 4068 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 4069 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4070 std::swap(V1IsSplat, V2IsSplat); 4071 std::swap(V1IsUndef, V2IsUndef); 4072 Commuted = true; 4073 } 4074 4075 // FIXME: Figure out a cleaner way to do this. 4076 if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { 4077 if (V2IsUndef) return V1; 4078 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4079 if (V2IsSplat) { 4080 // V2 is a splat, so the mask may be malformed. That is, it may point 4081 // to any V2 element. The instruction selectior won't like this. Get 4082 // a corrected mask and commute to form a proper MOVS{S|D}. 4083 SDValue NewMask = getMOVLMask(NumElems, DAG); 4084 if (NewMask.getNode() != PermMask.getNode()) 4085 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4086 } 4087 return Op; 4088 } 4089 4090 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4091 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4092 X86::isUNPCKLMask(PermMask.getNode()) || 4093 X86::isUNPCKHMask(PermMask.getNode())) 4094 return Op; 4095 4096 if (V2IsSplat) { 4097 // Normalize mask so all entries that point to V2 points to its first 4098 // element then try to match unpck{h|l} again. If match, return a 4099 // new vector_shuffle with the corrected mask. 4100 SDValue NewMask = NormalizeMask(PermMask, DAG); 4101 if (NewMask.getNode() != PermMask.getNode()) { 4102 if (X86::isUNPCKLMask(PermMask.getNode(), true)) { 4103 SDValue NewMask = getUnpacklMask(NumElems, DAG); 4104 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4105 } else if (X86::isUNPCKHMask(PermMask.getNode(), true)) { 4106 SDValue NewMask = getUnpackhMask(NumElems, DAG); 4107 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4108 } 4109 } 4110 } 4111 4112 // Normalize the node to match x86 shuffle ops if needed 4113 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) 4114 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4115 4116 if (Commuted) { 4117 // Commute is back and try unpck* again. 4118 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4119 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4120 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4121 X86::isUNPCKLMask(PermMask.getNode()) || 4122 X86::isUNPCKHMask(PermMask.getNode())) 4123 return Op; 4124 } 4125 4126 // Try PSHUF* first, then SHUFP*. 4127 // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically 4128 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 4129 if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { 4130 if (V2.getOpcode() != ISD::UNDEF) 4131 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 4132 DAG.getNode(ISD::UNDEF, VT), PermMask); 4133 return Op; 4134 } 4135 4136 if (!isMMX) { 4137 if (Subtarget->hasSSE2() && 4138 (X86::isPSHUFDMask(PermMask.getNode()) || 4139 X86::isPSHUFHWMask(PermMask.getNode()) || 4140 X86::isPSHUFLWMask(PermMask.getNode()))) { 4141 MVT RVT = VT; 4142 if (VT == MVT::v4f32) { 4143 RVT = MVT::v4i32; 4144 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, 4145 DAG.getNode(ISD::BIT_CONVERT, RVT, V1), 4146 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4147 } else if (V2.getOpcode() != ISD::UNDEF) 4148 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1, 4149 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4150 if (RVT != VT) 4151 Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); 4152 return Op; 4153 } 4154 4155 // Binary or unary shufps. 4156 if (X86::isSHUFPMask(PermMask.getNode()) || 4157 (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) 4158 return Op; 4159 } 4160 4161 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 4162 if (VT == MVT::v8i16) { 4163 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 4164 if (NewOp.getNode()) 4165 return NewOp; 4166 } 4167 4168 // Handle all 4 wide cases with a number of shuffles except for MMX. 4169 if (NumElems == 4 && !isMMX) 4170 return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG); 4171 4172 return SDValue(); 4173} 4174 4175SDValue 4176X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, 4177 SelectionDAG &DAG) { 4178 MVT VT = Op.getValueType(); 4179 if (VT.getSizeInBits() == 8) { 4180 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32, 4181 Op.getOperand(0), Op.getOperand(1)); 4182 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4183 DAG.getValueType(VT)); 4184 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4185 } else if (VT.getSizeInBits() == 16) { 4186 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32, 4187 Op.getOperand(0), Op.getOperand(1)); 4188 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4189 DAG.getValueType(VT)); 4190 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4191 } else if (VT == MVT::f32) { 4192 // EXTRACTPS outputs to a GPR32 register which will require a movd to copy 4193 // the result back to FR32 register. It's only worth matching if the 4194 // result has a single use which is a store or a bitcast to i32. 4195 if (!Op.hasOneUse()) 4196 return SDValue(); 4197 SDNode *User = *Op.getNode()->use_begin(); 4198 if (User->getOpcode() != ISD::STORE && 4199 (User->getOpcode() != ISD::BIT_CONVERT || 4200 User->getValueType(0) != MVT::i32)) 4201 return SDValue(); 4202 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4203 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), 4204 Op.getOperand(1)); 4205 return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract); 4206 } 4207 return SDValue(); 4208} 4209 4210 4211SDValue 4212X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4213 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4214 return SDValue(); 4215 4216 if (Subtarget->hasSSE41()) { 4217 SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 4218 if (Res.getNode()) 4219 return Res; 4220 } 4221 4222 MVT VT = Op.getValueType(); 4223 // TODO: handle v16i8. 4224 if (VT.getSizeInBits() == 16) { 4225 SDValue Vec = Op.getOperand(0); 4226 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4227 if (Idx == 0) 4228 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 4229 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4230 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 4231 Op.getOperand(1))); 4232 // Transform it so it match pextrw which produces a 32-bit result. 4233 MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); 4234 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 4235 Op.getOperand(0), Op.getOperand(1)); 4236 SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 4237 DAG.getValueType(VT)); 4238 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4239 } else if (VT.getSizeInBits() == 32) { 4240 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4241 if (Idx == 0) 4242 return Op; 4243 // SHUFPS the element to the lowest double word, then movss. 4244 MVT MaskVT = MVT::getIntVectorWithNumElements(4); 4245 SmallVector<SDValue, 8> IdxVec; 4246 IdxVec. 4247 push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); 4248 IdxVec. 4249 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4250 IdxVec. 4251 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4252 IdxVec. 4253 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4254 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4255 &IdxVec[0], IdxVec.size()); 4256 SDValue Vec = Op.getOperand(0); 4257 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4258 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4259 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4260 DAG.getIntPtrConstant(0)); 4261 } else if (VT.getSizeInBits() == 64) { 4262 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 4263 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 4264 // to match extract_elt for f64. 4265 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4266 if (Idx == 0) 4267 return Op; 4268 4269 // UNPCKHPD the element to the lowest double word, then movsd. 4270 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4271 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4272 MVT MaskVT = MVT::getIntVectorWithNumElements(2); 4273 SmallVector<SDValue, 8> IdxVec; 4274 IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); 4275 IdxVec. 4276 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4277 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4278 &IdxVec[0], IdxVec.size()); 4279 SDValue Vec = Op.getOperand(0); 4280 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4281 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4282 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4283 DAG.getIntPtrConstant(0)); 4284 } 4285 4286 return SDValue(); 4287} 4288 4289SDValue 4290X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ 4291 MVT VT = Op.getValueType(); 4292 MVT EVT = VT.getVectorElementType(); 4293 4294 SDValue N0 = Op.getOperand(0); 4295 SDValue N1 = Op.getOperand(1); 4296 SDValue N2 = Op.getOperand(2); 4297 4298 if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && 4299 isa<ConstantSDNode>(N2)) { 4300 unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB 4301 : X86ISD::PINSRW; 4302 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 4303 // argument. 4304 if (N1.getValueType() != MVT::i32) 4305 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4306 if (N2.getValueType() != MVT::i32) 4307 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4308 return DAG.getNode(Opc, VT, N0, N1, N2); 4309 } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) { 4310 // Bits [7:6] of the constant are the source select. This will always be 4311 // zero here. The DAG Combiner may combine an extract_elt index into these 4312 // bits. For example (insert (extract, 3), 2) could be matched by putting 4313 // the '3' into bits [7:6] of X86ISD::INSERTPS. 4314 // Bits [5:4] of the constant are the destination select. This is the 4315 // value of the incoming immediate. 4316 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 4317 // combine either bitwise AND or insert of float 0.0 to set these bits. 4318 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); 4319 return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2); 4320 } 4321 return SDValue(); 4322} 4323 4324SDValue 4325X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4326 MVT VT = Op.getValueType(); 4327 MVT EVT = VT.getVectorElementType(); 4328 4329 if (Subtarget->hasSSE41()) 4330 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 4331 4332 if (EVT == MVT::i8) 4333 return SDValue(); 4334 4335 SDValue N0 = Op.getOperand(0); 4336 SDValue N1 = Op.getOperand(1); 4337 SDValue N2 = Op.getOperand(2); 4338 4339 if (EVT.getSizeInBits() == 16) { 4340 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 4341 // as its second argument. 4342 if (N1.getValueType() != MVT::i32) 4343 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4344 if (N2.getValueType() != MVT::i32) 4345 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4346 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 4347 } 4348 return SDValue(); 4349} 4350 4351SDValue 4352X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 4353 if (Op.getValueType() == MVT::v2f32) 4354 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32, 4355 DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32, 4356 DAG.getNode(ISD::BIT_CONVERT, MVT::i32, 4357 Op.getOperand(0)))); 4358 4359 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 4360 MVT VT = MVT::v2i32; 4361 switch (Op.getValueType().getSimpleVT()) { 4362 default: break; 4363 case MVT::v16i8: 4364 case MVT::v8i16: 4365 VT = MVT::v4i32; 4366 break; 4367 } 4368 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), 4369 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt)); 4370} 4371 4372// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 4373// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 4374// one of the above mentioned nodes. It has to be wrapped because otherwise 4375// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 4376// be used to form addressing mode. These wrapped nodes will be selected 4377// into MOV32ri. 4378SDValue 4379X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 4380 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 4381 SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), 4382 getPointerTy(), 4383 CP->getAlignment()); 4384 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4385 // With PIC, the address is actually $g + Offset. 4386 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4387 !Subtarget->isPICStyleRIPRel()) { 4388 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4389 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4390 Result); 4391 } 4392 4393 return Result; 4394} 4395 4396SDValue 4397X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, 4398 int64_t Offset, 4399 SelectionDAG &DAG) const { 4400 bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; 4401 bool ExtraLoadRequired = 4402 Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); 4403 4404 // Create the TargetGlobalAddress node, folding in the constant 4405 // offset if it is legal. 4406 SDValue Result; 4407 if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { 4408 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); 4409 Offset = 0; 4410 } else 4411 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); 4412 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4413 4414 // With PIC, the address is actually $g + Offset. 4415 if (IsPic && !Subtarget->isPICStyleRIPRel()) { 4416 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4417 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4418 Result); 4419 } 4420 4421 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4422 // load the value at address GV, not the value of GV itself. This means that 4423 // the GlobalAddress must be in the base or index register of the address, not 4424 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4425 // The same applies for external symbols during PIC codegen 4426 if (ExtraLoadRequired) 4427 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, 4428 PseudoSourceValue::getGOT(), 0); 4429 4430 // If there was a non-zero offset that we didn't fold, create an explicit 4431 // addition for it. 4432 if (Offset != 0) 4433 Result = DAG.getNode(ISD::ADD, getPointerTy(), Result, 4434 DAG.getConstant(Offset, getPointerTy())); 4435 4436 return Result; 4437} 4438 4439SDValue 4440X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { 4441 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 4442 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 4443 return LowerGlobalAddress(GV, Offset, DAG); 4444} 4445 4446// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit 4447static SDValue 4448LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4449 const MVT PtrVT) { 4450 SDValue InFlag; 4451 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4452 DAG.getNode(X86ISD::GlobalBaseReg, 4453 PtrVT), InFlag); 4454 InFlag = Chain.getValue(1); 4455 4456 // emit leal symbol@TLSGD(,%ebx,1), %eax 4457 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4458 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4459 GA->getValueType(0), 4460 GA->getOffset()); 4461 SDValue Ops[] = { Chain, TGA, InFlag }; 4462 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4463 InFlag = Result.getValue(2); 4464 Chain = Result.getValue(1); 4465 4466 // call ___tls_get_addr. This function receives its argument in 4467 // the register EAX. 4468 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4469 InFlag = Chain.getValue(1); 4470 4471 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4472 SDValue Ops1[] = { Chain, 4473 DAG.getTargetExternalSymbol("___tls_get_addr", 4474 PtrVT), 4475 DAG.getRegister(X86::EAX, PtrVT), 4476 DAG.getRegister(X86::EBX, PtrVT), 4477 InFlag }; 4478 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4479 InFlag = Chain.getValue(1); 4480 4481 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4482} 4483 4484// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit 4485static SDValue 4486LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4487 const MVT PtrVT) { 4488 SDValue InFlag, Chain; 4489 4490 // emit leaq symbol@TLSGD(%rip), %rdi 4491 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4492 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4493 GA->getValueType(0), 4494 GA->getOffset()); 4495 SDValue Ops[] = { DAG.getEntryNode(), TGA}; 4496 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2); 4497 Chain = Result.getValue(1); 4498 InFlag = Result.getValue(2); 4499 4500 // call __tls_get_addr. This function receives its argument in 4501 // the register RDI. 4502 Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag); 4503 InFlag = Chain.getValue(1); 4504 4505 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4506 SDValue Ops1[] = { Chain, 4507 DAG.getTargetExternalSymbol("__tls_get_addr", 4508 PtrVT), 4509 DAG.getRegister(X86::RDI, PtrVT), 4510 InFlag }; 4511 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4); 4512 InFlag = Chain.getValue(1); 4513 4514 return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag); 4515} 4516 4517// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4518// "local exec" model. 4519static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4520 const MVT PtrVT) { 4521 // Get the Thread Pointer 4522 SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4523 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4524 // exec) 4525 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4526 GA->getValueType(0), 4527 GA->getOffset()); 4528 SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4529 4530 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4531 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, 4532 PseudoSourceValue::getGOT(), 0); 4533 4534 // The address of the thread local variable is the add of the thread 4535 // pointer with the offset of the variable. 4536 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4537} 4538 4539SDValue 4540X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 4541 // TODO: implement the "local dynamic" model 4542 // TODO: implement the "initial exec"model for pic executables 4543 assert(Subtarget->isTargetELF() && 4544 "TLS not implemented for non-ELF targets"); 4545 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4546 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4547 // otherwise use the "Local Exec"TLS Model 4548 if (Subtarget->is64Bit()) { 4549 return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); 4550 } else { 4551 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4552 return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); 4553 else 4554 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4555 } 4556} 4557 4558SDValue 4559X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { 4560 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4561 SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4562 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4563 // With PIC, the address is actually $g + Offset. 4564 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4565 !Subtarget->isPICStyleRIPRel()) { 4566 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4567 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4568 Result); 4569 } 4570 4571 return Result; 4572} 4573 4574SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 4575 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4576 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4577 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4578 // With PIC, the address is actually $g + Offset. 4579 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4580 !Subtarget->isPICStyleRIPRel()) { 4581 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4582 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4583 Result); 4584 } 4585 4586 return Result; 4587} 4588 4589/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4590/// take a 2 x i32 value to shift plus a shift amount. 4591SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { 4592 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4593 MVT VT = Op.getValueType(); 4594 unsigned VTBits = VT.getSizeInBits(); 4595 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4596 SDValue ShOpLo = Op.getOperand(0); 4597 SDValue ShOpHi = Op.getOperand(1); 4598 SDValue ShAmt = Op.getOperand(2); 4599 SDValue Tmp1 = isSRA ? 4600 DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) : 4601 DAG.getConstant(0, VT); 4602 4603 SDValue Tmp2, Tmp3; 4604 if (Op.getOpcode() == ISD::SHL_PARTS) { 4605 Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt); 4606 Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt); 4607 } else { 4608 Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt); 4609 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt); 4610 } 4611 4612 SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4613 DAG.getConstant(VTBits, MVT::i8)); 4614 SDValue Cond = DAG.getNode(X86ISD::CMP, VT, 4615 AndNode, DAG.getConstant(0, MVT::i8)); 4616 4617 SDValue Hi, Lo; 4618 SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4619 SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; 4620 SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; 4621 4622 if (Op.getOpcode() == ISD::SHL_PARTS) { 4623 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4624 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4625 } else { 4626 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4627 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4628 } 4629 4630 SDValue Ops[2] = { Lo, Hi }; 4631 return DAG.getMergeValues(Ops, 2); 4632} 4633 4634SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4635 MVT SrcVT = Op.getOperand(0).getValueType(); 4636 assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && 4637 "Unknown SINT_TO_FP to lower!"); 4638 4639 // These are really Legal; caller falls through into that case. 4640 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 4641 return SDValue(); 4642 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 4643 Subtarget->is64Bit()) 4644 return SDValue(); 4645 4646 unsigned Size = SrcVT.getSizeInBits()/8; 4647 MachineFunction &MF = DAG.getMachineFunction(); 4648 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4649 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4650 SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4651 StackSlot, 4652 PseudoSourceValue::getFixedStack(SSFI), 0); 4653 4654 // Build the FILD 4655 SDVTList Tys; 4656 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 4657 if (useSSE) 4658 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4659 else 4660 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4661 SmallVector<SDValue, 8> Ops; 4662 Ops.push_back(Chain); 4663 Ops.push_back(StackSlot); 4664 Ops.push_back(DAG.getValueType(SrcVT)); 4665 SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, 4666 Tys, &Ops[0], Ops.size()); 4667 4668 if (useSSE) { 4669 Chain = Result.getValue(1); 4670 SDValue InFlag = Result.getValue(2); 4671 4672 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4673 // shouldn't be necessary except that RFP cannot be live across 4674 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4675 MachineFunction &MF = DAG.getMachineFunction(); 4676 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4677 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4678 Tys = DAG.getVTList(MVT::Other); 4679 SmallVector<SDValue, 8> Ops; 4680 Ops.push_back(Chain); 4681 Ops.push_back(Result); 4682 Ops.push_back(StackSlot); 4683 Ops.push_back(DAG.getValueType(Op.getValueType())); 4684 Ops.push_back(InFlag); 4685 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4686 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 4687 PseudoSourceValue::getFixedStack(SSFI), 0); 4688 } 4689 4690 return Result; 4691} 4692 4693SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4694 MVT SrcVT = Op.getOperand(0).getValueType(); 4695 assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!"); 4696 4697 // We only handle SSE2 f64 target here; caller can handle the rest. 4698 if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) 4699 return SDValue(); 4700 4701 // This algorithm is not obvious. Here it is in C code, more or less: 4702/* 4703 double uint64_to_double( uint32_t hi, uint32_t lo ) 4704 { 4705 static const __m128i exp = { 0x4330000045300000ULL, 0 }; 4706 static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; 4707 4708 // copy ints to xmm registers 4709 __m128i xh = _mm_cvtsi32_si128( hi ); 4710 __m128i xl = _mm_cvtsi32_si128( lo ); 4711 4712 // combine into low half of a single xmm register 4713 __m128i x = _mm_unpacklo_epi32( xh, xl ); 4714 __m128d d; 4715 double sd; 4716 4717 // merge in appropriate exponents to give the integer bits the 4718 // right magnitude 4719 x = _mm_unpacklo_epi32( x, exp ); 4720 4721 // subtract away the biases to deal with the IEEE-754 double precision 4722 // implicit 1 4723 d = _mm_sub_pd( (__m128d) x, bias ); 4724 4725 // All conversions up to here are exact. The correctly rounded result is 4726 // calculated using the 4727 // current rounding mode using the following horizontal add. 4728 d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); 4729 _mm_store_sd( &sd, d ); //since we are returning doubles in XMM, this 4730 // store doesn't really need to be here (except maybe to zero the other 4731 // double) 4732 return sd; 4733 } 4734*/ 4735 4736 // Build some magic constants. 4737 std::vector<Constant*>CV0; 4738 CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); 4739 CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); 4740 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4741 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4742 Constant *C0 = ConstantVector::get(CV0); 4743 SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4); 4744 4745 std::vector<Constant*>CV1; 4746 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); 4747 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); 4748 Constant *C1 = ConstantVector::get(CV1); 4749 SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4); 4750 4751 SmallVector<SDValue, 4> MaskVec; 4752 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 4753 MaskVec.push_back(DAG.getConstant(4, MVT::i32)); 4754 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 4755 MaskVec.push_back(DAG.getConstant(5, MVT::i32)); 4756 SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0], 4757 MaskVec.size()); 4758 SmallVector<SDValue, 4> MaskVec2; 4759 MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); 4760 MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); 4761 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec2[0], 4762 MaskVec2.size()); 4763 4764 SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4765 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4766 Op.getOperand(0), 4767 DAG.getIntPtrConstant(1))); 4768 SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4769 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4770 Op.getOperand(0), 4771 DAG.getIntPtrConstant(0))); 4772 SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4773 XR1, XR2, UnpcklMask); 4774 SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0, 4775 PseudoSourceValue::getConstantPool(), 0, false, 16); 4776 SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4777 Unpck1, CLod0, UnpcklMask); 4778 SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2); 4779 SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1, 4780 PseudoSourceValue::getConstantPool(), 0, false, 16); 4781 SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1); 4782 // Add the halves; easiest way is to swap them into another reg first. 4783 SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64, 4784 Sub, Sub, ShufMask); 4785 SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub); 4786 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add, 4787 DAG.getIntPtrConstant(0)); 4788} 4789 4790std::pair<SDValue,SDValue> X86TargetLowering:: 4791FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { 4792 assert(Op.getValueType().getSimpleVT() <= MVT::i64 && 4793 Op.getValueType().getSimpleVT() >= MVT::i16 && 4794 "Unknown FP_TO_SINT to lower!"); 4795 4796 // These are really Legal. 4797 if (Op.getValueType() == MVT::i32 && 4798 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4799 return std::make_pair(SDValue(), SDValue()); 4800 if (Subtarget->is64Bit() && 4801 Op.getValueType() == MVT::i64 && 4802 Op.getOperand(0).getValueType() != MVT::f80) 4803 return std::make_pair(SDValue(), SDValue()); 4804 4805 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4806 // stack slot. 4807 MachineFunction &MF = DAG.getMachineFunction(); 4808 unsigned MemSize = Op.getValueType().getSizeInBits()/8; 4809 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4810 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4811 unsigned Opc; 4812 switch (Op.getValueType().getSimpleVT()) { 4813 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4814 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4815 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4816 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4817 } 4818 4819 SDValue Chain = DAG.getEntryNode(); 4820 SDValue Value = Op.getOperand(0); 4821 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4822 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4823 Chain = DAG.getStore(Chain, Value, StackSlot, 4824 PseudoSourceValue::getFixedStack(SSFI), 0); 4825 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4826 SDValue Ops[] = { 4827 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4828 }; 4829 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4830 Chain = Value.getValue(1); 4831 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4832 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4833 } 4834 4835 // Build the FP_TO_INT*_IN_MEM 4836 SDValue Ops[] = { Chain, Value, StackSlot }; 4837 SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4838 4839 return std::make_pair(FIST, StackSlot); 4840} 4841 4842SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 4843 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG); 4844 SDValue FIST = Vals.first, StackSlot = Vals.second; 4845 if (FIST.getNode() == 0) return SDValue(); 4846 4847 // Load the result. 4848 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4849} 4850 4851SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4852 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG); 4853 SDValue FIST = Vals.first, StackSlot = Vals.second; 4854 if (FIST.getNode() == 0) return 0; 4855 4856 MVT VT = N->getValueType(0); 4857 4858 // Return a load from the stack slot. 4859 SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0); 4860 4861 // Use MERGE_VALUES to drop the chain result value and get a node with one 4862 // result. This requires turning off getMergeValues simplification, since 4863 // otherwise it will give us Res back. 4864 return DAG.getMergeValues(&Res, 1, false).getNode(); 4865} 4866 4867SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { 4868 MVT VT = Op.getValueType(); 4869 MVT EltVT = VT; 4870 if (VT.isVector()) 4871 EltVT = VT.getVectorElementType(); 4872 std::vector<Constant*> CV; 4873 if (EltVT == MVT::f64) { 4874 Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); 4875 CV.push_back(C); 4876 CV.push_back(C); 4877 } else { 4878 Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); 4879 CV.push_back(C); 4880 CV.push_back(C); 4881 CV.push_back(C); 4882 CV.push_back(C); 4883 } 4884 Constant *C = ConstantVector::get(CV); 4885 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4886 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4887 PseudoSourceValue::getConstantPool(), 0, 4888 false, 16); 4889 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4890} 4891 4892SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { 4893 MVT VT = Op.getValueType(); 4894 MVT EltVT = VT; 4895 unsigned EltNum = 1; 4896 if (VT.isVector()) { 4897 EltVT = VT.getVectorElementType(); 4898 EltNum = VT.getVectorNumElements(); 4899 } 4900 std::vector<Constant*> CV; 4901 if (EltVT == MVT::f64) { 4902 Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); 4903 CV.push_back(C); 4904 CV.push_back(C); 4905 } else { 4906 Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); 4907 CV.push_back(C); 4908 CV.push_back(C); 4909 CV.push_back(C); 4910 CV.push_back(C); 4911 } 4912 Constant *C = ConstantVector::get(CV); 4913 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4914 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4915 PseudoSourceValue::getConstantPool(), 0, 4916 false, 16); 4917 if (VT.isVector()) { 4918 return DAG.getNode(ISD::BIT_CONVERT, VT, 4919 DAG.getNode(ISD::XOR, MVT::v2i64, 4920 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4921 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4922 } else { 4923 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4924 } 4925} 4926 4927SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 4928 SDValue Op0 = Op.getOperand(0); 4929 SDValue Op1 = Op.getOperand(1); 4930 MVT VT = Op.getValueType(); 4931 MVT SrcVT = Op1.getValueType(); 4932 4933 // If second operand is smaller, extend it first. 4934 if (SrcVT.bitsLT(VT)) { 4935 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4936 SrcVT = VT; 4937 } 4938 // And if it is bigger, shrink it first. 4939 if (SrcVT.bitsGT(VT)) { 4940 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 4941 SrcVT = VT; 4942 } 4943 4944 // At this point the operands and the result should have the same 4945 // type, and that won't be f80 since that is not custom lowered. 4946 4947 // First get the sign bit of second operand. 4948 std::vector<Constant*> CV; 4949 if (SrcVT == MVT::f64) { 4950 CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); 4951 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4952 } else { 4953 CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); 4954 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4955 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4956 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4957 } 4958 Constant *C = ConstantVector::get(CV); 4959 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4960 SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, 4961 PseudoSourceValue::getConstantPool(), 0, 4962 false, 16); 4963 SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4964 4965 // Shift sign bit right or left if the two operands have different types. 4966 if (SrcVT.bitsGT(VT)) { 4967 // Op0 is MVT::f32, Op1 is MVT::f64. 4968 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4969 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4970 DAG.getConstant(32, MVT::i32)); 4971 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4972 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4973 DAG.getIntPtrConstant(0)); 4974 } 4975 4976 // Clear first operand sign bit. 4977 CV.clear(); 4978 if (VT == MVT::f64) { 4979 CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); 4980 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4981 } else { 4982 CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); 4983 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4984 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4985 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4986 } 4987 C = ConstantVector::get(CV); 4988 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4989 SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4990 PseudoSourceValue::getConstantPool(), 0, 4991 false, 16); 4992 SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4993 4994 // Or the value with the sign bit. 4995 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4996} 4997 4998SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 4999 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 5000 SDValue Cond; 5001 SDValue Op0 = Op.getOperand(0); 5002 SDValue Op1 = Op.getOperand(1); 5003 SDValue CC = Op.getOperand(2); 5004 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5005 unsigned X86CC; 5006 5007 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 5008 Op0, Op1, DAG)) { 5009 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 5010 return DAG.getNode(X86ISD::SETCC, MVT::i8, 5011 DAG.getConstant(X86CC, MVT::i8), Cond); 5012 } 5013 5014 assert(0 && "Illegal SetCC!"); 5015 return SDValue(); 5016} 5017 5018SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 5019 SDValue Cond; 5020 SDValue Op0 = Op.getOperand(0); 5021 SDValue Op1 = Op.getOperand(1); 5022 SDValue CC = Op.getOperand(2); 5023 MVT VT = Op.getValueType(); 5024 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 5025 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5026 5027 if (isFP) { 5028 unsigned SSECC = 8; 5029 MVT VT0 = Op0.getValueType(); 5030 assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); 5031 unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; 5032 bool Swap = false; 5033 5034 switch (SetCCOpcode) { 5035 default: break; 5036 case ISD::SETOEQ: 5037 case ISD::SETEQ: SSECC = 0; break; 5038 case ISD::SETOGT: 5039 case ISD::SETGT: Swap = true; // Fallthrough 5040 case ISD::SETLT: 5041 case ISD::SETOLT: SSECC = 1; break; 5042 case ISD::SETOGE: 5043 case ISD::SETGE: Swap = true; // Fallthrough 5044 case ISD::SETLE: 5045 case ISD::SETOLE: SSECC = 2; break; 5046 case ISD::SETUO: SSECC = 3; break; 5047 case ISD::SETUNE: 5048 case ISD::SETNE: SSECC = 4; break; 5049 case ISD::SETULE: Swap = true; 5050 case ISD::SETUGE: SSECC = 5; break; 5051 case ISD::SETULT: Swap = true; 5052 case ISD::SETUGT: SSECC = 6; break; 5053 case ISD::SETO: SSECC = 7; break; 5054 } 5055 if (Swap) 5056 std::swap(Op0, Op1); 5057 5058 // In the two special cases we can't handle, emit two comparisons. 5059 if (SSECC == 8) { 5060 if (SetCCOpcode == ISD::SETUEQ) { 5061 SDValue UNORD, EQ; 5062 UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); 5063 EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); 5064 return DAG.getNode(ISD::OR, VT, UNORD, EQ); 5065 } 5066 else if (SetCCOpcode == ISD::SETONE) { 5067 SDValue ORD, NEQ; 5068 ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); 5069 NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); 5070 return DAG.getNode(ISD::AND, VT, ORD, NEQ); 5071 } 5072 assert(0 && "Illegal FP comparison"); 5073 } 5074 // Handle all other FP comparisons here. 5075 return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); 5076 } 5077 5078 // We are handling one of the integer comparisons here. Since SSE only has 5079 // GT and EQ comparisons for integer, swapping operands and multiple 5080 // operations may be required for some comparisons. 5081 unsigned Opc = 0, EQOpc = 0, GTOpc = 0; 5082 bool Swap = false, Invert = false, FlipSigns = false; 5083 5084 switch (VT.getSimpleVT()) { 5085 default: break; 5086 case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; 5087 case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; 5088 case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; 5089 case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; 5090 } 5091 5092 switch (SetCCOpcode) { 5093 default: break; 5094 case ISD::SETNE: Invert = true; 5095 case ISD::SETEQ: Opc = EQOpc; break; 5096 case ISD::SETLT: Swap = true; 5097 case ISD::SETGT: Opc = GTOpc; break; 5098 case ISD::SETGE: Swap = true; 5099 case ISD::SETLE: Opc = GTOpc; Invert = true; break; 5100 case ISD::SETULT: Swap = true; 5101 case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; 5102 case ISD::SETUGE: Swap = true; 5103 case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; 5104 } 5105 if (Swap) 5106 std::swap(Op0, Op1); 5107 5108 // Since SSE has no unsigned integer comparisons, we need to flip the sign 5109 // bits of the inputs before performing those operations. 5110 if (FlipSigns) { 5111 MVT EltVT = VT.getVectorElementType(); 5112 SDValue SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT); 5113 std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); 5114 SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0], 5115 SignBits.size()); 5116 Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec); 5117 Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec); 5118 } 5119 5120 SDValue Result = DAG.getNode(Opc, VT, Op0, Op1); 5121 5122 // If the logical-not of the result is required, perform that now. 5123 if (Invert) { 5124 MVT EltVT = VT.getVectorElementType(); 5125 SDValue NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT); 5126 std::vector<SDValue> NegOnes(VT.getVectorNumElements(), NegOne); 5127 SDValue NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0], 5128 NegOnes.size()); 5129 Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV); 5130 } 5131 return Result; 5132} 5133 5134SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { 5135 bool addTest = true; 5136 SDValue Cond = Op.getOperand(0); 5137 SDValue CC; 5138 5139 if (Cond.getOpcode() == ISD::SETCC) 5140 Cond = LowerSETCC(Cond, DAG); 5141 5142 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5143 // setting operand in place of the X86ISD::SETCC. 5144 if (Cond.getOpcode() == X86ISD::SETCC) { 5145 CC = Cond.getOperand(0); 5146 5147 SDValue Cmp = Cond.getOperand(1); 5148 unsigned Opc = Cmp.getOpcode(); 5149 MVT VT = Op.getValueType(); 5150 5151 bool IllegalFPCMov = false; 5152 if (VT.isFloatingPoint() && !VT.isVector() && 5153 !isScalarFPTypeInSSEReg(VT)) // FPStack? 5154 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); 5155 5156 if ((Opc == X86ISD::CMP || 5157 Opc == X86ISD::COMI || 5158 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 5159 Cond = Cmp; 5160 addTest = false; 5161 } 5162 } 5163 5164 if (addTest) { 5165 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5166 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5167 } 5168 5169 const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), 5170 MVT::Flag); 5171 SmallVector<SDValue, 4> Ops; 5172 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 5173 // condition is true. 5174 Ops.push_back(Op.getOperand(2)); 5175 Ops.push_back(Op.getOperand(1)); 5176 Ops.push_back(CC); 5177 Ops.push_back(Cond); 5178 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 5179} 5180 5181SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { 5182 bool addTest = true; 5183 SDValue Chain = Op.getOperand(0); 5184 SDValue Cond = Op.getOperand(1); 5185 SDValue Dest = Op.getOperand(2); 5186 SDValue CC; 5187 5188 if (Cond.getOpcode() == ISD::SETCC) 5189 Cond = LowerSETCC(Cond, DAG); 5190 5191 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5192 // setting operand in place of the X86ISD::SETCC. 5193 if (Cond.getOpcode() == X86ISD::SETCC) { 5194 CC = Cond.getOperand(0); 5195 5196 SDValue Cmp = Cond.getOperand(1); 5197 unsigned Opc = Cmp.getOpcode(); 5198 if (Opc == X86ISD::CMP || 5199 Opc == X86ISD::COMI || 5200 Opc == X86ISD::UCOMI) { 5201 Cond = Cmp; 5202 addTest = false; 5203 } 5204 // Also, recognize the pattern generated by an FCMP_UNE. We can emit 5205 // two branches instead of an explicit OR instruction with a 5206 // separate test. 5207 } else if (Cond.getOpcode() == ISD::OR && 5208 Cond.hasOneUse() && 5209 Cond.getOperand(0).getOpcode() == X86ISD::SETCC && 5210 Cond.getOperand(0).hasOneUse() && 5211 Cond.getOperand(1).getOpcode() == X86ISD::SETCC && 5212 Cond.getOperand(1).hasOneUse()) { 5213 SDValue Cmp = Cond.getOperand(0).getOperand(1); 5214 unsigned Opc = Cmp.getOpcode(); 5215 if (Cmp == Cond.getOperand(1).getOperand(1) && 5216 (Opc == X86ISD::CMP || 5217 Opc == X86ISD::COMI || 5218 Opc == X86ISD::UCOMI)) { 5219 CC = Cond.getOperand(0).getOperand(0); 5220 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5221 Chain, Dest, CC, Cmp); 5222 CC = Cond.getOperand(1).getOperand(0); 5223 Cond = Cmp; 5224 addTest = false; 5225 } 5226 // Also, recognize the pattern generated by an FCMP_OEQ. We can emit 5227 // two branches instead of an explicit AND instruction with a 5228 // separate test. However, we only do this if this block doesn't 5229 // have a fall-through edge, because this requires an explicit 5230 // jmp when the condition is false. 5231 } else if (Cond.getOpcode() == ISD::AND && 5232 Cond.hasOneUse() && 5233 Cond.getOperand(0).getOpcode() == X86ISD::SETCC && 5234 Cond.getOperand(0).hasOneUse() && 5235 Cond.getOperand(1).getOpcode() == X86ISD::SETCC && 5236 Cond.getOperand(1).hasOneUse()) { 5237 SDValue Cmp = Cond.getOperand(0).getOperand(1); 5238 unsigned Opc = Cmp.getOpcode(); 5239 if (Cmp == Cond.getOperand(1).getOperand(1) && 5240 (Opc == X86ISD::CMP || 5241 Opc == X86ISD::COMI || 5242 Opc == X86ISD::UCOMI) && 5243 Op.getNode()->hasOneUse()) { 5244 X86::CondCode CCode = 5245 (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); 5246 CCode = X86::GetOppositeBranchCondition(CCode); 5247 CC = DAG.getConstant(CCode, MVT::i8); 5248 SDValue User = SDValue(*Op.getNode()->use_begin(), 0); 5249 // Look for an unconditional branch following this conditional branch. 5250 // We need this because we need to reverse the successors in order 5251 // to implement FCMP_OEQ. 5252 if (User.getOpcode() == ISD::BR) { 5253 SDValue FalseBB = User.getOperand(1); 5254 SDValue NewBR = 5255 DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); 5256 assert(NewBR == User); 5257 Dest = FalseBB; 5258 5259 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5260 Chain, Dest, CC, Cmp); 5261 X86::CondCode CCode = 5262 (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); 5263 CCode = X86::GetOppositeBranchCondition(CCode); 5264 CC = DAG.getConstant(CCode, MVT::i8); 5265 Cond = Cmp; 5266 addTest = false; 5267 } 5268 } 5269 } 5270 5271 if (addTest) { 5272 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5273 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5274 } 5275 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5276 Chain, Dest, CC, Cond); 5277} 5278 5279 5280// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 5281// Calls to _alloca is needed to probe the stack when allocating more than 4k 5282// bytes in one go. Touching the stack at 4K increments is necessary to ensure 5283// that the guard pages used by the OS virtual memory manager are allocated in 5284// correct sequence. 5285SDValue 5286X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5287 SelectionDAG &DAG) { 5288 assert(Subtarget->isTargetCygMing() && 5289 "This should be used only on Cygwin/Mingw targets"); 5290 5291 // Get the inputs. 5292 SDValue Chain = Op.getOperand(0); 5293 SDValue Size = Op.getOperand(1); 5294 // FIXME: Ensure alignment here 5295 5296 SDValue Flag; 5297 5298 MVT IntPtr = getPointerTy(); 5299 MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 5300 5301 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 5302 5303 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 5304 Flag = Chain.getValue(1); 5305 5306 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 5307 SDValue Ops[] = { Chain, 5308 DAG.getTargetExternalSymbol("_alloca", IntPtr), 5309 DAG.getRegister(X86::EAX, IntPtr), 5310 DAG.getRegister(X86StackPtr, SPTy), 5311 Flag }; 5312 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5); 5313 Flag = Chain.getValue(1); 5314 5315 Chain = DAG.getCALLSEQ_END(Chain, 5316 DAG.getIntPtrConstant(0, true), 5317 DAG.getIntPtrConstant(0, true), 5318 Flag); 5319 5320 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 5321 5322 SDValue Ops1[2] = { Chain.getValue(0), Chain }; 5323 return DAG.getMergeValues(Ops1, 2); 5324} 5325 5326SDValue 5327X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, 5328 SDValue Chain, 5329 SDValue Dst, SDValue Src, 5330 SDValue Size, unsigned Align, 5331 const Value *DstSV, 5332 uint64_t DstSVOff) { 5333 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5334 5335 // If not DWORD aligned or size is more than the threshold, call the library. 5336 // The libc version is likely to be faster for these cases. It can use the 5337 // address value and run time information about the CPU. 5338 if ((Align & 3) != 0 || 5339 !ConstantSize || 5340 ConstantSize->getZExtValue() > 5341 getSubtarget()->getMaxInlineSizeThreshold()) { 5342 SDValue InFlag(0, 0); 5343 5344 // Check to see if there is a specialized entry-point for memory zeroing. 5345 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 5346 5347 if (const char *bzeroEntry = V && 5348 V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 5349 MVT IntPtr = getPointerTy(); 5350 const Type *IntPtrTy = TD->getIntPtrType(); 5351 TargetLowering::ArgListTy Args; 5352 TargetLowering::ArgListEntry Entry; 5353 Entry.Node = Dst; 5354 Entry.Ty = IntPtrTy; 5355 Args.push_back(Entry); 5356 Entry.Node = Size; 5357 Args.push_back(Entry); 5358 std::pair<SDValue,SDValue> CallResult = 5359 LowerCallTo(Chain, Type::VoidTy, false, false, false, false, 5360 CallingConv::C, false, 5361 DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG); 5362 return CallResult.second; 5363 } 5364 5365 // Otherwise have the target-independent code call memset. 5366 return SDValue(); 5367 } 5368 5369 uint64_t SizeVal = ConstantSize->getZExtValue(); 5370 SDValue InFlag(0, 0); 5371 MVT AVT; 5372 SDValue Count; 5373 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 5374 unsigned BytesLeft = 0; 5375 bool TwoRepStos = false; 5376 if (ValC) { 5377 unsigned ValReg; 5378 uint64_t Val = ValC->getZExtValue() & 255; 5379 5380 // If the value is a constant, then we can potentially use larger sets. 5381 switch (Align & 3) { 5382 case 2: // WORD aligned 5383 AVT = MVT::i16; 5384 ValReg = X86::AX; 5385 Val = (Val << 8) | Val; 5386 break; 5387 case 0: // DWORD aligned 5388 AVT = MVT::i32; 5389 ValReg = X86::EAX; 5390 Val = (Val << 8) | Val; 5391 Val = (Val << 16) | Val; 5392 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 5393 AVT = MVT::i64; 5394 ValReg = X86::RAX; 5395 Val = (Val << 32) | Val; 5396 } 5397 break; 5398 default: // Byte aligned 5399 AVT = MVT::i8; 5400 ValReg = X86::AL; 5401 Count = DAG.getIntPtrConstant(SizeVal); 5402 break; 5403 } 5404 5405 if (AVT.bitsGT(MVT::i8)) { 5406 unsigned UBytes = AVT.getSizeInBits() / 8; 5407 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 5408 BytesLeft = SizeVal % UBytes; 5409 } 5410 5411 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 5412 InFlag); 5413 InFlag = Chain.getValue(1); 5414 } else { 5415 AVT = MVT::i8; 5416 Count = DAG.getIntPtrConstant(SizeVal); 5417 Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); 5418 InFlag = Chain.getValue(1); 5419 } 5420 5421 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5422 Count, InFlag); 5423 InFlag = Chain.getValue(1); 5424 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5425 Dst, InFlag); 5426 InFlag = Chain.getValue(1); 5427 5428 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5429 SmallVector<SDValue, 8> Ops; 5430 Ops.push_back(Chain); 5431 Ops.push_back(DAG.getValueType(AVT)); 5432 Ops.push_back(InFlag); 5433 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5434 5435 if (TwoRepStos) { 5436 InFlag = Chain.getValue(1); 5437 Count = Size; 5438 MVT CVT = Count.getValueType(); 5439 SDValue Left = DAG.getNode(ISD::AND, CVT, Count, 5440 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 5441 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 5442 Left, InFlag); 5443 InFlag = Chain.getValue(1); 5444 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5445 Ops.clear(); 5446 Ops.push_back(Chain); 5447 Ops.push_back(DAG.getValueType(MVT::i8)); 5448 Ops.push_back(InFlag); 5449 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5450 } else if (BytesLeft) { 5451 // Handle the last 1 - 7 bytes. 5452 unsigned Offset = SizeVal - BytesLeft; 5453 MVT AddrVT = Dst.getValueType(); 5454 MVT SizeVT = Size.getValueType(); 5455 5456 Chain = DAG.getMemset(Chain, 5457 DAG.getNode(ISD::ADD, AddrVT, Dst, 5458 DAG.getConstant(Offset, AddrVT)), 5459 Src, 5460 DAG.getConstant(BytesLeft, SizeVT), 5461 Align, DstSV, DstSVOff + Offset); 5462 } 5463 5464 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 5465 return Chain; 5466} 5467 5468SDValue 5469X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, 5470 SDValue Chain, SDValue Dst, SDValue Src, 5471 SDValue Size, unsigned Align, 5472 bool AlwaysInline, 5473 const Value *DstSV, uint64_t DstSVOff, 5474 const Value *SrcSV, uint64_t SrcSVOff) { 5475 // This requires the copy size to be a constant, preferrably 5476 // within a subtarget-specific limit. 5477 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5478 if (!ConstantSize) 5479 return SDValue(); 5480 uint64_t SizeVal = ConstantSize->getZExtValue(); 5481 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 5482 return SDValue(); 5483 5484 /// If not DWORD aligned, call the library. 5485 if ((Align & 3) != 0) 5486 return SDValue(); 5487 5488 // DWORD aligned 5489 MVT AVT = MVT::i32; 5490 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned 5491 AVT = MVT::i64; 5492 5493 unsigned UBytes = AVT.getSizeInBits() / 8; 5494 unsigned CountVal = SizeVal / UBytes; 5495 SDValue Count = DAG.getIntPtrConstant(CountVal); 5496 unsigned BytesLeft = SizeVal % UBytes; 5497 5498 SDValue InFlag(0, 0); 5499 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5500 Count, InFlag); 5501 InFlag = Chain.getValue(1); 5502 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5503 Dst, InFlag); 5504 InFlag = Chain.getValue(1); 5505 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 5506 Src, InFlag); 5507 InFlag = Chain.getValue(1); 5508 5509 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5510 SmallVector<SDValue, 8> Ops; 5511 Ops.push_back(Chain); 5512 Ops.push_back(DAG.getValueType(AVT)); 5513 Ops.push_back(InFlag); 5514 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 5515 5516 SmallVector<SDValue, 4> Results; 5517 Results.push_back(RepMovs); 5518 if (BytesLeft) { 5519 // Handle the last 1 - 7 bytes. 5520 unsigned Offset = SizeVal - BytesLeft; 5521 MVT DstVT = Dst.getValueType(); 5522 MVT SrcVT = Src.getValueType(); 5523 MVT SizeVT = Size.getValueType(); 5524 Results.push_back(DAG.getMemcpy(Chain, 5525 DAG.getNode(ISD::ADD, DstVT, Dst, 5526 DAG.getConstant(Offset, DstVT)), 5527 DAG.getNode(ISD::ADD, SrcVT, Src, 5528 DAG.getConstant(Offset, SrcVT)), 5529 DAG.getConstant(BytesLeft, SizeVT), 5530 Align, AlwaysInline, 5531 DstSV, DstSVOff + Offset, 5532 SrcSV, SrcSVOff + Offset)); 5533 } 5534 5535 return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); 5536} 5537 5538/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 5539SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 5540 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5541 SDValue TheChain = N->getOperand(0); 5542 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 5543 if (Subtarget->is64Bit()) { 5544 SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 5545 SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 5546 MVT::i64, rax.getValue(2)); 5547 SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 5548 DAG.getConstant(32, MVT::i8)); 5549 SDValue Ops[] = { 5550 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 5551 }; 5552 5553 return DAG.getMergeValues(Ops, 2).getNode(); 5554 } 5555 5556 SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 5557 SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 5558 MVT::i32, eax.getValue(2)); 5559 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 5560 SDValue Ops[] = { eax, edx }; 5561 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 5562 5563 // Use a MERGE_VALUES to return the value and chain. 5564 Ops[1] = edx.getValue(1); 5565 return DAG.getMergeValues(Ops, 2).getNode(); 5566} 5567 5568SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { 5569 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 5570 5571 if (!Subtarget->is64Bit()) { 5572 // vastart just stores the address of the VarArgsFrameIndex slot into the 5573 // memory location argument. 5574 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5575 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0); 5576 } 5577 5578 // __va_list_tag: 5579 // gp_offset (0 - 6 * 8) 5580 // fp_offset (48 - 48 + 8 * 16) 5581 // overflow_arg_area (point to parameters coming in memory). 5582 // reg_save_area 5583 SmallVector<SDValue, 8> MemOps; 5584 SDValue FIN = Op.getOperand(1); 5585 // Store gp_offset 5586 SDValue Store = DAG.getStore(Op.getOperand(0), 5587 DAG.getConstant(VarArgsGPOffset, MVT::i32), 5588 FIN, SV, 0); 5589 MemOps.push_back(Store); 5590 5591 // Store fp_offset 5592 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5593 Store = DAG.getStore(Op.getOperand(0), 5594 DAG.getConstant(VarArgsFPOffset, MVT::i32), 5595 FIN, SV, 0); 5596 MemOps.push_back(Store); 5597 5598 // Store ptr to overflow_arg_area 5599 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5600 SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5601 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0); 5602 MemOps.push_back(Store); 5603 5604 // Store ptr to reg_save_area. 5605 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 5606 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5607 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0); 5608 MemOps.push_back(Store); 5609 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5610} 5611 5612SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { 5613 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5614 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); 5615 SDValue Chain = Op.getOperand(0); 5616 SDValue SrcPtr = Op.getOperand(1); 5617 SDValue SrcSV = Op.getOperand(2); 5618 5619 assert(0 && "VAArgInst is not yet implemented for x86-64!"); 5620 abort(); 5621 return SDValue(); 5622} 5623 5624SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { 5625 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5626 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); 5627 SDValue Chain = Op.getOperand(0); 5628 SDValue DstPtr = Op.getOperand(1); 5629 SDValue SrcPtr = Op.getOperand(2); 5630 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 5631 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5632 5633 return DAG.getMemcpy(Chain, DstPtr, SrcPtr, 5634 DAG.getIntPtrConstant(24), 8, false, 5635 DstSV, 0, SrcSV, 0); 5636} 5637 5638SDValue 5639X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 5640 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5641 switch (IntNo) { 5642 default: return SDValue(); // Don't custom lower most intrinsics. 5643 // Comparison intrinsics. 5644 case Intrinsic::x86_sse_comieq_ss: 5645 case Intrinsic::x86_sse_comilt_ss: 5646 case Intrinsic::x86_sse_comile_ss: 5647 case Intrinsic::x86_sse_comigt_ss: 5648 case Intrinsic::x86_sse_comige_ss: 5649 case Intrinsic::x86_sse_comineq_ss: 5650 case Intrinsic::x86_sse_ucomieq_ss: 5651 case Intrinsic::x86_sse_ucomilt_ss: 5652 case Intrinsic::x86_sse_ucomile_ss: 5653 case Intrinsic::x86_sse_ucomigt_ss: 5654 case Intrinsic::x86_sse_ucomige_ss: 5655 case Intrinsic::x86_sse_ucomineq_ss: 5656 case Intrinsic::x86_sse2_comieq_sd: 5657 case Intrinsic::x86_sse2_comilt_sd: 5658 case Intrinsic::x86_sse2_comile_sd: 5659 case Intrinsic::x86_sse2_comigt_sd: 5660 case Intrinsic::x86_sse2_comige_sd: 5661 case Intrinsic::x86_sse2_comineq_sd: 5662 case Intrinsic::x86_sse2_ucomieq_sd: 5663 case Intrinsic::x86_sse2_ucomilt_sd: 5664 case Intrinsic::x86_sse2_ucomile_sd: 5665 case Intrinsic::x86_sse2_ucomigt_sd: 5666 case Intrinsic::x86_sse2_ucomige_sd: 5667 case Intrinsic::x86_sse2_ucomineq_sd: { 5668 unsigned Opc = 0; 5669 ISD::CondCode CC = ISD::SETCC_INVALID; 5670 switch (IntNo) { 5671 default: break; 5672 case Intrinsic::x86_sse_comieq_ss: 5673 case Intrinsic::x86_sse2_comieq_sd: 5674 Opc = X86ISD::COMI; 5675 CC = ISD::SETEQ; 5676 break; 5677 case Intrinsic::x86_sse_comilt_ss: 5678 case Intrinsic::x86_sse2_comilt_sd: 5679 Opc = X86ISD::COMI; 5680 CC = ISD::SETLT; 5681 break; 5682 case Intrinsic::x86_sse_comile_ss: 5683 case Intrinsic::x86_sse2_comile_sd: 5684 Opc = X86ISD::COMI; 5685 CC = ISD::SETLE; 5686 break; 5687 case Intrinsic::x86_sse_comigt_ss: 5688 case Intrinsic::x86_sse2_comigt_sd: 5689 Opc = X86ISD::COMI; 5690 CC = ISD::SETGT; 5691 break; 5692 case Intrinsic::x86_sse_comige_ss: 5693 case Intrinsic::x86_sse2_comige_sd: 5694 Opc = X86ISD::COMI; 5695 CC = ISD::SETGE; 5696 break; 5697 case Intrinsic::x86_sse_comineq_ss: 5698 case Intrinsic::x86_sse2_comineq_sd: 5699 Opc = X86ISD::COMI; 5700 CC = ISD::SETNE; 5701 break; 5702 case Intrinsic::x86_sse_ucomieq_ss: 5703 case Intrinsic::x86_sse2_ucomieq_sd: 5704 Opc = X86ISD::UCOMI; 5705 CC = ISD::SETEQ; 5706 break; 5707 case Intrinsic::x86_sse_ucomilt_ss: 5708 case Intrinsic::x86_sse2_ucomilt_sd: 5709 Opc = X86ISD::UCOMI; 5710 CC = ISD::SETLT; 5711 break; 5712 case Intrinsic::x86_sse_ucomile_ss: 5713 case Intrinsic::x86_sse2_ucomile_sd: 5714 Opc = X86ISD::UCOMI; 5715 CC = ISD::SETLE; 5716 break; 5717 case Intrinsic::x86_sse_ucomigt_ss: 5718 case Intrinsic::x86_sse2_ucomigt_sd: 5719 Opc = X86ISD::UCOMI; 5720 CC = ISD::SETGT; 5721 break; 5722 case Intrinsic::x86_sse_ucomige_ss: 5723 case Intrinsic::x86_sse2_ucomige_sd: 5724 Opc = X86ISD::UCOMI; 5725 CC = ISD::SETGE; 5726 break; 5727 case Intrinsic::x86_sse_ucomineq_ss: 5728 case Intrinsic::x86_sse2_ucomineq_sd: 5729 Opc = X86ISD::UCOMI; 5730 CC = ISD::SETNE; 5731 break; 5732 } 5733 5734 unsigned X86CC; 5735 SDValue LHS = Op.getOperand(1); 5736 SDValue RHS = Op.getOperand(2); 5737 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 5738 5739 SDValue Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5740 SDValue SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5741 DAG.getConstant(X86CC, MVT::i8), Cond); 5742 return DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, SetCC); 5743 } 5744 5745 // Fix vector shift instructions where the last operand is a non-immediate 5746 // i32 value. 5747 case Intrinsic::x86_sse2_pslli_w: 5748 case Intrinsic::x86_sse2_pslli_d: 5749 case Intrinsic::x86_sse2_pslli_q: 5750 case Intrinsic::x86_sse2_psrli_w: 5751 case Intrinsic::x86_sse2_psrli_d: 5752 case Intrinsic::x86_sse2_psrli_q: 5753 case Intrinsic::x86_sse2_psrai_w: 5754 case Intrinsic::x86_sse2_psrai_d: 5755 case Intrinsic::x86_mmx_pslli_w: 5756 case Intrinsic::x86_mmx_pslli_d: 5757 case Intrinsic::x86_mmx_pslli_q: 5758 case Intrinsic::x86_mmx_psrli_w: 5759 case Intrinsic::x86_mmx_psrli_d: 5760 case Intrinsic::x86_mmx_psrli_q: 5761 case Intrinsic::x86_mmx_psrai_w: 5762 case Intrinsic::x86_mmx_psrai_d: { 5763 SDValue ShAmt = Op.getOperand(2); 5764 if (isa<ConstantSDNode>(ShAmt)) 5765 return SDValue(); 5766 5767 unsigned NewIntNo = 0; 5768 MVT ShAmtVT = MVT::v4i32; 5769 switch (IntNo) { 5770 case Intrinsic::x86_sse2_pslli_w: 5771 NewIntNo = Intrinsic::x86_sse2_psll_w; 5772 break; 5773 case Intrinsic::x86_sse2_pslli_d: 5774 NewIntNo = Intrinsic::x86_sse2_psll_d; 5775 break; 5776 case Intrinsic::x86_sse2_pslli_q: 5777 NewIntNo = Intrinsic::x86_sse2_psll_q; 5778 break; 5779 case Intrinsic::x86_sse2_psrli_w: 5780 NewIntNo = Intrinsic::x86_sse2_psrl_w; 5781 break; 5782 case Intrinsic::x86_sse2_psrli_d: 5783 NewIntNo = Intrinsic::x86_sse2_psrl_d; 5784 break; 5785 case Intrinsic::x86_sse2_psrli_q: 5786 NewIntNo = Intrinsic::x86_sse2_psrl_q; 5787 break; 5788 case Intrinsic::x86_sse2_psrai_w: 5789 NewIntNo = Intrinsic::x86_sse2_psra_w; 5790 break; 5791 case Intrinsic::x86_sse2_psrai_d: 5792 NewIntNo = Intrinsic::x86_sse2_psra_d; 5793 break; 5794 default: { 5795 ShAmtVT = MVT::v2i32; 5796 switch (IntNo) { 5797 case Intrinsic::x86_mmx_pslli_w: 5798 NewIntNo = Intrinsic::x86_mmx_psll_w; 5799 break; 5800 case Intrinsic::x86_mmx_pslli_d: 5801 NewIntNo = Intrinsic::x86_mmx_psll_d; 5802 break; 5803 case Intrinsic::x86_mmx_pslli_q: 5804 NewIntNo = Intrinsic::x86_mmx_psll_q; 5805 break; 5806 case Intrinsic::x86_mmx_psrli_w: 5807 NewIntNo = Intrinsic::x86_mmx_psrl_w; 5808 break; 5809 case Intrinsic::x86_mmx_psrli_d: 5810 NewIntNo = Intrinsic::x86_mmx_psrl_d; 5811 break; 5812 case Intrinsic::x86_mmx_psrli_q: 5813 NewIntNo = Intrinsic::x86_mmx_psrl_q; 5814 break; 5815 case Intrinsic::x86_mmx_psrai_w: 5816 NewIntNo = Intrinsic::x86_mmx_psra_w; 5817 break; 5818 case Intrinsic::x86_mmx_psrai_d: 5819 NewIntNo = Intrinsic::x86_mmx_psra_d; 5820 break; 5821 default: abort(); // Can't reach here. 5822 } 5823 break; 5824 } 5825 } 5826 MVT VT = Op.getValueType(); 5827 ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT, 5828 DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt)); 5829 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 5830 DAG.getConstant(NewIntNo, MVT::i32), 5831 Op.getOperand(1), ShAmt); 5832 } 5833 } 5834} 5835 5836SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 5837 // Depths > 0 not supported yet! 5838 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) 5839 return SDValue(); 5840 5841 // Just load the return address 5842 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 5843 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5844} 5845 5846SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 5847 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 5848 MFI->setFrameAddressIsTaken(true); 5849 MVT VT = Op.getValueType(); 5850 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5851 unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; 5852 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT); 5853 while (Depth--) 5854 FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0); 5855 return FrameAddr; 5856} 5857 5858SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, 5859 SelectionDAG &DAG) { 5860 return DAG.getIntPtrConstant(2*TD->getPointerSize()); 5861} 5862 5863SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) 5864{ 5865 MachineFunction &MF = DAG.getMachineFunction(); 5866 SDValue Chain = Op.getOperand(0); 5867 SDValue Offset = Op.getOperand(1); 5868 SDValue Handler = Op.getOperand(2); 5869 5870 SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, 5871 getPointerTy()); 5872 unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); 5873 5874 SDValue StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5875 DAG.getIntPtrConstant(-TD->getPointerSize())); 5876 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5877 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5878 Chain = DAG.getCopyToReg(Chain, StoreAddrReg, StoreAddr); 5879 MF.getRegInfo().addLiveOut(StoreAddrReg); 5880 5881 return DAG.getNode(X86ISD::EH_RETURN, 5882 MVT::Other, 5883 Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); 5884} 5885 5886SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, 5887 SelectionDAG &DAG) { 5888 SDValue Root = Op.getOperand(0); 5889 SDValue Trmp = Op.getOperand(1); // trampoline 5890 SDValue FPtr = Op.getOperand(2); // nested function 5891 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 5892 5893 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5894 5895 const X86InstrInfo *TII = 5896 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5897 5898 if (Subtarget->is64Bit()) { 5899 SDValue OutChains[6]; 5900 5901 // Large code-model. 5902 5903 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 5904 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 5905 5906 const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); 5907 const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); 5908 5909 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 5910 5911 // Load the pointer to the nested function into R11. 5912 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 5913 SDValue Addr = Trmp; 5914 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5915 TrmpAddr, 0); 5916 5917 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 5918 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2); 5919 5920 // Load the 'nest' parameter value into R10. 5921 // R10 is specified in X86CallingConv.td 5922 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 5923 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 5924 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5925 TrmpAddr, 10); 5926 5927 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 5928 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2); 5929 5930 // Jump to the nested function. 5931 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 5932 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 5933 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5934 TrmpAddr, 20); 5935 5936 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 5937 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 5938 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 5939 TrmpAddr, 22); 5940 5941 SDValue Ops[] = 5942 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 5943 return DAG.getMergeValues(Ops, 2); 5944 } else { 5945 const Function *Func = 5946 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5947 unsigned CC = Func->getCallingConv(); 5948 unsigned NestReg; 5949 5950 switch (CC) { 5951 default: 5952 assert(0 && "Unsupported calling convention"); 5953 case CallingConv::C: 5954 case CallingConv::X86_StdCall: { 5955 // Pass 'nest' parameter in ECX. 5956 // Must be kept in sync with X86CallingConv.td 5957 NestReg = X86::ECX; 5958 5959 // Check that ECX wasn't needed by an 'inreg' parameter. 5960 const FunctionType *FTy = Func->getFunctionType(); 5961 const AttrListPtr &Attrs = Func->getAttributes(); 5962 5963 if (!Attrs.isEmpty() && !Func->isVarArg()) { 5964 unsigned InRegCount = 0; 5965 unsigned Idx = 1; 5966 5967 for (FunctionType::param_iterator I = FTy->param_begin(), 5968 E = FTy->param_end(); I != E; ++I, ++Idx) 5969 if (Attrs.paramHasAttr(Idx, Attribute::InReg)) 5970 // FIXME: should only count parameters that are lowered to integers. 5971 InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; 5972 5973 if (InRegCount > 2) { 5974 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5975 abort(); 5976 } 5977 } 5978 break; 5979 } 5980 case CallingConv::X86_FastCall: 5981 case CallingConv::Fast: 5982 // Pass 'nest' parameter in EAX. 5983 // Must be kept in sync with X86CallingConv.td 5984 NestReg = X86::EAX; 5985 break; 5986 } 5987 5988 SDValue OutChains[4]; 5989 SDValue Addr, Disp; 5990 5991 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5992 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5993 5994 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5995 const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); 5996 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5997 Trmp, TrmpAddr, 0); 5998 5999 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 6000 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1); 6001 6002 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 6003 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 6004 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 6005 TrmpAddr, 5, false, 1); 6006 6007 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 6008 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1); 6009 6010 SDValue Ops[] = 6011 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 6012 return DAG.getMergeValues(Ops, 2); 6013 } 6014} 6015 6016SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 6017 /* 6018 The rounding mode is in bits 11:10 of FPSR, and has the following 6019 settings: 6020 00 Round to nearest 6021 01 Round to -inf 6022 10 Round to +inf 6023 11 Round to 0 6024 6025 FLT_ROUNDS, on the other hand, expects the following: 6026 -1 Undefined 6027 0 Round to 0 6028 1 Round to nearest 6029 2 Round to +inf 6030 3 Round to -inf 6031 6032 To perform the conversion, we do: 6033 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 6034 */ 6035 6036 MachineFunction &MF = DAG.getMachineFunction(); 6037 const TargetMachine &TM = MF.getTarget(); 6038 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 6039 unsigned StackAlignment = TFI.getStackAlignment(); 6040 MVT VT = Op.getValueType(); 6041 6042 // Save FP Control Word to stack slot 6043 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 6044 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 6045 6046 SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 6047 DAG.getEntryNode(), StackSlot); 6048 6049 // Load FP Control Word from stack slot 6050 SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 6051 6052 // Transform as necessary 6053 SDValue CWD1 = 6054 DAG.getNode(ISD::SRL, MVT::i16, 6055 DAG.getNode(ISD::AND, MVT::i16, 6056 CWD, DAG.getConstant(0x800, MVT::i16)), 6057 DAG.getConstant(11, MVT::i8)); 6058 SDValue CWD2 = 6059 DAG.getNode(ISD::SRL, MVT::i16, 6060 DAG.getNode(ISD::AND, MVT::i16, 6061 CWD, DAG.getConstant(0x400, MVT::i16)), 6062 DAG.getConstant(9, MVT::i8)); 6063 6064 SDValue RetVal = 6065 DAG.getNode(ISD::AND, MVT::i16, 6066 DAG.getNode(ISD::ADD, MVT::i16, 6067 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 6068 DAG.getConstant(1, MVT::i16)), 6069 DAG.getConstant(3, MVT::i16)); 6070 6071 6072 return DAG.getNode((VT.getSizeInBits() < 16 ? 6073 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 6074} 6075 6076SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { 6077 MVT VT = Op.getValueType(); 6078 MVT OpVT = VT; 6079 unsigned NumBits = VT.getSizeInBits(); 6080 6081 Op = Op.getOperand(0); 6082 if (VT == MVT::i8) { 6083 // Zero extend to i32 since there is not an i8 bsr. 6084 OpVT = MVT::i32; 6085 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6086 } 6087 6088 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 6089 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6090 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 6091 6092 // If src is zero (i.e. bsr sets ZF), returns NumBits. 6093 SmallVector<SDValue, 4> Ops; 6094 Ops.push_back(Op); 6095 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 6096 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6097 Ops.push_back(Op.getValue(1)); 6098 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6099 6100 // Finally xor with NumBits-1. 6101 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 6102 6103 if (VT == MVT::i8) 6104 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6105 return Op; 6106} 6107 6108SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { 6109 MVT VT = Op.getValueType(); 6110 MVT OpVT = VT; 6111 unsigned NumBits = VT.getSizeInBits(); 6112 6113 Op = Op.getOperand(0); 6114 if (VT == MVT::i8) { 6115 OpVT = MVT::i32; 6116 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6117 } 6118 6119 // Issue a bsf (scan bits forward) which also sets EFLAGS. 6120 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6121 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 6122 6123 // If src is zero (i.e. bsf sets ZF), returns NumBits. 6124 SmallVector<SDValue, 4> Ops; 6125 Ops.push_back(Op); 6126 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 6127 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6128 Ops.push_back(Op.getValue(1)); 6129 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6130 6131 if (VT == MVT::i8) 6132 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6133 return Op; 6134} 6135 6136SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { 6137 MVT T = Op.getValueType(); 6138 unsigned Reg = 0; 6139 unsigned size = 0; 6140 switch(T.getSimpleVT()) { 6141 default: 6142 assert(false && "Invalid value type!"); 6143 case MVT::i8: Reg = X86::AL; size = 1; break; 6144 case MVT::i16: Reg = X86::AX; size = 2; break; 6145 case MVT::i32: Reg = X86::EAX; size = 4; break; 6146 case MVT::i64: 6147 if (Subtarget->is64Bit()) { 6148 Reg = X86::RAX; size = 8; 6149 } else //Should go away when LegalizeType stuff lands 6150 return SDValue(ExpandATOMIC_CMP_SWAP(Op.getNode(), DAG), 0); 6151 break; 6152 }; 6153 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg, 6154 Op.getOperand(2), SDValue()); 6155 SDValue Ops[] = { cpIn.getValue(0), 6156 Op.getOperand(1), 6157 Op.getOperand(3), 6158 DAG.getTargetConstant(size, MVT::i8), 6159 cpIn.getValue(1) }; 6160 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6161 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5); 6162 SDValue cpOut = 6163 DAG.getCopyFromReg(Result.getValue(0), Reg, T, Result.getValue(1)); 6164 return cpOut; 6165} 6166 6167SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, 6168 SelectionDAG &DAG) { 6169 MVT T = Op->getValueType(0); 6170 assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); 6171 SDValue cpInL, cpInH; 6172 cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 6173 DAG.getConstant(0, MVT::i32)); 6174 cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 6175 DAG.getConstant(1, MVT::i32)); 6176 cpInL = DAG.getCopyToReg(Op->getOperand(0), X86::EAX, 6177 cpInL, SDValue()); 6178 cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX, 6179 cpInH, cpInL.getValue(1)); 6180 SDValue swapInL, swapInH; 6181 swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 6182 DAG.getConstant(0, MVT::i32)); 6183 swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 6184 DAG.getConstant(1, MVT::i32)); 6185 swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX, 6186 swapInL, cpInH.getValue(1)); 6187 swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, 6188 swapInH, swapInL.getValue(1)); 6189 SDValue Ops[] = { swapInH.getValue(0), 6190 Op->getOperand(1), 6191 swapInH.getValue(1) }; 6192 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6193 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3); 6194 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32, 6195 Result.getValue(1)); 6196 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32, 6197 cpOutL.getValue(2)); 6198 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; 6199 SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); 6200 SDValue Vals[2] = { ResultVal, cpOutH.getValue(1) }; 6201 return DAG.getMergeValues(Vals, 2).getNode(); 6202} 6203 6204SDValue X86TargetLowering::LowerATOMIC_BINARY_64(SDValue Op, 6205 SelectionDAG &DAG, 6206 unsigned NewOp) { 6207 SDNode *Node = Op.getNode(); 6208 MVT T = Node->getValueType(0); 6209 assert (T == MVT::i64 && "Only know how to expand i64 atomics"); 6210 6211 SDValue Chain = Node->getOperand(0); 6212 SDValue In1 = Node->getOperand(1); 6213 SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6214 Node->getOperand(2), DAG.getIntPtrConstant(0)); 6215 SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6216 Node->getOperand(2), DAG.getIntPtrConstant(1)); 6217 // This is a generalized SDNode, not an AtomicSDNode, so it doesn't 6218 // have a MemOperand. Pass the info through as a normal operand. 6219 SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); 6220 SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; 6221 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6222 SDValue Result = DAG.getNode(NewOp, Tys, Ops, 5); 6223 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; 6224 SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); 6225 SDValue Vals[2] = { ResultVal, Result.getValue(2) }; 6226 return SDValue(DAG.getMergeValues(Vals, 2).getNode(), 0); 6227} 6228 6229SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { 6230 SDNode *Node = Op.getNode(); 6231 MVT T = Node->getValueType(0); 6232 SDValue negOp = DAG.getNode(ISD::SUB, T, 6233 DAG.getConstant(0, T), Node->getOperand(2)); 6234 return DAG.getAtomic((Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_8 ? 6235 ISD::ATOMIC_LOAD_ADD_8 : 6236 Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_16 ? 6237 ISD::ATOMIC_LOAD_ADD_16 : 6238 Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_32 ? 6239 ISD::ATOMIC_LOAD_ADD_32 : 6240 ISD::ATOMIC_LOAD_ADD_64), 6241 Node->getOperand(0), 6242 Node->getOperand(1), negOp, 6243 cast<AtomicSDNode>(Node)->getSrcValue(), 6244 cast<AtomicSDNode>(Node)->getAlignment()); 6245} 6246 6247/// LowerOperation - Provide custom lowering hooks for some operations. 6248/// 6249SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 6250 switch (Op.getOpcode()) { 6251 default: assert(0 && "Should not custom lower this!"); 6252 case ISD::ATOMIC_CMP_SWAP_8: 6253 case ISD::ATOMIC_CMP_SWAP_16: 6254 case ISD::ATOMIC_CMP_SWAP_32: 6255 case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG); 6256 case ISD::ATOMIC_LOAD_SUB_8: 6257 case ISD::ATOMIC_LOAD_SUB_16: 6258 case ISD::ATOMIC_LOAD_SUB_32: return LowerLOAD_SUB(Op,DAG); 6259 case ISD::ATOMIC_LOAD_SUB_64: return (Subtarget->is64Bit()) ? 6260 LowerLOAD_SUB(Op,DAG) : 6261 LowerATOMIC_BINARY_64(Op,DAG, 6262 X86ISD::ATOMSUB64_DAG); 6263 case ISD::ATOMIC_LOAD_AND_64: return LowerATOMIC_BINARY_64(Op,DAG, 6264 X86ISD::ATOMAND64_DAG); 6265 case ISD::ATOMIC_LOAD_OR_64: return LowerATOMIC_BINARY_64(Op, DAG, 6266 X86ISD::ATOMOR64_DAG); 6267 case ISD::ATOMIC_LOAD_XOR_64: return LowerATOMIC_BINARY_64(Op,DAG, 6268 X86ISD::ATOMXOR64_DAG); 6269 case ISD::ATOMIC_LOAD_NAND_64:return LowerATOMIC_BINARY_64(Op,DAG, 6270 X86ISD::ATOMNAND64_DAG); 6271 case ISD::ATOMIC_LOAD_ADD_64: return LowerATOMIC_BINARY_64(Op,DAG, 6272 X86ISD::ATOMADD64_DAG); 6273 case ISD::ATOMIC_SWAP_64: return LowerATOMIC_BINARY_64(Op,DAG, 6274 X86ISD::ATOMSWAP64_DAG); 6275 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6276 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6277 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6278 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6279 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6280 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6281 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6282 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6283 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 6284 case ISD::SHL_PARTS: 6285 case ISD::SRA_PARTS: 6286 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 6287 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 6288 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 6289 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 6290 case ISD::FABS: return LowerFABS(Op, DAG); 6291 case ISD::FNEG: return LowerFNEG(Op, DAG); 6292 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6293 case ISD::SETCC: return LowerSETCC(Op, DAG); 6294 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 6295 case ISD::SELECT: return LowerSELECT(Op, DAG); 6296 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 6297 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6298 case ISD::CALL: return LowerCALL(Op, DAG); 6299 case ISD::RET: return LowerRET(Op, DAG); 6300 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 6301 case ISD::VASTART: return LowerVASTART(Op, DAG); 6302 case ISD::VAARG: return LowerVAARG(Op, DAG); 6303 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 6304 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6305 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6306 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6307 case ISD::FRAME_TO_ARGS_OFFSET: 6308 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 6309 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 6310 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 6311 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 6312 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6313 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 6314 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 6315 6316 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 6317 case ISD::READCYCLECOUNTER: 6318 return SDValue(ExpandREADCYCLECOUNTER(Op.getNode(), DAG), 0); 6319 } 6320} 6321 6322/// ReplaceNodeResults - Replace a node with an illegal result type 6323/// with a new node built out of custom code. 6324SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { 6325 switch (N->getOpcode()) { 6326 default: 6327 return X86TargetLowering::LowerOperation(SDValue (N, 0), DAG).getNode(); 6328 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 6329 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 6330 case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG); 6331 } 6332} 6333 6334const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 6335 switch (Opcode) { 6336 default: return NULL; 6337 case X86ISD::BSF: return "X86ISD::BSF"; 6338 case X86ISD::BSR: return "X86ISD::BSR"; 6339 case X86ISD::SHLD: return "X86ISD::SHLD"; 6340 case X86ISD::SHRD: return "X86ISD::SHRD"; 6341 case X86ISD::FAND: return "X86ISD::FAND"; 6342 case X86ISD::FOR: return "X86ISD::FOR"; 6343 case X86ISD::FXOR: return "X86ISD::FXOR"; 6344 case X86ISD::FSRL: return "X86ISD::FSRL"; 6345 case X86ISD::FILD: return "X86ISD::FILD"; 6346 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 6347 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 6348 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 6349 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 6350 case X86ISD::FLD: return "X86ISD::FLD"; 6351 case X86ISD::FST: return "X86ISD::FST"; 6352 case X86ISD::CALL: return "X86ISD::CALL"; 6353 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 6354 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 6355 case X86ISD::CMP: return "X86ISD::CMP"; 6356 case X86ISD::COMI: return "X86ISD::COMI"; 6357 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 6358 case X86ISD::SETCC: return "X86ISD::SETCC"; 6359 case X86ISD::CMOV: return "X86ISD::CMOV"; 6360 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 6361 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 6362 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 6363 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 6364 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 6365 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 6366 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 6367 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 6368 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 6369 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 6370 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 6371 case X86ISD::FMAX: return "X86ISD::FMAX"; 6372 case X86ISD::FMIN: return "X86ISD::FMIN"; 6373 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 6374 case X86ISD::FRCP: return "X86ISD::FRCP"; 6375 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 6376 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 6377 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 6378 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 6379 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 6380 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; 6381 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; 6382 case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; 6383 case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; 6384 case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; 6385 case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; 6386 case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; 6387 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; 6388 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; 6389 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; 6390 case X86ISD::VSHL: return "X86ISD::VSHL"; 6391 case X86ISD::VSRL: return "X86ISD::VSRL"; 6392 case X86ISD::CMPPD: return "X86ISD::CMPPD"; 6393 case X86ISD::CMPPS: return "X86ISD::CMPPS"; 6394 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; 6395 case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; 6396 case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; 6397 case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; 6398 case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; 6399 case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; 6400 case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; 6401 case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; 6402 } 6403} 6404 6405// isLegalAddressingMode - Return true if the addressing mode represented 6406// by AM is legal for this target, for a load/store of the specified type. 6407bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 6408 const Type *Ty) const { 6409 // X86 supports extremely general addressing modes. 6410 6411 // X86 allows a sign-extended 32-bit immediate field as a displacement. 6412 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 6413 return false; 6414 6415 if (AM.BaseGV) { 6416 // We can only fold this if we don't need an extra load. 6417 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 6418 return false; 6419 6420 // X86-64 only supports addr of globals in small code model. 6421 if (Subtarget->is64Bit()) { 6422 if (getTargetMachine().getCodeModel() != CodeModel::Small) 6423 return false; 6424 // If lower 4G is not available, then we must use rip-relative addressing. 6425 if (AM.BaseOffs || AM.Scale > 1) 6426 return false; 6427 } 6428 } 6429 6430 switch (AM.Scale) { 6431 case 0: 6432 case 1: 6433 case 2: 6434 case 4: 6435 case 8: 6436 // These scales always work. 6437 break; 6438 case 3: 6439 case 5: 6440 case 9: 6441 // These scales are formed with basereg+scalereg. Only accept if there is 6442 // no basereg yet. 6443 if (AM.HasBaseReg) 6444 return false; 6445 break; 6446 default: // Other stuff never works. 6447 return false; 6448 } 6449 6450 return true; 6451} 6452 6453 6454bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 6455 if (!Ty1->isInteger() || !Ty2->isInteger()) 6456 return false; 6457 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 6458 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 6459 if (NumBits1 <= NumBits2) 6460 return false; 6461 return Subtarget->is64Bit() || NumBits1 < 64; 6462} 6463 6464bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { 6465 if (!VT1.isInteger() || !VT2.isInteger()) 6466 return false; 6467 unsigned NumBits1 = VT1.getSizeInBits(); 6468 unsigned NumBits2 = VT2.getSizeInBits(); 6469 if (NumBits1 <= NumBits2) 6470 return false; 6471 return Subtarget->is64Bit() || NumBits1 < 64; 6472} 6473 6474/// isShuffleMaskLegal - Targets can use this to indicate that they only 6475/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 6476/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 6477/// are assumed to be legal. 6478bool 6479X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { 6480 // Only do shuffles on 128-bit vector types for now. 6481 if (VT.getSizeInBits() == 64) return false; 6482 return (Mask.getNode()->getNumOperands() <= 4 || 6483 isIdentityMask(Mask.getNode()) || 6484 isIdentityMask(Mask.getNode(), true) || 6485 isSplatMask(Mask.getNode()) || 6486 isPSHUFHW_PSHUFLWMask(Mask.getNode()) || 6487 X86::isUNPCKLMask(Mask.getNode()) || 6488 X86::isUNPCKHMask(Mask.getNode()) || 6489 X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || 6490 X86::isUNPCKH_v_undef_Mask(Mask.getNode())); 6491} 6492 6493bool 6494X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, 6495 MVT EVT, SelectionDAG &DAG) const { 6496 unsigned NumElts = BVOps.size(); 6497 // Only do shuffles on 128-bit vector types for now. 6498 if (EVT.getSizeInBits() * NumElts == 64) return false; 6499 if (NumElts == 2) return true; 6500 if (NumElts == 4) { 6501 return (isMOVLMask(&BVOps[0], 4) || 6502 isCommutedMOVL(&BVOps[0], 4, true) || 6503 isSHUFPMask(&BVOps[0], 4) || 6504 isCommutedSHUFP(&BVOps[0], 4)); 6505 } 6506 return false; 6507} 6508 6509//===----------------------------------------------------------------------===// 6510// X86 Scheduler Hooks 6511//===----------------------------------------------------------------------===// 6512 6513// private utility function 6514MachineBasicBlock * 6515X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, 6516 MachineBasicBlock *MBB, 6517 unsigned regOpc, 6518 unsigned immOpc, 6519 unsigned LoadOpc, 6520 unsigned CXchgOpc, 6521 unsigned copyOpc, 6522 unsigned notOpc, 6523 unsigned EAXreg, 6524 TargetRegisterClass *RC, 6525 bool invSrc) { 6526 // For the atomic bitwise operator, we generate 6527 // thisMBB: 6528 // newMBB: 6529 // ld t1 = [bitinstr.addr] 6530 // op t2 = t1, [bitinstr.val] 6531 // mov EAX = t1 6532 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6533 // bz newMBB 6534 // fallthrough -->nextMBB 6535 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6536 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6537 MachineFunction::iterator MBBIter = MBB; 6538 ++MBBIter; 6539 6540 /// First build the CFG 6541 MachineFunction *F = MBB->getParent(); 6542 MachineBasicBlock *thisMBB = MBB; 6543 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6544 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6545 F->insert(MBBIter, newMBB); 6546 F->insert(MBBIter, nextMBB); 6547 6548 // Move all successors to thisMBB to nextMBB 6549 nextMBB->transferSuccessors(thisMBB); 6550 6551 // Update thisMBB to fall through to newMBB 6552 thisMBB->addSuccessor(newMBB); 6553 6554 // newMBB jumps to itself and fall through to nextMBB 6555 newMBB->addSuccessor(nextMBB); 6556 newMBB->addSuccessor(newMBB); 6557 6558 // Insert instructions into newMBB based on incoming instruction 6559 assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); 6560 MachineOperand& destOper = bInstr->getOperand(0); 6561 MachineOperand* argOpers[6]; 6562 int numArgs = bInstr->getNumOperands() - 1; 6563 for (int i=0; i < numArgs; ++i) 6564 argOpers[i] = &bInstr->getOperand(i+1); 6565 6566 // x86 address has 4 operands: base, index, scale, and displacement 6567 int lastAddrIndx = 3; // [0,3] 6568 int valArgIndx = 4; 6569 6570 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6571 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(LoadOpc), t1); 6572 for (int i=0; i <= lastAddrIndx; ++i) 6573 (*MIB).addOperand(*argOpers[i]); 6574 6575 unsigned tt = F->getRegInfo().createVirtualRegister(RC); 6576 if (invSrc) { 6577 MIB = BuildMI(newMBB, TII->get(notOpc), tt).addReg(t1); 6578 } 6579 else 6580 tt = t1; 6581 6582 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6583 assert((argOpers[valArgIndx]->isReg() || 6584 argOpers[valArgIndx]->isImm()) && 6585 "invalid operand"); 6586 if (argOpers[valArgIndx]->isReg()) 6587 MIB = BuildMI(newMBB, TII->get(regOpc), t2); 6588 else 6589 MIB = BuildMI(newMBB, TII->get(immOpc), t2); 6590 MIB.addReg(tt); 6591 (*MIB).addOperand(*argOpers[valArgIndx]); 6592 6593 MIB = BuildMI(newMBB, TII->get(copyOpc), EAXreg); 6594 MIB.addReg(t1); 6595 6596 MIB = BuildMI(newMBB, TII->get(CXchgOpc)); 6597 for (int i=0; i <= lastAddrIndx; ++i) 6598 (*MIB).addOperand(*argOpers[i]); 6599 MIB.addReg(t2); 6600 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6601 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6602 6603 MIB = BuildMI(newMBB, TII->get(copyOpc), destOper.getReg()); 6604 MIB.addReg(EAXreg); 6605 6606 // insert branch 6607 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6608 6609 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6610 return nextMBB; 6611} 6612 6613// private utility function: 64 bit atomics on 32 bit host. 6614MachineBasicBlock * 6615X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, 6616 MachineBasicBlock *MBB, 6617 unsigned regOpcL, 6618 unsigned regOpcH, 6619 unsigned immOpcL, 6620 unsigned immOpcH, 6621 bool invSrc) { 6622 // For the atomic bitwise operator, we generate 6623 // thisMBB (instructions are in pairs, except cmpxchg8b) 6624 // ld t1,t2 = [bitinstr.addr] 6625 // newMBB: 6626 // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) 6627 // op t5, t6 <- out1, out2, [bitinstr.val] 6628 // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) 6629 // mov ECX, EBX <- t5, t6 6630 // mov EAX, EDX <- t1, t2 6631 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] 6632 // mov t3, t4 <- EAX, EDX 6633 // bz newMBB 6634 // result in out1, out2 6635 // fallthrough -->nextMBB 6636 6637 const TargetRegisterClass *RC = X86::GR32RegisterClass; 6638 const unsigned LoadOpc = X86::MOV32rm; 6639 const unsigned copyOpc = X86::MOV32rr; 6640 const unsigned NotOpc = X86::NOT32r; 6641 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6642 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6643 MachineFunction::iterator MBBIter = MBB; 6644 ++MBBIter; 6645 6646 /// First build the CFG 6647 MachineFunction *F = MBB->getParent(); 6648 MachineBasicBlock *thisMBB = MBB; 6649 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6650 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6651 F->insert(MBBIter, newMBB); 6652 F->insert(MBBIter, nextMBB); 6653 6654 // Move all successors to thisMBB to nextMBB 6655 nextMBB->transferSuccessors(thisMBB); 6656 6657 // Update thisMBB to fall through to newMBB 6658 thisMBB->addSuccessor(newMBB); 6659 6660 // newMBB jumps to itself and fall through to nextMBB 6661 newMBB->addSuccessor(nextMBB); 6662 newMBB->addSuccessor(newMBB); 6663 6664 // Insert instructions into newMBB based on incoming instruction 6665 // There are 8 "real" operands plus 9 implicit def/uses, ignored here. 6666 assert(bInstr->getNumOperands() < 18 && "unexpected number of operands"); 6667 MachineOperand& dest1Oper = bInstr->getOperand(0); 6668 MachineOperand& dest2Oper = bInstr->getOperand(1); 6669 MachineOperand* argOpers[6]; 6670 for (int i=0; i < 6; ++i) 6671 argOpers[i] = &bInstr->getOperand(i+2); 6672 6673 // x86 address has 4 operands: base, index, scale, and displacement 6674 int lastAddrIndx = 3; // [0,3] 6675 6676 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6677 MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1); 6678 for (int i=0; i <= lastAddrIndx; ++i) 6679 (*MIB).addOperand(*argOpers[i]); 6680 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6681 MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2); 6682 // add 4 to displacement. 6683 for (int i=0; i <= lastAddrIndx-1; ++i) 6684 (*MIB).addOperand(*argOpers[i]); 6685 MachineOperand newOp3 = *(argOpers[3]); 6686 if (newOp3.isImm()) 6687 newOp3.setImm(newOp3.getImm()+4); 6688 else 6689 newOp3.setOffset(newOp3.getOffset()+4); 6690 (*MIB).addOperand(newOp3); 6691 6692 // t3/4 are defined later, at the bottom of the loop 6693 unsigned t3 = F->getRegInfo().createVirtualRegister(RC); 6694 unsigned t4 = F->getRegInfo().createVirtualRegister(RC); 6695 BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg()) 6696 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); 6697 BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg()) 6698 .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); 6699 6700 unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); 6701 unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); 6702 if (invSrc) { 6703 MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1); 6704 MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2); 6705 } else { 6706 tt1 = t1; 6707 tt2 = t2; 6708 } 6709 6710 assert((argOpers[4]->isReg() || argOpers[4]->isImm()) && 6711 "invalid operand"); 6712 unsigned t5 = F->getRegInfo().createVirtualRegister(RC); 6713 unsigned t6 = F->getRegInfo().createVirtualRegister(RC); 6714 if (argOpers[4]->isReg()) 6715 MIB = BuildMI(newMBB, TII->get(regOpcL), t5); 6716 else 6717 MIB = BuildMI(newMBB, TII->get(immOpcL), t5); 6718 if (regOpcL != X86::MOV32rr) 6719 MIB.addReg(tt1); 6720 (*MIB).addOperand(*argOpers[4]); 6721 assert(argOpers[5]->isReg() == argOpers[4]->isReg()); 6722 assert(argOpers[5]->isImm() == argOpers[4]->isImm()); 6723 if (argOpers[5]->isReg()) 6724 MIB = BuildMI(newMBB, TII->get(regOpcH), t6); 6725 else 6726 MIB = BuildMI(newMBB, TII->get(immOpcH), t6); 6727 if (regOpcH != X86::MOV32rr) 6728 MIB.addReg(tt2); 6729 (*MIB).addOperand(*argOpers[5]); 6730 6731 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX); 6732 MIB.addReg(t1); 6733 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX); 6734 MIB.addReg(t2); 6735 6736 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX); 6737 MIB.addReg(t5); 6738 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX); 6739 MIB.addReg(t6); 6740 6741 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B)); 6742 for (int i=0; i <= lastAddrIndx; ++i) 6743 (*MIB).addOperand(*argOpers[i]); 6744 6745 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6746 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6747 6748 MIB = BuildMI(newMBB, TII->get(copyOpc), t3); 6749 MIB.addReg(X86::EAX); 6750 MIB = BuildMI(newMBB, TII->get(copyOpc), t4); 6751 MIB.addReg(X86::EDX); 6752 6753 // insert branch 6754 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6755 6756 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6757 return nextMBB; 6758} 6759 6760// private utility function 6761MachineBasicBlock * 6762X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, 6763 MachineBasicBlock *MBB, 6764 unsigned cmovOpc) { 6765 // For the atomic min/max operator, we generate 6766 // thisMBB: 6767 // newMBB: 6768 // ld t1 = [min/max.addr] 6769 // mov t2 = [min/max.val] 6770 // cmp t1, t2 6771 // cmov[cond] t2 = t1 6772 // mov EAX = t1 6773 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6774 // bz newMBB 6775 // fallthrough -->nextMBB 6776 // 6777 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6778 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6779 MachineFunction::iterator MBBIter = MBB; 6780 ++MBBIter; 6781 6782 /// First build the CFG 6783 MachineFunction *F = MBB->getParent(); 6784 MachineBasicBlock *thisMBB = MBB; 6785 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6786 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6787 F->insert(MBBIter, newMBB); 6788 F->insert(MBBIter, nextMBB); 6789 6790 // Move all successors to thisMBB to nextMBB 6791 nextMBB->transferSuccessors(thisMBB); 6792 6793 // Update thisMBB to fall through to newMBB 6794 thisMBB->addSuccessor(newMBB); 6795 6796 // newMBB jumps to newMBB and fall through to nextMBB 6797 newMBB->addSuccessor(nextMBB); 6798 newMBB->addSuccessor(newMBB); 6799 6800 // Insert instructions into newMBB based on incoming instruction 6801 assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); 6802 MachineOperand& destOper = mInstr->getOperand(0); 6803 MachineOperand* argOpers[6]; 6804 int numArgs = mInstr->getNumOperands() - 1; 6805 for (int i=0; i < numArgs; ++i) 6806 argOpers[i] = &mInstr->getOperand(i+1); 6807 6808 // x86 address has 4 operands: base, index, scale, and displacement 6809 int lastAddrIndx = 3; // [0,3] 6810 int valArgIndx = 4; 6811 6812 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6813 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1); 6814 for (int i=0; i <= lastAddrIndx; ++i) 6815 (*MIB).addOperand(*argOpers[i]); 6816 6817 // We only support register and immediate values 6818 assert((argOpers[valArgIndx]->isReg() || 6819 argOpers[valArgIndx]->isImm()) && 6820 "invalid operand"); 6821 6822 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6823 if (argOpers[valArgIndx]->isReg()) 6824 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6825 else 6826 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6827 (*MIB).addOperand(*argOpers[valArgIndx]); 6828 6829 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX); 6830 MIB.addReg(t1); 6831 6832 MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); 6833 MIB.addReg(t1); 6834 MIB.addReg(t2); 6835 6836 // Generate movc 6837 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6838 MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); 6839 MIB.addReg(t2); 6840 MIB.addReg(t1); 6841 6842 // Cmp and exchange if none has modified the memory location 6843 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); 6844 for (int i=0; i <= lastAddrIndx; ++i) 6845 (*MIB).addOperand(*argOpers[i]); 6846 MIB.addReg(t3); 6847 assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6848 (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); 6849 6850 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); 6851 MIB.addReg(X86::EAX); 6852 6853 // insert branch 6854 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6855 6856 F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. 6857 return nextMBB; 6858} 6859 6860 6861MachineBasicBlock * 6862X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6863 MachineBasicBlock *BB) { 6864 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6865 switch (MI->getOpcode()) { 6866 default: assert(false && "Unexpected instr type to insert"); 6867 case X86::CMOV_FR32: 6868 case X86::CMOV_FR64: 6869 case X86::CMOV_V4F32: 6870 case X86::CMOV_V2F64: 6871 case X86::CMOV_V2I64: { 6872 // To "insert" a SELECT_CC instruction, we actually have to insert the 6873 // diamond control-flow pattern. The incoming instruction knows the 6874 // destination vreg to set, the condition code register to branch on, the 6875 // true/false values to select between, and a branch opcode to use. 6876 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6877 MachineFunction::iterator It = BB; 6878 ++It; 6879 6880 // thisMBB: 6881 // ... 6882 // TrueVal = ... 6883 // cmpTY ccX, r1, r2 6884 // bCC copy1MBB 6885 // fallthrough --> copy0MBB 6886 MachineBasicBlock *thisMBB = BB; 6887 MachineFunction *F = BB->getParent(); 6888 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6889 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6890 unsigned Opc = 6891 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 6892 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 6893 F->insert(It, copy0MBB); 6894 F->insert(It, sinkMBB); 6895 // Update machine-CFG edges by transferring all successors of the current 6896 // block to the new block which will contain the Phi node for the select. 6897 sinkMBB->transferSuccessors(BB); 6898 6899 // Add the true and fallthrough blocks as its successors. 6900 BB->addSuccessor(copy0MBB); 6901 BB->addSuccessor(sinkMBB); 6902 6903 // copy0MBB: 6904 // %FalseValue = ... 6905 // # fallthrough to sinkMBB 6906 BB = copy0MBB; 6907 6908 // Update machine-CFG edges 6909 BB->addSuccessor(sinkMBB); 6910 6911 // sinkMBB: 6912 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6913 // ... 6914 BB = sinkMBB; 6915 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 6916 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6917 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6918 6919 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 6920 return BB; 6921 } 6922 6923 case X86::FP32_TO_INT16_IN_MEM: 6924 case X86::FP32_TO_INT32_IN_MEM: 6925 case X86::FP32_TO_INT64_IN_MEM: 6926 case X86::FP64_TO_INT16_IN_MEM: 6927 case X86::FP64_TO_INT32_IN_MEM: 6928 case X86::FP64_TO_INT64_IN_MEM: 6929 case X86::FP80_TO_INT16_IN_MEM: 6930 case X86::FP80_TO_INT32_IN_MEM: 6931 case X86::FP80_TO_INT64_IN_MEM: { 6932 // Change the floating point control register to use "round towards zero" 6933 // mode when truncating to an integer value. 6934 MachineFunction *F = BB->getParent(); 6935 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 6936 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 6937 6938 // Load the old value of the high byte of the control word... 6939 unsigned OldCW = 6940 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 6941 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 6942 6943 // Set the high part to be round to zero... 6944 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 6945 .addImm(0xC7F); 6946 6947 // Reload the modified control word now... 6948 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 6949 6950 // Restore the memory image of control word to original value 6951 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 6952 .addReg(OldCW); 6953 6954 // Get the X86 opcode to use. 6955 unsigned Opc; 6956 switch (MI->getOpcode()) { 6957 default: assert(0 && "illegal opcode!"); 6958 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 6959 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 6960 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 6961 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 6962 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 6963 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 6964 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 6965 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 6966 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 6967 } 6968 6969 X86AddressMode AM; 6970 MachineOperand &Op = MI->getOperand(0); 6971 if (Op.isReg()) { 6972 AM.BaseType = X86AddressMode::RegBase; 6973 AM.Base.Reg = Op.getReg(); 6974 } else { 6975 AM.BaseType = X86AddressMode::FrameIndexBase; 6976 AM.Base.FrameIndex = Op.getIndex(); 6977 } 6978 Op = MI->getOperand(1); 6979 if (Op.isImm()) 6980 AM.Scale = Op.getImm(); 6981 Op = MI->getOperand(2); 6982 if (Op.isImm()) 6983 AM.IndexReg = Op.getImm(); 6984 Op = MI->getOperand(3); 6985 if (Op.isGlobal()) { 6986 AM.GV = Op.getGlobal(); 6987 } else { 6988 AM.Disp = Op.getImm(); 6989 } 6990 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 6991 .addReg(MI->getOperand(4).getReg()); 6992 6993 // Reload the original control word now. 6994 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 6995 6996 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 6997 return BB; 6998 } 6999 case X86::ATOMAND32: 7000 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7001 X86::AND32ri, X86::MOV32rm, 7002 X86::LCMPXCHG32, X86::MOV32rr, 7003 X86::NOT32r, X86::EAX, 7004 X86::GR32RegisterClass); 7005 case X86::ATOMOR32: 7006 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, 7007 X86::OR32ri, X86::MOV32rm, 7008 X86::LCMPXCHG32, X86::MOV32rr, 7009 X86::NOT32r, X86::EAX, 7010 X86::GR32RegisterClass); 7011 case X86::ATOMXOR32: 7012 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, 7013 X86::XOR32ri, X86::MOV32rm, 7014 X86::LCMPXCHG32, X86::MOV32rr, 7015 X86::NOT32r, X86::EAX, 7016 X86::GR32RegisterClass); 7017 case X86::ATOMNAND32: 7018 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7019 X86::AND32ri, X86::MOV32rm, 7020 X86::LCMPXCHG32, X86::MOV32rr, 7021 X86::NOT32r, X86::EAX, 7022 X86::GR32RegisterClass, true); 7023 case X86::ATOMMIN32: 7024 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); 7025 case X86::ATOMMAX32: 7026 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); 7027 case X86::ATOMUMIN32: 7028 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); 7029 case X86::ATOMUMAX32: 7030 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); 7031 7032 case X86::ATOMAND16: 7033 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7034 X86::AND16ri, X86::MOV16rm, 7035 X86::LCMPXCHG16, X86::MOV16rr, 7036 X86::NOT16r, X86::AX, 7037 X86::GR16RegisterClass); 7038 case X86::ATOMOR16: 7039 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, 7040 X86::OR16ri, X86::MOV16rm, 7041 X86::LCMPXCHG16, X86::MOV16rr, 7042 X86::NOT16r, X86::AX, 7043 X86::GR16RegisterClass); 7044 case X86::ATOMXOR16: 7045 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, 7046 X86::XOR16ri, X86::MOV16rm, 7047 X86::LCMPXCHG16, X86::MOV16rr, 7048 X86::NOT16r, X86::AX, 7049 X86::GR16RegisterClass); 7050 case X86::ATOMNAND16: 7051 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7052 X86::AND16ri, X86::MOV16rm, 7053 X86::LCMPXCHG16, X86::MOV16rr, 7054 X86::NOT16r, X86::AX, 7055 X86::GR16RegisterClass, true); 7056 case X86::ATOMMIN16: 7057 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); 7058 case X86::ATOMMAX16: 7059 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); 7060 case X86::ATOMUMIN16: 7061 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); 7062 case X86::ATOMUMAX16: 7063 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); 7064 7065 case X86::ATOMAND8: 7066 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7067 X86::AND8ri, X86::MOV8rm, 7068 X86::LCMPXCHG8, X86::MOV8rr, 7069 X86::NOT8r, X86::AL, 7070 X86::GR8RegisterClass); 7071 case X86::ATOMOR8: 7072 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, 7073 X86::OR8ri, X86::MOV8rm, 7074 X86::LCMPXCHG8, X86::MOV8rr, 7075 X86::NOT8r, X86::AL, 7076 X86::GR8RegisterClass); 7077 case X86::ATOMXOR8: 7078 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, 7079 X86::XOR8ri, X86::MOV8rm, 7080 X86::LCMPXCHG8, X86::MOV8rr, 7081 X86::NOT8r, X86::AL, 7082 X86::GR8RegisterClass); 7083 case X86::ATOMNAND8: 7084 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7085 X86::AND8ri, X86::MOV8rm, 7086 X86::LCMPXCHG8, X86::MOV8rr, 7087 X86::NOT8r, X86::AL, 7088 X86::GR8RegisterClass, true); 7089 // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. 7090 // This group is for 64-bit host. 7091 case X86::ATOMAND64: 7092 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7093 X86::AND64ri32, X86::MOV64rm, 7094 X86::LCMPXCHG64, X86::MOV64rr, 7095 X86::NOT64r, X86::RAX, 7096 X86::GR64RegisterClass); 7097 case X86::ATOMOR64: 7098 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, 7099 X86::OR64ri32, X86::MOV64rm, 7100 X86::LCMPXCHG64, X86::MOV64rr, 7101 X86::NOT64r, X86::RAX, 7102 X86::GR64RegisterClass); 7103 case X86::ATOMXOR64: 7104 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, 7105 X86::XOR64ri32, X86::MOV64rm, 7106 X86::LCMPXCHG64, X86::MOV64rr, 7107 X86::NOT64r, X86::RAX, 7108 X86::GR64RegisterClass); 7109 case X86::ATOMNAND64: 7110 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7111 X86::AND64ri32, X86::MOV64rm, 7112 X86::LCMPXCHG64, X86::MOV64rr, 7113 X86::NOT64r, X86::RAX, 7114 X86::GR64RegisterClass, true); 7115 case X86::ATOMMIN64: 7116 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); 7117 case X86::ATOMMAX64: 7118 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); 7119 case X86::ATOMUMIN64: 7120 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); 7121 case X86::ATOMUMAX64: 7122 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); 7123 7124 // This group does 64-bit operations on a 32-bit host. 7125 case X86::ATOMAND6432: 7126 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7127 X86::AND32rr, X86::AND32rr, 7128 X86::AND32ri, X86::AND32ri, 7129 false); 7130 case X86::ATOMOR6432: 7131 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7132 X86::OR32rr, X86::OR32rr, 7133 X86::OR32ri, X86::OR32ri, 7134 false); 7135 case X86::ATOMXOR6432: 7136 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7137 X86::XOR32rr, X86::XOR32rr, 7138 X86::XOR32ri, X86::XOR32ri, 7139 false); 7140 case X86::ATOMNAND6432: 7141 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7142 X86::AND32rr, X86::AND32rr, 7143 X86::AND32ri, X86::AND32ri, 7144 true); 7145 case X86::ATOMADD6432: 7146 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7147 X86::ADD32rr, X86::ADC32rr, 7148 X86::ADD32ri, X86::ADC32ri, 7149 false); 7150 case X86::ATOMSUB6432: 7151 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7152 X86::SUB32rr, X86::SBB32rr, 7153 X86::SUB32ri, X86::SBB32ri, 7154 false); 7155 case X86::ATOMSWAP6432: 7156 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7157 X86::MOV32rr, X86::MOV32rr, 7158 X86::MOV32ri, X86::MOV32ri, 7159 false); 7160 } 7161} 7162 7163//===----------------------------------------------------------------------===// 7164// X86 Optimization Hooks 7165//===----------------------------------------------------------------------===// 7166 7167void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7168 const APInt &Mask, 7169 APInt &KnownZero, 7170 APInt &KnownOne, 7171 const SelectionDAG &DAG, 7172 unsigned Depth) const { 7173 unsigned Opc = Op.getOpcode(); 7174 assert((Opc >= ISD::BUILTIN_OP_END || 7175 Opc == ISD::INTRINSIC_WO_CHAIN || 7176 Opc == ISD::INTRINSIC_W_CHAIN || 7177 Opc == ISD::INTRINSIC_VOID) && 7178 "Should use MaskedValueIsZero if you don't know whether Op" 7179 " is a target node!"); 7180 7181 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 7182 switch (Opc) { 7183 default: break; 7184 case X86ISD::SETCC: 7185 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 7186 Mask.getBitWidth() - 1); 7187 break; 7188 } 7189} 7190 7191/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 7192/// node is a GlobalAddress + offset. 7193bool X86TargetLowering::isGAPlusOffset(SDNode *N, 7194 GlobalValue* &GA, int64_t &Offset) const{ 7195 if (N->getOpcode() == X86ISD::Wrapper) { 7196 if (isa<GlobalAddressSDNode>(N->getOperand(0))) { 7197 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 7198 Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset(); 7199 return true; 7200 } 7201 } 7202 return TargetLowering::isGAPlusOffset(N, GA, Offset); 7203} 7204 7205static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, 7206 const TargetLowering &TLI) { 7207 GlobalValue *GV; 7208 int64_t Offset = 0; 7209 if (TLI.isGAPlusOffset(Base, GV, Offset)) 7210 return (GV->getAlignment() >= N && (Offset % N) == 0); 7211 // DAG combine handles the stack object case. 7212 return false; 7213} 7214 7215static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, 7216 unsigned NumElems, MVT EVT, 7217 SDNode *&Base, 7218 SelectionDAG &DAG, MachineFrameInfo *MFI, 7219 const TargetLowering &TLI) { 7220 Base = NULL; 7221 for (unsigned i = 0; i < NumElems; ++i) { 7222 SDValue Idx = PermMask.getOperand(i); 7223 if (Idx.getOpcode() == ISD::UNDEF) { 7224 if (!Base) 7225 return false; 7226 continue; 7227 } 7228 7229 SDValue Elt = DAG.getShuffleScalarElt(N, i); 7230 if (!Elt.getNode() || 7231 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 7232 return false; 7233 if (!Base) { 7234 Base = Elt.getNode(); 7235 if (Base->getOpcode() == ISD::UNDEF) 7236 return false; 7237 continue; 7238 } 7239 if (Elt.getOpcode() == ISD::UNDEF) 7240 continue; 7241 7242 if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, 7243 EVT.getSizeInBits()/8, i, MFI)) 7244 return false; 7245 } 7246 return true; 7247} 7248 7249/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 7250/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 7251/// if the load addresses are consecutive, non-overlapping, and in the right 7252/// order. 7253static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 7254 const TargetLowering &TLI) { 7255 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7256 MVT VT = N->getValueType(0); 7257 MVT EVT = VT.getVectorElementType(); 7258 SDValue PermMask = N->getOperand(2); 7259 unsigned NumElems = PermMask.getNumOperands(); 7260 SDNode *Base = NULL; 7261 if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, 7262 DAG, MFI, TLI)) 7263 return SDValue(); 7264 7265 LoadSDNode *LD = cast<LoadSDNode>(Base); 7266 if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) 7267 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7268 LD->getSrcValueOffset(), LD->isVolatile()); 7269 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7270 LD->getSrcValueOffset(), LD->isVolatile(), 7271 LD->getAlignment()); 7272} 7273 7274/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. 7275static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, 7276 const X86Subtarget *Subtarget, 7277 const TargetLowering &TLI) { 7278 unsigned NumOps = N->getNumOperands(); 7279 7280 // Ignore single operand BUILD_VECTOR. 7281 if (NumOps == 1) 7282 return SDValue(); 7283 7284 MVT VT = N->getValueType(0); 7285 MVT EVT = VT.getVectorElementType(); 7286 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) 7287 // We are looking for load i64 and zero extend. We want to transform 7288 // it before legalizer has a chance to expand it. Also look for i64 7289 // BUILD_PAIR bit casted to f64. 7290 return SDValue(); 7291 // This must be an insertion into a zero vector. 7292 SDValue HighElt = N->getOperand(1); 7293 if (!isZeroNode(HighElt)) 7294 return SDValue(); 7295 7296 // Value must be a load. 7297 SDNode *Base = N->getOperand(0).getNode(); 7298 if (!isa<LoadSDNode>(Base)) { 7299 if (Base->getOpcode() != ISD::BIT_CONVERT) 7300 return SDValue(); 7301 Base = Base->getOperand(0).getNode(); 7302 if (!isa<LoadSDNode>(Base)) 7303 return SDValue(); 7304 } 7305 7306 // Transform it into VZEXT_LOAD addr. 7307 LoadSDNode *LD = cast<LoadSDNode>(Base); 7308 7309 // Load must not be an extload. 7310 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 7311 return SDValue(); 7312 7313 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 7314 SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; 7315 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2); 7316 DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1)); 7317 return ResNode; 7318} 7319 7320/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 7321static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 7322 const X86Subtarget *Subtarget) { 7323 SDValue Cond = N->getOperand(0); 7324 7325 // If we have SSE[12] support, try to form min/max nodes. 7326 if (Subtarget->hasSSE2() && 7327 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 7328 if (Cond.getOpcode() == ISD::SETCC) { 7329 // Get the LHS/RHS of the select. 7330 SDValue LHS = N->getOperand(1); 7331 SDValue RHS = N->getOperand(2); 7332 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 7333 7334 unsigned Opcode = 0; 7335 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 7336 switch (CC) { 7337 default: break; 7338 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 7339 case ISD::SETULE: 7340 case ISD::SETLE: 7341 if (!UnsafeFPMath) break; 7342 // FALL THROUGH. 7343 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 7344 case ISD::SETLT: 7345 Opcode = X86ISD::FMIN; 7346 break; 7347 7348 case ISD::SETOGT: // (X > Y) ? X : Y -> max 7349 case ISD::SETUGT: 7350 case ISD::SETGT: 7351 if (!UnsafeFPMath) break; 7352 // FALL THROUGH. 7353 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 7354 case ISD::SETGE: 7355 Opcode = X86ISD::FMAX; 7356 break; 7357 } 7358 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 7359 switch (CC) { 7360 default: break; 7361 case ISD::SETOGT: // (X > Y) ? Y : X -> min 7362 case ISD::SETUGT: 7363 case ISD::SETGT: 7364 if (!UnsafeFPMath) break; 7365 // FALL THROUGH. 7366 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 7367 case ISD::SETGE: 7368 Opcode = X86ISD::FMIN; 7369 break; 7370 7371 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 7372 case ISD::SETULE: 7373 case ISD::SETLE: 7374 if (!UnsafeFPMath) break; 7375 // FALL THROUGH. 7376 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 7377 case ISD::SETLT: 7378 Opcode = X86ISD::FMAX; 7379 break; 7380 } 7381 } 7382 7383 if (Opcode) 7384 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 7385 } 7386 7387 } 7388 7389 return SDValue(); 7390} 7391 7392/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. 7393static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, 7394 const X86Subtarget *Subtarget) { 7395 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering 7396 // the FP state in cases where an emms may be missing. 7397 // A preferable solution to the general problem is to figure out the right 7398 // places to insert EMMS. This qualifies as a quick hack. 7399 StoreSDNode *St = cast<StoreSDNode>(N); 7400 if (St->getValue().getValueType().isVector() && 7401 St->getValue().getValueType().getSizeInBits() == 64 && 7402 isa<LoadSDNode>(St->getValue()) && 7403 !cast<LoadSDNode>(St->getValue())->isVolatile() && 7404 St->getChain().hasOneUse() && !St->isVolatile()) { 7405 SDNode* LdVal = St->getValue().getNode(); 7406 LoadSDNode *Ld = 0; 7407 int TokenFactorIndex = -1; 7408 SmallVector<SDValue, 8> Ops; 7409 SDNode* ChainVal = St->getChain().getNode(); 7410 // Must be a store of a load. We currently handle two cases: the load 7411 // is a direct child, and it's under an intervening TokenFactor. It is 7412 // possible to dig deeper under nested TokenFactors. 7413 if (ChainVal == LdVal) 7414 Ld = cast<LoadSDNode>(St->getChain()); 7415 else if (St->getValue().hasOneUse() && 7416 ChainVal->getOpcode() == ISD::TokenFactor) { 7417 for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { 7418 if (ChainVal->getOperand(i).getNode() == LdVal) { 7419 TokenFactorIndex = i; 7420 Ld = cast<LoadSDNode>(St->getValue()); 7421 } else 7422 Ops.push_back(ChainVal->getOperand(i)); 7423 } 7424 } 7425 if (Ld) { 7426 // If we are a 64-bit capable x86, lower to a single movq load/store pair. 7427 if (Subtarget->is64Bit()) { 7428 SDValue NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), 7429 Ld->getBasePtr(), Ld->getSrcValue(), 7430 Ld->getSrcValueOffset(), Ld->isVolatile(), 7431 Ld->getAlignment()); 7432 SDValue NewChain = NewLd.getValue(1); 7433 if (TokenFactorIndex != -1) { 7434 Ops.push_back(NewChain); 7435 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7436 Ops.size()); 7437 } 7438 return DAG.getStore(NewChain, NewLd, St->getBasePtr(), 7439 St->getSrcValue(), St->getSrcValueOffset(), 7440 St->isVolatile(), St->getAlignment()); 7441 } 7442 7443 // Otherwise, lower to two 32-bit copies. 7444 SDValue LoAddr = Ld->getBasePtr(); 7445 SDValue HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7446 DAG.getConstant(4, MVT::i32)); 7447 7448 SDValue LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr, 7449 Ld->getSrcValue(), Ld->getSrcValueOffset(), 7450 Ld->isVolatile(), Ld->getAlignment()); 7451 SDValue HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr, 7452 Ld->getSrcValue(), Ld->getSrcValueOffset()+4, 7453 Ld->isVolatile(), 7454 MinAlign(Ld->getAlignment(), 4)); 7455 7456 SDValue NewChain = LoLd.getValue(1); 7457 if (TokenFactorIndex != -1) { 7458 Ops.push_back(LoLd); 7459 Ops.push_back(HiLd); 7460 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7461 Ops.size()); 7462 } 7463 7464 LoAddr = St->getBasePtr(); 7465 HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7466 DAG.getConstant(4, MVT::i32)); 7467 7468 SDValue LoSt = DAG.getStore(NewChain, LoLd, LoAddr, 7469 St->getSrcValue(), St->getSrcValueOffset(), 7470 St->isVolatile(), St->getAlignment()); 7471 SDValue HiSt = DAG.getStore(NewChain, HiLd, HiAddr, 7472 St->getSrcValue(), 7473 St->getSrcValueOffset() + 4, 7474 St->isVolatile(), 7475 MinAlign(St->getAlignment(), 4)); 7476 return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt); 7477 } 7478 } 7479 return SDValue(); 7480} 7481 7482/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 7483/// X86ISD::FXOR nodes. 7484static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 7485 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 7486 // F[X]OR(0.0, x) -> x 7487 // F[X]OR(x, 0.0) -> x 7488 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7489 if (C->getValueAPF().isPosZero()) 7490 return N->getOperand(1); 7491 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7492 if (C->getValueAPF().isPosZero()) 7493 return N->getOperand(0); 7494 return SDValue(); 7495} 7496 7497/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 7498static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 7499 // FAND(0.0, x) -> 0.0 7500 // FAND(x, 0.0) -> 0.0 7501 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7502 if (C->getValueAPF().isPosZero()) 7503 return N->getOperand(0); 7504 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7505 if (C->getValueAPF().isPosZero()) 7506 return N->getOperand(1); 7507 return SDValue(); 7508} 7509 7510 7511SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 7512 DAGCombinerInfo &DCI) const { 7513 SelectionDAG &DAG = DCI.DAG; 7514 switch (N->getOpcode()) { 7515 default: break; 7516 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); 7517 case ISD::BUILD_VECTOR: 7518 return PerformBuildVectorCombine(N, DAG, Subtarget, *this); 7519 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 7520 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 7521 case X86ISD::FXOR: 7522 case X86ISD::FOR: return PerformFORCombine(N, DAG); 7523 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 7524 } 7525 7526 return SDValue(); 7527} 7528 7529//===----------------------------------------------------------------------===// 7530// X86 Inline Assembly Support 7531//===----------------------------------------------------------------------===// 7532 7533/// getConstraintType - Given a constraint letter, return the type of 7534/// constraint it is for this target. 7535X86TargetLowering::ConstraintType 7536X86TargetLowering::getConstraintType(const std::string &Constraint) const { 7537 if (Constraint.size() == 1) { 7538 switch (Constraint[0]) { 7539 case 'A': 7540 case 'f': 7541 case 'r': 7542 case 'R': 7543 case 'l': 7544 case 'q': 7545 case 'Q': 7546 case 'x': 7547 case 'y': 7548 case 'Y': 7549 return C_RegisterClass; 7550 default: 7551 break; 7552 } 7553 } 7554 return TargetLowering::getConstraintType(Constraint); 7555} 7556 7557/// LowerXConstraint - try to replace an X constraint, which matches anything, 7558/// with another that has more specific requirements based on the type of the 7559/// corresponding operand. 7560const char *X86TargetLowering:: 7561LowerXConstraint(MVT ConstraintVT) const { 7562 // FP X constraints get lowered to SSE1/2 registers if available, otherwise 7563 // 'f' like normal targets. 7564 if (ConstraintVT.isFloatingPoint()) { 7565 if (Subtarget->hasSSE2()) 7566 return "Y"; 7567 if (Subtarget->hasSSE1()) 7568 return "x"; 7569 } 7570 7571 return TargetLowering::LowerXConstraint(ConstraintVT); 7572} 7573 7574/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7575/// vector. If it is invalid, don't add anything to Ops. 7576void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7577 char Constraint, 7578 bool hasMemory, 7579 std::vector<SDValue>&Ops, 7580 SelectionDAG &DAG) const { 7581 SDValue Result(0, 0); 7582 7583 switch (Constraint) { 7584 default: break; 7585 case 'I': 7586 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7587 if (C->getZExtValue() <= 31) { 7588 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7589 break; 7590 } 7591 } 7592 return; 7593 case 'J': 7594 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7595 if (C->getZExtValue() <= 63) { 7596 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7597 break; 7598 } 7599 } 7600 return; 7601 case 'N': 7602 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7603 if (C->getZExtValue() <= 255) { 7604 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7605 break; 7606 } 7607 } 7608 return; 7609 case 'i': { 7610 // Literal immediates are always ok. 7611 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 7612 Result = DAG.getTargetConstant(CST->getZExtValue(), Op.getValueType()); 7613 break; 7614 } 7615 7616 // If we are in non-pic codegen mode, we allow the address of a global (with 7617 // an optional displacement) to be used with 'i'. 7618 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 7619 int64_t Offset = 0; 7620 7621 // Match either (GA) or (GA+C) 7622 if (GA) { 7623 Offset = GA->getOffset(); 7624 } else if (Op.getOpcode() == ISD::ADD) { 7625 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7626 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7627 if (C && GA) { 7628 Offset = GA->getOffset()+C->getZExtValue(); 7629 } else { 7630 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7631 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7632 if (C && GA) 7633 Offset = GA->getOffset()+C->getZExtValue(); 7634 else 7635 C = 0, GA = 0; 7636 } 7637 } 7638 7639 if (GA) { 7640 if (hasMemory) 7641 Op = LowerGlobalAddress(GA->getGlobal(), Offset, DAG); 7642 else 7643 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 7644 Offset); 7645 Result = Op; 7646 break; 7647 } 7648 7649 // Otherwise, not valid for this mode. 7650 return; 7651 } 7652 } 7653 7654 if (Result.getNode()) { 7655 Ops.push_back(Result); 7656 return; 7657 } 7658 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 7659 Ops, DAG); 7660} 7661 7662std::vector<unsigned> X86TargetLowering:: 7663getRegClassForInlineAsmConstraint(const std::string &Constraint, 7664 MVT VT) const { 7665 if (Constraint.size() == 1) { 7666 // FIXME: not handling fp-stack yet! 7667 switch (Constraint[0]) { // GCC X86 Constraint Letters 7668 default: break; // Unknown constraint letter 7669 case 'A': // EAX/EDX 7670 if (VT == MVT::i32 || VT == MVT::i64) 7671 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 7672 break; 7673 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 7674 case 'Q': // Q_REGS 7675 if (VT == MVT::i32) 7676 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 7677 else if (VT == MVT::i16) 7678 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 7679 else if (VT == MVT::i8) 7680 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 7681 else if (VT == MVT::i64) 7682 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 7683 break; 7684 } 7685 } 7686 7687 return std::vector<unsigned>(); 7688} 7689 7690std::pair<unsigned, const TargetRegisterClass*> 7691X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7692 MVT VT) const { 7693 // First, see if this is a constraint that directly corresponds to an LLVM 7694 // register class. 7695 if (Constraint.size() == 1) { 7696 // GCC Constraint Letters 7697 switch (Constraint[0]) { 7698 default: break; 7699 case 'r': // GENERAL_REGS 7700 case 'R': // LEGACY_REGS 7701 case 'l': // INDEX_REGS 7702 if (VT == MVT::i8) 7703 return std::make_pair(0U, X86::GR8RegisterClass); 7704 if (VT == MVT::i16) 7705 return std::make_pair(0U, X86::GR16RegisterClass); 7706 if (VT == MVT::i32 || !Subtarget->is64Bit()) 7707 return std::make_pair(0U, X86::GR32RegisterClass); 7708 return std::make_pair(0U, X86::GR64RegisterClass); 7709 case 'f': // FP Stack registers. 7710 // If SSE is enabled for this VT, use f80 to ensure the isel moves the 7711 // value to the correct fpstack register class. 7712 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) 7713 return std::make_pair(0U, X86::RFP32RegisterClass); 7714 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) 7715 return std::make_pair(0U, X86::RFP64RegisterClass); 7716 return std::make_pair(0U, X86::RFP80RegisterClass); 7717 case 'y': // MMX_REGS if MMX allowed. 7718 if (!Subtarget->hasMMX()) break; 7719 return std::make_pair(0U, X86::VR64RegisterClass); 7720 case 'Y': // SSE_REGS if SSE2 allowed 7721 if (!Subtarget->hasSSE2()) break; 7722 // FALL THROUGH. 7723 case 'x': // SSE_REGS if SSE1 allowed 7724 if (!Subtarget->hasSSE1()) break; 7725 7726 switch (VT.getSimpleVT()) { 7727 default: break; 7728 // Scalar SSE types. 7729 case MVT::f32: 7730 case MVT::i32: 7731 return std::make_pair(0U, X86::FR32RegisterClass); 7732 case MVT::f64: 7733 case MVT::i64: 7734 return std::make_pair(0U, X86::FR64RegisterClass); 7735 // Vector types. 7736 case MVT::v16i8: 7737 case MVT::v8i16: 7738 case MVT::v4i32: 7739 case MVT::v2i64: 7740 case MVT::v4f32: 7741 case MVT::v2f64: 7742 return std::make_pair(0U, X86::VR128RegisterClass); 7743 } 7744 break; 7745 } 7746 } 7747 7748 // Use the default implementation in TargetLowering to convert the register 7749 // constraint into a member of a register class. 7750 std::pair<unsigned, const TargetRegisterClass*> Res; 7751 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7752 7753 // Not found as a standard register? 7754 if (Res.second == 0) { 7755 // GCC calls "st(0)" just plain "st". 7756 if (StringsEqualNoCase("{st}", Constraint)) { 7757 Res.first = X86::ST0; 7758 Res.second = X86::RFP80RegisterClass; 7759 } 7760 7761 return Res; 7762 } 7763 7764 // Otherwise, check to see if this is a register class of the wrong value 7765 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 7766 // turn into {ax},{dx}. 7767 if (Res.second->hasType(VT)) 7768 return Res; // Correct type already, nothing to do. 7769 7770 // All of the single-register GCC register classes map their values onto 7771 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 7772 // really want an 8-bit or 32-bit register, map to the appropriate register 7773 // class and return the appropriate register. 7774 if (Res.second == X86::GR16RegisterClass) { 7775 if (VT == MVT::i8) { 7776 unsigned DestReg = 0; 7777 switch (Res.first) { 7778 default: break; 7779 case X86::AX: DestReg = X86::AL; break; 7780 case X86::DX: DestReg = X86::DL; break; 7781 case X86::CX: DestReg = X86::CL; break; 7782 case X86::BX: DestReg = X86::BL; break; 7783 } 7784 if (DestReg) { 7785 Res.first = DestReg; 7786 Res.second = Res.second = X86::GR8RegisterClass; 7787 } 7788 } else if (VT == MVT::i32) { 7789 unsigned DestReg = 0; 7790 switch (Res.first) { 7791 default: break; 7792 case X86::AX: DestReg = X86::EAX; break; 7793 case X86::DX: DestReg = X86::EDX; break; 7794 case X86::CX: DestReg = X86::ECX; break; 7795 case X86::BX: DestReg = X86::EBX; break; 7796 case X86::SI: DestReg = X86::ESI; break; 7797 case X86::DI: DestReg = X86::EDI; break; 7798 case X86::BP: DestReg = X86::EBP; break; 7799 case X86::SP: DestReg = X86::ESP; break; 7800 } 7801 if (DestReg) { 7802 Res.first = DestReg; 7803 Res.second = Res.second = X86::GR32RegisterClass; 7804 } 7805 } else if (VT == MVT::i64) { 7806 unsigned DestReg = 0; 7807 switch (Res.first) { 7808 default: break; 7809 case X86::AX: DestReg = X86::RAX; break; 7810 case X86::DX: DestReg = X86::RDX; break; 7811 case X86::CX: DestReg = X86::RCX; break; 7812 case X86::BX: DestReg = X86::RBX; break; 7813 case X86::SI: DestReg = X86::RSI; break; 7814 case X86::DI: DestReg = X86::RDI; break; 7815 case X86::BP: DestReg = X86::RBP; break; 7816 case X86::SP: DestReg = X86::RSP; break; 7817 } 7818 if (DestReg) { 7819 Res.first = DestReg; 7820 Res.second = Res.second = X86::GR64RegisterClass; 7821 } 7822 } 7823 } else if (Res.second == X86::FR32RegisterClass || 7824 Res.second == X86::FR64RegisterClass || 7825 Res.second == X86::VR128RegisterClass) { 7826 // Handle references to XMM physical registers that got mapped into the 7827 // wrong class. This can happen with constraints like {xmm0} where the 7828 // target independent register mapper will just pick the first match it can 7829 // find, ignoring the required type. 7830 if (VT == MVT::f32) 7831 Res.second = X86::FR32RegisterClass; 7832 else if (VT == MVT::f64) 7833 Res.second = X86::FR64RegisterClass; 7834 else if (X86::VR128RegisterClass->hasType(VT)) 7835 Res.second = X86::VR128RegisterClass; 7836 } 7837 7838 return Res; 7839} 7840