X86ISelLowering.cpp revision 4c2454623841f05c6c665659b34c214950d12d7e
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineModuleInfo.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/PseudoSourceValue.h" 35#include "llvm/CodeGen/SelectionDAG.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Target/TargetOptions.h" 39#include "llvm/ADT/SmallSet.h" 40#include "llvm/ADT/StringExtras.h" 41#include "llvm/Support/CommandLine.h" 42using namespace llvm; 43 44static cl::opt<bool> 45DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); 46 47// Forward declarations. 48static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG); 49 50X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 51 : TargetLowering(TM) { 52 Subtarget = &TM.getSubtarget<X86Subtarget>(); 53 X86ScalarSSEf64 = Subtarget->hasSSE2(); 54 X86ScalarSSEf32 = Subtarget->hasSSE1(); 55 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 56 57 bool Fast = false; 58 59 RegInfo = TM.getRegisterInfo(); 60 TD = getTargetData(); 61 62 // Set up the TargetLowering object. 63 64 // X86 is weird, it always uses i8 for shift amounts and setcc results. 65 setShiftAmountType(MVT::i8); 66 setBooleanContents(ZeroOrOneBooleanContent); 67 setSchedulingPreference(SchedulingForRegPressure); 68 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 69 setStackPointerRegisterToSaveRestore(X86StackPtr); 70 71 if (Subtarget->isTargetDarwin()) { 72 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 73 setUseUnderscoreSetJmp(false); 74 setUseUnderscoreLongJmp(false); 75 } else if (Subtarget->isTargetMingw()) { 76 // MS runtime is weird: it exports _setjmp, but longjmp! 77 setUseUnderscoreSetJmp(true); 78 setUseUnderscoreLongJmp(false); 79 } else { 80 setUseUnderscoreSetJmp(true); 81 setUseUnderscoreLongJmp(true); 82 } 83 84 // Set up the register classes. 85 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 86 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 87 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 88 if (Subtarget->is64Bit()) 89 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 90 91 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 92 93 // We don't accept any truncstore of integer registers. 94 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 95 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 96 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 97 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 98 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 99 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 100 101 // SETOEQ and SETUNE require checking two conditions. 102 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 103 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 104 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 105 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 106 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 107 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 108 109 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 110 // operation. 111 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 112 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 113 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 114 115 if (Subtarget->is64Bit()) { 116 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 117 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 118 } else { 119 if (X86ScalarSSEf64) { 120 // We have an impenetrably clever algorithm for ui64->double only. 121 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 122 123 // We have faster algorithm for ui32->single only. 124 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); 125 } else 126 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 127 } 128 129 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 130 // this operation. 131 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 132 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 133 // SSE has no i16 to fp conversion, only i32 134 if (X86ScalarSSEf32) { 135 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 136 // f32 and f64 cases are Legal, f80 case is not 137 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 138 } else { 139 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 140 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 141 } 142 143 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 144 // are Legal, f80 is custom lowered. 145 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 146 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 147 148 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 149 // this operation. 150 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 151 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 152 153 if (X86ScalarSSEf32) { 154 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 155 // f32 and f64 cases are Legal, f80 case is not 156 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 157 } else { 158 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 159 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 160 } 161 162 // Handle FP_TO_UINT by promoting the destination to a larger signed 163 // conversion. 164 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 165 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 166 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 167 168 if (Subtarget->is64Bit()) { 169 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 170 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 171 } else { 172 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 173 // Expand FP_TO_UINT into a select. 174 // FIXME: We would like to use a Custom expander here eventually to do 175 // the optimal thing for SSE vs. the default expansion in the legalizer. 176 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 177 else 178 // With SSE3 we can use fisttpll to convert to a signed i64. 179 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 180 } 181 182 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 183 if (!X86ScalarSSEf64) { 184 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 185 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 186 } 187 188 // Scalar integer divide and remainder are lowered to use operations that 189 // produce two results, to match the available instructions. This exposes 190 // the two-result form to trivial CSE, which is able to combine x/y and x%y 191 // into a single instruction. 192 // 193 // Scalar integer multiply-high is also lowered to use two-result 194 // operations, to match the available instructions. However, plain multiply 195 // (low) operations are left as Legal, as there are single-result 196 // instructions for this in x86. Using the two-result multiply instructions 197 // when both high and low results are needed must be arranged by dagcombine. 198 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 199 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 200 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 201 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 202 setOperationAction(ISD::SREM , MVT::i8 , Expand); 203 setOperationAction(ISD::UREM , MVT::i8 , Expand); 204 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 205 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 206 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 207 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 208 setOperationAction(ISD::SREM , MVT::i16 , Expand); 209 setOperationAction(ISD::UREM , MVT::i16 , Expand); 210 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 211 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 212 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 213 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 214 setOperationAction(ISD::SREM , MVT::i32 , Expand); 215 setOperationAction(ISD::UREM , MVT::i32 , Expand); 216 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 217 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 218 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 219 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 220 setOperationAction(ISD::SREM , MVT::i64 , Expand); 221 setOperationAction(ISD::UREM , MVT::i64 , Expand); 222 223 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 224 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 225 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 226 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 227 if (Subtarget->is64Bit()) 228 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 229 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 230 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 231 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 232 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 233 setOperationAction(ISD::FREM , MVT::f32 , Expand); 234 setOperationAction(ISD::FREM , MVT::f64 , Expand); 235 setOperationAction(ISD::FREM , MVT::f80 , Expand); 236 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 237 238 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 239 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 240 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 241 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 242 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 243 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 244 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 245 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 246 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 247 if (Subtarget->is64Bit()) { 248 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 249 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 250 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 251 } 252 253 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 254 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 255 256 // These should be promoted to a larger select which is supported. 257 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 258 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 259 // X86 wants to expand cmov itself. 260 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 261 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 262 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 263 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 264 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 265 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 266 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 267 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 268 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 269 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 270 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 271 if (Subtarget->is64Bit()) { 272 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 273 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 274 } 275 // X86 ret instruction may pop stack. 276 setOperationAction(ISD::RET , MVT::Other, Custom); 277 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 278 279 // Darwin ABI issue. 280 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 281 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 282 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 283 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 284 if (Subtarget->is64Bit()) 285 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 286 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 287 if (Subtarget->is64Bit()) { 288 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 289 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 290 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 291 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 292 } 293 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 294 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 295 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 296 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 297 if (Subtarget->is64Bit()) { 298 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 299 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 300 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 301 } 302 303 if (Subtarget->hasSSE1()) 304 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 305 306 if (!Subtarget->hasSSE2()) 307 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); 308 309 // Expand certain atomics 310 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); 311 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom); 312 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 314 315 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom); 316 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom); 317 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 318 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 319 320 if (!Subtarget->is64Bit()) { 321 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 322 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 323 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 324 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 325 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 326 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); 327 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 328 } 329 330 // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. 331 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 332 // FIXME - use subtarget debug flags 333 if (!Subtarget->isTargetDarwin() && 334 !Subtarget->isTargetELF() && 335 !Subtarget->isTargetCygMing()) { 336 setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); 337 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 338 } 339 340 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 341 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 342 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 343 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 344 if (Subtarget->is64Bit()) { 345 setExceptionPointerRegister(X86::RAX); 346 setExceptionSelectorRegister(X86::RDX); 347 } else { 348 setExceptionPointerRegister(X86::EAX); 349 setExceptionSelectorRegister(X86::EDX); 350 } 351 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 352 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 353 354 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 355 356 setOperationAction(ISD::TRAP, MVT::Other, Legal); 357 358 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 359 setOperationAction(ISD::VASTART , MVT::Other, Custom); 360 setOperationAction(ISD::VAEND , MVT::Other, Expand); 361 if (Subtarget->is64Bit()) { 362 setOperationAction(ISD::VAARG , MVT::Other, Custom); 363 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 364 } else { 365 setOperationAction(ISD::VAARG , MVT::Other, Expand); 366 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 367 } 368 369 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 370 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 371 if (Subtarget->is64Bit()) 372 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 373 if (Subtarget->isTargetCygMing()) 374 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 375 else 376 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 377 378 if (X86ScalarSSEf64) { 379 // f32 and f64 use SSE. 380 // Set up the FP register classes. 381 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 382 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 383 384 // Use ANDPD to simulate FABS. 385 setOperationAction(ISD::FABS , MVT::f64, Custom); 386 setOperationAction(ISD::FABS , MVT::f32, Custom); 387 388 // Use XORP to simulate FNEG. 389 setOperationAction(ISD::FNEG , MVT::f64, Custom); 390 setOperationAction(ISD::FNEG , MVT::f32, Custom); 391 392 // Use ANDPD and ORPD to simulate FCOPYSIGN. 393 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 394 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 395 396 // We don't support sin/cos/fmod 397 setOperationAction(ISD::FSIN , MVT::f64, Expand); 398 setOperationAction(ISD::FCOS , MVT::f64, Expand); 399 setOperationAction(ISD::FSIN , MVT::f32, Expand); 400 setOperationAction(ISD::FCOS , MVT::f32, Expand); 401 402 // Expand FP immediates into loads from the stack, except for the special 403 // cases we handle. 404 addLegalFPImmediate(APFloat(+0.0)); // xorpd 405 addLegalFPImmediate(APFloat(+0.0f)); // xorps 406 407 // Floating truncations from f80 and extensions to f80 go through memory. 408 // If optimizing, we lie about this though and handle it in 409 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 410 if (Fast) { 411 setConvertAction(MVT::f32, MVT::f80, Expand); 412 setConvertAction(MVT::f64, MVT::f80, Expand); 413 setConvertAction(MVT::f80, MVT::f32, Expand); 414 setConvertAction(MVT::f80, MVT::f64, Expand); 415 } 416 } else if (X86ScalarSSEf32) { 417 // Use SSE for f32, x87 for f64. 418 // Set up the FP register classes. 419 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 420 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 421 422 // Use ANDPS to simulate FABS. 423 setOperationAction(ISD::FABS , MVT::f32, Custom); 424 425 // Use XORP to simulate FNEG. 426 setOperationAction(ISD::FNEG , MVT::f32, Custom); 427 428 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 429 430 // Use ANDPS and ORPS to simulate FCOPYSIGN. 431 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 432 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 433 434 // We don't support sin/cos/fmod 435 setOperationAction(ISD::FSIN , MVT::f32, Expand); 436 setOperationAction(ISD::FCOS , MVT::f32, Expand); 437 438 // Special cases we handle for FP constants. 439 addLegalFPImmediate(APFloat(+0.0f)); // xorps 440 addLegalFPImmediate(APFloat(+0.0)); // FLD0 441 addLegalFPImmediate(APFloat(+1.0)); // FLD1 442 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 443 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 444 445 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 446 // this though and handle it in InstructionSelectPreprocess so that 447 // dagcombine2 can hack on these. 448 if (Fast) { 449 setConvertAction(MVT::f32, MVT::f64, Expand); 450 setConvertAction(MVT::f32, MVT::f80, Expand); 451 setConvertAction(MVT::f80, MVT::f32, Expand); 452 setConvertAction(MVT::f64, MVT::f32, Expand); 453 // And x87->x87 truncations also. 454 setConvertAction(MVT::f80, MVT::f64, Expand); 455 } 456 457 if (!UnsafeFPMath) { 458 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 459 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 460 } 461 } else { 462 // f32 and f64 in x87. 463 // Set up the FP register classes. 464 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 465 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 466 467 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 468 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 469 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 470 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 471 472 // Floating truncations go through memory. If optimizing, we lie about 473 // this though and handle it in InstructionSelectPreprocess so that 474 // dagcombine2 can hack on these. 475 if (Fast) { 476 setConvertAction(MVT::f80, MVT::f32, Expand); 477 setConvertAction(MVT::f64, MVT::f32, Expand); 478 setConvertAction(MVT::f80, MVT::f64, Expand); 479 } 480 481 if (!UnsafeFPMath) { 482 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 483 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 484 } 485 addLegalFPImmediate(APFloat(+0.0)); // FLD0 486 addLegalFPImmediate(APFloat(+1.0)); // FLD1 487 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 488 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 489 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 490 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 491 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 492 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 493 } 494 495 // Long double always uses X87. 496 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 497 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 498 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 499 { 500 bool ignored; 501 APFloat TmpFlt(+0.0); 502 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 503 &ignored); 504 addLegalFPImmediate(TmpFlt); // FLD0 505 TmpFlt.changeSign(); 506 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 507 APFloat TmpFlt2(+1.0); 508 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 509 &ignored); 510 addLegalFPImmediate(TmpFlt2); // FLD1 511 TmpFlt2.changeSign(); 512 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 513 } 514 515 if (!UnsafeFPMath) { 516 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 517 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 518 } 519 520 // Always use a library call for pow. 521 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 522 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 523 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 524 525 setOperationAction(ISD::FLOG, MVT::f80, Expand); 526 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 527 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 528 setOperationAction(ISD::FEXP, MVT::f80, Expand); 529 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 530 531 // First set operation action for all vector types to either promote 532 // (for widening) or expand (for scalarization). Then we will selectively 533 // turn on ones that can be effectively codegen'd. 534 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 535 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 536 setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); 537 setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); 538 setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); 539 setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); 540 setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); 541 setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); 542 setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); 543 setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); 544 setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); 545 setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); 546 setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); 547 setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); 548 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); 549 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); 550 setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); 551 setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); 552 setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); 553 setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); 554 setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); 555 setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); 556 setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); 557 setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); 558 setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); 559 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 560 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 561 setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); 562 setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); 563 setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); 564 setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); 565 setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); 566 setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); 567 setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); 568 setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); 569 setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); 570 setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); 571 setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); 572 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 573 setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); 574 setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); 575 setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); 576 setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); 577 setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); 578 setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); 579 } 580 581 if (!DisableMMX && Subtarget->hasMMX()) { 582 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 583 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 584 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 585 addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); 586 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 587 588 // FIXME: add MMX packed arithmetics 589 590 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 591 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 592 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 593 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 594 595 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 596 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 597 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 598 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 599 600 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 601 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 602 603 setOperationAction(ISD::AND, MVT::v8i8, Promote); 604 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 605 setOperationAction(ISD::AND, MVT::v4i16, Promote); 606 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 607 setOperationAction(ISD::AND, MVT::v2i32, Promote); 608 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 609 setOperationAction(ISD::AND, MVT::v1i64, Legal); 610 611 setOperationAction(ISD::OR, MVT::v8i8, Promote); 612 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 613 setOperationAction(ISD::OR, MVT::v4i16, Promote); 614 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 615 setOperationAction(ISD::OR, MVT::v2i32, Promote); 616 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 617 setOperationAction(ISD::OR, MVT::v1i64, Legal); 618 619 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 620 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 621 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 622 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 623 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 624 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 625 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 626 627 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 628 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 629 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 630 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 631 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 632 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 633 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 634 AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); 635 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 636 637 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 638 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 639 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 640 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); 641 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 642 643 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 644 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 645 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 646 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 647 648 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); 649 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 650 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 651 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 652 653 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); 654 655 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); 656 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); 657 setOperationAction(ISD::SELECT, MVT::v8i8, Promote); 658 setOperationAction(ISD::SELECT, MVT::v4i16, Promote); 659 setOperationAction(ISD::SELECT, MVT::v2i32, Promote); 660 setOperationAction(ISD::SELECT, MVT::v1i64, Custom); 661 } 662 663 if (Subtarget->hasSSE1()) { 664 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 665 666 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 667 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 668 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 669 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 670 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 671 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 672 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 673 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 674 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 675 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 676 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 677 setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); 678 } 679 680 if (Subtarget->hasSSE2()) { 681 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 682 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 683 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 684 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 685 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 686 687 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 688 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 689 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 690 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 691 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 692 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 693 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 694 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 695 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 696 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 697 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 698 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 699 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 700 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 701 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 702 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 703 704 setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); 705 setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); 706 setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); 707 setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); 708 709 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 710 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 711 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 712 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 713 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 714 715 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 716 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { 717 MVT VT = (MVT::SimpleValueType)i; 718 // Do not attempt to custom lower non-power-of-2 vectors 719 if (!isPowerOf2_32(VT.getVectorNumElements())) 720 continue; 721 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 722 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 723 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 724 } 725 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 726 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 727 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 728 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 729 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 730 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 731 if (Subtarget->is64Bit()) { 732 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 733 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 734 } 735 736 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 737 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 738 setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); 739 AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); 740 setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); 741 AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); 742 setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); 743 AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); 744 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); 745 AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); 746 setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); 747 AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); 748 } 749 750 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 751 752 // Custom lower v2i64 and v2f64 selects. 753 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 754 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 755 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 756 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 757 758 } 759 760 if (Subtarget->hasSSE41()) { 761 // FIXME: Do we need to handle scalar-to-vector here? 762 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 763 764 // i8 and i16 vectors are custom , because the source register and source 765 // source memory operand types are not the same width. f32 vectors are 766 // custom since the immediate controlling the insert encodes additional 767 // information. 768 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 769 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 770 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 771 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 772 773 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 774 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 775 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 777 778 if (Subtarget->is64Bit()) { 779 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 780 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 781 } 782 } 783 784 if (Subtarget->hasSSE42()) { 785 setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); 786 } 787 788 // We want to custom lower some of our intrinsics. 789 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 790 791 // Add/Sub/Mul with overflow operations are custom lowered. 792 setOperationAction(ISD::SADDO, MVT::i32, Custom); 793 setOperationAction(ISD::SADDO, MVT::i64, Custom); 794 setOperationAction(ISD::UADDO, MVT::i32, Custom); 795 setOperationAction(ISD::UADDO, MVT::i64, Custom); 796 setOperationAction(ISD::SSUBO, MVT::i32, Custom); 797 setOperationAction(ISD::SSUBO, MVT::i64, Custom); 798 setOperationAction(ISD::USUBO, MVT::i32, Custom); 799 setOperationAction(ISD::USUBO, MVT::i64, Custom); 800 setOperationAction(ISD::SMULO, MVT::i32, Custom); 801 setOperationAction(ISD::SMULO, MVT::i64, Custom); 802 setOperationAction(ISD::UMULO, MVT::i32, Custom); 803 setOperationAction(ISD::UMULO, MVT::i64, Custom); 804 805 // We have target-specific dag combine patterns for the following nodes: 806 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 807 setTargetDAGCombine(ISD::BUILD_VECTOR); 808 setTargetDAGCombine(ISD::SELECT); 809 setTargetDAGCombine(ISD::STORE); 810 811 computeRegisterProperties(); 812 813 // FIXME: These should be based on subtarget info. Plus, the values should 814 // be smaller when we are in optimizing for size mode. 815 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 816 maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores 817 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores 818 allowUnalignedMemoryAccesses = true; // x86 supports it! 819 setPrefLoopAlignment(16); 820} 821 822 823MVT X86TargetLowering::getSetCCResultType(MVT VT) const { 824 return MVT::i8; 825} 826 827 828/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 829/// the desired ByVal argument alignment. 830static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 831 if (MaxAlign == 16) 832 return; 833 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 834 if (VTy->getBitWidth() == 128) 835 MaxAlign = 16; 836 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 837 unsigned EltAlign = 0; 838 getMaxByValAlign(ATy->getElementType(), EltAlign); 839 if (EltAlign > MaxAlign) 840 MaxAlign = EltAlign; 841 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 842 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 843 unsigned EltAlign = 0; 844 getMaxByValAlign(STy->getElementType(i), EltAlign); 845 if (EltAlign > MaxAlign) 846 MaxAlign = EltAlign; 847 if (MaxAlign == 16) 848 break; 849 } 850 } 851 return; 852} 853 854/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 855/// function arguments in the caller parameter area. For X86, aggregates 856/// that contain SSE vectors are placed at 16-byte boundaries while the rest 857/// are at 4-byte boundaries. 858unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 859 if (Subtarget->is64Bit()) { 860 // Max of 8 and alignment of type. 861 unsigned TyAlign = TD->getABITypeAlignment(Ty); 862 if (TyAlign > 8) 863 return TyAlign; 864 return 8; 865 } 866 867 unsigned Align = 4; 868 if (Subtarget->hasSSE1()) 869 getMaxByValAlign(Ty, Align); 870 return Align; 871} 872 873/// getOptimalMemOpType - Returns the target specific optimal type for load 874/// and store operations as a result of memset, memcpy, and memmove 875/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for 876/// determining it. 877MVT 878X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, 879 bool isSrcConst, bool isSrcStr) const { 880 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like 881 // linux. This is because the stack realignment code can't handle certain 882 // cases like PR2962. This should be removed when PR2962 is fixed. 883 if (Subtarget->getStackAlignment() >= 16) { 884 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) 885 return MVT::v4i32; 886 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) 887 return MVT::v4f32; 888 } 889 if (Subtarget->is64Bit() && Size >= 8) 890 return MVT::i64; 891 return MVT::i32; 892} 893 894 895/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 896/// jumptable. 897SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 898 SelectionDAG &DAG) const { 899 if (usesGlobalOffsetTable()) 900 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 901 if (!Subtarget->isPICStyleRIPRel()) 902 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 903 return Table; 904} 905 906//===----------------------------------------------------------------------===// 907// Return Value Calling Convention Implementation 908//===----------------------------------------------------------------------===// 909 910#include "X86GenCallingConv.inc" 911 912/// LowerRET - Lower an ISD::RET node. 913SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { 914 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 915 916 SmallVector<CCValAssign, 16> RVLocs; 917 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 918 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 919 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 920 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); 921 922 // If this is the first return lowered for this function, add the regs to the 923 // liveout set for the function. 924 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 925 for (unsigned i = 0; i != RVLocs.size(); ++i) 926 if (RVLocs[i].isRegLoc()) 927 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 928 } 929 SDValue Chain = Op.getOperand(0); 930 931 // Handle tail call return. 932 Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); 933 if (Chain.getOpcode() == X86ISD::TAILCALL) { 934 SDValue TailCall = Chain; 935 SDValue TargetAddress = TailCall.getOperand(1); 936 SDValue StackAdjustment = TailCall.getOperand(2); 937 assert(((TargetAddress.getOpcode() == ISD::Register && 938 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || 939 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 940 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 941 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 942 "Expecting an global address, external symbol, or register"); 943 assert(StackAdjustment.getOpcode() == ISD::Constant && 944 "Expecting a const value"); 945 946 SmallVector<SDValue,8> Operands; 947 Operands.push_back(Chain.getOperand(0)); 948 Operands.push_back(TargetAddress); 949 Operands.push_back(StackAdjustment); 950 // Copy registers used by the call. Last operand is a flag so it is not 951 // copied. 952 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 953 Operands.push_back(Chain.getOperand(i)); 954 } 955 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 956 Operands.size()); 957 } 958 959 // Regular return. 960 SDValue Flag; 961 962 SmallVector<SDValue, 6> RetOps; 963 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 964 // Operand #1 = Bytes To Pop 965 RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 966 967 // Copy the result values into the output registers. 968 for (unsigned i = 0; i != RVLocs.size(); ++i) { 969 CCValAssign &VA = RVLocs[i]; 970 assert(VA.isRegLoc() && "Can only return in registers!"); 971 SDValue ValToCopy = Op.getOperand(i*2+1); 972 973 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 974 // the RET instruction and handled by the FP Stackifier. 975 if (RVLocs[i].getLocReg() == X86::ST0 || 976 RVLocs[i].getLocReg() == X86::ST1) { 977 // If this is a copy from an xmm register to ST(0), use an FPExtend to 978 // change the value to the FP stack register class. 979 if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) 980 ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy); 981 RetOps.push_back(ValToCopy); 982 // Don't emit a copytoreg. 983 continue; 984 } 985 986 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); 987 Flag = Chain.getValue(1); 988 } 989 990 // The x86-64 ABI for returning structs by value requires that we copy 991 // the sret argument into %rax for the return. We saved the argument into 992 // a virtual register in the entry block, so now we copy the value out 993 // and into %rax. 994 if (Subtarget->is64Bit() && 995 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 996 MachineFunction &MF = DAG.getMachineFunction(); 997 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 998 unsigned Reg = FuncInfo->getSRetReturnReg(); 999 if (!Reg) { 1000 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1001 FuncInfo->setSRetReturnReg(Reg); 1002 } 1003 SDValue Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy()); 1004 1005 Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag); 1006 Flag = Chain.getValue(1); 1007 } 1008 1009 RetOps[0] = Chain; // Update chain. 1010 1011 // Add the flag if we have it. 1012 if (Flag.getNode()) 1013 RetOps.push_back(Flag); 1014 1015 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, &RetOps[0], RetOps.size()); 1016} 1017 1018 1019/// LowerCallResult - Lower the result values of an ISD::CALL into the 1020/// appropriate copies out of appropriate physical registers. This assumes that 1021/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 1022/// being lowered. The returns a SDNode with the same number of values as the 1023/// ISD::CALL. 1024SDNode *X86TargetLowering:: 1025LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, 1026 unsigned CallingConv, SelectionDAG &DAG) { 1027 1028 // Assign locations to each value returned by this call. 1029 SmallVector<CCValAssign, 16> RVLocs; 1030 bool isVarArg = TheCall->isVarArg(); 1031 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 1032 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 1033 1034 SmallVector<SDValue, 8> ResultVals; 1035 1036 // Copy all of the result registers out of their specified physreg. 1037 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1038 MVT CopyVT = RVLocs[i].getValVT(); 1039 1040 // If this is a call to a function that returns an fp value on the floating 1041 // point stack, but where we prefer to use the value in xmm registers, copy 1042 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1043 if ((RVLocs[i].getLocReg() == X86::ST0 || 1044 RVLocs[i].getLocReg() == X86::ST1) && 1045 isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { 1046 CopyVT = MVT::f80; 1047 } 1048 1049 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 1050 CopyVT, InFlag).getValue(1); 1051 SDValue Val = Chain.getValue(0); 1052 InFlag = Chain.getValue(2); 1053 1054 if (CopyVT != RVLocs[i].getValVT()) { 1055 // Round the F80 the right size, which also moves to the appropriate xmm 1056 // register. 1057 Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val, 1058 // This truncation won't change the value. 1059 DAG.getIntPtrConstant(1)); 1060 } 1061 1062 ResultVals.push_back(Val); 1063 } 1064 1065 // Merge everything together with a MERGE_VALUES node. 1066 ResultVals.push_back(Chain); 1067 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), &ResultVals[0], 1068 ResultVals.size()).getNode(); 1069} 1070 1071 1072//===----------------------------------------------------------------------===// 1073// C & StdCall & Fast Calling Convention implementation 1074//===----------------------------------------------------------------------===// 1075// StdCall calling convention seems to be standard for many Windows' API 1076// routines and around. It differs from C calling convention just a little: 1077// callee should clean up the stack, not caller. Symbols should be also 1078// decorated in some fancy way :) It doesn't support any vector arguments. 1079// For info on fast calling convention see Fast Calling Convention (tail call) 1080// implementation LowerX86_32FastCCCallTo. 1081 1082/// AddLiveIn - This helper function adds the specified physical register to the 1083/// MachineFunction as a live in value. It also creates a corresponding virtual 1084/// register for it. 1085static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 1086 const TargetRegisterClass *RC) { 1087 assert(RC->contains(PReg) && "Not the correct regclass!"); 1088 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 1089 MF.getRegInfo().addLiveIn(PReg, VReg); 1090 return VReg; 1091} 1092 1093/// CallIsStructReturn - Determines whether a CALL node uses struct return 1094/// semantics. 1095static bool CallIsStructReturn(CallSDNode *TheCall) { 1096 unsigned NumOps = TheCall->getNumArgs(); 1097 if (!NumOps) 1098 return false; 1099 1100 return TheCall->getArgFlags(0).isSRet(); 1101} 1102 1103/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct 1104/// return semantics. 1105static bool ArgsAreStructReturn(SDValue Op) { 1106 unsigned NumArgs = Op.getNode()->getNumValues() - 1; 1107 if (!NumArgs) 1108 return false; 1109 1110 return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet(); 1111} 1112 1113/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires 1114/// the callee to pop its own arguments. Callee pop is necessary to support tail 1115/// calls. 1116bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { 1117 if (IsVarArg) 1118 return false; 1119 1120 switch (CallingConv) { 1121 default: 1122 return false; 1123 case CallingConv::X86_StdCall: 1124 return !Subtarget->is64Bit(); 1125 case CallingConv::X86_FastCall: 1126 return !Subtarget->is64Bit(); 1127 case CallingConv::Fast: 1128 return PerformTailCallOpt; 1129 } 1130} 1131 1132/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1133/// given CallingConvention value. 1134CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { 1135 if (Subtarget->is64Bit()) { 1136 if (Subtarget->isTargetWin64()) 1137 return CC_X86_Win64_C; 1138 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1139 return CC_X86_64_TailCall; 1140 else 1141 return CC_X86_64_C; 1142 } 1143 1144 if (CC == CallingConv::X86_FastCall) 1145 return CC_X86_32_FastCall; 1146 else if (CC == CallingConv::Fast) 1147 return CC_X86_32_FastCC; 1148 else 1149 return CC_X86_32_C; 1150} 1151 1152/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to 1153/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. 1154NameDecorationStyle 1155X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { 1156 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1157 if (CC == CallingConv::X86_FastCall) 1158 return FastCall; 1159 else if (CC == CallingConv::X86_StdCall) 1160 return StdCall; 1161 return None; 1162} 1163 1164 1165/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer 1166/// in a register before calling. 1167bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { 1168 return !IsTailCall && !Is64Bit && 1169 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1170 Subtarget->isPICStyleGOT(); 1171} 1172 1173/// CallRequiresFnAddressInReg - Check whether the call requires the function 1174/// address to be loaded in a register. 1175bool 1176X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { 1177 return !Is64Bit && IsTailCall && 1178 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1179 Subtarget->isPICStyleGOT(); 1180} 1181 1182/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1183/// by "Src" to address "Dst" with size and alignment information specified by 1184/// the specific parameter attribute. The copy will be passed as a byval 1185/// function parameter. 1186static SDValue 1187CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1188 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1189 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1190 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), 1191 /*AlwaysInline=*/true, NULL, 0, NULL, 0); 1192} 1193 1194SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, 1195 const CCValAssign &VA, 1196 MachineFrameInfo *MFI, 1197 unsigned CC, 1198 SDValue Root, unsigned i) { 1199 // Create the nodes corresponding to a load from this parameter slot. 1200 ISD::ArgFlagsTy Flags = 1201 cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags(); 1202 bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; 1203 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1204 1205 // FIXME: For now, all byval parameter objects are marked mutable. This can be 1206 // changed with more analysis. 1207 // In case of tail call optimization mark all arguments mutable. Since they 1208 // could be overwritten by lowering of arguments in case of a tail call. 1209 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1210 VA.getLocMemOffset(), isImmutable); 1211 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1212 if (Flags.isByVal()) 1213 return FIN; 1214 return DAG.getLoad(VA.getValVT(), Root, FIN, 1215 PseudoSourceValue::getFixedStack(FI), 0); 1216} 1217 1218SDValue 1219X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { 1220 MachineFunction &MF = DAG.getMachineFunction(); 1221 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1222 1223 const Function* Fn = MF.getFunction(); 1224 if (Fn->hasExternalLinkage() && 1225 Subtarget->isTargetCygMing() && 1226 Fn->getName() == "main") 1227 FuncInfo->setForceFramePointer(true); 1228 1229 // Decorate the function name. 1230 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1231 1232 MachineFrameInfo *MFI = MF.getFrameInfo(); 1233 SDValue Root = Op.getOperand(0); 1234 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; 1235 unsigned CC = MF.getFunction()->getCallingConv(); 1236 bool Is64Bit = Subtarget->is64Bit(); 1237 bool IsWin64 = Subtarget->isTargetWin64(); 1238 1239 assert(!(isVarArg && CC == CallingConv::Fast) && 1240 "Var args not supported with calling convention fastcc"); 1241 1242 // Assign locations to all of the incoming arguments. 1243 SmallVector<CCValAssign, 16> ArgLocs; 1244 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1245 CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); 1246 1247 SmallVector<SDValue, 8> ArgValues; 1248 unsigned LastVal = ~0U; 1249 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1250 CCValAssign &VA = ArgLocs[i]; 1251 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1252 // places. 1253 assert(VA.getValNo() != LastVal && 1254 "Don't support value assigned to multiple locs yet"); 1255 LastVal = VA.getValNo(); 1256 1257 if (VA.isRegLoc()) { 1258 MVT RegVT = VA.getLocVT(); 1259 TargetRegisterClass *RC = NULL; 1260 if (RegVT == MVT::i32) 1261 RC = X86::GR32RegisterClass; 1262 else if (Is64Bit && RegVT == MVT::i64) 1263 RC = X86::GR64RegisterClass; 1264 else if (RegVT == MVT::f32) 1265 RC = X86::FR32RegisterClass; 1266 else if (RegVT == MVT::f64) 1267 RC = X86::FR64RegisterClass; 1268 else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) 1269 RC = X86::VR128RegisterClass; 1270 else if (RegVT.isVector()) { 1271 assert(RegVT.getSizeInBits() == 64); 1272 if (!Is64Bit) 1273 RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. 1274 else { 1275 // Darwin calling convention passes MMX values in either GPRs or 1276 // XMMs in x86-64. Other targets pass them in memory. 1277 if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { 1278 RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. 1279 RegVT = MVT::v2i64; 1280 } else { 1281 RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. 1282 RegVT = MVT::i64; 1283 } 1284 } 1285 } else { 1286 assert(0 && "Unknown argument type!"); 1287 } 1288 1289 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1290 SDValue ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1291 1292 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1293 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1294 // right size. 1295 if (VA.getLocInfo() == CCValAssign::SExt) 1296 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1297 DAG.getValueType(VA.getValVT())); 1298 else if (VA.getLocInfo() == CCValAssign::ZExt) 1299 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1300 DAG.getValueType(VA.getValVT())); 1301 1302 if (VA.getLocInfo() != CCValAssign::Full) 1303 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1304 1305 // Handle MMX values passed in GPRs. 1306 if (Is64Bit && RegVT != VA.getLocVT()) { 1307 if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) 1308 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1309 else if (RC == X86::VR128RegisterClass) { 1310 ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue, 1311 DAG.getConstant(0, MVT::i64)); 1312 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1313 } 1314 } 1315 1316 ArgValues.push_back(ArgValue); 1317 } else { 1318 assert(VA.isMemLoc()); 1319 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); 1320 } 1321 } 1322 1323 // The x86-64 ABI for returning structs by value requires that we copy 1324 // the sret argument into %rax for the return. Save the argument into 1325 // a virtual register so that we can access it from the return points. 1326 if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 1327 MachineFunction &MF = DAG.getMachineFunction(); 1328 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1329 unsigned Reg = FuncInfo->getSRetReturnReg(); 1330 if (!Reg) { 1331 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1332 FuncInfo->setSRetReturnReg(Reg); 1333 } 1334 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]); 1335 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root); 1336 } 1337 1338 unsigned StackSize = CCInfo.getNextStackOffset(); 1339 // align stack specially for tail calls 1340 if (PerformTailCallOpt && CC == CallingConv::Fast) 1341 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1342 1343 // If the function takes variable number of arguments, make a frame index for 1344 // the start of the first vararg value... for expansion of llvm.va_start. 1345 if (isVarArg) { 1346 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1347 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1348 } 1349 if (Is64Bit) { 1350 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 1351 1352 // FIXME: We should really autogenerate these arrays 1353 static const unsigned GPR64ArgRegsWin64[] = { 1354 X86::RCX, X86::RDX, X86::R8, X86::R9 1355 }; 1356 static const unsigned XMMArgRegsWin64[] = { 1357 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1358 }; 1359 static const unsigned GPR64ArgRegs64Bit[] = { 1360 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1361 }; 1362 static const unsigned XMMArgRegs64Bit[] = { 1363 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1364 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1365 }; 1366 const unsigned *GPR64ArgRegs, *XMMArgRegs; 1367 1368 if (IsWin64) { 1369 TotalNumIntRegs = 4; TotalNumXMMRegs = 4; 1370 GPR64ArgRegs = GPR64ArgRegsWin64; 1371 XMMArgRegs = XMMArgRegsWin64; 1372 } else { 1373 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 1374 GPR64ArgRegs = GPR64ArgRegs64Bit; 1375 XMMArgRegs = XMMArgRegs64Bit; 1376 } 1377 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 1378 TotalNumIntRegs); 1379 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 1380 TotalNumXMMRegs); 1381 1382 // For X86-64, if there are vararg parameters that are passed via 1383 // registers, then we must store them to their spots on the stack so they 1384 // may be loaded by deferencing the result of va_next. 1385 VarArgsGPOffset = NumIntRegs * 8; 1386 VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; 1387 RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + 1388 TotalNumXMMRegs * 16, 16); 1389 1390 // Store the integer parameter registers. 1391 SmallVector<SDValue, 8> MemOps; 1392 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1393 SDValue FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1394 DAG.getIntPtrConstant(VarArgsGPOffset)); 1395 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 1396 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1397 X86::GR64RegisterClass); 1398 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1399 SDValue Store = 1400 DAG.getStore(Val.getValue(1), Val, FIN, 1401 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1402 MemOps.push_back(Store); 1403 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1404 DAG.getIntPtrConstant(8)); 1405 } 1406 1407 // Now store the XMM (fp + vector) parameter registers. 1408 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1409 DAG.getIntPtrConstant(VarArgsFPOffset)); 1410 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 1411 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1412 X86::VR128RegisterClass); 1413 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1414 SDValue Store = 1415 DAG.getStore(Val.getValue(1), Val, FIN, 1416 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1417 MemOps.push_back(Store); 1418 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1419 DAG.getIntPtrConstant(16)); 1420 } 1421 if (!MemOps.empty()) 1422 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1423 &MemOps[0], MemOps.size()); 1424 } 1425 } 1426 1427 ArgValues.push_back(Root); 1428 1429 // Some CCs need callee pop. 1430 if (IsCalleePop(isVarArg, CC)) { 1431 BytesToPopOnReturn = StackSize; // Callee pops everything. 1432 BytesCallerReserves = 0; 1433 } else { 1434 BytesToPopOnReturn = 0; // Callee pops nothing. 1435 // If this is an sret function, the return should pop the hidden pointer. 1436 if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) 1437 BytesToPopOnReturn = 4; 1438 BytesCallerReserves = StackSize; 1439 } 1440 1441 if (!Is64Bit) { 1442 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1443 if (CC == CallingConv::X86_FastCall) 1444 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1445 } 1446 1447 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1448 1449 // Return the new list of results. 1450 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(), 1451 &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); 1452} 1453 1454SDValue 1455X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, 1456 const SDValue &StackPtr, 1457 const CCValAssign &VA, 1458 SDValue Chain, 1459 SDValue Arg, ISD::ArgFlagsTy Flags) { 1460 unsigned LocMemOffset = VA.getLocMemOffset(); 1461 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1462 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1463 if (Flags.isByVal()) { 1464 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1465 } 1466 return DAG.getStore(Chain, Arg, PtrOff, 1467 PseudoSourceValue::getStack(), LocMemOffset); 1468} 1469 1470/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call 1471/// optimization is performed and it is required. 1472SDValue 1473X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 1474 SDValue &OutRetAddr, 1475 SDValue Chain, 1476 bool IsTailCall, 1477 bool Is64Bit, 1478 int FPDiff) { 1479 if (!IsTailCall || FPDiff==0) return Chain; 1480 1481 // Adjust the Return address stack slot. 1482 MVT VT = getPointerTy(); 1483 OutRetAddr = getReturnAddressFrameIndex(DAG); 1484 1485 // Load the "old" Return address. 1486 OutRetAddr = DAG.getLoad(VT, Chain, OutRetAddr, NULL, 0); 1487 return SDValue(OutRetAddr.getNode(), 1); 1488} 1489 1490/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call 1491/// optimization is performed and it is required (FPDiff!=0). 1492static SDValue 1493EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 1494 SDValue Chain, SDValue RetAddrFrIdx, 1495 bool Is64Bit, int FPDiff) { 1496 // Store the return address to the appropriate stack slot. 1497 if (!FPDiff) return Chain; 1498 // Calculate the new stack slot for the return address. 1499 int SlotSize = Is64Bit ? 8 : 4; 1500 int NewReturnAddrFI = 1501 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1502 MVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1503 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1504 Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, 1505 PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); 1506 return Chain; 1507} 1508 1509SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { 1510 MachineFunction &MF = DAG.getMachineFunction(); 1511 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); 1512 SDValue Chain = TheCall->getChain(); 1513 unsigned CC = TheCall->getCallingConv(); 1514 bool isVarArg = TheCall->isVarArg(); 1515 bool IsTailCall = TheCall->isTailCall() && 1516 CC == CallingConv::Fast && PerformTailCallOpt; 1517 SDValue Callee = TheCall->getCallee(); 1518 bool Is64Bit = Subtarget->is64Bit(); 1519 bool IsStructRet = CallIsStructReturn(TheCall); 1520 1521 assert(!(isVarArg && CC == CallingConv::Fast) && 1522 "Var args not supported with calling convention fastcc"); 1523 1524 // Analyze operands of the call, assigning locations to each operand. 1525 SmallVector<CCValAssign, 16> ArgLocs; 1526 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1527 CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); 1528 1529 // Get a count of how many bytes are to be pushed on the stack. 1530 unsigned NumBytes = CCInfo.getNextStackOffset(); 1531 if (PerformTailCallOpt && CC == CallingConv::Fast) 1532 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1533 1534 int FPDiff = 0; 1535 if (IsTailCall) { 1536 // Lower arguments at fp - stackoffset + fpdiff. 1537 unsigned NumBytesCallerPushed = 1538 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1539 FPDiff = NumBytesCallerPushed - NumBytes; 1540 1541 // Set the delta of movement of the returnaddr stackslot. 1542 // But only set if delta is greater than previous delta. 1543 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1544 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1545 } 1546 1547 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1548 1549 SDValue RetAddrFrIdx; 1550 // Load return adress for tail calls. 1551 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, 1552 FPDiff); 1553 1554 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1555 SmallVector<SDValue, 8> MemOpChains; 1556 SDValue StackPtr; 1557 1558 // Walk the register/memloc assignments, inserting copies/loads. In the case 1559 // of tail call optimization arguments are handle later. 1560 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1561 CCValAssign &VA = ArgLocs[i]; 1562 SDValue Arg = TheCall->getArg(i); 1563 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1564 bool isByVal = Flags.isByVal(); 1565 1566 // Promote the value if needed. 1567 switch (VA.getLocInfo()) { 1568 default: assert(0 && "Unknown loc info!"); 1569 case CCValAssign::Full: break; 1570 case CCValAssign::SExt: 1571 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1572 break; 1573 case CCValAssign::ZExt: 1574 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1575 break; 1576 case CCValAssign::AExt: 1577 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1578 break; 1579 } 1580 1581 if (VA.isRegLoc()) { 1582 if (Is64Bit) { 1583 MVT RegVT = VA.getLocVT(); 1584 if (RegVT.isVector() && RegVT.getSizeInBits() == 64) 1585 switch (VA.getLocReg()) { 1586 default: 1587 break; 1588 case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: 1589 case X86::R8: { 1590 // Special case: passing MMX values in GPR registers. 1591 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1592 break; 1593 } 1594 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: 1595 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { 1596 // Special case: passing MMX values in XMM registers. 1597 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1598 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg); 1599 Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 1600 DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg, 1601 getMOVLMask(2, DAG)); 1602 break; 1603 } 1604 } 1605 } 1606 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1607 } else { 1608 if (!IsTailCall || (IsTailCall && isByVal)) { 1609 assert(VA.isMemLoc()); 1610 if (StackPtr.getNode() == 0) 1611 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1612 1613 MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, 1614 Chain, Arg, Flags)); 1615 } 1616 } 1617 } 1618 1619 if (!MemOpChains.empty()) 1620 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1621 &MemOpChains[0], MemOpChains.size()); 1622 1623 // Build a sequence of copy-to-reg nodes chained together with token chain 1624 // and flag operands which copy the outgoing args into registers. 1625 SDValue InFlag; 1626 // Tail call byval lowering might overwrite argument registers so in case of 1627 // tail call optimization the copies to registers are lowered later. 1628 if (!IsTailCall) 1629 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1630 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1631 InFlag); 1632 InFlag = Chain.getValue(1); 1633 } 1634 1635 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1636 // GOT pointer. 1637 if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { 1638 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1639 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1640 InFlag); 1641 InFlag = Chain.getValue(1); 1642 } 1643 // If we are tail calling and generating PIC/GOT style code load the address 1644 // of the callee into ecx. The value in ecx is used as target of the tail 1645 // jump. This is done to circumvent the ebx/callee-saved problem for tail 1646 // calls on PIC/GOT architectures. Normally we would just put the address of 1647 // GOT into ebx and then call target@PLT. But for tail callss ebx would be 1648 // restored (since ebx is callee saved) before jumping to the target@PLT. 1649 if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { 1650 // Note: The actual moving to ecx is done further down. 1651 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1652 if (G && !G->getGlobal()->hasHiddenVisibility() && 1653 !G->getGlobal()->hasProtectedVisibility()) 1654 Callee = LowerGlobalAddress(Callee, DAG); 1655 else if (isa<ExternalSymbolSDNode>(Callee)) 1656 Callee = LowerExternalSymbol(Callee,DAG); 1657 } 1658 1659 if (Is64Bit && isVarArg) { 1660 // From AMD64 ABI document: 1661 // For calls that may call functions that use varargs or stdargs 1662 // (prototype-less calls or calls to functions containing ellipsis (...) in 1663 // the declaration) %al is used as hidden argument to specify the number 1664 // of SSE registers used. The contents of %al do not need to match exactly 1665 // the number of registers, but must be an ubound on the number of SSE 1666 // registers used and is in the range 0 - 8 inclusive. 1667 1668 // FIXME: Verify this on Win64 1669 // Count the number of XMM registers allocated. 1670 static const unsigned XMMArgRegs[] = { 1671 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1672 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1673 }; 1674 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1675 1676 Chain = DAG.getCopyToReg(Chain, X86::AL, 1677 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1678 InFlag = Chain.getValue(1); 1679 } 1680 1681 1682 // For tail calls lower the arguments to the 'real' stack slot. 1683 if (IsTailCall) { 1684 SmallVector<SDValue, 8> MemOpChains2; 1685 SDValue FIN; 1686 int FI = 0; 1687 // Do not flag preceeding copytoreg stuff together with the following stuff. 1688 InFlag = SDValue(); 1689 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1690 CCValAssign &VA = ArgLocs[i]; 1691 if (!VA.isRegLoc()) { 1692 assert(VA.isMemLoc()); 1693 SDValue Arg = TheCall->getArg(i); 1694 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1695 // Create frame index. 1696 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1697 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 1698 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1699 FIN = DAG.getFrameIndex(FI, getPointerTy()); 1700 1701 if (Flags.isByVal()) { 1702 // Copy relative to framepointer. 1703 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1704 if (StackPtr.getNode() == 0) 1705 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1706 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1707 1708 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1709 Flags, DAG)); 1710 } else { 1711 // Store relative to framepointer. 1712 MemOpChains2.push_back( 1713 DAG.getStore(Chain, Arg, FIN, 1714 PseudoSourceValue::getFixedStack(FI), 0)); 1715 } 1716 } 1717 } 1718 1719 if (!MemOpChains2.empty()) 1720 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1721 &MemOpChains2[0], MemOpChains2.size()); 1722 1723 // Copy arguments to their registers. 1724 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1725 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1726 InFlag); 1727 InFlag = Chain.getValue(1); 1728 } 1729 InFlag =SDValue(); 1730 1731 // Store the return address to the appropriate stack slot. 1732 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, 1733 FPDiff); 1734 } 1735 1736 // If the callee is a GlobalAddress node (quite common, every direct call is) 1737 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1738 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1739 // We should use extra load for direct calls to dllimported functions in 1740 // non-JIT mode. 1741 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1742 getTargetMachine(), true)) 1743 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), 1744 G->getOffset()); 1745 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1746 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1747 } else if (IsTailCall) { 1748 unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; 1749 1750 Chain = DAG.getCopyToReg(Chain, 1751 DAG.getRegister(Opc, getPointerTy()), 1752 Callee,InFlag); 1753 Callee = DAG.getRegister(Opc, getPointerTy()); 1754 // Add register as live out. 1755 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1756 } 1757 1758 // Returns a chain & a flag for retval copy to use. 1759 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1760 SmallVector<SDValue, 8> Ops; 1761 1762 if (IsTailCall) { 1763 Ops.push_back(Chain); 1764 Ops.push_back(DAG.getIntPtrConstant(NumBytes, true)); 1765 Ops.push_back(DAG.getIntPtrConstant(0, true)); 1766 if (InFlag.getNode()) 1767 Ops.push_back(InFlag); 1768 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1769 InFlag = Chain.getValue(1); 1770 1771 // Returns a chain & a flag for retval copy to use. 1772 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1773 Ops.clear(); 1774 } 1775 1776 Ops.push_back(Chain); 1777 Ops.push_back(Callee); 1778 1779 if (IsTailCall) 1780 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1781 1782 // Add argument registers to the end of the list so that they are known live 1783 // into the call. 1784 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1785 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1786 RegsToPass[i].second.getValueType())); 1787 1788 // Add an implicit use GOT pointer in EBX. 1789 if (!IsTailCall && !Is64Bit && 1790 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1791 Subtarget->isPICStyleGOT()) 1792 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1793 1794 // Add an implicit use of AL for x86 vararg functions. 1795 if (Is64Bit && isVarArg) 1796 Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); 1797 1798 if (InFlag.getNode()) 1799 Ops.push_back(InFlag); 1800 1801 if (IsTailCall) { 1802 assert(InFlag.getNode() && 1803 "Flag must be set. Depend on flag being set in LowerRET"); 1804 Chain = DAG.getNode(X86ISD::TAILCALL, 1805 TheCall->getVTList(), &Ops[0], Ops.size()); 1806 1807 return SDValue(Chain.getNode(), Op.getResNo()); 1808 } 1809 1810 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1811 InFlag = Chain.getValue(1); 1812 1813 // Create the CALLSEQ_END node. 1814 unsigned NumBytesForCalleeToPush; 1815 if (IsCalleePop(isVarArg, CC)) 1816 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1817 else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) 1818 // If this is is a call to a struct-return function, the callee 1819 // pops the hidden struct pointer, so we have to push it back. 1820 // This is common for Darwin/X86, Linux & Mingw32 targets. 1821 NumBytesForCalleeToPush = 4; 1822 else 1823 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1824 1825 // Returns a flag for retval copy to use. 1826 Chain = DAG.getCALLSEQ_END(Chain, 1827 DAG.getIntPtrConstant(NumBytes, true), 1828 DAG.getIntPtrConstant(NumBytesForCalleeToPush, 1829 true), 1830 InFlag); 1831 InFlag = Chain.getValue(1); 1832 1833 // Handle result values, copying them out of physregs into vregs that we 1834 // return. 1835 return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), 1836 Op.getResNo()); 1837} 1838 1839 1840//===----------------------------------------------------------------------===// 1841// Fast Calling Convention (tail call) implementation 1842//===----------------------------------------------------------------------===// 1843 1844// Like std call, callee cleans arguments, convention except that ECX is 1845// reserved for storing the tail called function address. Only 2 registers are 1846// free for argument passing (inreg). Tail call optimization is performed 1847// provided: 1848// * tailcallopt is enabled 1849// * caller/callee are fastcc 1850// On X86_64 architecture with GOT-style position independent code only local 1851// (within module) calls are supported at the moment. 1852// To keep the stack aligned according to platform abi the function 1853// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1854// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1855// If a tail called function callee has more arguments than the caller the 1856// caller needs to make sure that there is room to move the RETADDR to. This is 1857// achieved by reserving an area the size of the argument delta right after the 1858// original REtADDR, but before the saved framepointer or the spilled registers 1859// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1860// stack layout: 1861// arg1 1862// arg2 1863// RETADDR 1864// [ new RETADDR 1865// move area ] 1866// (possible EBP) 1867// ESI 1868// EDI 1869// local1 .. 1870 1871/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1872/// for a 16 byte align requirement. 1873unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1874 SelectionDAG& DAG) { 1875 MachineFunction &MF = DAG.getMachineFunction(); 1876 const TargetMachine &TM = MF.getTarget(); 1877 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1878 unsigned StackAlignment = TFI.getStackAlignment(); 1879 uint64_t AlignMask = StackAlignment - 1; 1880 int64_t Offset = StackSize; 1881 uint64_t SlotSize = TD->getPointerSize(); 1882 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1883 // Number smaller than 12 so just add the difference. 1884 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1885 } else { 1886 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1887 Offset = ((~AlignMask) & Offset) + StackAlignment + 1888 (StackAlignment-SlotSize); 1889 } 1890 return Offset; 1891} 1892 1893/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1894/// following the call is a return. A function is eligible if caller/callee 1895/// calling conventions match, currently only fastcc supports tail calls, and 1896/// the function CALL is immediatly followed by a RET. 1897bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, 1898 SDValue Ret, 1899 SelectionDAG& DAG) const { 1900 if (!PerformTailCallOpt) 1901 return false; 1902 1903 if (CheckTailCallReturnConstraints(TheCall, Ret)) { 1904 MachineFunction &MF = DAG.getMachineFunction(); 1905 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1906 unsigned CalleeCC= TheCall->getCallingConv(); 1907 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1908 SDValue Callee = TheCall->getCallee(); 1909 // On x86/32Bit PIC/GOT tail calls are supported. 1910 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1911 !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) 1912 return true; 1913 1914 // Can only do local tail calls (in same module, hidden or protected) on 1915 // x86_64 PIC/GOT at the moment. 1916 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1917 return G->getGlobal()->hasHiddenVisibility() 1918 || G->getGlobal()->hasProtectedVisibility(); 1919 } 1920 } 1921 1922 return false; 1923} 1924 1925FastISel * 1926X86TargetLowering::createFastISel(MachineFunction &mf, 1927 MachineModuleInfo *mmo, 1928 DwarfWriter *dw, 1929 DenseMap<const Value *, unsigned> &vm, 1930 DenseMap<const BasicBlock *, 1931 MachineBasicBlock *> &bm, 1932 DenseMap<const AllocaInst *, int> &am 1933#ifndef NDEBUG 1934 , SmallSet<Instruction*, 8> &cil 1935#endif 1936 ) { 1937 return X86::createFastISel(mf, mmo, dw, vm, bm, am 1938#ifndef NDEBUG 1939 , cil 1940#endif 1941 ); 1942} 1943 1944 1945//===----------------------------------------------------------------------===// 1946// Other Lowering Hooks 1947//===----------------------------------------------------------------------===// 1948 1949 1950SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1951 MachineFunction &MF = DAG.getMachineFunction(); 1952 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1953 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1954 1955 if (ReturnAddrIndex == 0) { 1956 // Set up a frame object for the return address. 1957 uint64_t SlotSize = TD->getPointerSize(); 1958 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); 1959 FuncInfo->setRAIndex(ReturnAddrIndex); 1960 } 1961 1962 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1963} 1964 1965 1966/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 1967/// specific condition code, returning the condition code and the LHS/RHS of the 1968/// comparison to make. 1969static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1970 SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { 1971 if (!isFP) { 1972 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1973 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1974 // X > -1 -> X == 0, jump !sign. 1975 RHS = DAG.getConstant(0, RHS.getValueType()); 1976 return X86::COND_NS; 1977 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1978 // X < 0 -> X == 0, jump on sign. 1979 return X86::COND_S; 1980 } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 1981 // X < 1 -> X <= 0 1982 RHS = DAG.getConstant(0, RHS.getValueType()); 1983 return X86::COND_LE; 1984 } 1985 } 1986 1987 switch (SetCCOpcode) { 1988 default: assert(0 && "Invalid integer condition!"); 1989 case ISD::SETEQ: return X86::COND_E; 1990 case ISD::SETGT: return X86::COND_G; 1991 case ISD::SETGE: return X86::COND_GE; 1992 case ISD::SETLT: return X86::COND_L; 1993 case ISD::SETLE: return X86::COND_LE; 1994 case ISD::SETNE: return X86::COND_NE; 1995 case ISD::SETULT: return X86::COND_B; 1996 case ISD::SETUGT: return X86::COND_A; 1997 case ISD::SETULE: return X86::COND_BE; 1998 case ISD::SETUGE: return X86::COND_AE; 1999 } 2000 } 2001 2002 // First determine if it is required or is profitable to flip the operands. 2003 2004 // If LHS is a foldable load, but RHS is not, flip the condition. 2005 if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && 2006 !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { 2007 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 2008 std::swap(LHS, RHS); 2009 } 2010 2011 switch (SetCCOpcode) { 2012 default: break; 2013 case ISD::SETOLT: 2014 case ISD::SETOLE: 2015 case ISD::SETUGT: 2016 case ISD::SETUGE: 2017 std::swap(LHS, RHS); 2018 break; 2019 } 2020 2021 // On a floating point condition, the flags are set as follows: 2022 // ZF PF CF op 2023 // 0 | 0 | 0 | X > Y 2024 // 0 | 0 | 1 | X < Y 2025 // 1 | 0 | 0 | X == Y 2026 // 1 | 1 | 1 | unordered 2027 switch (SetCCOpcode) { 2028 default: assert(0 && "Condcode should be pre-legalized away"); 2029 case ISD::SETUEQ: 2030 case ISD::SETEQ: return X86::COND_E; 2031 case ISD::SETOLT: // flipped 2032 case ISD::SETOGT: 2033 case ISD::SETGT: return X86::COND_A; 2034 case ISD::SETOLE: // flipped 2035 case ISD::SETOGE: 2036 case ISD::SETGE: return X86::COND_AE; 2037 case ISD::SETUGT: // flipped 2038 case ISD::SETULT: 2039 case ISD::SETLT: return X86::COND_B; 2040 case ISD::SETUGE: // flipped 2041 case ISD::SETULE: 2042 case ISD::SETLE: return X86::COND_BE; 2043 case ISD::SETONE: 2044 case ISD::SETNE: return X86::COND_NE; 2045 case ISD::SETUO: return X86::COND_P; 2046 case ISD::SETO: return X86::COND_NP; 2047 } 2048} 2049 2050/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2051/// code. Current x86 isa includes the following FP cmov instructions: 2052/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2053static bool hasFPCMov(unsigned X86CC) { 2054 switch (X86CC) { 2055 default: 2056 return false; 2057 case X86::COND_B: 2058 case X86::COND_BE: 2059 case X86::COND_E: 2060 case X86::COND_P: 2061 case X86::COND_A: 2062 case X86::COND_AE: 2063 case X86::COND_NE: 2064 case X86::COND_NP: 2065 return true; 2066 } 2067} 2068 2069/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2070/// true if Op is undef or if its value falls within the specified range (L, H]. 2071static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { 2072 if (Op.getOpcode() == ISD::UNDEF) 2073 return true; 2074 2075 unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); 2076 return (Val >= Low && Val < Hi); 2077} 2078 2079/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2080/// true if Op is undef or if its value equal to the specified value. 2081static bool isUndefOrEqual(SDValue Op, unsigned Val) { 2082 if (Op.getOpcode() == ISD::UNDEF) 2083 return true; 2084 return cast<ConstantSDNode>(Op)->getZExtValue() == Val; 2085} 2086 2087/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2088/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2089bool X86::isPSHUFDMask(SDNode *N) { 2090 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2091 2092 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2093 return false; 2094 2095 // Check if the value doesn't reference the second vector. 2096 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2097 SDValue Arg = N->getOperand(i); 2098 if (Arg.getOpcode() == ISD::UNDEF) continue; 2099 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2100 if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) 2101 return false; 2102 } 2103 2104 return true; 2105} 2106 2107/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2108/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2109bool X86::isPSHUFHWMask(SDNode *N) { 2110 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2111 2112 if (N->getNumOperands() != 8) 2113 return false; 2114 2115 // Lower quadword copied in order. 2116 for (unsigned i = 0; i != 4; ++i) { 2117 SDValue Arg = N->getOperand(i); 2118 if (Arg.getOpcode() == ISD::UNDEF) continue; 2119 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2120 if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) 2121 return false; 2122 } 2123 2124 // Upper quadword shuffled. 2125 for (unsigned i = 4; i != 8; ++i) { 2126 SDValue Arg = N->getOperand(i); 2127 if (Arg.getOpcode() == ISD::UNDEF) continue; 2128 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2129 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2130 if (Val < 4 || Val > 7) 2131 return false; 2132 } 2133 2134 return true; 2135} 2136 2137/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2138/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2139bool X86::isPSHUFLWMask(SDNode *N) { 2140 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2141 2142 if (N->getNumOperands() != 8) 2143 return false; 2144 2145 // Upper quadword copied in order. 2146 for (unsigned i = 4; i != 8; ++i) 2147 if (!isUndefOrEqual(N->getOperand(i), i)) 2148 return false; 2149 2150 // Lower quadword shuffled. 2151 for (unsigned i = 0; i != 4; ++i) 2152 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2153 return false; 2154 2155 return true; 2156} 2157 2158/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2159/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2160static bool isSHUFPMask(SDOperandPtr Elems, unsigned NumElems) { 2161 if (NumElems != 2 && NumElems != 4) return false; 2162 2163 unsigned Half = NumElems / 2; 2164 for (unsigned i = 0; i < Half; ++i) 2165 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2166 return false; 2167 for (unsigned i = Half; i < NumElems; ++i) 2168 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2169 return false; 2170 2171 return true; 2172} 2173 2174bool X86::isSHUFPMask(SDNode *N) { 2175 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2176 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2177} 2178 2179/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2180/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2181/// half elements to come from vector 1 (which would equal the dest.) and 2182/// the upper half to come from vector 2. 2183static bool isCommutedSHUFP(SDOperandPtr Ops, unsigned NumOps) { 2184 if (NumOps != 2 && NumOps != 4) return false; 2185 2186 unsigned Half = NumOps / 2; 2187 for (unsigned i = 0; i < Half; ++i) 2188 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2189 return false; 2190 for (unsigned i = Half; i < NumOps; ++i) 2191 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2192 return false; 2193 return true; 2194} 2195 2196static bool isCommutedSHUFP(SDNode *N) { 2197 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2198 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2199} 2200 2201/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2202/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2203bool X86::isMOVHLPSMask(SDNode *N) { 2204 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2205 2206 if (N->getNumOperands() != 4) 2207 return false; 2208 2209 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2210 return isUndefOrEqual(N->getOperand(0), 6) && 2211 isUndefOrEqual(N->getOperand(1), 7) && 2212 isUndefOrEqual(N->getOperand(2), 2) && 2213 isUndefOrEqual(N->getOperand(3), 3); 2214} 2215 2216/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2217/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2218/// <2, 3, 2, 3> 2219bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2220 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2221 2222 if (N->getNumOperands() != 4) 2223 return false; 2224 2225 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2226 return isUndefOrEqual(N->getOperand(0), 2) && 2227 isUndefOrEqual(N->getOperand(1), 3) && 2228 isUndefOrEqual(N->getOperand(2), 2) && 2229 isUndefOrEqual(N->getOperand(3), 3); 2230} 2231 2232/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2233/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2234bool X86::isMOVLPMask(SDNode *N) { 2235 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2236 2237 unsigned NumElems = N->getNumOperands(); 2238 if (NumElems != 2 && NumElems != 4) 2239 return false; 2240 2241 for (unsigned i = 0; i < NumElems/2; ++i) 2242 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2243 return false; 2244 2245 for (unsigned i = NumElems/2; i < NumElems; ++i) 2246 if (!isUndefOrEqual(N->getOperand(i), i)) 2247 return false; 2248 2249 return true; 2250} 2251 2252/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2253/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2254/// and MOVLHPS. 2255bool X86::isMOVHPMask(SDNode *N) { 2256 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2257 2258 unsigned NumElems = N->getNumOperands(); 2259 if (NumElems != 2 && NumElems != 4) 2260 return false; 2261 2262 for (unsigned i = 0; i < NumElems/2; ++i) 2263 if (!isUndefOrEqual(N->getOperand(i), i)) 2264 return false; 2265 2266 for (unsigned i = 0; i < NumElems/2; ++i) { 2267 SDValue Arg = N->getOperand(i + NumElems/2); 2268 if (!isUndefOrEqual(Arg, i + NumElems)) 2269 return false; 2270 } 2271 2272 return true; 2273} 2274 2275/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2276/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2277bool static isUNPCKLMask(SDOperandPtr Elts, unsigned NumElts, 2278 bool V2IsSplat = false) { 2279 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2280 return false; 2281 2282 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2283 SDValue BitI = Elts[i]; 2284 SDValue BitI1 = Elts[i+1]; 2285 if (!isUndefOrEqual(BitI, j)) 2286 return false; 2287 if (V2IsSplat) { 2288 if (isUndefOrEqual(BitI1, NumElts)) 2289 return false; 2290 } else { 2291 if (!isUndefOrEqual(BitI1, j + NumElts)) 2292 return false; 2293 } 2294 } 2295 2296 return true; 2297} 2298 2299bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2300 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2301 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2302} 2303 2304/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2305/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2306bool static isUNPCKHMask(SDOperandPtr Elts, unsigned NumElts, 2307 bool V2IsSplat = false) { 2308 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2309 return false; 2310 2311 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2312 SDValue BitI = Elts[i]; 2313 SDValue BitI1 = Elts[i+1]; 2314 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2315 return false; 2316 if (V2IsSplat) { 2317 if (isUndefOrEqual(BitI1, NumElts)) 2318 return false; 2319 } else { 2320 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2321 return false; 2322 } 2323 } 2324 2325 return true; 2326} 2327 2328bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2329 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2330 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2331} 2332 2333/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2334/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2335/// <0, 0, 1, 1> 2336bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2337 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2338 2339 unsigned NumElems = N->getNumOperands(); 2340 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2341 return false; 2342 2343 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2344 SDValue BitI = N->getOperand(i); 2345 SDValue BitI1 = N->getOperand(i+1); 2346 2347 if (!isUndefOrEqual(BitI, j)) 2348 return false; 2349 if (!isUndefOrEqual(BitI1, j)) 2350 return false; 2351 } 2352 2353 return true; 2354} 2355 2356/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2357/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2358/// <2, 2, 3, 3> 2359bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2360 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2361 2362 unsigned NumElems = N->getNumOperands(); 2363 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2364 return false; 2365 2366 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2367 SDValue BitI = N->getOperand(i); 2368 SDValue BitI1 = N->getOperand(i + 1); 2369 2370 if (!isUndefOrEqual(BitI, j)) 2371 return false; 2372 if (!isUndefOrEqual(BitI1, j)) 2373 return false; 2374 } 2375 2376 return true; 2377} 2378 2379/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2380/// specifies a shuffle of elements that is suitable for input to MOVSS, 2381/// MOVSD, and MOVD, i.e. setting the lowest element. 2382static bool isMOVLMask(SDOperandPtr Elts, unsigned NumElts) { 2383 if (NumElts != 2 && NumElts != 4) 2384 return false; 2385 2386 if (!isUndefOrEqual(Elts[0], NumElts)) 2387 return false; 2388 2389 for (unsigned i = 1; i < NumElts; ++i) { 2390 if (!isUndefOrEqual(Elts[i], i)) 2391 return false; 2392 } 2393 2394 return true; 2395} 2396 2397bool X86::isMOVLMask(SDNode *N) { 2398 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2399 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2400} 2401 2402/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2403/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2404/// element of vector 2 and the other elements to come from vector 1 in order. 2405static bool isCommutedMOVL(SDOperandPtr Ops, unsigned NumOps, 2406 bool V2IsSplat = false, 2407 bool V2IsUndef = false) { 2408 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2409 return false; 2410 2411 if (!isUndefOrEqual(Ops[0], 0)) 2412 return false; 2413 2414 for (unsigned i = 1; i < NumOps; ++i) { 2415 SDValue Arg = Ops[i]; 2416 if (!(isUndefOrEqual(Arg, i+NumOps) || 2417 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2418 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2419 return false; 2420 } 2421 2422 return true; 2423} 2424 2425static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2426 bool V2IsUndef = false) { 2427 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2428 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2429 V2IsSplat, V2IsUndef); 2430} 2431 2432/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2433/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2434bool X86::isMOVSHDUPMask(SDNode *N) { 2435 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2436 2437 if (N->getNumOperands() != 4) 2438 return false; 2439 2440 // Expect 1, 1, 3, 3 2441 for (unsigned i = 0; i < 2; ++i) { 2442 SDValue Arg = N->getOperand(i); 2443 if (Arg.getOpcode() == ISD::UNDEF) continue; 2444 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2445 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2446 if (Val != 1) return false; 2447 } 2448 2449 bool HasHi = false; 2450 for (unsigned i = 2; i < 4; ++i) { 2451 SDValue Arg = N->getOperand(i); 2452 if (Arg.getOpcode() == ISD::UNDEF) continue; 2453 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2454 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2455 if (Val != 3) return false; 2456 HasHi = true; 2457 } 2458 2459 // Don't use movshdup if it can be done with a shufps. 2460 return HasHi; 2461} 2462 2463/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2464/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2465bool X86::isMOVSLDUPMask(SDNode *N) { 2466 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2467 2468 if (N->getNumOperands() != 4) 2469 return false; 2470 2471 // Expect 0, 0, 2, 2 2472 for (unsigned i = 0; i < 2; ++i) { 2473 SDValue Arg = N->getOperand(i); 2474 if (Arg.getOpcode() == ISD::UNDEF) continue; 2475 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2476 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2477 if (Val != 0) return false; 2478 } 2479 2480 bool HasHi = false; 2481 for (unsigned i = 2; i < 4; ++i) { 2482 SDValue Arg = N->getOperand(i); 2483 if (Arg.getOpcode() == ISD::UNDEF) continue; 2484 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2485 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2486 if (Val != 2) return false; 2487 HasHi = true; 2488 } 2489 2490 // Don't use movshdup if it can be done with a shufps. 2491 return HasHi; 2492} 2493 2494/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2495/// specifies a identity operation on the LHS or RHS. 2496static bool isIdentityMask(SDNode *N, bool RHS = false) { 2497 unsigned NumElems = N->getNumOperands(); 2498 for (unsigned i = 0; i < NumElems; ++i) 2499 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2500 return false; 2501 return true; 2502} 2503 2504/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2505/// a splat of a single element. 2506static bool isSplatMask(SDNode *N) { 2507 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2508 2509 // This is a splat operation if each element of the permute is the same, and 2510 // if the value doesn't reference the second vector. 2511 unsigned NumElems = N->getNumOperands(); 2512 SDValue ElementBase; 2513 unsigned i = 0; 2514 for (; i != NumElems; ++i) { 2515 SDValue Elt = N->getOperand(i); 2516 if (isa<ConstantSDNode>(Elt)) { 2517 ElementBase = Elt; 2518 break; 2519 } 2520 } 2521 2522 if (!ElementBase.getNode()) 2523 return false; 2524 2525 for (; i != NumElems; ++i) { 2526 SDValue Arg = N->getOperand(i); 2527 if (Arg.getOpcode() == ISD::UNDEF) continue; 2528 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2529 if (Arg != ElementBase) return false; 2530 } 2531 2532 // Make sure it is a splat of the first vector operand. 2533 return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; 2534} 2535 2536/// getSplatMaskEltNo - Given a splat mask, return the index to the element 2537/// we want to splat. 2538static SDValue getSplatMaskEltNo(SDNode *N) { 2539 assert(isSplatMask(N) && "Not a splat mask"); 2540 unsigned NumElems = N->getNumOperands(); 2541 SDValue ElementBase; 2542 unsigned i = 0; 2543 for (; i != NumElems; ++i) { 2544 SDValue Elt = N->getOperand(i); 2545 if (isa<ConstantSDNode>(Elt)) 2546 return Elt; 2547 } 2548 assert(0 && " No splat value found!"); 2549 return SDValue(); 2550} 2551 2552 2553/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2554/// a splat of a single element and it's a 2 or 4 element mask. 2555bool X86::isSplatMask(SDNode *N) { 2556 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2557 2558 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2559 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2560 return false; 2561 return ::isSplatMask(N); 2562} 2563 2564/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2565/// specifies a splat of zero element. 2566bool X86::isSplatLoMask(SDNode *N) { 2567 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2568 2569 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2570 if (!isUndefOrEqual(N->getOperand(i), 0)) 2571 return false; 2572 return true; 2573} 2574 2575/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2576/// specifies a shuffle of elements that is suitable for input to MOVDDUP. 2577bool X86::isMOVDDUPMask(SDNode *N) { 2578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2579 2580 unsigned e = N->getNumOperands() / 2; 2581 for (unsigned i = 0; i < e; ++i) 2582 if (!isUndefOrEqual(N->getOperand(i), i)) 2583 return false; 2584 for (unsigned i = 0; i < e; ++i) 2585 if (!isUndefOrEqual(N->getOperand(e+i), i)) 2586 return false; 2587 return true; 2588} 2589 2590/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2591/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2592/// instructions. 2593unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2594 unsigned NumOperands = N->getNumOperands(); 2595 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2596 unsigned Mask = 0; 2597 for (unsigned i = 0; i < NumOperands; ++i) { 2598 unsigned Val = 0; 2599 SDValue Arg = N->getOperand(NumOperands-i-1); 2600 if (Arg.getOpcode() != ISD::UNDEF) 2601 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2602 if (Val >= NumOperands) Val -= NumOperands; 2603 Mask |= Val; 2604 if (i != NumOperands - 1) 2605 Mask <<= Shift; 2606 } 2607 2608 return Mask; 2609} 2610 2611/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2612/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2613/// instructions. 2614unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2615 unsigned Mask = 0; 2616 // 8 nodes, but we only care about the last 4. 2617 for (unsigned i = 7; i >= 4; --i) { 2618 unsigned Val = 0; 2619 SDValue Arg = N->getOperand(i); 2620 if (Arg.getOpcode() != ISD::UNDEF) 2621 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2622 Mask |= (Val - 4); 2623 if (i != 4) 2624 Mask <<= 2; 2625 } 2626 2627 return Mask; 2628} 2629 2630/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2631/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2632/// instructions. 2633unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2634 unsigned Mask = 0; 2635 // 8 nodes, but we only care about the first 4. 2636 for (int i = 3; i >= 0; --i) { 2637 unsigned Val = 0; 2638 SDValue Arg = N->getOperand(i); 2639 if (Arg.getOpcode() != ISD::UNDEF) 2640 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2641 Mask |= Val; 2642 if (i != 0) 2643 Mask <<= 2; 2644 } 2645 2646 return Mask; 2647} 2648 2649/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2650/// specifies a 8 element shuffle that can be broken into a pair of 2651/// PSHUFHW and PSHUFLW. 2652static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2653 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2654 2655 if (N->getNumOperands() != 8) 2656 return false; 2657 2658 // Lower quadword shuffled. 2659 for (unsigned i = 0; i != 4; ++i) { 2660 SDValue Arg = N->getOperand(i); 2661 if (Arg.getOpcode() == ISD::UNDEF) continue; 2662 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2663 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2664 if (Val >= 4) 2665 return false; 2666 } 2667 2668 // Upper quadword shuffled. 2669 for (unsigned i = 4; i != 8; ++i) { 2670 SDValue Arg = N->getOperand(i); 2671 if (Arg.getOpcode() == ISD::UNDEF) continue; 2672 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2673 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2674 if (Val < 4 || Val > 7) 2675 return false; 2676 } 2677 2678 return true; 2679} 2680 2681/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2682/// values in ther permute mask. 2683static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, 2684 SDValue &V2, SDValue &Mask, 2685 SelectionDAG &DAG) { 2686 MVT VT = Op.getValueType(); 2687 MVT MaskVT = Mask.getValueType(); 2688 MVT EltVT = MaskVT.getVectorElementType(); 2689 unsigned NumElems = Mask.getNumOperands(); 2690 SmallVector<SDValue, 8> MaskVec; 2691 2692 for (unsigned i = 0; i != NumElems; ++i) { 2693 SDValue Arg = Mask.getOperand(i); 2694 if (Arg.getOpcode() == ISD::UNDEF) { 2695 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2696 continue; 2697 } 2698 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2699 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2700 if (Val < NumElems) 2701 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2702 else 2703 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2704 } 2705 2706 std::swap(V1, V2); 2707 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2708 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2709} 2710 2711/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2712/// the two vector operands have swapped position. 2713static 2714SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG) { 2715 MVT MaskVT = Mask.getValueType(); 2716 MVT EltVT = MaskVT.getVectorElementType(); 2717 unsigned NumElems = Mask.getNumOperands(); 2718 SmallVector<SDValue, 8> MaskVec; 2719 for (unsigned i = 0; i != NumElems; ++i) { 2720 SDValue Arg = Mask.getOperand(i); 2721 if (Arg.getOpcode() == ISD::UNDEF) { 2722 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2723 continue; 2724 } 2725 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2726 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2727 if (Val < NumElems) 2728 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2729 else 2730 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2731 } 2732 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2733} 2734 2735 2736/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2737/// match movhlps. The lower half elements should come from upper half of 2738/// V1 (and in order), and the upper half elements should come from the upper 2739/// half of V2 (and in order). 2740static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2741 unsigned NumElems = Mask->getNumOperands(); 2742 if (NumElems != 4) 2743 return false; 2744 for (unsigned i = 0, e = 2; i != e; ++i) 2745 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2746 return false; 2747 for (unsigned i = 2; i != 4; ++i) 2748 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2749 return false; 2750 return true; 2751} 2752 2753/// isScalarLoadToVector - Returns true if the node is a scalar load that 2754/// is promoted to a vector. It also returns the LoadSDNode by reference if 2755/// required. 2756static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 2757 if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) 2758 return false; 2759 N = N->getOperand(0).getNode(); 2760 if (!ISD::isNON_EXTLoad(N)) 2761 return false; 2762 if (LD) 2763 *LD = cast<LoadSDNode>(N); 2764 return true; 2765} 2766 2767/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2768/// match movlp{s|d}. The lower half elements should come from lower half of 2769/// V1 (and in order), and the upper half elements should come from the upper 2770/// half of V2 (and in order). And since V1 will become the source of the 2771/// MOVLP, it must be either a vector load or a scalar load to vector. 2772static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2773 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2774 return false; 2775 // Is V2 is a vector load, don't do this transformation. We will try to use 2776 // load folding shufps op. 2777 if (ISD::isNON_EXTLoad(V2)) 2778 return false; 2779 2780 unsigned NumElems = Mask->getNumOperands(); 2781 if (NumElems != 2 && NumElems != 4) 2782 return false; 2783 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2784 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2785 return false; 2786 for (unsigned i = NumElems/2; i != NumElems; ++i) 2787 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2788 return false; 2789 return true; 2790} 2791 2792/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2793/// all the same. 2794static bool isSplatVector(SDNode *N) { 2795 if (N->getOpcode() != ISD::BUILD_VECTOR) 2796 return false; 2797 2798 SDValue SplatValue = N->getOperand(0); 2799 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2800 if (N->getOperand(i) != SplatValue) 2801 return false; 2802 return true; 2803} 2804 2805/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2806/// to an undef. 2807static bool isUndefShuffle(SDNode *N) { 2808 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2809 return false; 2810 2811 SDValue V1 = N->getOperand(0); 2812 SDValue V2 = N->getOperand(1); 2813 SDValue Mask = N->getOperand(2); 2814 unsigned NumElems = Mask.getNumOperands(); 2815 for (unsigned i = 0; i != NumElems; ++i) { 2816 SDValue Arg = Mask.getOperand(i); 2817 if (Arg.getOpcode() != ISD::UNDEF) { 2818 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2819 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2820 return false; 2821 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2822 return false; 2823 } 2824 } 2825 return true; 2826} 2827 2828/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2829/// constant +0.0. 2830static inline bool isZeroNode(SDValue Elt) { 2831 return ((isa<ConstantSDNode>(Elt) && 2832 cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || 2833 (isa<ConstantFPSDNode>(Elt) && 2834 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2835} 2836 2837/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2838/// to an zero vector. 2839static bool isZeroShuffle(SDNode *N) { 2840 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2841 return false; 2842 2843 SDValue V1 = N->getOperand(0); 2844 SDValue V2 = N->getOperand(1); 2845 SDValue Mask = N->getOperand(2); 2846 unsigned NumElems = Mask.getNumOperands(); 2847 for (unsigned i = 0; i != NumElems; ++i) { 2848 SDValue Arg = Mask.getOperand(i); 2849 if (Arg.getOpcode() == ISD::UNDEF) 2850 continue; 2851 2852 unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); 2853 if (Idx < NumElems) { 2854 unsigned Opc = V1.getNode()->getOpcode(); 2855 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 2856 continue; 2857 if (Opc != ISD::BUILD_VECTOR || 2858 !isZeroNode(V1.getNode()->getOperand(Idx))) 2859 return false; 2860 } else if (Idx >= NumElems) { 2861 unsigned Opc = V2.getNode()->getOpcode(); 2862 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 2863 continue; 2864 if (Opc != ISD::BUILD_VECTOR || 2865 !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) 2866 return false; 2867 } 2868 } 2869 return true; 2870} 2871 2872/// getZeroVector - Returns a vector of specified type with all zero elements. 2873/// 2874static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG) { 2875 assert(VT.isVector() && "Expected a vector type"); 2876 2877 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2878 // type. This ensures they get CSE'd. 2879 SDValue Vec; 2880 if (VT.getSizeInBits() == 64) { // MMX 2881 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2882 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2883 } else if (HasSSE2) { // SSE2 2884 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2885 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2886 } else { // SSE1 2887 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 2888 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst); 2889 } 2890 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2891} 2892 2893/// getOnesVector - Returns a vector of specified type with all bits set. 2894/// 2895static SDValue getOnesVector(MVT VT, SelectionDAG &DAG) { 2896 assert(VT.isVector() && "Expected a vector type"); 2897 2898 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2899 // type. This ensures they get CSE'd. 2900 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 2901 SDValue Vec; 2902 if (VT.getSizeInBits() == 64) // MMX 2903 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2904 else // SSE 2905 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2906 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2907} 2908 2909 2910/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2911/// that point to V2 points to its first element. 2912static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { 2913 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2914 2915 bool Changed = false; 2916 SmallVector<SDValue, 8> MaskVec; 2917 unsigned NumElems = Mask.getNumOperands(); 2918 for (unsigned i = 0; i != NumElems; ++i) { 2919 SDValue Arg = Mask.getOperand(i); 2920 if (Arg.getOpcode() != ISD::UNDEF) { 2921 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2922 if (Val > NumElems) { 2923 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2924 Changed = true; 2925 } 2926 } 2927 MaskVec.push_back(Arg); 2928 } 2929 2930 if (Changed) 2931 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2932 &MaskVec[0], MaskVec.size()); 2933 return Mask; 2934} 2935 2936/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2937/// operation of specified width. 2938static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2939 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2940 MVT BaseVT = MaskVT.getVectorElementType(); 2941 2942 SmallVector<SDValue, 8> MaskVec; 2943 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2944 for (unsigned i = 1; i != NumElems; ++i) 2945 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2946 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2947} 2948 2949/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2950/// of specified width. 2951static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2952 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2953 MVT BaseVT = MaskVT.getVectorElementType(); 2954 SmallVector<SDValue, 8> MaskVec; 2955 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2956 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2957 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2958 } 2959 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2960} 2961 2962/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2963/// of specified width. 2964static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2965 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2966 MVT BaseVT = MaskVT.getVectorElementType(); 2967 unsigned Half = NumElems/2; 2968 SmallVector<SDValue, 8> MaskVec; 2969 for (unsigned i = 0; i != Half; ++i) { 2970 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2971 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2972 } 2973 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2974} 2975 2976/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps 2977/// element #0 of a vector with the specified index, leaving the rest of the 2978/// elements in place. 2979static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, 2980 SelectionDAG &DAG) { 2981 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2982 MVT BaseVT = MaskVT.getVectorElementType(); 2983 SmallVector<SDValue, 8> MaskVec; 2984 // Element #0 of the result gets the elt we are replacing. 2985 MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); 2986 for (unsigned i = 1; i != NumElems; ++i) 2987 MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); 2988 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2989} 2990 2991/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. 2992static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { 2993 MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; 2994 MVT VT = Op.getValueType(); 2995 if (PVT == VT) 2996 return Op; 2997 SDValue V1 = Op.getOperand(0); 2998 SDValue Mask = Op.getOperand(2); 2999 unsigned MaskNumElems = Mask.getNumOperands(); 3000 unsigned NumElems = MaskNumElems; 3001 // Special handling of v4f32 -> v4i32. 3002 if (VT != MVT::v4f32) { 3003 // Find which element we want to splat. 3004 SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode(); 3005 unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue(); 3006 // unpack elements to the correct location 3007 while (NumElems > 4) { 3008 if (EltNo < NumElems/2) { 3009 Mask = getUnpacklMask(MaskNumElems, DAG); 3010 } else { 3011 Mask = getUnpackhMask(MaskNumElems, DAG); 3012 EltNo -= NumElems/2; 3013 } 3014 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 3015 NumElems >>= 1; 3016 } 3017 SDValue Cst = DAG.getConstant(EltNo, MVT::i32); 3018 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 3019 } 3020 3021 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 3022 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 3023 DAG.getNode(ISD::UNDEF, PVT), Mask); 3024 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3025} 3026 3027/// isVectorLoad - Returns true if the node is a vector load, a scalar 3028/// load that's promoted to vector, or a load bitcasted. 3029static bool isVectorLoad(SDValue Op) { 3030 assert(Op.getValueType().isVector() && "Expected a vector type"); 3031 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || 3032 Op.getOpcode() == ISD::BIT_CONVERT) { 3033 return isa<LoadSDNode>(Op.getOperand(0)); 3034 } 3035 return isa<LoadSDNode>(Op); 3036} 3037 3038 3039/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. 3040/// 3041static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, 3042 SelectionDAG &DAG, bool HasSSE3) { 3043 // If we have sse3 and shuffle has more than one use or input is a load, then 3044 // use movddup. Otherwise, use movlhps. 3045 bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); 3046 MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; 3047 MVT VT = Op.getValueType(); 3048 if (VT == PVT) 3049 return Op; 3050 unsigned NumElems = PVT.getVectorNumElements(); 3051 if (NumElems == 2) { 3052 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 3053 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 3054 } else { 3055 assert(NumElems == 4); 3056 SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); 3057 SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); 3058 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1); 3059 } 3060 3061 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 3062 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 3063 DAG.getNode(ISD::UNDEF, PVT), Mask); 3064 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3065} 3066 3067/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3068/// vector of zero or undef vector. This produces a shuffle where the low 3069/// element of V2 is swizzled into the zero/undef vector, landing at element 3070/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3071static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 3072 bool isZero, bool HasSSE2, 3073 SelectionDAG &DAG) { 3074 MVT VT = V2.getValueType(); 3075 SDValue V1 = isZero 3076 ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT); 3077 unsigned NumElems = V2.getValueType().getVectorNumElements(); 3078 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3079 MVT EVT = MaskVT.getVectorElementType(); 3080 SmallVector<SDValue, 16> MaskVec; 3081 for (unsigned i = 0; i != NumElems; ++i) 3082 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 3083 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 3084 else 3085 MaskVec.push_back(DAG.getConstant(i, EVT)); 3086 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3087 &MaskVec[0], MaskVec.size()); 3088 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3089} 3090 3091/// getNumOfConsecutiveZeros - Return the number of elements in a result of 3092/// a shuffle that is zero. 3093static 3094unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, 3095 unsigned NumElems, bool Low, 3096 SelectionDAG &DAG) { 3097 unsigned NumZeros = 0; 3098 for (unsigned i = 0; i < NumElems; ++i) { 3099 unsigned Index = Low ? i : NumElems-i-1; 3100 SDValue Idx = Mask.getOperand(Index); 3101 if (Idx.getOpcode() == ISD::UNDEF) { 3102 ++NumZeros; 3103 continue; 3104 } 3105 SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); 3106 if (Elt.getNode() && isZeroNode(Elt)) 3107 ++NumZeros; 3108 else 3109 break; 3110 } 3111 return NumZeros; 3112} 3113 3114/// isVectorShift - Returns true if the shuffle can be implemented as a 3115/// logical left or right shift of a vector. 3116static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, 3117 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 3118 unsigned NumElems = Mask.getNumOperands(); 3119 3120 isLeft = true; 3121 unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); 3122 if (!NumZeros) { 3123 isLeft = false; 3124 NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); 3125 if (!NumZeros) 3126 return false; 3127 } 3128 3129 bool SeenV1 = false; 3130 bool SeenV2 = false; 3131 for (unsigned i = NumZeros; i < NumElems; ++i) { 3132 unsigned Val = isLeft ? (i - NumZeros) : i; 3133 SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); 3134 if (Idx.getOpcode() == ISD::UNDEF) 3135 continue; 3136 unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); 3137 if (Index < NumElems) 3138 SeenV1 = true; 3139 else { 3140 Index -= NumElems; 3141 SeenV2 = true; 3142 } 3143 if (Index != Val) 3144 return false; 3145 } 3146 if (SeenV1 && SeenV2) 3147 return false; 3148 3149 ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); 3150 ShAmt = NumZeros; 3151 return true; 3152} 3153 3154 3155/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3156/// 3157static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 3158 unsigned NumNonZero, unsigned NumZero, 3159 SelectionDAG &DAG, TargetLowering &TLI) { 3160 if (NumNonZero > 8) 3161 return SDValue(); 3162 3163 SDValue V(0, 0); 3164 bool First = true; 3165 for (unsigned i = 0; i < 16; ++i) { 3166 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3167 if (ThisIsNonZero && First) { 3168 if (NumZero) 3169 V = getZeroVector(MVT::v8i16, true, DAG); 3170 else 3171 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3172 First = false; 3173 } 3174 3175 if ((i & 1) != 0) { 3176 SDValue ThisElt(0, 0), LastElt(0, 0); 3177 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3178 if (LastIsNonZero) { 3179 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3180 } 3181 if (ThisIsNonZero) { 3182 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3183 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3184 ThisElt, DAG.getConstant(8, MVT::i8)); 3185 if (LastIsNonZero) 3186 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3187 } else 3188 ThisElt = LastElt; 3189 3190 if (ThisElt.getNode()) 3191 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3192 DAG.getIntPtrConstant(i/2)); 3193 } 3194 } 3195 3196 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3197} 3198 3199/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3200/// 3201static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 3202 unsigned NumNonZero, unsigned NumZero, 3203 SelectionDAG &DAG, TargetLowering &TLI) { 3204 if (NumNonZero > 4) 3205 return SDValue(); 3206 3207 SDValue V(0, 0); 3208 bool First = true; 3209 for (unsigned i = 0; i < 8; ++i) { 3210 bool isNonZero = (NonZeros & (1 << i)) != 0; 3211 if (isNonZero) { 3212 if (First) { 3213 if (NumZero) 3214 V = getZeroVector(MVT::v8i16, true, DAG); 3215 else 3216 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3217 First = false; 3218 } 3219 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3220 DAG.getIntPtrConstant(i)); 3221 } 3222 } 3223 3224 return V; 3225} 3226 3227/// getVShift - Return a vector logical shift node. 3228/// 3229static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, 3230 unsigned NumBits, SelectionDAG &DAG, 3231 const TargetLowering &TLI) { 3232 bool isMMX = VT.getSizeInBits() == 64; 3233 MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; 3234 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; 3235 SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp); 3236 return DAG.getNode(ISD::BIT_CONVERT, VT, 3237 DAG.getNode(Opc, ShVT, SrcOp, 3238 DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); 3239} 3240 3241SDValue 3242X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3243 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3244 if (ISD::isBuildVectorAllZeros(Op.getNode()) 3245 || ISD::isBuildVectorAllOnes(Op.getNode())) { 3246 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3247 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3248 // eliminated on x86-32 hosts. 3249 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3250 return Op; 3251 3252 if (ISD::isBuildVectorAllOnes(Op.getNode())) 3253 return getOnesVector(Op.getValueType(), DAG); 3254 return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG); 3255 } 3256 3257 MVT VT = Op.getValueType(); 3258 MVT EVT = VT.getVectorElementType(); 3259 unsigned EVTBits = EVT.getSizeInBits(); 3260 3261 unsigned NumElems = Op.getNumOperands(); 3262 unsigned NumZero = 0; 3263 unsigned NumNonZero = 0; 3264 unsigned NonZeros = 0; 3265 bool IsAllConstants = true; 3266 SmallSet<SDValue, 8> Values; 3267 for (unsigned i = 0; i < NumElems; ++i) { 3268 SDValue Elt = Op.getOperand(i); 3269 if (Elt.getOpcode() == ISD::UNDEF) 3270 continue; 3271 Values.insert(Elt); 3272 if (Elt.getOpcode() != ISD::Constant && 3273 Elt.getOpcode() != ISD::ConstantFP) 3274 IsAllConstants = false; 3275 if (isZeroNode(Elt)) 3276 NumZero++; 3277 else { 3278 NonZeros |= (1 << i); 3279 NumNonZero++; 3280 } 3281 } 3282 3283 if (NumNonZero == 0) { 3284 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3285 return DAG.getNode(ISD::UNDEF, VT); 3286 } 3287 3288 // Special case for single non-zero, non-undef, element. 3289 if (NumNonZero == 1 && NumElems <= 4) { 3290 unsigned Idx = CountTrailingZeros_32(NonZeros); 3291 SDValue Item = Op.getOperand(Idx); 3292 3293 // If this is an insertion of an i64 value on x86-32, and if the top bits of 3294 // the value are obviously zero, truncate the value to i32 and do the 3295 // insertion that way. Only do this if the value is non-constant or if the 3296 // value is a constant being inserted into element 0. It is cheaper to do 3297 // a constant pool load than it is to do a movd + shuffle. 3298 if (EVT == MVT::i64 && !Subtarget->is64Bit() && 3299 (!IsAllConstants || Idx == 0)) { 3300 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 3301 // Handle MMX and SSE both. 3302 MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; 3303 unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; 3304 3305 // Truncate the value (which may itself be a constant) to i32, and 3306 // convert it to a vector with movd (S2V+shuffle to zero extend). 3307 Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item); 3308 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item); 3309 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3310 Subtarget->hasSSE2(), DAG); 3311 3312 // Now we have our 32-bit value zero extended in the low element of 3313 // a vector. If Idx != 0, swizzle it into place. 3314 if (Idx != 0) { 3315 SDValue Ops[] = { 3316 Item, DAG.getNode(ISD::UNDEF, Item.getValueType()), 3317 getSwapEltZeroMask(VecElts, Idx, DAG) 3318 }; 3319 Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3); 3320 } 3321 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item); 3322 } 3323 } 3324 3325 // If we have a constant or non-constant insertion into the low element of 3326 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 3327 // the rest of the elements. This will be matched as movd/movq/movss/movsd 3328 // depending on what the source datatype is. Because we can only get here 3329 // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. 3330 if (Idx == 0 && 3331 // Don't do this for i64 values on x86-32. 3332 (EVT != MVT::i64 || Subtarget->is64Bit())) { 3333 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3334 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3335 return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3336 Subtarget->hasSSE2(), DAG); 3337 } 3338 3339 // Is it a vector logical left shift? 3340 if (NumElems == 2 && Idx == 1 && 3341 isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { 3342 unsigned NumBits = VT.getSizeInBits(); 3343 return getVShift(true, VT, 3344 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)), 3345 NumBits/2, DAG, *this); 3346 } 3347 3348 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 3349 return SDValue(); 3350 3351 // Otherwise, if this is a vector with i32 or f32 elements, and the element 3352 // is a non-constant being inserted into an element other than the low one, 3353 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 3354 // movd/movss) to move this into the low element, then shuffle it into 3355 // place. 3356 if (EVTBits == 32) { 3357 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3358 3359 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3360 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3361 Subtarget->hasSSE2(), DAG); 3362 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3363 MVT MaskEVT = MaskVT.getVectorElementType(); 3364 SmallVector<SDValue, 8> MaskVec; 3365 for (unsigned i = 0; i < NumElems; i++) 3366 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3367 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3368 &MaskVec[0], MaskVec.size()); 3369 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3370 DAG.getNode(ISD::UNDEF, VT), Mask); 3371 } 3372 } 3373 3374 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3375 if (Values.size() == 1) 3376 return SDValue(); 3377 3378 // A vector full of immediates; various special cases are already 3379 // handled, so this is best done with a single constant-pool load. 3380 if (IsAllConstants) 3381 return SDValue(); 3382 3383 // Let legalizer expand 2-wide build_vectors. 3384 if (EVTBits == 64) { 3385 if (NumNonZero == 1) { 3386 // One half is zero or undef. 3387 unsigned Idx = CountTrailingZeros_32(NonZeros); 3388 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, 3389 Op.getOperand(Idx)); 3390 return getShuffleVectorZeroOrUndef(V2, Idx, true, 3391 Subtarget->hasSSE2(), DAG); 3392 } 3393 return SDValue(); 3394 } 3395 3396 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3397 if (EVTBits == 8 && NumElems == 16) { 3398 SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3399 *this); 3400 if (V.getNode()) return V; 3401 } 3402 3403 if (EVTBits == 16 && NumElems == 8) { 3404 SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3405 *this); 3406 if (V.getNode()) return V; 3407 } 3408 3409 // If element VT is == 32 bits, turn it into a number of shuffles. 3410 SmallVector<SDValue, 8> V; 3411 V.resize(NumElems); 3412 if (NumElems == 4 && NumZero > 0) { 3413 for (unsigned i = 0; i < 4; ++i) { 3414 bool isZero = !(NonZeros & (1 << i)); 3415 if (isZero) 3416 V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3417 else 3418 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3419 } 3420 3421 for (unsigned i = 0; i < 2; ++i) { 3422 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3423 default: break; 3424 case 0: 3425 V[i] = V[i*2]; // Must be a zero vector. 3426 break; 3427 case 1: 3428 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3429 getMOVLMask(NumElems, DAG)); 3430 break; 3431 case 2: 3432 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3433 getMOVLMask(NumElems, DAG)); 3434 break; 3435 case 3: 3436 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3437 getUnpacklMask(NumElems, DAG)); 3438 break; 3439 } 3440 } 3441 3442 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3443 MVT EVT = MaskVT.getVectorElementType(); 3444 SmallVector<SDValue, 8> MaskVec; 3445 bool Reverse = (NonZeros & 0x3) == 2; 3446 for (unsigned i = 0; i < 2; ++i) 3447 if (Reverse) 3448 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3449 else 3450 MaskVec.push_back(DAG.getConstant(i, EVT)); 3451 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3452 for (unsigned i = 0; i < 2; ++i) 3453 if (Reverse) 3454 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3455 else 3456 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3457 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3458 &MaskVec[0], MaskVec.size()); 3459 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3460 } 3461 3462 if (Values.size() > 2) { 3463 // Expand into a number of unpckl*. 3464 // e.g. for v4f32 3465 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3466 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3467 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3468 SDValue UnpckMask = getUnpacklMask(NumElems, DAG); 3469 for (unsigned i = 0; i < NumElems; ++i) 3470 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3471 NumElems >>= 1; 3472 while (NumElems != 0) { 3473 for (unsigned i = 0; i < NumElems; ++i) 3474 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3475 UnpckMask); 3476 NumElems >>= 1; 3477 } 3478 return V[0]; 3479 } 3480 3481 return SDValue(); 3482} 3483 3484static 3485SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, 3486 SDValue PermMask, SelectionDAG &DAG, 3487 TargetLowering &TLI) { 3488 SDValue NewV; 3489 MVT MaskVT = MVT::getIntVectorWithNumElements(8); 3490 MVT MaskEVT = MaskVT.getVectorElementType(); 3491 MVT PtrVT = TLI.getPointerTy(); 3492 SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), 3493 PermMask.getNode()->op_end()); 3494 3495 // First record which half of which vector the low elements come from. 3496 SmallVector<unsigned, 4> LowQuad(4); 3497 for (unsigned i = 0; i < 4; ++i) { 3498 SDValue Elt = MaskElts[i]; 3499 if (Elt.getOpcode() == ISD::UNDEF) 3500 continue; 3501 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3502 int QuadIdx = EltIdx / 4; 3503 ++LowQuad[QuadIdx]; 3504 } 3505 3506 int BestLowQuad = -1; 3507 unsigned MaxQuad = 1; 3508 for (unsigned i = 0; i < 4; ++i) { 3509 if (LowQuad[i] > MaxQuad) { 3510 BestLowQuad = i; 3511 MaxQuad = LowQuad[i]; 3512 } 3513 } 3514 3515 // Record which half of which vector the high elements come from. 3516 SmallVector<unsigned, 4> HighQuad(4); 3517 for (unsigned i = 4; i < 8; ++i) { 3518 SDValue Elt = MaskElts[i]; 3519 if (Elt.getOpcode() == ISD::UNDEF) 3520 continue; 3521 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3522 int QuadIdx = EltIdx / 4; 3523 ++HighQuad[QuadIdx]; 3524 } 3525 3526 int BestHighQuad = -1; 3527 MaxQuad = 1; 3528 for (unsigned i = 0; i < 4; ++i) { 3529 if (HighQuad[i] > MaxQuad) { 3530 BestHighQuad = i; 3531 MaxQuad = HighQuad[i]; 3532 } 3533 } 3534 3535 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3536 if (BestLowQuad != -1 || BestHighQuad != -1) { 3537 // First sort the 4 chunks in order using shufpd. 3538 SmallVector<SDValue, 8> MaskVec; 3539 3540 if (BestLowQuad != -1) 3541 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3542 else 3543 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3544 3545 if (BestHighQuad != -1) 3546 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3547 else 3548 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3549 3550 SDValue Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3551 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3552 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3553 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3554 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3555 3556 // Now sort high and low parts separately. 3557 BitVector InOrder(8); 3558 if (BestLowQuad != -1) { 3559 // Sort lower half in order using PSHUFLW. 3560 MaskVec.clear(); 3561 bool AnyOutOrder = false; 3562 3563 for (unsigned i = 0; i != 4; ++i) { 3564 SDValue Elt = MaskElts[i]; 3565 if (Elt.getOpcode() == ISD::UNDEF) { 3566 MaskVec.push_back(Elt); 3567 InOrder.set(i); 3568 } else { 3569 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3570 if (EltIdx != i) 3571 AnyOutOrder = true; 3572 3573 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3574 3575 // If this element is in the right place after this shuffle, then 3576 // remember it. 3577 if ((int)(EltIdx / 4) == BestLowQuad) 3578 InOrder.set(i); 3579 } 3580 } 3581 if (AnyOutOrder) { 3582 for (unsigned i = 4; i != 8; ++i) 3583 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3584 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3585 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3586 } 3587 } 3588 3589 if (BestHighQuad != -1) { 3590 // Sort high half in order using PSHUFHW if possible. 3591 MaskVec.clear(); 3592 3593 for (unsigned i = 0; i != 4; ++i) 3594 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3595 3596 bool AnyOutOrder = false; 3597 for (unsigned i = 4; i != 8; ++i) { 3598 SDValue Elt = MaskElts[i]; 3599 if (Elt.getOpcode() == ISD::UNDEF) { 3600 MaskVec.push_back(Elt); 3601 InOrder.set(i); 3602 } else { 3603 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3604 if (EltIdx != i) 3605 AnyOutOrder = true; 3606 3607 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3608 3609 // If this element is in the right place after this shuffle, then 3610 // remember it. 3611 if ((int)(EltIdx / 4) == BestHighQuad) 3612 InOrder.set(i); 3613 } 3614 } 3615 3616 if (AnyOutOrder) { 3617 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3618 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3619 } 3620 } 3621 3622 // The other elements are put in the right place using pextrw and pinsrw. 3623 for (unsigned i = 0; i != 8; ++i) { 3624 if (InOrder[i]) 3625 continue; 3626 SDValue Elt = MaskElts[i]; 3627 if (Elt.getOpcode() == ISD::UNDEF) 3628 continue; 3629 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3630 SDValue ExtOp = (EltIdx < 8) 3631 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3632 DAG.getConstant(EltIdx, PtrVT)) 3633 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3634 DAG.getConstant(EltIdx - 8, PtrVT)); 3635 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3636 DAG.getConstant(i, PtrVT)); 3637 } 3638 3639 return NewV; 3640 } 3641 3642 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use as 3643 // few as possible. First, let's find out how many elements are already in the 3644 // right order. 3645 unsigned V1InOrder = 0; 3646 unsigned V1FromV1 = 0; 3647 unsigned V2InOrder = 0; 3648 unsigned V2FromV2 = 0; 3649 SmallVector<SDValue, 8> V1Elts; 3650 SmallVector<SDValue, 8> V2Elts; 3651 for (unsigned i = 0; i < 8; ++i) { 3652 SDValue Elt = MaskElts[i]; 3653 if (Elt.getOpcode() == ISD::UNDEF) { 3654 V1Elts.push_back(Elt); 3655 V2Elts.push_back(Elt); 3656 ++V1InOrder; 3657 ++V2InOrder; 3658 continue; 3659 } 3660 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3661 if (EltIdx == i) { 3662 V1Elts.push_back(Elt); 3663 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3664 ++V1InOrder; 3665 } else if (EltIdx == i+8) { 3666 V1Elts.push_back(Elt); 3667 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3668 ++V2InOrder; 3669 } else if (EltIdx < 8) { 3670 V1Elts.push_back(Elt); 3671 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3672 ++V1FromV1; 3673 } else { 3674 V1Elts.push_back(Elt); 3675 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3676 ++V2FromV2; 3677 } 3678 } 3679 3680 if (V2InOrder > V1InOrder) { 3681 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3682 std::swap(V1, V2); 3683 std::swap(V1Elts, V2Elts); 3684 std::swap(V1FromV1, V2FromV2); 3685 } 3686 3687 if ((V1FromV1 + V1InOrder) != 8) { 3688 // Some elements are from V2. 3689 if (V1FromV1) { 3690 // If there are elements that are from V1 but out of place, 3691 // then first sort them in place 3692 SmallVector<SDValue, 8> MaskVec; 3693 for (unsigned i = 0; i < 8; ++i) { 3694 SDValue Elt = V1Elts[i]; 3695 if (Elt.getOpcode() == ISD::UNDEF) { 3696 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3697 continue; 3698 } 3699 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3700 if (EltIdx >= 8) 3701 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3702 else 3703 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3704 } 3705 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3706 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3707 } 3708 3709 NewV = V1; 3710 for (unsigned i = 0; i < 8; ++i) { 3711 SDValue Elt = V1Elts[i]; 3712 if (Elt.getOpcode() == ISD::UNDEF) 3713 continue; 3714 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3715 if (EltIdx < 8) 3716 continue; 3717 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3718 DAG.getConstant(EltIdx - 8, PtrVT)); 3719 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3720 DAG.getConstant(i, PtrVT)); 3721 } 3722 return NewV; 3723 } else { 3724 // All elements are from V1. 3725 NewV = V1; 3726 for (unsigned i = 0; i < 8; ++i) { 3727 SDValue Elt = V1Elts[i]; 3728 if (Elt.getOpcode() == ISD::UNDEF) 3729 continue; 3730 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3731 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3732 DAG.getConstant(EltIdx, PtrVT)); 3733 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3734 DAG.getConstant(i, PtrVT)); 3735 } 3736 return NewV; 3737 } 3738} 3739 3740/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3741/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3742/// done when every pair / quad of shuffle mask elements point to elements in 3743/// the right sequence. e.g. 3744/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3745static 3746SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, 3747 MVT VT, 3748 SDValue PermMask, SelectionDAG &DAG, 3749 TargetLowering &TLI) { 3750 unsigned NumElems = PermMask.getNumOperands(); 3751 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3752 MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3753 MVT MaskEltVT = MaskVT.getVectorElementType(); 3754 MVT NewVT = MaskVT; 3755 switch (VT.getSimpleVT()) { 3756 default: assert(false && "Unexpected!"); 3757 case MVT::v4f32: NewVT = MVT::v2f64; break; 3758 case MVT::v4i32: NewVT = MVT::v2i64; break; 3759 case MVT::v8i16: NewVT = MVT::v4i32; break; 3760 case MVT::v16i8: NewVT = MVT::v4i32; break; 3761 } 3762 3763 if (NewWidth == 2) { 3764 if (VT.isInteger()) 3765 NewVT = MVT::v2i64; 3766 else 3767 NewVT = MVT::v2f64; 3768 } 3769 unsigned Scale = NumElems / NewWidth; 3770 SmallVector<SDValue, 8> MaskVec; 3771 for (unsigned i = 0; i < NumElems; i += Scale) { 3772 unsigned StartIdx = ~0U; 3773 for (unsigned j = 0; j < Scale; ++j) { 3774 SDValue Elt = PermMask.getOperand(i+j); 3775 if (Elt.getOpcode() == ISD::UNDEF) 3776 continue; 3777 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3778 if (StartIdx == ~0U) 3779 StartIdx = EltIdx - (EltIdx % Scale); 3780 if (EltIdx != StartIdx + j) 3781 return SDValue(); 3782 } 3783 if (StartIdx == ~0U) 3784 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT)); 3785 else 3786 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); 3787 } 3788 3789 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3790 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3791 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3792 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3793 &MaskVec[0], MaskVec.size())); 3794} 3795 3796/// getVZextMovL - Return a zero-extending vector move low node. 3797/// 3798static SDValue getVZextMovL(MVT VT, MVT OpVT, 3799 SDValue SrcOp, SelectionDAG &DAG, 3800 const X86Subtarget *Subtarget) { 3801 if (VT == MVT::v2f64 || VT == MVT::v4f32) { 3802 LoadSDNode *LD = NULL; 3803 if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) 3804 LD = dyn_cast<LoadSDNode>(SrcOp); 3805 if (!LD) { 3806 // movssrr and movsdrr do not clear top bits. Try to use movd, movq 3807 // instead. 3808 MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; 3809 if ((EVT != MVT::i64 || Subtarget->is64Bit()) && 3810 SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 3811 SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && 3812 SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { 3813 // PR2108 3814 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; 3815 return DAG.getNode(ISD::BIT_CONVERT, VT, 3816 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3817 DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT, 3818 SrcOp.getOperand(0) 3819 .getOperand(0)))); 3820 } 3821 } 3822 } 3823 3824 return DAG.getNode(ISD::BIT_CONVERT, VT, 3825 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3826 DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp))); 3827} 3828 3829/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of 3830/// shuffles. 3831static SDValue 3832LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, 3833 SDValue PermMask, MVT VT, SelectionDAG &DAG) { 3834 MVT MaskVT = PermMask.getValueType(); 3835 MVT MaskEVT = MaskVT.getVectorElementType(); 3836 SmallVector<std::pair<int, int>, 8> Locs; 3837 Locs.resize(4); 3838 SmallVector<SDValue, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3839 unsigned NumHi = 0; 3840 unsigned NumLo = 0; 3841 for (unsigned i = 0; i != 4; ++i) { 3842 SDValue Elt = PermMask.getOperand(i); 3843 if (Elt.getOpcode() == ISD::UNDEF) { 3844 Locs[i] = std::make_pair(-1, -1); 3845 } else { 3846 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3847 assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); 3848 if (Val < 4) { 3849 Locs[i] = std::make_pair(0, NumLo); 3850 Mask1[NumLo] = Elt; 3851 NumLo++; 3852 } else { 3853 Locs[i] = std::make_pair(1, NumHi); 3854 if (2+NumHi < 4) 3855 Mask1[2+NumHi] = Elt; 3856 NumHi++; 3857 } 3858 } 3859 } 3860 3861 if (NumLo <= 2 && NumHi <= 2) { 3862 // If no more than two elements come from either vector. This can be 3863 // implemented with two shuffles. First shuffle gather the elements. 3864 // The second shuffle, which takes the first shuffle as both of its 3865 // vector operands, put the elements into the right order. 3866 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3867 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3868 &Mask1[0], Mask1.size())); 3869 3870 SmallVector<SDValue, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3871 for (unsigned i = 0; i != 4; ++i) { 3872 if (Locs[i].first == -1) 3873 continue; 3874 else { 3875 unsigned Idx = (i < 2) ? 0 : 4; 3876 Idx += Locs[i].first * 2 + Locs[i].second; 3877 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3878 } 3879 } 3880 3881 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3882 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3883 &Mask2[0], Mask2.size())); 3884 } else if (NumLo == 3 || NumHi == 3) { 3885 // Otherwise, we must have three elements from one vector, call it X, and 3886 // one element from the other, call it Y. First, use a shufps to build an 3887 // intermediate vector with the one element from Y and the element from X 3888 // that will be in the same half in the final destination (the indexes don't 3889 // matter). Then, use a shufps to build the final vector, taking the half 3890 // containing the element from Y from the intermediate, and the other half 3891 // from X. 3892 if (NumHi == 3) { 3893 // Normalize it so the 3 elements come from V1. 3894 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3895 std::swap(V1, V2); 3896 } 3897 3898 // Find the element from V2. 3899 unsigned HiIndex; 3900 for (HiIndex = 0; HiIndex < 3; ++HiIndex) { 3901 SDValue Elt = PermMask.getOperand(HiIndex); 3902 if (Elt.getOpcode() == ISD::UNDEF) 3903 continue; 3904 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3905 if (Val >= 4) 3906 break; 3907 } 3908 3909 Mask1[0] = PermMask.getOperand(HiIndex); 3910 Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT); 3911 Mask1[2] = PermMask.getOperand(HiIndex^1); 3912 Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT); 3913 V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3914 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3915 3916 if (HiIndex >= 2) { 3917 Mask1[0] = PermMask.getOperand(0); 3918 Mask1[1] = PermMask.getOperand(1); 3919 Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); 3920 Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); 3921 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3922 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3923 } else { 3924 Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); 3925 Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); 3926 Mask1[2] = PermMask.getOperand(2); 3927 Mask1[3] = PermMask.getOperand(3); 3928 if (Mask1[2].getOpcode() != ISD::UNDEF) 3929 Mask1[2] = 3930 DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, 3931 MaskEVT); 3932 if (Mask1[3].getOpcode() != ISD::UNDEF) 3933 Mask1[3] = 3934 DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, 3935 MaskEVT); 3936 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, 3937 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3938 } 3939 } 3940 3941 // Break it into (shuffle shuffle_hi, shuffle_lo). 3942 Locs.clear(); 3943 SmallVector<SDValue,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3944 SmallVector<SDValue,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3945 SmallVector<SDValue,8> *MaskPtr = &LoMask; 3946 unsigned MaskIdx = 0; 3947 unsigned LoIdx = 0; 3948 unsigned HiIdx = 2; 3949 for (unsigned i = 0; i != 4; ++i) { 3950 if (i == 2) { 3951 MaskPtr = &HiMask; 3952 MaskIdx = 1; 3953 LoIdx = 0; 3954 HiIdx = 2; 3955 } 3956 SDValue Elt = PermMask.getOperand(i); 3957 if (Elt.getOpcode() == ISD::UNDEF) { 3958 Locs[i] = std::make_pair(-1, -1); 3959 } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { 3960 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3961 (*MaskPtr)[LoIdx] = Elt; 3962 LoIdx++; 3963 } else { 3964 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3965 (*MaskPtr)[HiIdx] = Elt; 3966 HiIdx++; 3967 } 3968 } 3969 3970 SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3971 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3972 &LoMask[0], LoMask.size())); 3973 SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3974 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3975 &HiMask[0], HiMask.size())); 3976 SmallVector<SDValue, 8> MaskOps; 3977 for (unsigned i = 0; i != 4; ++i) { 3978 if (Locs[i].first == -1) { 3979 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3980 } else { 3981 unsigned Idx = Locs[i].first * 4 + Locs[i].second; 3982 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3983 } 3984 } 3985 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3986 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3987 &MaskOps[0], MaskOps.size())); 3988} 3989 3990SDValue 3991X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3992 SDValue V1 = Op.getOperand(0); 3993 SDValue V2 = Op.getOperand(1); 3994 SDValue PermMask = Op.getOperand(2); 3995 MVT VT = Op.getValueType(); 3996 unsigned NumElems = PermMask.getNumOperands(); 3997 bool isMMX = VT.getSizeInBits() == 64; 3998 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3999 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 4000 bool V1IsSplat = false; 4001 bool V2IsSplat = false; 4002 4003 if (isUndefShuffle(Op.getNode())) 4004 return DAG.getNode(ISD::UNDEF, VT); 4005 4006 if (isZeroShuffle(Op.getNode())) 4007 return getZeroVector(VT, Subtarget->hasSSE2(), DAG); 4008 4009 if (isIdentityMask(PermMask.getNode())) 4010 return V1; 4011 else if (isIdentityMask(PermMask.getNode(), true)) 4012 return V2; 4013 4014 // Canonicalize movddup shuffles. 4015 if (V2IsUndef && Subtarget->hasSSE2() && 4016 VT.getSizeInBits() == 128 && 4017 X86::isMOVDDUPMask(PermMask.getNode())) 4018 return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); 4019 4020 if (isSplatMask(PermMask.getNode())) { 4021 if (isMMX || NumElems < 4) return Op; 4022 // Promote it to a v4{if}32 splat. 4023 return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); 4024 } 4025 4026 // If the shuffle can be profitably rewritten as a narrower shuffle, then 4027 // do it! 4028 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 4029 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 4030 if (NewOp.getNode()) 4031 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 4032 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 4033 // FIXME: Figure out a cleaner way to do this. 4034 // Try to make use of movq to zero out the top part. 4035 if (ISD::isBuildVectorAllZeros(V2.getNode())) { 4036 SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4037 DAG, *this); 4038 if (NewOp.getNode()) { 4039 SDValue NewV1 = NewOp.getOperand(0); 4040 SDValue NewV2 = NewOp.getOperand(1); 4041 SDValue NewMask = NewOp.getOperand(2); 4042 if (isCommutedMOVL(NewMask.getNode(), true, false)) { 4043 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 4044 return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget); 4045 } 4046 } 4047 } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { 4048 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4049 DAG, *this); 4050 if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) 4051 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), 4052 DAG, Subtarget); 4053 } 4054 } 4055 4056 // Check if this can be converted into a logical shift. 4057 bool isLeft = false; 4058 unsigned ShAmt = 0; 4059 SDValue ShVal; 4060 bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); 4061 if (isShift && ShVal.hasOneUse()) { 4062 // If the shifted value has multiple uses, it may be cheaper to use 4063 // v_set0 + movlhps or movhlps, etc. 4064 MVT EVT = VT.getVectorElementType(); 4065 ShAmt *= EVT.getSizeInBits(); 4066 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4067 } 4068 4069 if (X86::isMOVLMask(PermMask.getNode())) { 4070 if (V1IsUndef) 4071 return V2; 4072 if (ISD::isBuildVectorAllZeros(V1.getNode())) 4073 return getVZextMovL(VT, VT, V2, DAG, Subtarget); 4074 if (!isMMX) 4075 return Op; 4076 } 4077 4078 if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || 4079 X86::isMOVSLDUPMask(PermMask.getNode()) || 4080 X86::isMOVHLPSMask(PermMask.getNode()) || 4081 X86::isMOVHPMask(PermMask.getNode()) || 4082 X86::isMOVLPMask(PermMask.getNode()))) 4083 return Op; 4084 4085 if (ShouldXformToMOVHLPS(PermMask.getNode()) || 4086 ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) 4087 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4088 4089 if (isShift) { 4090 // No better options. Use a vshl / vsrl. 4091 MVT EVT = VT.getVectorElementType(); 4092 ShAmt *= EVT.getSizeInBits(); 4093 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4094 } 4095 4096 bool Commuted = false; 4097 // FIXME: This should also accept a bitcast of a splat? Be careful, not 4098 // 1,1,1,1 -> v8i16 though. 4099 V1IsSplat = isSplatVector(V1.getNode()); 4100 V2IsSplat = isSplatVector(V2.getNode()); 4101 4102 // Canonicalize the splat or undef, if present, to be on the RHS. 4103 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 4104 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4105 std::swap(V1IsSplat, V2IsSplat); 4106 std::swap(V1IsUndef, V2IsUndef); 4107 Commuted = true; 4108 } 4109 4110 // FIXME: Figure out a cleaner way to do this. 4111 if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { 4112 if (V2IsUndef) return V1; 4113 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4114 if (V2IsSplat) { 4115 // V2 is a splat, so the mask may be malformed. That is, it may point 4116 // to any V2 element. The instruction selectior won't like this. Get 4117 // a corrected mask and commute to form a proper MOVS{S|D}. 4118 SDValue NewMask = getMOVLMask(NumElems, DAG); 4119 if (NewMask.getNode() != PermMask.getNode()) 4120 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4121 } 4122 return Op; 4123 } 4124 4125 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4126 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4127 X86::isUNPCKLMask(PermMask.getNode()) || 4128 X86::isUNPCKHMask(PermMask.getNode())) 4129 return Op; 4130 4131 if (V2IsSplat) { 4132 // Normalize mask so all entries that point to V2 points to its first 4133 // element then try to match unpck{h|l} again. If match, return a 4134 // new vector_shuffle with the corrected mask. 4135 SDValue NewMask = NormalizeMask(PermMask, DAG); 4136 if (NewMask.getNode() != PermMask.getNode()) { 4137 if (X86::isUNPCKLMask(PermMask.getNode(), true)) { 4138 SDValue NewMask = getUnpacklMask(NumElems, DAG); 4139 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4140 } else if (X86::isUNPCKHMask(PermMask.getNode(), true)) { 4141 SDValue NewMask = getUnpackhMask(NumElems, DAG); 4142 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4143 } 4144 } 4145 } 4146 4147 // Normalize the node to match x86 shuffle ops if needed 4148 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) 4149 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4150 4151 if (Commuted) { 4152 // Commute is back and try unpck* again. 4153 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4154 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4155 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4156 X86::isUNPCKLMask(PermMask.getNode()) || 4157 X86::isUNPCKHMask(PermMask.getNode())) 4158 return Op; 4159 } 4160 4161 // Try PSHUF* first, then SHUFP*. 4162 // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically 4163 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 4164 if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { 4165 if (V2.getOpcode() != ISD::UNDEF) 4166 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 4167 DAG.getNode(ISD::UNDEF, VT), PermMask); 4168 return Op; 4169 } 4170 4171 if (!isMMX) { 4172 if (Subtarget->hasSSE2() && 4173 (X86::isPSHUFDMask(PermMask.getNode()) || 4174 X86::isPSHUFHWMask(PermMask.getNode()) || 4175 X86::isPSHUFLWMask(PermMask.getNode()))) { 4176 MVT RVT = VT; 4177 if (VT == MVT::v4f32) { 4178 RVT = MVT::v4i32; 4179 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, 4180 DAG.getNode(ISD::BIT_CONVERT, RVT, V1), 4181 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4182 } else if (V2.getOpcode() != ISD::UNDEF) 4183 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1, 4184 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4185 if (RVT != VT) 4186 Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); 4187 return Op; 4188 } 4189 4190 // Binary or unary shufps. 4191 if (X86::isSHUFPMask(PermMask.getNode()) || 4192 (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) 4193 return Op; 4194 } 4195 4196 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 4197 if (VT == MVT::v8i16) { 4198 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 4199 if (NewOp.getNode()) 4200 return NewOp; 4201 } 4202 4203 // Handle all 4 wide cases with a number of shuffles except for MMX. 4204 if (NumElems == 4 && !isMMX) 4205 return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG); 4206 4207 return SDValue(); 4208} 4209 4210SDValue 4211X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, 4212 SelectionDAG &DAG) { 4213 MVT VT = Op.getValueType(); 4214 if (VT.getSizeInBits() == 8) { 4215 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32, 4216 Op.getOperand(0), Op.getOperand(1)); 4217 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4218 DAG.getValueType(VT)); 4219 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4220 } else if (VT.getSizeInBits() == 16) { 4221 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4222 // If Idx is 0, it's cheaper to do a move instead of a pextrw. 4223 if (Idx == 0) 4224 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 4225 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4226 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, 4227 Op.getOperand(0)), 4228 Op.getOperand(1))); 4229 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32, 4230 Op.getOperand(0), Op.getOperand(1)); 4231 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4232 DAG.getValueType(VT)); 4233 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4234 } else if (VT == MVT::f32) { 4235 // EXTRACTPS outputs to a GPR32 register which will require a movd to copy 4236 // the result back to FR32 register. It's only worth matching if the 4237 // result has a single use which is a store or a bitcast to i32. And in 4238 // the case of a store, it's not worth it if the index is a constant 0, 4239 // because a MOVSSmr can be used instead, which is smaller and faster. 4240 if (!Op.hasOneUse()) 4241 return SDValue(); 4242 SDNode *User = *Op.getNode()->use_begin(); 4243 if ((User->getOpcode() != ISD::STORE || 4244 (isa<ConstantSDNode>(Op.getOperand(1)) && 4245 cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) && 4246 (User->getOpcode() != ISD::BIT_CONVERT || 4247 User->getValueType(0) != MVT::i32)) 4248 return SDValue(); 4249 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4250 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), 4251 Op.getOperand(1)); 4252 return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract); 4253 } else if (VT == MVT::i32) { 4254 // ExtractPS works with constant index. 4255 if (isa<ConstantSDNode>(Op.getOperand(1))) 4256 return Op; 4257 } 4258 return SDValue(); 4259} 4260 4261 4262SDValue 4263X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4264 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4265 return SDValue(); 4266 4267 if (Subtarget->hasSSE41()) { 4268 SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 4269 if (Res.getNode()) 4270 return Res; 4271 } 4272 4273 MVT VT = Op.getValueType(); 4274 // TODO: handle v16i8. 4275 if (VT.getSizeInBits() == 16) { 4276 SDValue Vec = Op.getOperand(0); 4277 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4278 if (Idx == 0) 4279 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 4280 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4281 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 4282 Op.getOperand(1))); 4283 // Transform it so it match pextrw which produces a 32-bit result. 4284 MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); 4285 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 4286 Op.getOperand(0), Op.getOperand(1)); 4287 SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 4288 DAG.getValueType(VT)); 4289 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4290 } else if (VT.getSizeInBits() == 32) { 4291 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4292 if (Idx == 0) 4293 return Op; 4294 // SHUFPS the element to the lowest double word, then movss. 4295 MVT MaskVT = MVT::getIntVectorWithNumElements(4); 4296 SmallVector<SDValue, 8> IdxVec; 4297 IdxVec. 4298 push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); 4299 IdxVec. 4300 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4301 IdxVec. 4302 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4303 IdxVec. 4304 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4305 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4306 &IdxVec[0], IdxVec.size()); 4307 SDValue Vec = Op.getOperand(0); 4308 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4309 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4310 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4311 DAG.getIntPtrConstant(0)); 4312 } else if (VT.getSizeInBits() == 64) { 4313 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 4314 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 4315 // to match extract_elt for f64. 4316 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4317 if (Idx == 0) 4318 return Op; 4319 4320 // UNPCKHPD the element to the lowest double word, then movsd. 4321 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4322 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4323 MVT MaskVT = MVT::getIntVectorWithNumElements(2); 4324 SmallVector<SDValue, 8> IdxVec; 4325 IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); 4326 IdxVec. 4327 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4328 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4329 &IdxVec[0], IdxVec.size()); 4330 SDValue Vec = Op.getOperand(0); 4331 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4332 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4333 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4334 DAG.getIntPtrConstant(0)); 4335 } 4336 4337 return SDValue(); 4338} 4339 4340SDValue 4341X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ 4342 MVT VT = Op.getValueType(); 4343 MVT EVT = VT.getVectorElementType(); 4344 4345 SDValue N0 = Op.getOperand(0); 4346 SDValue N1 = Op.getOperand(1); 4347 SDValue N2 = Op.getOperand(2); 4348 4349 if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && 4350 isa<ConstantSDNode>(N2)) { 4351 unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB 4352 : X86ISD::PINSRW; 4353 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 4354 // argument. 4355 if (N1.getValueType() != MVT::i32) 4356 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4357 if (N2.getValueType() != MVT::i32) 4358 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4359 return DAG.getNode(Opc, VT, N0, N1, N2); 4360 } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) { 4361 // Bits [7:6] of the constant are the source select. This will always be 4362 // zero here. The DAG Combiner may combine an extract_elt index into these 4363 // bits. For example (insert (extract, 3), 2) could be matched by putting 4364 // the '3' into bits [7:6] of X86ISD::INSERTPS. 4365 // Bits [5:4] of the constant are the destination select. This is the 4366 // value of the incoming immediate. 4367 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 4368 // combine either bitwise AND or insert of float 0.0 to set these bits. 4369 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); 4370 return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2); 4371 } else if (EVT == MVT::i32) { 4372 // InsertPS works with constant index. 4373 if (isa<ConstantSDNode>(N2)) 4374 return Op; 4375 } 4376 return SDValue(); 4377} 4378 4379SDValue 4380X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4381 MVT VT = Op.getValueType(); 4382 MVT EVT = VT.getVectorElementType(); 4383 4384 if (Subtarget->hasSSE41()) 4385 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 4386 4387 if (EVT == MVT::i8) 4388 return SDValue(); 4389 4390 SDValue N0 = Op.getOperand(0); 4391 SDValue N1 = Op.getOperand(1); 4392 SDValue N2 = Op.getOperand(2); 4393 4394 if (EVT.getSizeInBits() == 16) { 4395 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 4396 // as its second argument. 4397 if (N1.getValueType() != MVT::i32) 4398 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4399 if (N2.getValueType() != MVT::i32) 4400 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4401 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 4402 } 4403 return SDValue(); 4404} 4405 4406SDValue 4407X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 4408 if (Op.getValueType() == MVT::v2f32) 4409 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32, 4410 DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32, 4411 DAG.getNode(ISD::BIT_CONVERT, MVT::i32, 4412 Op.getOperand(0)))); 4413 4414 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 4415 MVT VT = MVT::v2i32; 4416 switch (Op.getValueType().getSimpleVT()) { 4417 default: break; 4418 case MVT::v16i8: 4419 case MVT::v8i16: 4420 VT = MVT::v4i32; 4421 break; 4422 } 4423 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), 4424 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt)); 4425} 4426 4427// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 4428// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 4429// one of the above mentioned nodes. It has to be wrapped because otherwise 4430// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 4431// be used to form addressing mode. These wrapped nodes will be selected 4432// into MOV32ri. 4433SDValue 4434X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 4435 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 4436 SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), 4437 getPointerTy(), 4438 CP->getAlignment()); 4439 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4440 // With PIC, the address is actually $g + Offset. 4441 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4442 !Subtarget->isPICStyleRIPRel()) { 4443 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4444 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4445 Result); 4446 } 4447 4448 return Result; 4449} 4450 4451SDValue 4452X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, 4453 int64_t Offset, 4454 SelectionDAG &DAG) const { 4455 bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; 4456 bool ExtraLoadRequired = 4457 Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); 4458 4459 // Create the TargetGlobalAddress node, folding in the constant 4460 // offset if it is legal. 4461 SDValue Result; 4462 if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { 4463 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); 4464 Offset = 0; 4465 } else 4466 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); 4467 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4468 4469 // With PIC, the address is actually $g + Offset. 4470 if (IsPic && !Subtarget->isPICStyleRIPRel()) { 4471 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4472 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4473 Result); 4474 } 4475 4476 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4477 // load the value at address GV, not the value of GV itself. This means that 4478 // the GlobalAddress must be in the base or index register of the address, not 4479 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4480 // The same applies for external symbols during PIC codegen 4481 if (ExtraLoadRequired) 4482 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, 4483 PseudoSourceValue::getGOT(), 0); 4484 4485 // If there was a non-zero offset that we didn't fold, create an explicit 4486 // addition for it. 4487 if (Offset != 0) 4488 Result = DAG.getNode(ISD::ADD, getPointerTy(), Result, 4489 DAG.getConstant(Offset, getPointerTy())); 4490 4491 return Result; 4492} 4493 4494SDValue 4495X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { 4496 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 4497 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 4498 return LowerGlobalAddress(GV, Offset, DAG); 4499} 4500 4501// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit 4502static SDValue 4503LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4504 const MVT PtrVT) { 4505 SDValue InFlag; 4506 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4507 DAG.getNode(X86ISD::GlobalBaseReg, 4508 PtrVT), InFlag); 4509 InFlag = Chain.getValue(1); 4510 4511 // emit leal symbol@TLSGD(,%ebx,1), %eax 4512 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4513 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4514 GA->getValueType(0), 4515 GA->getOffset()); 4516 SDValue Ops[] = { Chain, TGA, InFlag }; 4517 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4518 InFlag = Result.getValue(2); 4519 Chain = Result.getValue(1); 4520 4521 // call ___tls_get_addr. This function receives its argument in 4522 // the register EAX. 4523 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4524 InFlag = Chain.getValue(1); 4525 4526 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4527 SDValue Ops1[] = { Chain, 4528 DAG.getTargetExternalSymbol("___tls_get_addr", 4529 PtrVT), 4530 DAG.getRegister(X86::EAX, PtrVT), 4531 DAG.getRegister(X86::EBX, PtrVT), 4532 InFlag }; 4533 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4534 InFlag = Chain.getValue(1); 4535 4536 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4537} 4538 4539// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit 4540static SDValue 4541LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4542 const MVT PtrVT) { 4543 SDValue InFlag, Chain; 4544 4545 // emit leaq symbol@TLSGD(%rip), %rdi 4546 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4547 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4548 GA->getValueType(0), 4549 GA->getOffset()); 4550 SDValue Ops[] = { DAG.getEntryNode(), TGA}; 4551 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2); 4552 Chain = Result.getValue(1); 4553 InFlag = Result.getValue(2); 4554 4555 // call __tls_get_addr. This function receives its argument in 4556 // the register RDI. 4557 Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag); 4558 InFlag = Chain.getValue(1); 4559 4560 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4561 SDValue Ops1[] = { Chain, 4562 DAG.getTargetExternalSymbol("__tls_get_addr", 4563 PtrVT), 4564 DAG.getRegister(X86::RDI, PtrVT), 4565 InFlag }; 4566 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4); 4567 InFlag = Chain.getValue(1); 4568 4569 return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag); 4570} 4571 4572// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4573// "local exec" model. 4574static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4575 const MVT PtrVT) { 4576 // Get the Thread Pointer 4577 SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4578 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4579 // exec) 4580 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4581 GA->getValueType(0), 4582 GA->getOffset()); 4583 SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4584 4585 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4586 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, 4587 PseudoSourceValue::getGOT(), 0); 4588 4589 // The address of the thread local variable is the add of the thread 4590 // pointer with the offset of the variable. 4591 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4592} 4593 4594SDValue 4595X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 4596 // TODO: implement the "local dynamic" model 4597 // TODO: implement the "initial exec"model for pic executables 4598 assert(Subtarget->isTargetELF() && 4599 "TLS not implemented for non-ELF targets"); 4600 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4601 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4602 // otherwise use the "Local Exec"TLS Model 4603 if (Subtarget->is64Bit()) { 4604 return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); 4605 } else { 4606 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4607 return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); 4608 else 4609 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4610 } 4611} 4612 4613SDValue 4614X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { 4615 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4616 SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4617 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4618 // With PIC, the address is actually $g + Offset. 4619 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4620 !Subtarget->isPICStyleRIPRel()) { 4621 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4622 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4623 Result); 4624 } 4625 4626 return Result; 4627} 4628 4629SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 4630 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4631 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4632 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4633 // With PIC, the address is actually $g + Offset. 4634 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4635 !Subtarget->isPICStyleRIPRel()) { 4636 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4637 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4638 Result); 4639 } 4640 4641 return Result; 4642} 4643 4644/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4645/// take a 2 x i32 value to shift plus a shift amount. 4646SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { 4647 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4648 MVT VT = Op.getValueType(); 4649 unsigned VTBits = VT.getSizeInBits(); 4650 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4651 SDValue ShOpLo = Op.getOperand(0); 4652 SDValue ShOpHi = Op.getOperand(1); 4653 SDValue ShAmt = Op.getOperand(2); 4654 SDValue Tmp1 = isSRA ? 4655 DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) : 4656 DAG.getConstant(0, VT); 4657 4658 SDValue Tmp2, Tmp3; 4659 if (Op.getOpcode() == ISD::SHL_PARTS) { 4660 Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt); 4661 Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt); 4662 } else { 4663 Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt); 4664 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt); 4665 } 4666 4667 SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4668 DAG.getConstant(VTBits, MVT::i8)); 4669 SDValue Cond = DAG.getNode(X86ISD::CMP, VT, 4670 AndNode, DAG.getConstant(0, MVT::i8)); 4671 4672 SDValue Hi, Lo; 4673 SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4674 SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; 4675 SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; 4676 4677 if (Op.getOpcode() == ISD::SHL_PARTS) { 4678 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4679 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4680 } else { 4681 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4682 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4683 } 4684 4685 SDValue Ops[2] = { Lo, Hi }; 4686 return DAG.getMergeValues(Ops, 2); 4687} 4688 4689SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4690 MVT SrcVT = Op.getOperand(0).getValueType(); 4691 assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && 4692 "Unknown SINT_TO_FP to lower!"); 4693 4694 // These are really Legal; caller falls through into that case. 4695 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 4696 return SDValue(); 4697 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 4698 Subtarget->is64Bit()) 4699 return SDValue(); 4700 4701 unsigned Size = SrcVT.getSizeInBits()/8; 4702 MachineFunction &MF = DAG.getMachineFunction(); 4703 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4704 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4705 SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4706 StackSlot, 4707 PseudoSourceValue::getFixedStack(SSFI), 0); 4708 4709 // Build the FILD 4710 SDVTList Tys; 4711 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 4712 if (useSSE) 4713 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4714 else 4715 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4716 SmallVector<SDValue, 8> Ops; 4717 Ops.push_back(Chain); 4718 Ops.push_back(StackSlot); 4719 Ops.push_back(DAG.getValueType(SrcVT)); 4720 SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, 4721 Tys, &Ops[0], Ops.size()); 4722 4723 if (useSSE) { 4724 Chain = Result.getValue(1); 4725 SDValue InFlag = Result.getValue(2); 4726 4727 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4728 // shouldn't be necessary except that RFP cannot be live across 4729 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4730 MachineFunction &MF = DAG.getMachineFunction(); 4731 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4732 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4733 Tys = DAG.getVTList(MVT::Other); 4734 SmallVector<SDValue, 8> Ops; 4735 Ops.push_back(Chain); 4736 Ops.push_back(Result); 4737 Ops.push_back(StackSlot); 4738 Ops.push_back(DAG.getValueType(Op.getValueType())); 4739 Ops.push_back(InFlag); 4740 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4741 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 4742 PseudoSourceValue::getFixedStack(SSFI), 0); 4743 } 4744 4745 return Result; 4746} 4747 4748// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. 4749SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { 4750 // This algorithm is not obvious. Here it is in C code, more or less: 4751 /* 4752 double uint64_to_double( uint32_t hi, uint32_t lo ) { 4753 static const __m128i exp = { 0x4330000045300000ULL, 0 }; 4754 static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; 4755 4756 // Copy ints to xmm registers. 4757 __m128i xh = _mm_cvtsi32_si128( hi ); 4758 __m128i xl = _mm_cvtsi32_si128( lo ); 4759 4760 // Combine into low half of a single xmm register. 4761 __m128i x = _mm_unpacklo_epi32( xh, xl ); 4762 __m128d d; 4763 double sd; 4764 4765 // Merge in appropriate exponents to give the integer bits the right 4766 // magnitude. 4767 x = _mm_unpacklo_epi32( x, exp ); 4768 4769 // Subtract away the biases to deal with the IEEE-754 double precision 4770 // implicit 1. 4771 d = _mm_sub_pd( (__m128d) x, bias ); 4772 4773 // All conversions up to here are exact. The correctly rounded result is 4774 // calculated using the current rounding mode using the following 4775 // horizontal add. 4776 d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); 4777 _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this 4778 // store doesn't really need to be here (except 4779 // maybe to zero the other double) 4780 return sd; 4781 } 4782 */ 4783 4784 // Build some magic constants. 4785 std::vector<Constant*> CV0; 4786 CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); 4787 CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); 4788 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4789 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4790 Constant *C0 = ConstantVector::get(CV0); 4791 SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4); 4792 4793 std::vector<Constant*> CV1; 4794 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); 4795 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); 4796 Constant *C1 = ConstantVector::get(CV1); 4797 SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4); 4798 4799 SmallVector<SDValue, 4> MaskVec; 4800 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 4801 MaskVec.push_back(DAG.getConstant(4, MVT::i32)); 4802 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 4803 MaskVec.push_back(DAG.getConstant(5, MVT::i32)); 4804 SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0], 4805 MaskVec.size()); 4806 SmallVector<SDValue, 4> MaskVec2; 4807 MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); 4808 MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); 4809 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec2[0], 4810 MaskVec2.size()); 4811 4812 SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4813 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4814 Op.getOperand(0), 4815 DAG.getIntPtrConstant(1))); 4816 SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4817 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4818 Op.getOperand(0), 4819 DAG.getIntPtrConstant(0))); 4820 SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4821 XR1, XR2, UnpcklMask); 4822 SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0, 4823 PseudoSourceValue::getConstantPool(), 0, 4824 false, 16); 4825 SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4826 Unpck1, CLod0, UnpcklMask); 4827 SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2); 4828 SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1, 4829 PseudoSourceValue::getConstantPool(), 0, 4830 false, 16); 4831 SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1); 4832 4833 // Add the halves; easiest way is to swap them into another reg first. 4834 SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64, 4835 Sub, Sub, ShufMask); 4836 SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub); 4837 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add, 4838 DAG.getIntPtrConstant(0)); 4839} 4840 4841// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. 4842SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { 4843 // FP constant to bias correct the final result. 4844 SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), 4845 MVT::f64); 4846 4847 // Load the 32-bit value into an XMM register. 4848 SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4849 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4850 Op.getOperand(0), 4851 DAG.getIntPtrConstant(0))); 4852 4853 Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, 4854 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Load), 4855 DAG.getIntPtrConstant(0)); 4856 4857 // Or the load with the bias. 4858 SDValue Or = DAG.getNode(ISD::OR, MVT::v2i64, 4859 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, 4860 DAG.getNode(ISD::SCALAR_TO_VECTOR, 4861 MVT::v2f64, Load)), 4862 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, 4863 DAG.getNode(ISD::SCALAR_TO_VECTOR, 4864 MVT::v2f64, Bias))); 4865 Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, 4866 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Or), 4867 DAG.getIntPtrConstant(0)); 4868 4869 // Subtract the bias. 4870 SDValue Sub = DAG.getNode(ISD::FSUB, MVT::f64, Or, Bias); 4871 4872 // Handle final rounding. 4873 MVT DestVT = Op.getValueType(); 4874 4875 if (DestVT.bitsLT(MVT::f64)) { 4876 return DAG.getNode(ISD::FP_ROUND, DestVT, Sub, 4877 DAG.getIntPtrConstant(0)); 4878 } else if (DestVT.bitsGT(MVT::f64)) { 4879 return DAG.getNode(ISD::FP_EXTEND, DestVT, Sub); 4880 } 4881 4882 // Handle final rounding. 4883 return Sub; 4884} 4885 4886SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4887 SDValue N0 = Op.getOperand(0); 4888 4889 // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't 4890 // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform 4891 // the optimization here. 4892 if (DAG.SignBitIsZero(N0)) 4893 return DAG.getNode(ISD::SINT_TO_FP, Op.getValueType(), N0); 4894 4895 MVT SrcVT = N0.getValueType(); 4896 if (SrcVT == MVT::i64) { 4897 // We only handle SSE2 f64 target here; caller can handle the rest. 4898 if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) 4899 return SDValue(); 4900 4901 return LowerUINT_TO_FP_i64(Op, DAG); 4902 } else if (SrcVT == MVT::i32) { 4903 return LowerUINT_TO_FP_i32(Op, DAG); 4904 } 4905 4906 assert(0 && "Unknown UINT_TO_FP to lower!"); 4907 return SDValue(); 4908} 4909 4910std::pair<SDValue,SDValue> X86TargetLowering:: 4911FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { 4912 assert(Op.getValueType().getSimpleVT() <= MVT::i64 && 4913 Op.getValueType().getSimpleVT() >= MVT::i16 && 4914 "Unknown FP_TO_SINT to lower!"); 4915 4916 // These are really Legal. 4917 if (Op.getValueType() == MVT::i32 && 4918 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4919 return std::make_pair(SDValue(), SDValue()); 4920 if (Subtarget->is64Bit() && 4921 Op.getValueType() == MVT::i64 && 4922 Op.getOperand(0).getValueType() != MVT::f80) 4923 return std::make_pair(SDValue(), SDValue()); 4924 4925 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4926 // stack slot. 4927 MachineFunction &MF = DAG.getMachineFunction(); 4928 unsigned MemSize = Op.getValueType().getSizeInBits()/8; 4929 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4930 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4931 unsigned Opc; 4932 switch (Op.getValueType().getSimpleVT()) { 4933 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4934 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4935 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4936 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4937 } 4938 4939 SDValue Chain = DAG.getEntryNode(); 4940 SDValue Value = Op.getOperand(0); 4941 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4942 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4943 Chain = DAG.getStore(Chain, Value, StackSlot, 4944 PseudoSourceValue::getFixedStack(SSFI), 0); 4945 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4946 SDValue Ops[] = { 4947 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4948 }; 4949 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4950 Chain = Value.getValue(1); 4951 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4952 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4953 } 4954 4955 // Build the FP_TO_INT*_IN_MEM 4956 SDValue Ops[] = { Chain, Value, StackSlot }; 4957 SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4958 4959 return std::make_pair(FIST, StackSlot); 4960} 4961 4962SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 4963 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG); 4964 SDValue FIST = Vals.first, StackSlot = Vals.second; 4965 if (FIST.getNode() == 0) return SDValue(); 4966 4967 // Load the result. 4968 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4969} 4970 4971SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { 4972 MVT VT = Op.getValueType(); 4973 MVT EltVT = VT; 4974 if (VT.isVector()) 4975 EltVT = VT.getVectorElementType(); 4976 std::vector<Constant*> CV; 4977 if (EltVT == MVT::f64) { 4978 Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); 4979 CV.push_back(C); 4980 CV.push_back(C); 4981 } else { 4982 Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); 4983 CV.push_back(C); 4984 CV.push_back(C); 4985 CV.push_back(C); 4986 CV.push_back(C); 4987 } 4988 Constant *C = ConstantVector::get(CV); 4989 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4990 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4991 PseudoSourceValue::getConstantPool(), 0, 4992 false, 16); 4993 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4994} 4995 4996SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { 4997 MVT VT = Op.getValueType(); 4998 MVT EltVT = VT; 4999 unsigned EltNum = 1; 5000 if (VT.isVector()) { 5001 EltVT = VT.getVectorElementType(); 5002 EltNum = VT.getVectorNumElements(); 5003 } 5004 std::vector<Constant*> CV; 5005 if (EltVT == MVT::f64) { 5006 Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); 5007 CV.push_back(C); 5008 CV.push_back(C); 5009 } else { 5010 Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); 5011 CV.push_back(C); 5012 CV.push_back(C); 5013 CV.push_back(C); 5014 CV.push_back(C); 5015 } 5016 Constant *C = ConstantVector::get(CV); 5017 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 5018 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 5019 PseudoSourceValue::getConstantPool(), 0, 5020 false, 16); 5021 if (VT.isVector()) { 5022 return DAG.getNode(ISD::BIT_CONVERT, VT, 5023 DAG.getNode(ISD::XOR, MVT::v2i64, 5024 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 5025 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 5026 } else { 5027 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 5028 } 5029} 5030 5031SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 5032 SDValue Op0 = Op.getOperand(0); 5033 SDValue Op1 = Op.getOperand(1); 5034 MVT VT = Op.getValueType(); 5035 MVT SrcVT = Op1.getValueType(); 5036 5037 // If second operand is smaller, extend it first. 5038 if (SrcVT.bitsLT(VT)) { 5039 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 5040 SrcVT = VT; 5041 } 5042 // And if it is bigger, shrink it first. 5043 if (SrcVT.bitsGT(VT)) { 5044 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 5045 SrcVT = VT; 5046 } 5047 5048 // At this point the operands and the result should have the same 5049 // type, and that won't be f80 since that is not custom lowered. 5050 5051 // First get the sign bit of second operand. 5052 std::vector<Constant*> CV; 5053 if (SrcVT == MVT::f64) { 5054 CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); 5055 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 5056 } else { 5057 CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); 5058 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5059 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5060 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5061 } 5062 Constant *C = ConstantVector::get(CV); 5063 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 5064 SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, 5065 PseudoSourceValue::getConstantPool(), 0, 5066 false, 16); 5067 SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 5068 5069 // Shift sign bit right or left if the two operands have different types. 5070 if (SrcVT.bitsGT(VT)) { 5071 // Op0 is MVT::f32, Op1 is MVT::f64. 5072 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 5073 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 5074 DAG.getConstant(32, MVT::i32)); 5075 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 5076 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 5077 DAG.getIntPtrConstant(0)); 5078 } 5079 5080 // Clear first operand sign bit. 5081 CV.clear(); 5082 if (VT == MVT::f64) { 5083 CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); 5084 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 5085 } else { 5086 CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); 5087 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5088 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5089 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 5090 } 5091 C = ConstantVector::get(CV); 5092 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 5093 SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 5094 PseudoSourceValue::getConstantPool(), 0, 5095 false, 16); 5096 SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 5097 5098 // Or the value with the sign bit. 5099 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 5100} 5101 5102SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 5103 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 5104 SDValue Op0 = Op.getOperand(0); 5105 SDValue Op1 = Op.getOperand(1); 5106 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 5107 5108 // Lower (X & (1 << N)) == 0 to BT. 5109 // Lower ((X >>u N) & 1) != 0 to BT. 5110 // Lower ((X >>s N) & 1) != 0 to BT. 5111 if (Op0.getOpcode() == ISD::AND && 5112 Op0.hasOneUse() && 5113 Op1.getOpcode() == ISD::Constant && 5114 Op0.getOperand(1).getOpcode() == ISD::Constant && 5115 (CC == ISD::SETEQ || CC == ISD::SETNE)) { 5116 ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); 5117 ConstantSDNode *CmpRHS = cast<ConstantSDNode>(Op1); 5118 SDValue AndLHS = Op0.getOperand(0); 5119 if (CmpRHS->getZExtValue() == 0 && AndRHS->getZExtValue() == 1 && 5120 AndLHS.getOpcode() == ISD::SRL) { 5121 SDValue LHS = AndLHS.getOperand(0); 5122 SDValue RHS = AndLHS.getOperand(1); 5123 5124 // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT 5125 // instruction. Since the shift amount is in-range-or-undefined, we know 5126 // that doing a bittest on the i16 value is ok. We extend to i32 because 5127 // the encoding for the i16 version is larger than the i32 version. 5128 if (LHS.getValueType() == MVT::i8) 5129 LHS = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, LHS); 5130 5131 // If the operand types disagree, extend the shift amount to match. Since 5132 // BT ignores high bits (like shifts) we can use anyextend. 5133 if (LHS.getValueType() != RHS.getValueType()) 5134 RHS = DAG.getNode(ISD::ANY_EXTEND, LHS.getValueType(), RHS); 5135 5136 SDValue BT = DAG.getNode(X86ISD::BT, MVT::i32, LHS, RHS); 5137 unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; 5138 return DAG.getNode(X86ISD::SETCC, MVT::i8, 5139 DAG.getConstant(Cond, MVT::i8), BT); 5140 } 5141 } 5142 5143 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5144 unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); 5145 5146 SDValue Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 5147 return DAG.getNode(X86ISD::SETCC, MVT::i8, 5148 DAG.getConstant(X86CC, MVT::i8), Cond); 5149} 5150 5151SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 5152 SDValue Cond; 5153 SDValue Op0 = Op.getOperand(0); 5154 SDValue Op1 = Op.getOperand(1); 5155 SDValue CC = Op.getOperand(2); 5156 MVT VT = Op.getValueType(); 5157 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 5158 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5159 5160 if (isFP) { 5161 unsigned SSECC = 8; 5162 MVT VT0 = Op0.getValueType(); 5163 assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); 5164 unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; 5165 bool Swap = false; 5166 5167 switch (SetCCOpcode) { 5168 default: break; 5169 case ISD::SETOEQ: 5170 case ISD::SETEQ: SSECC = 0; break; 5171 case ISD::SETOGT: 5172 case ISD::SETGT: Swap = true; // Fallthrough 5173 case ISD::SETLT: 5174 case ISD::SETOLT: SSECC = 1; break; 5175 case ISD::SETOGE: 5176 case ISD::SETGE: Swap = true; // Fallthrough 5177 case ISD::SETLE: 5178 case ISD::SETOLE: SSECC = 2; break; 5179 case ISD::SETUO: SSECC = 3; break; 5180 case ISD::SETUNE: 5181 case ISD::SETNE: SSECC = 4; break; 5182 case ISD::SETULE: Swap = true; 5183 case ISD::SETUGE: SSECC = 5; break; 5184 case ISD::SETULT: Swap = true; 5185 case ISD::SETUGT: SSECC = 6; break; 5186 case ISD::SETO: SSECC = 7; break; 5187 } 5188 if (Swap) 5189 std::swap(Op0, Op1); 5190 5191 // In the two special cases we can't handle, emit two comparisons. 5192 if (SSECC == 8) { 5193 if (SetCCOpcode == ISD::SETUEQ) { 5194 SDValue UNORD, EQ; 5195 UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); 5196 EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); 5197 return DAG.getNode(ISD::OR, VT, UNORD, EQ); 5198 } 5199 else if (SetCCOpcode == ISD::SETONE) { 5200 SDValue ORD, NEQ; 5201 ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); 5202 NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); 5203 return DAG.getNode(ISD::AND, VT, ORD, NEQ); 5204 } 5205 assert(0 && "Illegal FP comparison"); 5206 } 5207 // Handle all other FP comparisons here. 5208 return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); 5209 } 5210 5211 // We are handling one of the integer comparisons here. Since SSE only has 5212 // GT and EQ comparisons for integer, swapping operands and multiple 5213 // operations may be required for some comparisons. 5214 unsigned Opc = 0, EQOpc = 0, GTOpc = 0; 5215 bool Swap = false, Invert = false, FlipSigns = false; 5216 5217 switch (VT.getSimpleVT()) { 5218 default: break; 5219 case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; 5220 case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; 5221 case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; 5222 case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; 5223 } 5224 5225 switch (SetCCOpcode) { 5226 default: break; 5227 case ISD::SETNE: Invert = true; 5228 case ISD::SETEQ: Opc = EQOpc; break; 5229 case ISD::SETLT: Swap = true; 5230 case ISD::SETGT: Opc = GTOpc; break; 5231 case ISD::SETGE: Swap = true; 5232 case ISD::SETLE: Opc = GTOpc; Invert = true; break; 5233 case ISD::SETULT: Swap = true; 5234 case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; 5235 case ISD::SETUGE: Swap = true; 5236 case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; 5237 } 5238 if (Swap) 5239 std::swap(Op0, Op1); 5240 5241 // Since SSE has no unsigned integer comparisons, we need to flip the sign 5242 // bits of the inputs before performing those operations. 5243 if (FlipSigns) { 5244 MVT EltVT = VT.getVectorElementType(); 5245 SDValue SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT); 5246 std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); 5247 SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0], 5248 SignBits.size()); 5249 Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec); 5250 Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec); 5251 } 5252 5253 SDValue Result = DAG.getNode(Opc, VT, Op0, Op1); 5254 5255 // If the logical-not of the result is required, perform that now. 5256 if (Invert) 5257 Result = DAG.getNOT(Result, VT); 5258 5259 return Result; 5260} 5261 5262// isX86LogicalCmp - Return true if opcode is a X86 logical comparison. 5263static bool isX86LogicalCmp(unsigned Opc) { 5264 return Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI; 5265} 5266 5267SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { 5268 bool addTest = true; 5269 SDValue Cond = Op.getOperand(0); 5270 SDValue CC; 5271 5272 if (Cond.getOpcode() == ISD::SETCC) 5273 Cond = LowerSETCC(Cond, DAG); 5274 5275 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5276 // setting operand in place of the X86ISD::SETCC. 5277 if (Cond.getOpcode() == X86ISD::SETCC) { 5278 CC = Cond.getOperand(0); 5279 5280 SDValue Cmp = Cond.getOperand(1); 5281 unsigned Opc = Cmp.getOpcode(); 5282 MVT VT = Op.getValueType(); 5283 5284 bool IllegalFPCMov = false; 5285 if (VT.isFloatingPoint() && !VT.isVector() && 5286 !isScalarFPTypeInSSEReg(VT)) // FPStack? 5287 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); 5288 5289 if (isX86LogicalCmp(Opc) && !IllegalFPCMov) { 5290 Cond = Cmp; 5291 addTest = false; 5292 } 5293 } 5294 5295 if (addTest) { 5296 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5297 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5298 } 5299 5300 const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), 5301 MVT::Flag); 5302 SmallVector<SDValue, 4> Ops; 5303 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 5304 // condition is true. 5305 Ops.push_back(Op.getOperand(2)); 5306 Ops.push_back(Op.getOperand(1)); 5307 Ops.push_back(CC); 5308 Ops.push_back(Cond); 5309 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 5310} 5311 5312// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or 5313// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart 5314// from the AND / OR. 5315static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { 5316 Opc = Op.getOpcode(); 5317 if (Opc != ISD::OR && Opc != ISD::AND) 5318 return false; 5319 return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && 5320 Op.getOperand(0).hasOneUse() && 5321 Op.getOperand(1).getOpcode() == X86ISD::SETCC && 5322 Op.getOperand(1).hasOneUse()); 5323} 5324 5325SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { 5326 bool addTest = true; 5327 SDValue Chain = Op.getOperand(0); 5328 SDValue Cond = Op.getOperand(1); 5329 SDValue Dest = Op.getOperand(2); 5330 SDValue CC; 5331 5332 if (Cond.getOpcode() == ISD::SETCC) 5333 Cond = LowerSETCC(Cond, DAG); 5334#if 0 5335 // FIXME: LowerXALUO doesn't handle these!! 5336 else if (Cond.getOpcode() == X86ISD::ADD || 5337 Cond.getOpcode() == X86ISD::SUB || 5338 Cond.getOpcode() == X86ISD::SMUL || 5339 Cond.getOpcode() == X86ISD::UMUL) 5340 Cond = LowerXALUO(Cond, DAG); 5341#endif 5342 5343 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5344 // setting operand in place of the X86ISD::SETCC. 5345 if (Cond.getOpcode() == X86ISD::SETCC) { 5346 CC = Cond.getOperand(0); 5347 5348 SDValue Cmp = Cond.getOperand(1); 5349 unsigned Opc = Cmp.getOpcode(); 5350 // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? 5351 if (isX86LogicalCmp(Opc) || Opc == X86ISD::BT) { 5352 Cond = Cmp; 5353 addTest = false; 5354 } else { 5355 switch (cast<ConstantSDNode>(CC)->getZExtValue()) { 5356 default: break; 5357 case X86::COND_O: 5358 case X86::COND_B: 5359 // These can only come from an arithmetic instruction with overflow, 5360 // e.g. SADDO, UADDO. 5361 Cond = Cond.getNode()->getOperand(1); 5362 addTest = false; 5363 break; 5364 } 5365 } 5366 } else { 5367 unsigned CondOpc; 5368 if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { 5369 SDValue Cmp = Cond.getOperand(0).getOperand(1); 5370 unsigned Opc = Cmp.getOpcode(); 5371 if (CondOpc == ISD::OR) { 5372 // Also, recognize the pattern generated by an FCMP_UNE. We can emit 5373 // two branches instead of an explicit OR instruction with a 5374 // separate test. 5375 if (Cmp == Cond.getOperand(1).getOperand(1) && 5376 isX86LogicalCmp(Opc)) { 5377 CC = Cond.getOperand(0).getOperand(0); 5378 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5379 Chain, Dest, CC, Cmp); 5380 CC = Cond.getOperand(1).getOperand(0); 5381 Cond = Cmp; 5382 addTest = false; 5383 } 5384 } else { // ISD::AND 5385 // Also, recognize the pattern generated by an FCMP_OEQ. We can emit 5386 // two branches instead of an explicit AND instruction with a 5387 // separate test. However, we only do this if this block doesn't 5388 // have a fall-through edge, because this requires an explicit 5389 // jmp when the condition is false. 5390 if (Cmp == Cond.getOperand(1).getOperand(1) && 5391 isX86LogicalCmp(Opc) && 5392 Op.getNode()->hasOneUse()) { 5393 X86::CondCode CCode = 5394 (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); 5395 CCode = X86::GetOppositeBranchCondition(CCode); 5396 CC = DAG.getConstant(CCode, MVT::i8); 5397 SDValue User = SDValue(*Op.getNode()->use_begin(), 0); 5398 // Look for an unconditional branch following this conditional branch. 5399 // We need this because we need to reverse the successors in order 5400 // to implement FCMP_OEQ. 5401 if (User.getOpcode() == ISD::BR) { 5402 SDValue FalseBB = User.getOperand(1); 5403 SDValue NewBR = 5404 DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); 5405 assert(NewBR == User); 5406 Dest = FalseBB; 5407 5408 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5409 Chain, Dest, CC, Cmp); 5410 X86::CondCode CCode = 5411 (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); 5412 CCode = X86::GetOppositeBranchCondition(CCode); 5413 CC = DAG.getConstant(CCode, MVT::i8); 5414 Cond = Cmp; 5415 addTest = false; 5416 } 5417 } 5418 } 5419 } 5420 } 5421 5422 if (addTest) { 5423 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5424 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5425 } 5426 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5427 Chain, Dest, CC, Cond); 5428} 5429 5430 5431// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 5432// Calls to _alloca is needed to probe the stack when allocating more than 4k 5433// bytes in one go. Touching the stack at 4K increments is necessary to ensure 5434// that the guard pages used by the OS virtual memory manager are allocated in 5435// correct sequence. 5436SDValue 5437X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5438 SelectionDAG &DAG) { 5439 assert(Subtarget->isTargetCygMing() && 5440 "This should be used only on Cygwin/Mingw targets"); 5441 5442 // Get the inputs. 5443 SDValue Chain = Op.getOperand(0); 5444 SDValue Size = Op.getOperand(1); 5445 // FIXME: Ensure alignment here 5446 5447 SDValue Flag; 5448 5449 MVT IntPtr = getPointerTy(); 5450 MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 5451 5452 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 5453 5454 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 5455 Flag = Chain.getValue(1); 5456 5457 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 5458 SDValue Ops[] = { Chain, 5459 DAG.getTargetExternalSymbol("_alloca", IntPtr), 5460 DAG.getRegister(X86::EAX, IntPtr), 5461 DAG.getRegister(X86StackPtr, SPTy), 5462 Flag }; 5463 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5); 5464 Flag = Chain.getValue(1); 5465 5466 Chain = DAG.getCALLSEQ_END(Chain, 5467 DAG.getIntPtrConstant(0, true), 5468 DAG.getIntPtrConstant(0, true), 5469 Flag); 5470 5471 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 5472 5473 SDValue Ops1[2] = { Chain.getValue(0), Chain }; 5474 return DAG.getMergeValues(Ops1, 2); 5475} 5476 5477SDValue 5478X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, 5479 SDValue Chain, 5480 SDValue Dst, SDValue Src, 5481 SDValue Size, unsigned Align, 5482 const Value *DstSV, 5483 uint64_t DstSVOff) { 5484 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5485 5486 // If not DWORD aligned or size is more than the threshold, call the library. 5487 // The libc version is likely to be faster for these cases. It can use the 5488 // address value and run time information about the CPU. 5489 if ((Align & 3) != 0 || 5490 !ConstantSize || 5491 ConstantSize->getZExtValue() > 5492 getSubtarget()->getMaxInlineSizeThreshold()) { 5493 SDValue InFlag(0, 0); 5494 5495 // Check to see if there is a specialized entry-point for memory zeroing. 5496 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 5497 5498 if (const char *bzeroEntry = V && 5499 V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 5500 MVT IntPtr = getPointerTy(); 5501 const Type *IntPtrTy = TD->getIntPtrType(); 5502 TargetLowering::ArgListTy Args; 5503 TargetLowering::ArgListEntry Entry; 5504 Entry.Node = Dst; 5505 Entry.Ty = IntPtrTy; 5506 Args.push_back(Entry); 5507 Entry.Node = Size; 5508 Args.push_back(Entry); 5509 std::pair<SDValue,SDValue> CallResult = 5510 LowerCallTo(Chain, Type::VoidTy, false, false, false, false, 5511 CallingConv::C, false, 5512 DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG); 5513 return CallResult.second; 5514 } 5515 5516 // Otherwise have the target-independent code call memset. 5517 return SDValue(); 5518 } 5519 5520 uint64_t SizeVal = ConstantSize->getZExtValue(); 5521 SDValue InFlag(0, 0); 5522 MVT AVT; 5523 SDValue Count; 5524 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 5525 unsigned BytesLeft = 0; 5526 bool TwoRepStos = false; 5527 if (ValC) { 5528 unsigned ValReg; 5529 uint64_t Val = ValC->getZExtValue() & 255; 5530 5531 // If the value is a constant, then we can potentially use larger sets. 5532 switch (Align & 3) { 5533 case 2: // WORD aligned 5534 AVT = MVT::i16; 5535 ValReg = X86::AX; 5536 Val = (Val << 8) | Val; 5537 break; 5538 case 0: // DWORD aligned 5539 AVT = MVT::i32; 5540 ValReg = X86::EAX; 5541 Val = (Val << 8) | Val; 5542 Val = (Val << 16) | Val; 5543 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 5544 AVT = MVT::i64; 5545 ValReg = X86::RAX; 5546 Val = (Val << 32) | Val; 5547 } 5548 break; 5549 default: // Byte aligned 5550 AVT = MVT::i8; 5551 ValReg = X86::AL; 5552 Count = DAG.getIntPtrConstant(SizeVal); 5553 break; 5554 } 5555 5556 if (AVT.bitsGT(MVT::i8)) { 5557 unsigned UBytes = AVT.getSizeInBits() / 8; 5558 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 5559 BytesLeft = SizeVal % UBytes; 5560 } 5561 5562 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 5563 InFlag); 5564 InFlag = Chain.getValue(1); 5565 } else { 5566 AVT = MVT::i8; 5567 Count = DAG.getIntPtrConstant(SizeVal); 5568 Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); 5569 InFlag = Chain.getValue(1); 5570 } 5571 5572 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5573 Count, InFlag); 5574 InFlag = Chain.getValue(1); 5575 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5576 Dst, InFlag); 5577 InFlag = Chain.getValue(1); 5578 5579 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5580 SmallVector<SDValue, 8> Ops; 5581 Ops.push_back(Chain); 5582 Ops.push_back(DAG.getValueType(AVT)); 5583 Ops.push_back(InFlag); 5584 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5585 5586 if (TwoRepStos) { 5587 InFlag = Chain.getValue(1); 5588 Count = Size; 5589 MVT CVT = Count.getValueType(); 5590 SDValue Left = DAG.getNode(ISD::AND, CVT, Count, 5591 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 5592 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 5593 Left, InFlag); 5594 InFlag = Chain.getValue(1); 5595 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5596 Ops.clear(); 5597 Ops.push_back(Chain); 5598 Ops.push_back(DAG.getValueType(MVT::i8)); 5599 Ops.push_back(InFlag); 5600 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5601 } else if (BytesLeft) { 5602 // Handle the last 1 - 7 bytes. 5603 unsigned Offset = SizeVal - BytesLeft; 5604 MVT AddrVT = Dst.getValueType(); 5605 MVT SizeVT = Size.getValueType(); 5606 5607 Chain = DAG.getMemset(Chain, 5608 DAG.getNode(ISD::ADD, AddrVT, Dst, 5609 DAG.getConstant(Offset, AddrVT)), 5610 Src, 5611 DAG.getConstant(BytesLeft, SizeVT), 5612 Align, DstSV, DstSVOff + Offset); 5613 } 5614 5615 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 5616 return Chain; 5617} 5618 5619SDValue 5620X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, 5621 SDValue Chain, SDValue Dst, SDValue Src, 5622 SDValue Size, unsigned Align, 5623 bool AlwaysInline, 5624 const Value *DstSV, uint64_t DstSVOff, 5625 const Value *SrcSV, uint64_t SrcSVOff) { 5626 // This requires the copy size to be a constant, preferrably 5627 // within a subtarget-specific limit. 5628 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5629 if (!ConstantSize) 5630 return SDValue(); 5631 uint64_t SizeVal = ConstantSize->getZExtValue(); 5632 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 5633 return SDValue(); 5634 5635 /// If not DWORD aligned, call the library. 5636 if ((Align & 3) != 0) 5637 return SDValue(); 5638 5639 // DWORD aligned 5640 MVT AVT = MVT::i32; 5641 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned 5642 AVT = MVT::i64; 5643 5644 unsigned UBytes = AVT.getSizeInBits() / 8; 5645 unsigned CountVal = SizeVal / UBytes; 5646 SDValue Count = DAG.getIntPtrConstant(CountVal); 5647 unsigned BytesLeft = SizeVal % UBytes; 5648 5649 SDValue InFlag(0, 0); 5650 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5651 Count, InFlag); 5652 InFlag = Chain.getValue(1); 5653 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5654 Dst, InFlag); 5655 InFlag = Chain.getValue(1); 5656 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 5657 Src, InFlag); 5658 InFlag = Chain.getValue(1); 5659 5660 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5661 SmallVector<SDValue, 8> Ops; 5662 Ops.push_back(Chain); 5663 Ops.push_back(DAG.getValueType(AVT)); 5664 Ops.push_back(InFlag); 5665 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 5666 5667 SmallVector<SDValue, 4> Results; 5668 Results.push_back(RepMovs); 5669 if (BytesLeft) { 5670 // Handle the last 1 - 7 bytes. 5671 unsigned Offset = SizeVal - BytesLeft; 5672 MVT DstVT = Dst.getValueType(); 5673 MVT SrcVT = Src.getValueType(); 5674 MVT SizeVT = Size.getValueType(); 5675 Results.push_back(DAG.getMemcpy(Chain, 5676 DAG.getNode(ISD::ADD, DstVT, Dst, 5677 DAG.getConstant(Offset, DstVT)), 5678 DAG.getNode(ISD::ADD, SrcVT, Src, 5679 DAG.getConstant(Offset, SrcVT)), 5680 DAG.getConstant(BytesLeft, SizeVT), 5681 Align, AlwaysInline, 5682 DstSV, DstSVOff + Offset, 5683 SrcSV, SrcSVOff + Offset)); 5684 } 5685 5686 return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); 5687} 5688 5689SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { 5690 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 5691 5692 if (!Subtarget->is64Bit()) { 5693 // vastart just stores the address of the VarArgsFrameIndex slot into the 5694 // memory location argument. 5695 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5696 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0); 5697 } 5698 5699 // __va_list_tag: 5700 // gp_offset (0 - 6 * 8) 5701 // fp_offset (48 - 48 + 8 * 16) 5702 // overflow_arg_area (point to parameters coming in memory). 5703 // reg_save_area 5704 SmallVector<SDValue, 8> MemOps; 5705 SDValue FIN = Op.getOperand(1); 5706 // Store gp_offset 5707 SDValue Store = DAG.getStore(Op.getOperand(0), 5708 DAG.getConstant(VarArgsGPOffset, MVT::i32), 5709 FIN, SV, 0); 5710 MemOps.push_back(Store); 5711 5712 // Store fp_offset 5713 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5714 Store = DAG.getStore(Op.getOperand(0), 5715 DAG.getConstant(VarArgsFPOffset, MVT::i32), 5716 FIN, SV, 0); 5717 MemOps.push_back(Store); 5718 5719 // Store ptr to overflow_arg_area 5720 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5721 SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5722 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0); 5723 MemOps.push_back(Store); 5724 5725 // Store ptr to reg_save_area. 5726 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 5727 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5728 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0); 5729 MemOps.push_back(Store); 5730 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5731} 5732 5733SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { 5734 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5735 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); 5736 SDValue Chain = Op.getOperand(0); 5737 SDValue SrcPtr = Op.getOperand(1); 5738 SDValue SrcSV = Op.getOperand(2); 5739 5740 assert(0 && "VAArgInst is not yet implemented for x86-64!"); 5741 abort(); 5742 return SDValue(); 5743} 5744 5745SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { 5746 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5747 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); 5748 SDValue Chain = Op.getOperand(0); 5749 SDValue DstPtr = Op.getOperand(1); 5750 SDValue SrcPtr = Op.getOperand(2); 5751 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 5752 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5753 5754 return DAG.getMemcpy(Chain, DstPtr, SrcPtr, 5755 DAG.getIntPtrConstant(24), 8, false, 5756 DstSV, 0, SrcSV, 0); 5757} 5758 5759SDValue 5760X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 5761 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5762 switch (IntNo) { 5763 default: return SDValue(); // Don't custom lower most intrinsics. 5764 // Comparison intrinsics. 5765 case Intrinsic::x86_sse_comieq_ss: 5766 case Intrinsic::x86_sse_comilt_ss: 5767 case Intrinsic::x86_sse_comile_ss: 5768 case Intrinsic::x86_sse_comigt_ss: 5769 case Intrinsic::x86_sse_comige_ss: 5770 case Intrinsic::x86_sse_comineq_ss: 5771 case Intrinsic::x86_sse_ucomieq_ss: 5772 case Intrinsic::x86_sse_ucomilt_ss: 5773 case Intrinsic::x86_sse_ucomile_ss: 5774 case Intrinsic::x86_sse_ucomigt_ss: 5775 case Intrinsic::x86_sse_ucomige_ss: 5776 case Intrinsic::x86_sse_ucomineq_ss: 5777 case Intrinsic::x86_sse2_comieq_sd: 5778 case Intrinsic::x86_sse2_comilt_sd: 5779 case Intrinsic::x86_sse2_comile_sd: 5780 case Intrinsic::x86_sse2_comigt_sd: 5781 case Intrinsic::x86_sse2_comige_sd: 5782 case Intrinsic::x86_sse2_comineq_sd: 5783 case Intrinsic::x86_sse2_ucomieq_sd: 5784 case Intrinsic::x86_sse2_ucomilt_sd: 5785 case Intrinsic::x86_sse2_ucomile_sd: 5786 case Intrinsic::x86_sse2_ucomigt_sd: 5787 case Intrinsic::x86_sse2_ucomige_sd: 5788 case Intrinsic::x86_sse2_ucomineq_sd: { 5789 unsigned Opc = 0; 5790 ISD::CondCode CC = ISD::SETCC_INVALID; 5791 switch (IntNo) { 5792 default: break; 5793 case Intrinsic::x86_sse_comieq_ss: 5794 case Intrinsic::x86_sse2_comieq_sd: 5795 Opc = X86ISD::COMI; 5796 CC = ISD::SETEQ; 5797 break; 5798 case Intrinsic::x86_sse_comilt_ss: 5799 case Intrinsic::x86_sse2_comilt_sd: 5800 Opc = X86ISD::COMI; 5801 CC = ISD::SETLT; 5802 break; 5803 case Intrinsic::x86_sse_comile_ss: 5804 case Intrinsic::x86_sse2_comile_sd: 5805 Opc = X86ISD::COMI; 5806 CC = ISD::SETLE; 5807 break; 5808 case Intrinsic::x86_sse_comigt_ss: 5809 case Intrinsic::x86_sse2_comigt_sd: 5810 Opc = X86ISD::COMI; 5811 CC = ISD::SETGT; 5812 break; 5813 case Intrinsic::x86_sse_comige_ss: 5814 case Intrinsic::x86_sse2_comige_sd: 5815 Opc = X86ISD::COMI; 5816 CC = ISD::SETGE; 5817 break; 5818 case Intrinsic::x86_sse_comineq_ss: 5819 case Intrinsic::x86_sse2_comineq_sd: 5820 Opc = X86ISD::COMI; 5821 CC = ISD::SETNE; 5822 break; 5823 case Intrinsic::x86_sse_ucomieq_ss: 5824 case Intrinsic::x86_sse2_ucomieq_sd: 5825 Opc = X86ISD::UCOMI; 5826 CC = ISD::SETEQ; 5827 break; 5828 case Intrinsic::x86_sse_ucomilt_ss: 5829 case Intrinsic::x86_sse2_ucomilt_sd: 5830 Opc = X86ISD::UCOMI; 5831 CC = ISD::SETLT; 5832 break; 5833 case Intrinsic::x86_sse_ucomile_ss: 5834 case Intrinsic::x86_sse2_ucomile_sd: 5835 Opc = X86ISD::UCOMI; 5836 CC = ISD::SETLE; 5837 break; 5838 case Intrinsic::x86_sse_ucomigt_ss: 5839 case Intrinsic::x86_sse2_ucomigt_sd: 5840 Opc = X86ISD::UCOMI; 5841 CC = ISD::SETGT; 5842 break; 5843 case Intrinsic::x86_sse_ucomige_ss: 5844 case Intrinsic::x86_sse2_ucomige_sd: 5845 Opc = X86ISD::UCOMI; 5846 CC = ISD::SETGE; 5847 break; 5848 case Intrinsic::x86_sse_ucomineq_ss: 5849 case Intrinsic::x86_sse2_ucomineq_sd: 5850 Opc = X86ISD::UCOMI; 5851 CC = ISD::SETNE; 5852 break; 5853 } 5854 5855 SDValue LHS = Op.getOperand(1); 5856 SDValue RHS = Op.getOperand(2); 5857 unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); 5858 SDValue Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5859 SDValue SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5860 DAG.getConstant(X86CC, MVT::i8), Cond); 5861 return DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, SetCC); 5862 } 5863 5864 // Fix vector shift instructions where the last operand is a non-immediate 5865 // i32 value. 5866 case Intrinsic::x86_sse2_pslli_w: 5867 case Intrinsic::x86_sse2_pslli_d: 5868 case Intrinsic::x86_sse2_pslli_q: 5869 case Intrinsic::x86_sse2_psrli_w: 5870 case Intrinsic::x86_sse2_psrli_d: 5871 case Intrinsic::x86_sse2_psrli_q: 5872 case Intrinsic::x86_sse2_psrai_w: 5873 case Intrinsic::x86_sse2_psrai_d: 5874 case Intrinsic::x86_mmx_pslli_w: 5875 case Intrinsic::x86_mmx_pslli_d: 5876 case Intrinsic::x86_mmx_pslli_q: 5877 case Intrinsic::x86_mmx_psrli_w: 5878 case Intrinsic::x86_mmx_psrli_d: 5879 case Intrinsic::x86_mmx_psrli_q: 5880 case Intrinsic::x86_mmx_psrai_w: 5881 case Intrinsic::x86_mmx_psrai_d: { 5882 SDValue ShAmt = Op.getOperand(2); 5883 if (isa<ConstantSDNode>(ShAmt)) 5884 return SDValue(); 5885 5886 unsigned NewIntNo = 0; 5887 MVT ShAmtVT = MVT::v4i32; 5888 switch (IntNo) { 5889 case Intrinsic::x86_sse2_pslli_w: 5890 NewIntNo = Intrinsic::x86_sse2_psll_w; 5891 break; 5892 case Intrinsic::x86_sse2_pslli_d: 5893 NewIntNo = Intrinsic::x86_sse2_psll_d; 5894 break; 5895 case Intrinsic::x86_sse2_pslli_q: 5896 NewIntNo = Intrinsic::x86_sse2_psll_q; 5897 break; 5898 case Intrinsic::x86_sse2_psrli_w: 5899 NewIntNo = Intrinsic::x86_sse2_psrl_w; 5900 break; 5901 case Intrinsic::x86_sse2_psrli_d: 5902 NewIntNo = Intrinsic::x86_sse2_psrl_d; 5903 break; 5904 case Intrinsic::x86_sse2_psrli_q: 5905 NewIntNo = Intrinsic::x86_sse2_psrl_q; 5906 break; 5907 case Intrinsic::x86_sse2_psrai_w: 5908 NewIntNo = Intrinsic::x86_sse2_psra_w; 5909 break; 5910 case Intrinsic::x86_sse2_psrai_d: 5911 NewIntNo = Intrinsic::x86_sse2_psra_d; 5912 break; 5913 default: { 5914 ShAmtVT = MVT::v2i32; 5915 switch (IntNo) { 5916 case Intrinsic::x86_mmx_pslli_w: 5917 NewIntNo = Intrinsic::x86_mmx_psll_w; 5918 break; 5919 case Intrinsic::x86_mmx_pslli_d: 5920 NewIntNo = Intrinsic::x86_mmx_psll_d; 5921 break; 5922 case Intrinsic::x86_mmx_pslli_q: 5923 NewIntNo = Intrinsic::x86_mmx_psll_q; 5924 break; 5925 case Intrinsic::x86_mmx_psrli_w: 5926 NewIntNo = Intrinsic::x86_mmx_psrl_w; 5927 break; 5928 case Intrinsic::x86_mmx_psrli_d: 5929 NewIntNo = Intrinsic::x86_mmx_psrl_d; 5930 break; 5931 case Intrinsic::x86_mmx_psrli_q: 5932 NewIntNo = Intrinsic::x86_mmx_psrl_q; 5933 break; 5934 case Intrinsic::x86_mmx_psrai_w: 5935 NewIntNo = Intrinsic::x86_mmx_psra_w; 5936 break; 5937 case Intrinsic::x86_mmx_psrai_d: 5938 NewIntNo = Intrinsic::x86_mmx_psra_d; 5939 break; 5940 default: abort(); // Can't reach here. 5941 } 5942 break; 5943 } 5944 } 5945 MVT VT = Op.getValueType(); 5946 ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT, 5947 DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt)); 5948 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 5949 DAG.getConstant(NewIntNo, MVT::i32), 5950 Op.getOperand(1), ShAmt); 5951 } 5952 } 5953} 5954 5955SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 5956 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5957 5958 if (Depth > 0) { 5959 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 5960 SDValue Offset = 5961 DAG.getConstant(TD->getPointerSize(), 5962 Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 5963 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), 5964 DAG.getNode(ISD::ADD, getPointerTy(), FrameAddr, Offset), 5965 NULL, 0); 5966 } 5967 5968 // Just load the return address. 5969 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 5970 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5971} 5972 5973SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 5974 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 5975 MFI->setFrameAddressIsTaken(true); 5976 MVT VT = Op.getValueType(); 5977 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5978 unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; 5979 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT); 5980 while (Depth--) 5981 FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0); 5982 return FrameAddr; 5983} 5984 5985SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, 5986 SelectionDAG &DAG) { 5987 return DAG.getIntPtrConstant(2*TD->getPointerSize()); 5988} 5989 5990SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) 5991{ 5992 MachineFunction &MF = DAG.getMachineFunction(); 5993 SDValue Chain = Op.getOperand(0); 5994 SDValue Offset = Op.getOperand(1); 5995 SDValue Handler = Op.getOperand(2); 5996 5997 SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, 5998 getPointerTy()); 5999 unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); 6000 6001 SDValue StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 6002 DAG.getIntPtrConstant(-TD->getPointerSize())); 6003 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 6004 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 6005 Chain = DAG.getCopyToReg(Chain, StoreAddrReg, StoreAddr); 6006 MF.getRegInfo().addLiveOut(StoreAddrReg); 6007 6008 return DAG.getNode(X86ISD::EH_RETURN, 6009 MVT::Other, 6010 Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); 6011} 6012 6013SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, 6014 SelectionDAG &DAG) { 6015 SDValue Root = Op.getOperand(0); 6016 SDValue Trmp = Op.getOperand(1); // trampoline 6017 SDValue FPtr = Op.getOperand(2); // nested function 6018 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 6019 6020 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 6021 6022 const X86InstrInfo *TII = 6023 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 6024 6025 if (Subtarget->is64Bit()) { 6026 SDValue OutChains[6]; 6027 6028 // Large code-model. 6029 6030 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 6031 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 6032 6033 const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); 6034 const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); 6035 6036 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 6037 6038 // Load the pointer to the nested function into R11. 6039 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 6040 SDValue Addr = Trmp; 6041 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 6042 TrmpAddr, 0); 6043 6044 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 6045 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2); 6046 6047 // Load the 'nest' parameter value into R10. 6048 // R10 is specified in X86CallingConv.td 6049 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 6050 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 6051 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 6052 TrmpAddr, 10); 6053 6054 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 6055 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2); 6056 6057 // Jump to the nested function. 6058 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 6059 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 6060 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 6061 TrmpAddr, 20); 6062 6063 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 6064 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 6065 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 6066 TrmpAddr, 22); 6067 6068 SDValue Ops[] = 6069 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 6070 return DAG.getMergeValues(Ops, 2); 6071 } else { 6072 const Function *Func = 6073 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 6074 unsigned CC = Func->getCallingConv(); 6075 unsigned NestReg; 6076 6077 switch (CC) { 6078 default: 6079 assert(0 && "Unsupported calling convention"); 6080 case CallingConv::C: 6081 case CallingConv::X86_StdCall: { 6082 // Pass 'nest' parameter in ECX. 6083 // Must be kept in sync with X86CallingConv.td 6084 NestReg = X86::ECX; 6085 6086 // Check that ECX wasn't needed by an 'inreg' parameter. 6087 const FunctionType *FTy = Func->getFunctionType(); 6088 const AttrListPtr &Attrs = Func->getAttributes(); 6089 6090 if (!Attrs.isEmpty() && !Func->isVarArg()) { 6091 unsigned InRegCount = 0; 6092 unsigned Idx = 1; 6093 6094 for (FunctionType::param_iterator I = FTy->param_begin(), 6095 E = FTy->param_end(); I != E; ++I, ++Idx) 6096 if (Attrs.paramHasAttr(Idx, Attribute::InReg)) 6097 // FIXME: should only count parameters that are lowered to integers. 6098 InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; 6099 6100 if (InRegCount > 2) { 6101 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 6102 abort(); 6103 } 6104 } 6105 break; 6106 } 6107 case CallingConv::X86_FastCall: 6108 case CallingConv::Fast: 6109 // Pass 'nest' parameter in EAX. 6110 // Must be kept in sync with X86CallingConv.td 6111 NestReg = X86::EAX; 6112 break; 6113 } 6114 6115 SDValue OutChains[4]; 6116 SDValue Addr, Disp; 6117 6118 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 6119 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 6120 6121 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 6122 const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); 6123 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 6124 Trmp, TrmpAddr, 0); 6125 6126 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 6127 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1); 6128 6129 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 6130 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 6131 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 6132 TrmpAddr, 5, false, 1); 6133 6134 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 6135 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1); 6136 6137 SDValue Ops[] = 6138 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 6139 return DAG.getMergeValues(Ops, 2); 6140 } 6141} 6142 6143SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 6144 /* 6145 The rounding mode is in bits 11:10 of FPSR, and has the following 6146 settings: 6147 00 Round to nearest 6148 01 Round to -inf 6149 10 Round to +inf 6150 11 Round to 0 6151 6152 FLT_ROUNDS, on the other hand, expects the following: 6153 -1 Undefined 6154 0 Round to 0 6155 1 Round to nearest 6156 2 Round to +inf 6157 3 Round to -inf 6158 6159 To perform the conversion, we do: 6160 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 6161 */ 6162 6163 MachineFunction &MF = DAG.getMachineFunction(); 6164 const TargetMachine &TM = MF.getTarget(); 6165 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 6166 unsigned StackAlignment = TFI.getStackAlignment(); 6167 MVT VT = Op.getValueType(); 6168 6169 // Save FP Control Word to stack slot 6170 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 6171 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 6172 6173 SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 6174 DAG.getEntryNode(), StackSlot); 6175 6176 // Load FP Control Word from stack slot 6177 SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 6178 6179 // Transform as necessary 6180 SDValue CWD1 = 6181 DAG.getNode(ISD::SRL, MVT::i16, 6182 DAG.getNode(ISD::AND, MVT::i16, 6183 CWD, DAG.getConstant(0x800, MVT::i16)), 6184 DAG.getConstant(11, MVT::i8)); 6185 SDValue CWD2 = 6186 DAG.getNode(ISD::SRL, MVT::i16, 6187 DAG.getNode(ISD::AND, MVT::i16, 6188 CWD, DAG.getConstant(0x400, MVT::i16)), 6189 DAG.getConstant(9, MVT::i8)); 6190 6191 SDValue RetVal = 6192 DAG.getNode(ISD::AND, MVT::i16, 6193 DAG.getNode(ISD::ADD, MVT::i16, 6194 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 6195 DAG.getConstant(1, MVT::i16)), 6196 DAG.getConstant(3, MVT::i16)); 6197 6198 6199 return DAG.getNode((VT.getSizeInBits() < 16 ? 6200 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 6201} 6202 6203SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { 6204 MVT VT = Op.getValueType(); 6205 MVT OpVT = VT; 6206 unsigned NumBits = VT.getSizeInBits(); 6207 6208 Op = Op.getOperand(0); 6209 if (VT == MVT::i8) { 6210 // Zero extend to i32 since there is not an i8 bsr. 6211 OpVT = MVT::i32; 6212 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6213 } 6214 6215 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 6216 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6217 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 6218 6219 // If src is zero (i.e. bsr sets ZF), returns NumBits. 6220 SmallVector<SDValue, 4> Ops; 6221 Ops.push_back(Op); 6222 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 6223 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6224 Ops.push_back(Op.getValue(1)); 6225 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6226 6227 // Finally xor with NumBits-1. 6228 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 6229 6230 if (VT == MVT::i8) 6231 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6232 return Op; 6233} 6234 6235SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { 6236 MVT VT = Op.getValueType(); 6237 MVT OpVT = VT; 6238 unsigned NumBits = VT.getSizeInBits(); 6239 6240 Op = Op.getOperand(0); 6241 if (VT == MVT::i8) { 6242 OpVT = MVT::i32; 6243 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6244 } 6245 6246 // Issue a bsf (scan bits forward) which also sets EFLAGS. 6247 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6248 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 6249 6250 // If src is zero (i.e. bsf sets ZF), returns NumBits. 6251 SmallVector<SDValue, 4> Ops; 6252 Ops.push_back(Op); 6253 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 6254 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6255 Ops.push_back(Op.getValue(1)); 6256 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6257 6258 if (VT == MVT::i8) 6259 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6260 return Op; 6261} 6262 6263SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { 6264 MVT VT = Op.getValueType(); 6265 assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); 6266 6267 // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); 6268 // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); 6269 // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); 6270 // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi ); 6271 // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b ); 6272 // 6273 // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 ); 6274 // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); 6275 // return AloBlo + AloBhi + AhiBlo; 6276 6277 SDValue A = Op.getOperand(0); 6278 SDValue B = Op.getOperand(1); 6279 6280 SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6281 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), 6282 A, DAG.getConstant(32, MVT::i32)); 6283 SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6284 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), 6285 B, DAG.getConstant(32, MVT::i32)); 6286 SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6287 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 6288 A, B); 6289 SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6290 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 6291 A, Bhi); 6292 SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6293 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 6294 Ahi, B); 6295 AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6296 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), 6297 AloBhi, DAG.getConstant(32, MVT::i32)); 6298 AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 6299 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), 6300 AhiBlo, DAG.getConstant(32, MVT::i32)); 6301 SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi); 6302 Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo); 6303 return Res; 6304} 6305 6306 6307SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { 6308 // Lower the "add/sub/mul with overflow" instruction into a regular ins plus 6309 // a "setcc" instruction that checks the overflow flag. The "brcond" lowering 6310 // looks for this combo and may remove the "setcc" instruction if the "setcc" 6311 // has only one use. 6312 SDNode *N = Op.getNode(); 6313 SDValue LHS = N->getOperand(0); 6314 SDValue RHS = N->getOperand(1); 6315 unsigned BaseOp = 0; 6316 unsigned Cond = 0; 6317 6318 switch (Op.getOpcode()) { 6319 default: assert(0 && "Unknown ovf instruction!"); 6320 case ISD::SADDO: 6321 BaseOp = X86ISD::ADD; 6322 Cond = X86::COND_O; 6323 break; 6324 case ISD::UADDO: 6325 BaseOp = X86ISD::ADD; 6326 Cond = X86::COND_B; 6327 break; 6328 case ISD::SSUBO: 6329 BaseOp = X86ISD::SUB; 6330 Cond = X86::COND_O; 6331 break; 6332 case ISD::USUBO: 6333 BaseOp = X86ISD::SUB; 6334 Cond = X86::COND_B; 6335 break; 6336 case ISD::SMULO: 6337 BaseOp = X86ISD::SMUL; 6338 Cond = X86::COND_O; 6339 break; 6340 case ISD::UMULO: 6341 BaseOp = X86ISD::UMUL; 6342 Cond = X86::COND_B; 6343 break; 6344 } 6345 6346 // Also sets EFLAGS. 6347 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 6348 SDValue Sum = DAG.getNode(BaseOp, VTs, LHS, RHS); 6349 6350 SDValue SetCC = 6351 DAG.getNode(X86ISD::SETCC, N->getValueType(1), 6352 DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); 6353 6354 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); 6355 return Sum; 6356} 6357 6358SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { 6359 MVT T = Op.getValueType(); 6360 unsigned Reg = 0; 6361 unsigned size = 0; 6362 switch(T.getSimpleVT()) { 6363 default: 6364 assert(false && "Invalid value type!"); 6365 case MVT::i8: Reg = X86::AL; size = 1; break; 6366 case MVT::i16: Reg = X86::AX; size = 2; break; 6367 case MVT::i32: Reg = X86::EAX; size = 4; break; 6368 case MVT::i64: 6369 assert(Subtarget->is64Bit() && "Node not type legal!"); 6370 Reg = X86::RAX; size = 8; 6371 break; 6372 } 6373 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg, 6374 Op.getOperand(2), SDValue()); 6375 SDValue Ops[] = { cpIn.getValue(0), 6376 Op.getOperand(1), 6377 Op.getOperand(3), 6378 DAG.getTargetConstant(size, MVT::i8), 6379 cpIn.getValue(1) }; 6380 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6381 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5); 6382 SDValue cpOut = 6383 DAG.getCopyFromReg(Result.getValue(0), Reg, T, Result.getValue(1)); 6384 return cpOut; 6385} 6386 6387SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, 6388 SelectionDAG &DAG) { 6389 assert(Subtarget->is64Bit() && "Result not type legalized?"); 6390 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6391 SDValue TheChain = Op.getOperand(0); 6392 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 6393 SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 6394 SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, MVT::i64, 6395 rax.getValue(2)); 6396 SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 6397 DAG.getConstant(32, MVT::i8)); 6398 SDValue Ops[] = { 6399 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), 6400 rdx.getValue(1) 6401 }; 6402 return DAG.getMergeValues(Ops, 2); 6403} 6404 6405SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { 6406 SDNode *Node = Op.getNode(); 6407 MVT T = Node->getValueType(0); 6408 SDValue negOp = DAG.getNode(ISD::SUB, T, 6409 DAG.getConstant(0, T), Node->getOperand(2)); 6410 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, 6411 cast<AtomicSDNode>(Node)->getMemoryVT(), 6412 Node->getOperand(0), 6413 Node->getOperand(1), negOp, 6414 cast<AtomicSDNode>(Node)->getSrcValue(), 6415 cast<AtomicSDNode>(Node)->getAlignment()); 6416} 6417 6418/// LowerOperation - Provide custom lowering hooks for some operations. 6419/// 6420SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 6421 switch (Op.getOpcode()) { 6422 default: assert(0 && "Should not custom lower this!"); 6423 case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); 6424 case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); 6425 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6426 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6427 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6428 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6429 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6430 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6431 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6432 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6433 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 6434 case ISD::SHL_PARTS: 6435 case ISD::SRA_PARTS: 6436 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 6437 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 6438 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 6439 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 6440 case ISD::FABS: return LowerFABS(Op, DAG); 6441 case ISD::FNEG: return LowerFNEG(Op, DAG); 6442 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6443 case ISD::SETCC: return LowerSETCC(Op, DAG); 6444 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 6445 case ISD::SELECT: return LowerSELECT(Op, DAG); 6446 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 6447 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6448 case ISD::CALL: return LowerCALL(Op, DAG); 6449 case ISD::RET: return LowerRET(Op, DAG); 6450 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 6451 case ISD::VASTART: return LowerVASTART(Op, DAG); 6452 case ISD::VAARG: return LowerVAARG(Op, DAG); 6453 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 6454 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6455 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6456 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6457 case ISD::FRAME_TO_ARGS_OFFSET: 6458 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 6459 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 6460 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 6461 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 6462 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6463 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 6464 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 6465 case ISD::MUL: return LowerMUL_V2I64(Op, DAG); 6466 case ISD::SADDO: 6467 case ISD::UADDO: 6468 case ISD::SSUBO: 6469 case ISD::USUBO: 6470 case ISD::SMULO: 6471 case ISD::UMULO: return LowerXALUO(Op, DAG); 6472 case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); 6473 } 6474} 6475 6476void X86TargetLowering:: 6477ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, 6478 SelectionDAG &DAG, unsigned NewOp) { 6479 MVT T = Node->getValueType(0); 6480 assert (T == MVT::i64 && "Only know how to expand i64 atomics"); 6481 6482 SDValue Chain = Node->getOperand(0); 6483 SDValue In1 = Node->getOperand(1); 6484 SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6485 Node->getOperand(2), DAG.getIntPtrConstant(0)); 6486 SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6487 Node->getOperand(2), DAG.getIntPtrConstant(1)); 6488 // This is a generalized SDNode, not an AtomicSDNode, so it doesn't 6489 // have a MemOperand. Pass the info through as a normal operand. 6490 SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); 6491 SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; 6492 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6493 SDValue Result = DAG.getNode(NewOp, Tys, Ops, 5); 6494 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; 6495 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2)); 6496 Results.push_back(Result.getValue(2)); 6497} 6498 6499/// ReplaceNodeResults - Replace a node with an illegal result type 6500/// with a new node built out of custom code. 6501void X86TargetLowering::ReplaceNodeResults(SDNode *N, 6502 SmallVectorImpl<SDValue>&Results, 6503 SelectionDAG &DAG) { 6504 switch (N->getOpcode()) { 6505 default: 6506 assert(false && "Do not know how to custom type legalize this operation!"); 6507 return; 6508 case ISD::FP_TO_SINT: { 6509 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG); 6510 SDValue FIST = Vals.first, StackSlot = Vals.second; 6511 if (FIST.getNode() != 0) { 6512 MVT VT = N->getValueType(0); 6513 // Return a load from the stack slot. 6514 Results.push_back(DAG.getLoad(VT, FIST, StackSlot, NULL, 0)); 6515 } 6516 return; 6517 } 6518 case ISD::READCYCLECOUNTER: { 6519 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6520 SDValue TheChain = N->getOperand(0); 6521 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 6522 SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 6523 SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, MVT::i32, 6524 eax.getValue(2)); 6525 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 6526 SDValue Ops[] = { eax, edx }; 6527 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2)); 6528 Results.push_back(edx.getValue(1)); 6529 return; 6530 } 6531 case ISD::ATOMIC_CMP_SWAP: { 6532 MVT T = N->getValueType(0); 6533 assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); 6534 SDValue cpInL, cpInH; 6535 cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(2), 6536 DAG.getConstant(0, MVT::i32)); 6537 cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(2), 6538 DAG.getConstant(1, MVT::i32)); 6539 cpInL = DAG.getCopyToReg(N->getOperand(0), X86::EAX, cpInL, SDValue()); 6540 cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX, cpInH, 6541 cpInL.getValue(1)); 6542 SDValue swapInL, swapInH; 6543 swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(3), 6544 DAG.getConstant(0, MVT::i32)); 6545 swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(3), 6546 DAG.getConstant(1, MVT::i32)); 6547 swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX, swapInL, 6548 cpInH.getValue(1)); 6549 swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, swapInH, 6550 swapInL.getValue(1)); 6551 SDValue Ops[] = { swapInH.getValue(0), 6552 N->getOperand(1), 6553 swapInH.getValue(1) }; 6554 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6555 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3); 6556 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32, 6557 Result.getValue(1)); 6558 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32, 6559 cpOutL.getValue(2)); 6560 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; 6561 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2)); 6562 Results.push_back(cpOutH.getValue(1)); 6563 return; 6564 } 6565 case ISD::ATOMIC_LOAD_ADD: 6566 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); 6567 return; 6568 case ISD::ATOMIC_LOAD_AND: 6569 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); 6570 return; 6571 case ISD::ATOMIC_LOAD_NAND: 6572 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); 6573 return; 6574 case ISD::ATOMIC_LOAD_OR: 6575 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); 6576 return; 6577 case ISD::ATOMIC_LOAD_SUB: 6578 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); 6579 return; 6580 case ISD::ATOMIC_LOAD_XOR: 6581 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); 6582 return; 6583 case ISD::ATOMIC_SWAP: 6584 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); 6585 return; 6586 } 6587} 6588 6589const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 6590 switch (Opcode) { 6591 default: return NULL; 6592 case X86ISD::BSF: return "X86ISD::BSF"; 6593 case X86ISD::BSR: return "X86ISD::BSR"; 6594 case X86ISD::SHLD: return "X86ISD::SHLD"; 6595 case X86ISD::SHRD: return "X86ISD::SHRD"; 6596 case X86ISD::FAND: return "X86ISD::FAND"; 6597 case X86ISD::FOR: return "X86ISD::FOR"; 6598 case X86ISD::FXOR: return "X86ISD::FXOR"; 6599 case X86ISD::FSRL: return "X86ISD::FSRL"; 6600 case X86ISD::FILD: return "X86ISD::FILD"; 6601 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 6602 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 6603 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 6604 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 6605 case X86ISD::FLD: return "X86ISD::FLD"; 6606 case X86ISD::FST: return "X86ISD::FST"; 6607 case X86ISD::CALL: return "X86ISD::CALL"; 6608 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 6609 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 6610 case X86ISD::BT: return "X86ISD::BT"; 6611 case X86ISD::CMP: return "X86ISD::CMP"; 6612 case X86ISD::COMI: return "X86ISD::COMI"; 6613 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 6614 case X86ISD::SETCC: return "X86ISD::SETCC"; 6615 case X86ISD::CMOV: return "X86ISD::CMOV"; 6616 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 6617 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 6618 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 6619 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 6620 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 6621 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 6622 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 6623 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 6624 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 6625 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 6626 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 6627 case X86ISD::FMAX: return "X86ISD::FMAX"; 6628 case X86ISD::FMIN: return "X86ISD::FMIN"; 6629 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 6630 case X86ISD::FRCP: return "X86ISD::FRCP"; 6631 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 6632 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 6633 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 6634 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 6635 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 6636 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; 6637 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; 6638 case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; 6639 case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; 6640 case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; 6641 case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; 6642 case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; 6643 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; 6644 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; 6645 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; 6646 case X86ISD::VSHL: return "X86ISD::VSHL"; 6647 case X86ISD::VSRL: return "X86ISD::VSRL"; 6648 case X86ISD::CMPPD: return "X86ISD::CMPPD"; 6649 case X86ISD::CMPPS: return "X86ISD::CMPPS"; 6650 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; 6651 case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; 6652 case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; 6653 case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; 6654 case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; 6655 case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; 6656 case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; 6657 case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; 6658 case X86ISD::ADD: return "X86ISD::ADD"; 6659 case X86ISD::SUB: return "X86ISD::SUB"; 6660 case X86ISD::SMUL: return "X86ISD::SMUL"; 6661 case X86ISD::UMUL: return "X86ISD::UMUL"; 6662 } 6663} 6664 6665// isLegalAddressingMode - Return true if the addressing mode represented 6666// by AM is legal for this target, for a load/store of the specified type. 6667bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 6668 const Type *Ty) const { 6669 // X86 supports extremely general addressing modes. 6670 6671 // X86 allows a sign-extended 32-bit immediate field as a displacement. 6672 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 6673 return false; 6674 6675 if (AM.BaseGV) { 6676 // We can only fold this if we don't need an extra load. 6677 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 6678 return false; 6679 // If BaseGV requires a register, we cannot also have a BaseReg. 6680 if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) && 6681 AM.HasBaseReg) 6682 return false; 6683 6684 // X86-64 only supports addr of globals in small code model. 6685 if (Subtarget->is64Bit()) { 6686 if (getTargetMachine().getCodeModel() != CodeModel::Small) 6687 return false; 6688 // If lower 4G is not available, then we must use rip-relative addressing. 6689 if (AM.BaseOffs || AM.Scale > 1) 6690 return false; 6691 } 6692 } 6693 6694 switch (AM.Scale) { 6695 case 0: 6696 case 1: 6697 case 2: 6698 case 4: 6699 case 8: 6700 // These scales always work. 6701 break; 6702 case 3: 6703 case 5: 6704 case 9: 6705 // These scales are formed with basereg+scalereg. Only accept if there is 6706 // no basereg yet. 6707 if (AM.HasBaseReg) 6708 return false; 6709 break; 6710 default: // Other stuff never works. 6711 return false; 6712 } 6713 6714 return true; 6715} 6716 6717 6718bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 6719 if (!Ty1->isInteger() || !Ty2->isInteger()) 6720 return false; 6721 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 6722 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 6723 if (NumBits1 <= NumBits2) 6724 return false; 6725 return Subtarget->is64Bit() || NumBits1 < 64; 6726} 6727 6728bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { 6729 if (!VT1.isInteger() || !VT2.isInteger()) 6730 return false; 6731 unsigned NumBits1 = VT1.getSizeInBits(); 6732 unsigned NumBits2 = VT2.getSizeInBits(); 6733 if (NumBits1 <= NumBits2) 6734 return false; 6735 return Subtarget->is64Bit() || NumBits1 < 64; 6736} 6737 6738/// isShuffleMaskLegal - Targets can use this to indicate that they only 6739/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 6740/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 6741/// are assumed to be legal. 6742bool 6743X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { 6744 // Only do shuffles on 128-bit vector types for now. 6745 if (VT.getSizeInBits() == 64) return false; 6746 return (Mask.getNode()->getNumOperands() <= 4 || 6747 isIdentityMask(Mask.getNode()) || 6748 isIdentityMask(Mask.getNode(), true) || 6749 isSplatMask(Mask.getNode()) || 6750 isPSHUFHW_PSHUFLWMask(Mask.getNode()) || 6751 X86::isUNPCKLMask(Mask.getNode()) || 6752 X86::isUNPCKHMask(Mask.getNode()) || 6753 X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || 6754 X86::isUNPCKH_v_undef_Mask(Mask.getNode())); 6755} 6756 6757bool 6758X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, 6759 MVT EVT, SelectionDAG &DAG) const { 6760 unsigned NumElts = BVOps.size(); 6761 // Only do shuffles on 128-bit vector types for now. 6762 if (EVT.getSizeInBits() * NumElts == 64) return false; 6763 if (NumElts == 2) return true; 6764 if (NumElts == 4) { 6765 return (isMOVLMask(&BVOps[0], 4) || 6766 isCommutedMOVL(&BVOps[0], 4, true) || 6767 isSHUFPMask(&BVOps[0], 4) || 6768 isCommutedSHUFP(&BVOps[0], 4)); 6769 } 6770 return false; 6771} 6772 6773//===----------------------------------------------------------------------===// 6774// X86 Scheduler Hooks 6775//===----------------------------------------------------------------------===// 6776 6777// private utility function 6778MachineBasicBlock * 6779X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, 6780 MachineBasicBlock *MBB, 6781 unsigned regOpc, 6782 unsigned immOpc, 6783 unsigned LoadOpc, 6784 unsigned CXchgOpc, 6785 unsigned copyOpc, 6786 unsigned notOpc, 6787 unsigned EAXreg, 6788 TargetRegisterClass *RC, 6789 bool invSrc) { 6790 // For the atomic bitwise operator, we generate 6791 // thisMBB: 6792 // newMBB: 6793 // ld t1 = [bitinstr.addr] 6794 // op t2 = t1, [bitinstr.val] 6795 // mov EAX = t1 6796 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6797 // bz newMBB 6798 // fallthrough -->nextMBB 6799 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6800 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6801 MachineFunction::iterator MBBIter = MBB; 6802 ++MBBIter; 6803 6804 /// First build the CFG 6805 MachineFunction *F = MBB->getParent(); 6806 MachineBasicBlock *thisMBB = MBB; 6807 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6808 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6809 F->insert(MBBIter, newMBB); 6810 F->insert(MBBIter, nextMBB); 6811 6812 // Move all successors to thisMBB to nextMBB 6813 nextMBB->transferSuccessors(thisMBB); 6814 6815 // Update thisMBB to fall through to newMBB 6816 thisMBB->addSuccessor(newMBB); 6817 6818 // newMBB jumps to itself and fall through to nextMBB 6819 newMBB->addSuccessor(nextMBB); 6820 newMBB->addSuccessor(newMBB); 6821 6822 // Insert instructions into newMBB based on incoming instruction 6823 assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); 6824 MachineOperand& destOper = bInstr->getOperand(0); 6825 MachineOperand* argOpers[6]; 6826 int numArgs = bInstr->getNumOperands() - 1; 6827 for (int i=0; i < numArgs; ++i) 6828 argOpers[i] = &bInstr->getOperand(i+1); 6829 6830 // x86 address has 4 operands: base, index, scale, and displacement 6831 int lastAddrIndx = 3; // [0,3] 6832 int valArgIndx = 4; 6833 6834 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6835 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(LoadOpc), t1); 6836 for (int i=0; i <= lastAddrIndx; ++i) 6837 (*MIB).addOperand(*argOpers[i]); 6838 6839 unsigned tt = F->getRegInfo().createVirtualRegister(RC); 6840 if (invSrc) { 6841 MIB = BuildMI(newMBB, TII->get(notOpc), tt).addReg(t1); 6842 } 6843 else 6844 tt = t1; 6845 6846 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6847 assert((argOpers[valArgIndx]->isReg() || 6848 argOpers[valArgIndx]->isImm()) && 6849 "invalid operand"); 6850 if (argOpers[valArgIndx]->isReg()) 6851 MIB = BuildMI(newMBB, TII->get(regOpc), t2); 6852 else 6853 MIB = BuildMI(newMBB, TII->get(immOpc), t2); 6854 MIB.addReg(tt); 6855 (*MIB).addOperand(*argOpers[valArgIndx]); 6856 6857 MIB = BuildMI(newMBB, TII->get(copyOpc), EAXreg); 6858 MIB.addReg(t1); 6859 6860 MIB = BuildMI(newMBB, TII->get(CXchgOpc)); 6861 for (int i=0; i <= lastAddrIndx; ++i) 6862 (*MIB).addOperand(*argOpers[i]); 6863 MIB.addReg(t2); 6864 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6865 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6866 6867 MIB = BuildMI(newMBB, TII->get(copyOpc), destOper.getReg()); 6868 MIB.addReg(EAXreg); 6869 6870 // insert branch 6871 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6872 6873 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6874 return nextMBB; 6875} 6876 6877// private utility function: 64 bit atomics on 32 bit host. 6878MachineBasicBlock * 6879X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, 6880 MachineBasicBlock *MBB, 6881 unsigned regOpcL, 6882 unsigned regOpcH, 6883 unsigned immOpcL, 6884 unsigned immOpcH, 6885 bool invSrc) { 6886 // For the atomic bitwise operator, we generate 6887 // thisMBB (instructions are in pairs, except cmpxchg8b) 6888 // ld t1,t2 = [bitinstr.addr] 6889 // newMBB: 6890 // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) 6891 // op t5, t6 <- out1, out2, [bitinstr.val] 6892 // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) 6893 // mov ECX, EBX <- t5, t6 6894 // mov EAX, EDX <- t1, t2 6895 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] 6896 // mov t3, t4 <- EAX, EDX 6897 // bz newMBB 6898 // result in out1, out2 6899 // fallthrough -->nextMBB 6900 6901 const TargetRegisterClass *RC = X86::GR32RegisterClass; 6902 const unsigned LoadOpc = X86::MOV32rm; 6903 const unsigned copyOpc = X86::MOV32rr; 6904 const unsigned NotOpc = X86::NOT32r; 6905 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6906 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6907 MachineFunction::iterator MBBIter = MBB; 6908 ++MBBIter; 6909 6910 /// First build the CFG 6911 MachineFunction *F = MBB->getParent(); 6912 MachineBasicBlock *thisMBB = MBB; 6913 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6914 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6915 F->insert(MBBIter, newMBB); 6916 F->insert(MBBIter, nextMBB); 6917 6918 // Move all successors to thisMBB to nextMBB 6919 nextMBB->transferSuccessors(thisMBB); 6920 6921 // Update thisMBB to fall through to newMBB 6922 thisMBB->addSuccessor(newMBB); 6923 6924 // newMBB jumps to itself and fall through to nextMBB 6925 newMBB->addSuccessor(nextMBB); 6926 newMBB->addSuccessor(newMBB); 6927 6928 // Insert instructions into newMBB based on incoming instruction 6929 // There are 8 "real" operands plus 9 implicit def/uses, ignored here. 6930 assert(bInstr->getNumOperands() < 18 && "unexpected number of operands"); 6931 MachineOperand& dest1Oper = bInstr->getOperand(0); 6932 MachineOperand& dest2Oper = bInstr->getOperand(1); 6933 MachineOperand* argOpers[6]; 6934 for (int i=0; i < 6; ++i) 6935 argOpers[i] = &bInstr->getOperand(i+2); 6936 6937 // x86 address has 4 operands: base, index, scale, and displacement 6938 int lastAddrIndx = 3; // [0,3] 6939 6940 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6941 MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1); 6942 for (int i=0; i <= lastAddrIndx; ++i) 6943 (*MIB).addOperand(*argOpers[i]); 6944 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6945 MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2); 6946 // add 4 to displacement. 6947 for (int i=0; i <= lastAddrIndx-1; ++i) 6948 (*MIB).addOperand(*argOpers[i]); 6949 MachineOperand newOp3 = *(argOpers[3]); 6950 if (newOp3.isImm()) 6951 newOp3.setImm(newOp3.getImm()+4); 6952 else 6953 newOp3.setOffset(newOp3.getOffset()+4); 6954 (*MIB).addOperand(newOp3); 6955 6956 // t3/4 are defined later, at the bottom of the loop 6957 unsigned t3 = F->getRegInfo().createVirtualRegister(RC); 6958 unsigned t4 = F->getRegInfo().createVirtualRegister(RC); 6959 BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg()) 6960 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); 6961 BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg()) 6962 .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); 6963 6964 unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); 6965 unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); 6966 if (invSrc) { 6967 MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1); 6968 MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2); 6969 } else { 6970 tt1 = t1; 6971 tt2 = t2; 6972 } 6973 6974 assert((argOpers[4]->isReg() || argOpers[4]->isImm()) && 6975 "invalid operand"); 6976 unsigned t5 = F->getRegInfo().createVirtualRegister(RC); 6977 unsigned t6 = F->getRegInfo().createVirtualRegister(RC); 6978 if (argOpers[4]->isReg()) 6979 MIB = BuildMI(newMBB, TII->get(regOpcL), t5); 6980 else 6981 MIB = BuildMI(newMBB, TII->get(immOpcL), t5); 6982 if (regOpcL != X86::MOV32rr) 6983 MIB.addReg(tt1); 6984 (*MIB).addOperand(*argOpers[4]); 6985 assert(argOpers[5]->isReg() == argOpers[4]->isReg()); 6986 assert(argOpers[5]->isImm() == argOpers[4]->isImm()); 6987 if (argOpers[5]->isReg()) 6988 MIB = BuildMI(newMBB, TII->get(regOpcH), t6); 6989 else 6990 MIB = BuildMI(newMBB, TII->get(immOpcH), t6); 6991 if (regOpcH != X86::MOV32rr) 6992 MIB.addReg(tt2); 6993 (*MIB).addOperand(*argOpers[5]); 6994 6995 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX); 6996 MIB.addReg(t1); 6997 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX); 6998 MIB.addReg(t2); 6999 7000 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX); 7001 MIB.addReg(t5); 7002 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX); 7003 MIB.addReg(t6); 7004 7005 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B)); 7006 for (int i=0; i <= lastAddrIndx; ++i) 7007 (*MIB).addOperand(*argOpers[i]); 7008 7009 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 7010 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 7011 7012 MIB = BuildMI(newMBB, TII->get(copyOpc), t3); 7013 MIB.addReg(X86::EAX); 7014 MIB = BuildMI(newMBB, TII->get(copyOpc), t4); 7015 MIB.addReg(X86::EDX); 7016 7017 // insert branch 7018 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 7019 7020 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 7021 return nextMBB; 7022} 7023 7024// private utility function 7025MachineBasicBlock * 7026X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, 7027 MachineBasicBlock *MBB, 7028 unsigned cmovOpc) { 7029 // For the atomic min/max operator, we generate 7030 // thisMBB: 7031 // newMBB: 7032 // ld t1 = [min/max.addr] 7033 // mov t2 = [min/max.val] 7034 // cmp t1, t2 7035 // cmov[cond] t2 = t1 7036 // mov EAX = t1 7037 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 7038 // bz newMBB 7039 // fallthrough -->nextMBB 7040 // 7041 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7042 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 7043 MachineFunction::iterator MBBIter = MBB; 7044 ++MBBIter; 7045 7046 /// First build the CFG 7047 MachineFunction *F = MBB->getParent(); 7048 MachineBasicBlock *thisMBB = MBB; 7049 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 7050 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 7051 F->insert(MBBIter, newMBB); 7052 F->insert(MBBIter, nextMBB); 7053 7054 // Move all successors to thisMBB to nextMBB 7055 nextMBB->transferSuccessors(thisMBB); 7056 7057 // Update thisMBB to fall through to newMBB 7058 thisMBB->addSuccessor(newMBB); 7059 7060 // newMBB jumps to newMBB and fall through to nextMBB 7061 newMBB->addSuccessor(nextMBB); 7062 newMBB->addSuccessor(newMBB); 7063 7064 // Insert instructions into newMBB based on incoming instruction 7065 assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); 7066 MachineOperand& destOper = mInstr->getOperand(0); 7067 MachineOperand* argOpers[6]; 7068 int numArgs = mInstr->getNumOperands() - 1; 7069 for (int i=0; i < numArgs; ++i) 7070 argOpers[i] = &mInstr->getOperand(i+1); 7071 7072 // x86 address has 4 operands: base, index, scale, and displacement 7073 int lastAddrIndx = 3; // [0,3] 7074 int valArgIndx = 4; 7075 7076 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 7077 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1); 7078 for (int i=0; i <= lastAddrIndx; ++i) 7079 (*MIB).addOperand(*argOpers[i]); 7080 7081 // We only support register and immediate values 7082 assert((argOpers[valArgIndx]->isReg() || 7083 argOpers[valArgIndx]->isImm()) && 7084 "invalid operand"); 7085 7086 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 7087 if (argOpers[valArgIndx]->isReg()) 7088 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 7089 else 7090 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 7091 (*MIB).addOperand(*argOpers[valArgIndx]); 7092 7093 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX); 7094 MIB.addReg(t1); 7095 7096 MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); 7097 MIB.addReg(t1); 7098 MIB.addReg(t2); 7099 7100 // Generate movc 7101 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 7102 MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); 7103 MIB.addReg(t2); 7104 MIB.addReg(t1); 7105 7106 // Cmp and exchange if none has modified the memory location 7107 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); 7108 for (int i=0; i <= lastAddrIndx; ++i) 7109 (*MIB).addOperand(*argOpers[i]); 7110 MIB.addReg(t3); 7111 assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 7112 (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); 7113 7114 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); 7115 MIB.addReg(X86::EAX); 7116 7117 // insert branch 7118 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 7119 7120 F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. 7121 return nextMBB; 7122} 7123 7124 7125MachineBasicBlock * 7126X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7127 MachineBasicBlock *BB) { 7128 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7129 switch (MI->getOpcode()) { 7130 default: assert(false && "Unexpected instr type to insert"); 7131 case X86::CMOV_V1I64: 7132 case X86::CMOV_FR32: 7133 case X86::CMOV_FR64: 7134 case X86::CMOV_V4F32: 7135 case X86::CMOV_V2F64: 7136 case X86::CMOV_V2I64: { 7137 // To "insert" a SELECT_CC instruction, we actually have to insert the 7138 // diamond control-flow pattern. The incoming instruction knows the 7139 // destination vreg to set, the condition code register to branch on, the 7140 // true/false values to select between, and a branch opcode to use. 7141 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7142 MachineFunction::iterator It = BB; 7143 ++It; 7144 7145 // thisMBB: 7146 // ... 7147 // TrueVal = ... 7148 // cmpTY ccX, r1, r2 7149 // bCC copy1MBB 7150 // fallthrough --> copy0MBB 7151 MachineBasicBlock *thisMBB = BB; 7152 MachineFunction *F = BB->getParent(); 7153 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7154 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7155 unsigned Opc = 7156 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 7157 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 7158 F->insert(It, copy0MBB); 7159 F->insert(It, sinkMBB); 7160 // Update machine-CFG edges by transferring all successors of the current 7161 // block to the new block which will contain the Phi node for the select. 7162 sinkMBB->transferSuccessors(BB); 7163 7164 // Add the true and fallthrough blocks as its successors. 7165 BB->addSuccessor(copy0MBB); 7166 BB->addSuccessor(sinkMBB); 7167 7168 // copy0MBB: 7169 // %FalseValue = ... 7170 // # fallthrough to sinkMBB 7171 BB = copy0MBB; 7172 7173 // Update machine-CFG edges 7174 BB->addSuccessor(sinkMBB); 7175 7176 // sinkMBB: 7177 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7178 // ... 7179 BB = sinkMBB; 7180 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 7181 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7182 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7183 7184 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 7185 return BB; 7186 } 7187 7188 case X86::FP32_TO_INT16_IN_MEM: 7189 case X86::FP32_TO_INT32_IN_MEM: 7190 case X86::FP32_TO_INT64_IN_MEM: 7191 case X86::FP64_TO_INT16_IN_MEM: 7192 case X86::FP64_TO_INT32_IN_MEM: 7193 case X86::FP64_TO_INT64_IN_MEM: 7194 case X86::FP80_TO_INT16_IN_MEM: 7195 case X86::FP80_TO_INT32_IN_MEM: 7196 case X86::FP80_TO_INT64_IN_MEM: { 7197 // Change the floating point control register to use "round towards zero" 7198 // mode when truncating to an integer value. 7199 MachineFunction *F = BB->getParent(); 7200 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 7201 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 7202 7203 // Load the old value of the high byte of the control word... 7204 unsigned OldCW = 7205 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 7206 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 7207 7208 // Set the high part to be round to zero... 7209 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 7210 .addImm(0xC7F); 7211 7212 // Reload the modified control word now... 7213 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 7214 7215 // Restore the memory image of control word to original value 7216 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 7217 .addReg(OldCW); 7218 7219 // Get the X86 opcode to use. 7220 unsigned Opc; 7221 switch (MI->getOpcode()) { 7222 default: assert(0 && "illegal opcode!"); 7223 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 7224 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 7225 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 7226 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 7227 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 7228 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 7229 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 7230 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 7231 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 7232 } 7233 7234 X86AddressMode AM; 7235 MachineOperand &Op = MI->getOperand(0); 7236 if (Op.isReg()) { 7237 AM.BaseType = X86AddressMode::RegBase; 7238 AM.Base.Reg = Op.getReg(); 7239 } else { 7240 AM.BaseType = X86AddressMode::FrameIndexBase; 7241 AM.Base.FrameIndex = Op.getIndex(); 7242 } 7243 Op = MI->getOperand(1); 7244 if (Op.isImm()) 7245 AM.Scale = Op.getImm(); 7246 Op = MI->getOperand(2); 7247 if (Op.isImm()) 7248 AM.IndexReg = Op.getImm(); 7249 Op = MI->getOperand(3); 7250 if (Op.isGlobal()) { 7251 AM.GV = Op.getGlobal(); 7252 } else { 7253 AM.Disp = Op.getImm(); 7254 } 7255 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 7256 .addReg(MI->getOperand(4).getReg()); 7257 7258 // Reload the original control word now. 7259 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 7260 7261 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 7262 return BB; 7263 } 7264 case X86::ATOMAND32: 7265 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7266 X86::AND32ri, X86::MOV32rm, 7267 X86::LCMPXCHG32, X86::MOV32rr, 7268 X86::NOT32r, X86::EAX, 7269 X86::GR32RegisterClass); 7270 case X86::ATOMOR32: 7271 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, 7272 X86::OR32ri, X86::MOV32rm, 7273 X86::LCMPXCHG32, X86::MOV32rr, 7274 X86::NOT32r, X86::EAX, 7275 X86::GR32RegisterClass); 7276 case X86::ATOMXOR32: 7277 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, 7278 X86::XOR32ri, X86::MOV32rm, 7279 X86::LCMPXCHG32, X86::MOV32rr, 7280 X86::NOT32r, X86::EAX, 7281 X86::GR32RegisterClass); 7282 case X86::ATOMNAND32: 7283 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7284 X86::AND32ri, X86::MOV32rm, 7285 X86::LCMPXCHG32, X86::MOV32rr, 7286 X86::NOT32r, X86::EAX, 7287 X86::GR32RegisterClass, true); 7288 case X86::ATOMMIN32: 7289 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); 7290 case X86::ATOMMAX32: 7291 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); 7292 case X86::ATOMUMIN32: 7293 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); 7294 case X86::ATOMUMAX32: 7295 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); 7296 7297 case X86::ATOMAND16: 7298 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7299 X86::AND16ri, X86::MOV16rm, 7300 X86::LCMPXCHG16, X86::MOV16rr, 7301 X86::NOT16r, X86::AX, 7302 X86::GR16RegisterClass); 7303 case X86::ATOMOR16: 7304 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, 7305 X86::OR16ri, X86::MOV16rm, 7306 X86::LCMPXCHG16, X86::MOV16rr, 7307 X86::NOT16r, X86::AX, 7308 X86::GR16RegisterClass); 7309 case X86::ATOMXOR16: 7310 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, 7311 X86::XOR16ri, X86::MOV16rm, 7312 X86::LCMPXCHG16, X86::MOV16rr, 7313 X86::NOT16r, X86::AX, 7314 X86::GR16RegisterClass); 7315 case X86::ATOMNAND16: 7316 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7317 X86::AND16ri, X86::MOV16rm, 7318 X86::LCMPXCHG16, X86::MOV16rr, 7319 X86::NOT16r, X86::AX, 7320 X86::GR16RegisterClass, true); 7321 case X86::ATOMMIN16: 7322 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); 7323 case X86::ATOMMAX16: 7324 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); 7325 case X86::ATOMUMIN16: 7326 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); 7327 case X86::ATOMUMAX16: 7328 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); 7329 7330 case X86::ATOMAND8: 7331 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7332 X86::AND8ri, X86::MOV8rm, 7333 X86::LCMPXCHG8, X86::MOV8rr, 7334 X86::NOT8r, X86::AL, 7335 X86::GR8RegisterClass); 7336 case X86::ATOMOR8: 7337 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, 7338 X86::OR8ri, X86::MOV8rm, 7339 X86::LCMPXCHG8, X86::MOV8rr, 7340 X86::NOT8r, X86::AL, 7341 X86::GR8RegisterClass); 7342 case X86::ATOMXOR8: 7343 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, 7344 X86::XOR8ri, X86::MOV8rm, 7345 X86::LCMPXCHG8, X86::MOV8rr, 7346 X86::NOT8r, X86::AL, 7347 X86::GR8RegisterClass); 7348 case X86::ATOMNAND8: 7349 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7350 X86::AND8ri, X86::MOV8rm, 7351 X86::LCMPXCHG8, X86::MOV8rr, 7352 X86::NOT8r, X86::AL, 7353 X86::GR8RegisterClass, true); 7354 // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. 7355 // This group is for 64-bit host. 7356 case X86::ATOMAND64: 7357 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7358 X86::AND64ri32, X86::MOV64rm, 7359 X86::LCMPXCHG64, X86::MOV64rr, 7360 X86::NOT64r, X86::RAX, 7361 X86::GR64RegisterClass); 7362 case X86::ATOMOR64: 7363 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, 7364 X86::OR64ri32, X86::MOV64rm, 7365 X86::LCMPXCHG64, X86::MOV64rr, 7366 X86::NOT64r, X86::RAX, 7367 X86::GR64RegisterClass); 7368 case X86::ATOMXOR64: 7369 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, 7370 X86::XOR64ri32, X86::MOV64rm, 7371 X86::LCMPXCHG64, X86::MOV64rr, 7372 X86::NOT64r, X86::RAX, 7373 X86::GR64RegisterClass); 7374 case X86::ATOMNAND64: 7375 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7376 X86::AND64ri32, X86::MOV64rm, 7377 X86::LCMPXCHG64, X86::MOV64rr, 7378 X86::NOT64r, X86::RAX, 7379 X86::GR64RegisterClass, true); 7380 case X86::ATOMMIN64: 7381 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); 7382 case X86::ATOMMAX64: 7383 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); 7384 case X86::ATOMUMIN64: 7385 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); 7386 case X86::ATOMUMAX64: 7387 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); 7388 7389 // This group does 64-bit operations on a 32-bit host. 7390 case X86::ATOMAND6432: 7391 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7392 X86::AND32rr, X86::AND32rr, 7393 X86::AND32ri, X86::AND32ri, 7394 false); 7395 case X86::ATOMOR6432: 7396 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7397 X86::OR32rr, X86::OR32rr, 7398 X86::OR32ri, X86::OR32ri, 7399 false); 7400 case X86::ATOMXOR6432: 7401 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7402 X86::XOR32rr, X86::XOR32rr, 7403 X86::XOR32ri, X86::XOR32ri, 7404 false); 7405 case X86::ATOMNAND6432: 7406 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7407 X86::AND32rr, X86::AND32rr, 7408 X86::AND32ri, X86::AND32ri, 7409 true); 7410 case X86::ATOMADD6432: 7411 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7412 X86::ADD32rr, X86::ADC32rr, 7413 X86::ADD32ri, X86::ADC32ri, 7414 false); 7415 case X86::ATOMSUB6432: 7416 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7417 X86::SUB32rr, X86::SBB32rr, 7418 X86::SUB32ri, X86::SBB32ri, 7419 false); 7420 case X86::ATOMSWAP6432: 7421 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7422 X86::MOV32rr, X86::MOV32rr, 7423 X86::MOV32ri, X86::MOV32ri, 7424 false); 7425 } 7426} 7427 7428//===----------------------------------------------------------------------===// 7429// X86 Optimization Hooks 7430//===----------------------------------------------------------------------===// 7431 7432void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7433 const APInt &Mask, 7434 APInt &KnownZero, 7435 APInt &KnownOne, 7436 const SelectionDAG &DAG, 7437 unsigned Depth) const { 7438 unsigned Opc = Op.getOpcode(); 7439 assert((Opc >= ISD::BUILTIN_OP_END || 7440 Opc == ISD::INTRINSIC_WO_CHAIN || 7441 Opc == ISD::INTRINSIC_W_CHAIN || 7442 Opc == ISD::INTRINSIC_VOID) && 7443 "Should use MaskedValueIsZero if you don't know whether Op" 7444 " is a target node!"); 7445 7446 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 7447 switch (Opc) { 7448 default: break; 7449 case X86ISD::SETCC: 7450 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 7451 Mask.getBitWidth() - 1); 7452 break; 7453 } 7454} 7455 7456/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 7457/// node is a GlobalAddress + offset. 7458bool X86TargetLowering::isGAPlusOffset(SDNode *N, 7459 GlobalValue* &GA, int64_t &Offset) const{ 7460 if (N->getOpcode() == X86ISD::Wrapper) { 7461 if (isa<GlobalAddressSDNode>(N->getOperand(0))) { 7462 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 7463 Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset(); 7464 return true; 7465 } 7466 } 7467 return TargetLowering::isGAPlusOffset(N, GA, Offset); 7468} 7469 7470static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, 7471 const TargetLowering &TLI) { 7472 GlobalValue *GV; 7473 int64_t Offset = 0; 7474 if (TLI.isGAPlusOffset(Base, GV, Offset)) 7475 return (GV->getAlignment() >= N && (Offset % N) == 0); 7476 // DAG combine handles the stack object case. 7477 return false; 7478} 7479 7480static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, 7481 unsigned NumElems, MVT EVT, 7482 SDNode *&Base, 7483 SelectionDAG &DAG, MachineFrameInfo *MFI, 7484 const TargetLowering &TLI) { 7485 Base = NULL; 7486 for (unsigned i = 0; i < NumElems; ++i) { 7487 SDValue Idx = PermMask.getOperand(i); 7488 if (Idx.getOpcode() == ISD::UNDEF) { 7489 if (!Base) 7490 return false; 7491 continue; 7492 } 7493 7494 SDValue Elt = DAG.getShuffleScalarElt(N, i); 7495 if (!Elt.getNode() || 7496 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 7497 return false; 7498 if (!Base) { 7499 Base = Elt.getNode(); 7500 if (Base->getOpcode() == ISD::UNDEF) 7501 return false; 7502 continue; 7503 } 7504 if (Elt.getOpcode() == ISD::UNDEF) 7505 continue; 7506 7507 if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, 7508 EVT.getSizeInBits()/8, i, MFI)) 7509 return false; 7510 } 7511 return true; 7512} 7513 7514/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 7515/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 7516/// if the load addresses are consecutive, non-overlapping, and in the right 7517/// order. 7518static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 7519 const TargetLowering &TLI) { 7520 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7521 MVT VT = N->getValueType(0); 7522 MVT EVT = VT.getVectorElementType(); 7523 SDValue PermMask = N->getOperand(2); 7524 unsigned NumElems = PermMask.getNumOperands(); 7525 SDNode *Base = NULL; 7526 if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, 7527 DAG, MFI, TLI)) 7528 return SDValue(); 7529 7530 LoadSDNode *LD = cast<LoadSDNode>(Base); 7531 if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) 7532 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7533 LD->getSrcValueOffset(), LD->isVolatile()); 7534 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7535 LD->getSrcValueOffset(), LD->isVolatile(), 7536 LD->getAlignment()); 7537} 7538 7539/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. 7540static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, 7541 const X86Subtarget *Subtarget, 7542 const TargetLowering &TLI) { 7543 unsigned NumOps = N->getNumOperands(); 7544 7545 // Ignore single operand BUILD_VECTOR. 7546 if (NumOps == 1) 7547 return SDValue(); 7548 7549 MVT VT = N->getValueType(0); 7550 MVT EVT = VT.getVectorElementType(); 7551 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) 7552 // We are looking for load i64 and zero extend. We want to transform 7553 // it before legalizer has a chance to expand it. Also look for i64 7554 // BUILD_PAIR bit casted to f64. 7555 return SDValue(); 7556 // This must be an insertion into a zero vector. 7557 SDValue HighElt = N->getOperand(1); 7558 if (!isZeroNode(HighElt)) 7559 return SDValue(); 7560 7561 // Value must be a load. 7562 SDNode *Base = N->getOperand(0).getNode(); 7563 if (!isa<LoadSDNode>(Base)) { 7564 if (Base->getOpcode() != ISD::BIT_CONVERT) 7565 return SDValue(); 7566 Base = Base->getOperand(0).getNode(); 7567 if (!isa<LoadSDNode>(Base)) 7568 return SDValue(); 7569 } 7570 7571 // Transform it into VZEXT_LOAD addr. 7572 LoadSDNode *LD = cast<LoadSDNode>(Base); 7573 7574 // Load must not be an extload. 7575 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 7576 return SDValue(); 7577 7578 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 7579 SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; 7580 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2); 7581 DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1)); 7582 return ResNode; 7583} 7584 7585/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 7586static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 7587 const X86Subtarget *Subtarget) { 7588 SDValue Cond = N->getOperand(0); 7589 7590 // If we have SSE[12] support, try to form min/max nodes. 7591 if (Subtarget->hasSSE2() && 7592 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 7593 if (Cond.getOpcode() == ISD::SETCC) { 7594 // Get the LHS/RHS of the select. 7595 SDValue LHS = N->getOperand(1); 7596 SDValue RHS = N->getOperand(2); 7597 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 7598 7599 unsigned Opcode = 0; 7600 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 7601 switch (CC) { 7602 default: break; 7603 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 7604 case ISD::SETULE: 7605 case ISD::SETLE: 7606 if (!UnsafeFPMath) break; 7607 // FALL THROUGH. 7608 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 7609 case ISD::SETLT: 7610 Opcode = X86ISD::FMIN; 7611 break; 7612 7613 case ISD::SETOGT: // (X > Y) ? X : Y -> max 7614 case ISD::SETUGT: 7615 case ISD::SETGT: 7616 if (!UnsafeFPMath) break; 7617 // FALL THROUGH. 7618 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 7619 case ISD::SETGE: 7620 Opcode = X86ISD::FMAX; 7621 break; 7622 } 7623 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 7624 switch (CC) { 7625 default: break; 7626 case ISD::SETOGT: // (X > Y) ? Y : X -> min 7627 case ISD::SETUGT: 7628 case ISD::SETGT: 7629 if (!UnsafeFPMath) break; 7630 // FALL THROUGH. 7631 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 7632 case ISD::SETGE: 7633 Opcode = X86ISD::FMIN; 7634 break; 7635 7636 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 7637 case ISD::SETULE: 7638 case ISD::SETLE: 7639 if (!UnsafeFPMath) break; 7640 // FALL THROUGH. 7641 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 7642 case ISD::SETLT: 7643 Opcode = X86ISD::FMAX; 7644 break; 7645 } 7646 } 7647 7648 if (Opcode) 7649 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 7650 } 7651 7652 } 7653 7654 return SDValue(); 7655} 7656 7657/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. 7658static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, 7659 const X86Subtarget *Subtarget) { 7660 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering 7661 // the FP state in cases where an emms may be missing. 7662 // A preferable solution to the general problem is to figure out the right 7663 // places to insert EMMS. This qualifies as a quick hack. 7664 StoreSDNode *St = cast<StoreSDNode>(N); 7665 if (St->getValue().getValueType().isVector() && 7666 St->getValue().getValueType().getSizeInBits() == 64 && 7667 isa<LoadSDNode>(St->getValue()) && 7668 !cast<LoadSDNode>(St->getValue())->isVolatile() && 7669 St->getChain().hasOneUse() && !St->isVolatile()) { 7670 SDNode* LdVal = St->getValue().getNode(); 7671 LoadSDNode *Ld = 0; 7672 int TokenFactorIndex = -1; 7673 SmallVector<SDValue, 8> Ops; 7674 SDNode* ChainVal = St->getChain().getNode(); 7675 // Must be a store of a load. We currently handle two cases: the load 7676 // is a direct child, and it's under an intervening TokenFactor. It is 7677 // possible to dig deeper under nested TokenFactors. 7678 if (ChainVal == LdVal) 7679 Ld = cast<LoadSDNode>(St->getChain()); 7680 else if (St->getValue().hasOneUse() && 7681 ChainVal->getOpcode() == ISD::TokenFactor) { 7682 for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { 7683 if (ChainVal->getOperand(i).getNode() == LdVal) { 7684 TokenFactorIndex = i; 7685 Ld = cast<LoadSDNode>(St->getValue()); 7686 } else 7687 Ops.push_back(ChainVal->getOperand(i)); 7688 } 7689 } 7690 if (Ld) { 7691 // If we are a 64-bit capable x86, lower to a single movq load/store pair. 7692 if (Subtarget->is64Bit()) { 7693 SDValue NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), 7694 Ld->getBasePtr(), Ld->getSrcValue(), 7695 Ld->getSrcValueOffset(), Ld->isVolatile(), 7696 Ld->getAlignment()); 7697 SDValue NewChain = NewLd.getValue(1); 7698 if (TokenFactorIndex != -1) { 7699 Ops.push_back(NewChain); 7700 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7701 Ops.size()); 7702 } 7703 return DAG.getStore(NewChain, NewLd, St->getBasePtr(), 7704 St->getSrcValue(), St->getSrcValueOffset(), 7705 St->isVolatile(), St->getAlignment()); 7706 } 7707 7708 // Otherwise, lower to two 32-bit copies. 7709 SDValue LoAddr = Ld->getBasePtr(); 7710 SDValue HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7711 DAG.getConstant(4, MVT::i32)); 7712 7713 SDValue LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr, 7714 Ld->getSrcValue(), Ld->getSrcValueOffset(), 7715 Ld->isVolatile(), Ld->getAlignment()); 7716 SDValue HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr, 7717 Ld->getSrcValue(), Ld->getSrcValueOffset()+4, 7718 Ld->isVolatile(), 7719 MinAlign(Ld->getAlignment(), 4)); 7720 7721 SDValue NewChain = LoLd.getValue(1); 7722 if (TokenFactorIndex != -1) { 7723 Ops.push_back(LoLd); 7724 Ops.push_back(HiLd); 7725 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7726 Ops.size()); 7727 } 7728 7729 LoAddr = St->getBasePtr(); 7730 HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7731 DAG.getConstant(4, MVT::i32)); 7732 7733 SDValue LoSt = DAG.getStore(NewChain, LoLd, LoAddr, 7734 St->getSrcValue(), St->getSrcValueOffset(), 7735 St->isVolatile(), St->getAlignment()); 7736 SDValue HiSt = DAG.getStore(NewChain, HiLd, HiAddr, 7737 St->getSrcValue(), 7738 St->getSrcValueOffset() + 4, 7739 St->isVolatile(), 7740 MinAlign(St->getAlignment(), 4)); 7741 return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt); 7742 } 7743 } 7744 return SDValue(); 7745} 7746 7747/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 7748/// X86ISD::FXOR nodes. 7749static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 7750 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 7751 // F[X]OR(0.0, x) -> x 7752 // F[X]OR(x, 0.0) -> x 7753 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7754 if (C->getValueAPF().isPosZero()) 7755 return N->getOperand(1); 7756 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7757 if (C->getValueAPF().isPosZero()) 7758 return N->getOperand(0); 7759 return SDValue(); 7760} 7761 7762/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 7763static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 7764 // FAND(0.0, x) -> 0.0 7765 // FAND(x, 0.0) -> 0.0 7766 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7767 if (C->getValueAPF().isPosZero()) 7768 return N->getOperand(0); 7769 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7770 if (C->getValueAPF().isPosZero()) 7771 return N->getOperand(1); 7772 return SDValue(); 7773} 7774 7775 7776SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 7777 DAGCombinerInfo &DCI) const { 7778 SelectionDAG &DAG = DCI.DAG; 7779 switch (N->getOpcode()) { 7780 default: break; 7781 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); 7782 case ISD::BUILD_VECTOR: 7783 return PerformBuildVectorCombine(N, DAG, Subtarget, *this); 7784 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 7785 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 7786 case X86ISD::FXOR: 7787 case X86ISD::FOR: return PerformFORCombine(N, DAG); 7788 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 7789 } 7790 7791 return SDValue(); 7792} 7793 7794//===----------------------------------------------------------------------===// 7795// X86 Inline Assembly Support 7796//===----------------------------------------------------------------------===// 7797 7798/// getConstraintType - Given a constraint letter, return the type of 7799/// constraint it is for this target. 7800X86TargetLowering::ConstraintType 7801X86TargetLowering::getConstraintType(const std::string &Constraint) const { 7802 if (Constraint.size() == 1) { 7803 switch (Constraint[0]) { 7804 case 'A': 7805 return C_Register; 7806 case 'f': 7807 case 'r': 7808 case 'R': 7809 case 'l': 7810 case 'q': 7811 case 'Q': 7812 case 'x': 7813 case 'y': 7814 case 'Y': 7815 return C_RegisterClass; 7816 default: 7817 break; 7818 } 7819 } 7820 return TargetLowering::getConstraintType(Constraint); 7821} 7822 7823/// LowerXConstraint - try to replace an X constraint, which matches anything, 7824/// with another that has more specific requirements based on the type of the 7825/// corresponding operand. 7826const char *X86TargetLowering:: 7827LowerXConstraint(MVT ConstraintVT) const { 7828 // FP X constraints get lowered to SSE1/2 registers if available, otherwise 7829 // 'f' like normal targets. 7830 if (ConstraintVT.isFloatingPoint()) { 7831 if (Subtarget->hasSSE2()) 7832 return "Y"; 7833 if (Subtarget->hasSSE1()) 7834 return "x"; 7835 } 7836 7837 return TargetLowering::LowerXConstraint(ConstraintVT); 7838} 7839 7840/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7841/// vector. If it is invalid, don't add anything to Ops. 7842void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7843 char Constraint, 7844 bool hasMemory, 7845 std::vector<SDValue>&Ops, 7846 SelectionDAG &DAG) const { 7847 SDValue Result(0, 0); 7848 7849 switch (Constraint) { 7850 default: break; 7851 case 'I': 7852 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7853 if (C->getZExtValue() <= 31) { 7854 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7855 break; 7856 } 7857 } 7858 return; 7859 case 'J': 7860 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7861 if (C->getZExtValue() <= 63) { 7862 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7863 break; 7864 } 7865 } 7866 return; 7867 case 'N': 7868 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7869 if (C->getZExtValue() <= 255) { 7870 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7871 break; 7872 } 7873 } 7874 return; 7875 case 'i': { 7876 // Literal immediates are always ok. 7877 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 7878 Result = DAG.getTargetConstant(CST->getZExtValue(), Op.getValueType()); 7879 break; 7880 } 7881 7882 // If we are in non-pic codegen mode, we allow the address of a global (with 7883 // an optional displacement) to be used with 'i'. 7884 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 7885 int64_t Offset = 0; 7886 7887 // Match either (GA) or (GA+C) 7888 if (GA) { 7889 Offset = GA->getOffset(); 7890 } else if (Op.getOpcode() == ISD::ADD) { 7891 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7892 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7893 if (C && GA) { 7894 Offset = GA->getOffset()+C->getZExtValue(); 7895 } else { 7896 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7897 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7898 if (C && GA) 7899 Offset = GA->getOffset()+C->getZExtValue(); 7900 else 7901 C = 0, GA = 0; 7902 } 7903 } 7904 7905 if (GA) { 7906 if (hasMemory) 7907 Op = LowerGlobalAddress(GA->getGlobal(), Offset, DAG); 7908 else 7909 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 7910 Offset); 7911 Result = Op; 7912 break; 7913 } 7914 7915 // Otherwise, not valid for this mode. 7916 return; 7917 } 7918 } 7919 7920 if (Result.getNode()) { 7921 Ops.push_back(Result); 7922 return; 7923 } 7924 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 7925 Ops, DAG); 7926} 7927 7928std::vector<unsigned> X86TargetLowering:: 7929getRegClassForInlineAsmConstraint(const std::string &Constraint, 7930 MVT VT) const { 7931 if (Constraint.size() == 1) { 7932 // FIXME: not handling fp-stack yet! 7933 switch (Constraint[0]) { // GCC X86 Constraint Letters 7934 default: break; // Unknown constraint letter 7935 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 7936 case 'Q': // Q_REGS 7937 if (VT == MVT::i32) 7938 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 7939 else if (VT == MVT::i16) 7940 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 7941 else if (VT == MVT::i8) 7942 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 7943 else if (VT == MVT::i64) 7944 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 7945 break; 7946 } 7947 } 7948 7949 return std::vector<unsigned>(); 7950} 7951 7952std::pair<unsigned, const TargetRegisterClass*> 7953X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7954 MVT VT) const { 7955 // First, see if this is a constraint that directly corresponds to an LLVM 7956 // register class. 7957 if (Constraint.size() == 1) { 7958 // GCC Constraint Letters 7959 switch (Constraint[0]) { 7960 default: break; 7961 case 'r': // GENERAL_REGS 7962 case 'R': // LEGACY_REGS 7963 case 'l': // INDEX_REGS 7964 if (VT == MVT::i8) 7965 return std::make_pair(0U, X86::GR8RegisterClass); 7966 if (VT == MVT::i16) 7967 return std::make_pair(0U, X86::GR16RegisterClass); 7968 if (VT == MVT::i32 || !Subtarget->is64Bit()) 7969 return std::make_pair(0U, X86::GR32RegisterClass); 7970 return std::make_pair(0U, X86::GR64RegisterClass); 7971 case 'f': // FP Stack registers. 7972 // If SSE is enabled for this VT, use f80 to ensure the isel moves the 7973 // value to the correct fpstack register class. 7974 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) 7975 return std::make_pair(0U, X86::RFP32RegisterClass); 7976 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) 7977 return std::make_pair(0U, X86::RFP64RegisterClass); 7978 return std::make_pair(0U, X86::RFP80RegisterClass); 7979 case 'y': // MMX_REGS if MMX allowed. 7980 if (!Subtarget->hasMMX()) break; 7981 return std::make_pair(0U, X86::VR64RegisterClass); 7982 case 'Y': // SSE_REGS if SSE2 allowed 7983 if (!Subtarget->hasSSE2()) break; 7984 // FALL THROUGH. 7985 case 'x': // SSE_REGS if SSE1 allowed 7986 if (!Subtarget->hasSSE1()) break; 7987 7988 switch (VT.getSimpleVT()) { 7989 default: break; 7990 // Scalar SSE types. 7991 case MVT::f32: 7992 case MVT::i32: 7993 return std::make_pair(0U, X86::FR32RegisterClass); 7994 case MVT::f64: 7995 case MVT::i64: 7996 return std::make_pair(0U, X86::FR64RegisterClass); 7997 // Vector types. 7998 case MVT::v16i8: 7999 case MVT::v8i16: 8000 case MVT::v4i32: 8001 case MVT::v2i64: 8002 case MVT::v4f32: 8003 case MVT::v2f64: 8004 return std::make_pair(0U, X86::VR128RegisterClass); 8005 } 8006 break; 8007 } 8008 } 8009 8010 // Use the default implementation in TargetLowering to convert the register 8011 // constraint into a member of a register class. 8012 std::pair<unsigned, const TargetRegisterClass*> Res; 8013 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8014 8015 // Not found as a standard register? 8016 if (Res.second == 0) { 8017 // GCC calls "st(0)" just plain "st". 8018 if (StringsEqualNoCase("{st}", Constraint)) { 8019 Res.first = X86::ST0; 8020 Res.second = X86::RFP80RegisterClass; 8021 } 8022 // 'A' means EAX + EDX. 8023 if (Constraint == "A") { 8024 Res.first = X86::EAX; 8025 Res.second = X86::GRADRegisterClass; 8026 } 8027 return Res; 8028 } 8029 8030 // Otherwise, check to see if this is a register class of the wrong value 8031 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 8032 // turn into {ax},{dx}. 8033 if (Res.second->hasType(VT)) 8034 return Res; // Correct type already, nothing to do. 8035 8036 // All of the single-register GCC register classes map their values onto 8037 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 8038 // really want an 8-bit or 32-bit register, map to the appropriate register 8039 // class and return the appropriate register. 8040 if (Res.second == X86::GR16RegisterClass) { 8041 if (VT == MVT::i8) { 8042 unsigned DestReg = 0; 8043 switch (Res.first) { 8044 default: break; 8045 case X86::AX: DestReg = X86::AL; break; 8046 case X86::DX: DestReg = X86::DL; break; 8047 case X86::CX: DestReg = X86::CL; break; 8048 case X86::BX: DestReg = X86::BL; break; 8049 } 8050 if (DestReg) { 8051 Res.first = DestReg; 8052 Res.second = Res.second = X86::GR8RegisterClass; 8053 } 8054 } else if (VT == MVT::i32) { 8055 unsigned DestReg = 0; 8056 switch (Res.first) { 8057 default: break; 8058 case X86::AX: DestReg = X86::EAX; break; 8059 case X86::DX: DestReg = X86::EDX; break; 8060 case X86::CX: DestReg = X86::ECX; break; 8061 case X86::BX: DestReg = X86::EBX; break; 8062 case X86::SI: DestReg = X86::ESI; break; 8063 case X86::DI: DestReg = X86::EDI; break; 8064 case X86::BP: DestReg = X86::EBP; break; 8065 case X86::SP: DestReg = X86::ESP; break; 8066 } 8067 if (DestReg) { 8068 Res.first = DestReg; 8069 Res.second = Res.second = X86::GR32RegisterClass; 8070 } 8071 } else if (VT == MVT::i64) { 8072 unsigned DestReg = 0; 8073 switch (Res.first) { 8074 default: break; 8075 case X86::AX: DestReg = X86::RAX; break; 8076 case X86::DX: DestReg = X86::RDX; break; 8077 case X86::CX: DestReg = X86::RCX; break; 8078 case X86::BX: DestReg = X86::RBX; break; 8079 case X86::SI: DestReg = X86::RSI; break; 8080 case X86::DI: DestReg = X86::RDI; break; 8081 case X86::BP: DestReg = X86::RBP; break; 8082 case X86::SP: DestReg = X86::RSP; break; 8083 } 8084 if (DestReg) { 8085 Res.first = DestReg; 8086 Res.second = Res.second = X86::GR64RegisterClass; 8087 } 8088 } 8089 } else if (Res.second == X86::FR32RegisterClass || 8090 Res.second == X86::FR64RegisterClass || 8091 Res.second == X86::VR128RegisterClass) { 8092 // Handle references to XMM physical registers that got mapped into the 8093 // wrong class. This can happen with constraints like {xmm0} where the 8094 // target independent register mapper will just pick the first match it can 8095 // find, ignoring the required type. 8096 if (VT == MVT::f32) 8097 Res.second = X86::FR32RegisterClass; 8098 else if (VT == MVT::f64) 8099 Res.second = X86::FR64RegisterClass; 8100 else if (X86::VR128RegisterClass->hasType(VT)) 8101 Res.second = X86::VR128RegisterClass; 8102 } 8103 8104 return Res; 8105} 8106 8107//===----------------------------------------------------------------------===// 8108// X86 Widen vector type 8109//===----------------------------------------------------------------------===// 8110 8111/// getWidenVectorType: given a vector type, returns the type to widen 8112/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. 8113/// If there is no vector type that we want to widen to, returns MVT::Other 8114/// When and where to widen is target dependent based on the cost of 8115/// scalarizing vs using the wider vector type. 8116 8117MVT X86TargetLowering::getWidenVectorType(MVT VT) const { 8118 assert(VT.isVector()); 8119 if (isTypeLegal(VT)) 8120 return VT; 8121 8122 // TODO: In computeRegisterProperty, we can compute the list of legal vector 8123 // type based on element type. This would speed up our search (though 8124 // it may not be worth it since the size of the list is relatively 8125 // small). 8126 MVT EltVT = VT.getVectorElementType(); 8127 unsigned NElts = VT.getVectorNumElements(); 8128 8129 // On X86, it make sense to widen any vector wider than 1 8130 if (NElts <= 1) 8131 return MVT::Other; 8132 8133 for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; 8134 nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { 8135 MVT SVT = (MVT::SimpleValueType)nVT; 8136 8137 if (isTypeLegal(SVT) && 8138 SVT.getVectorElementType() == EltVT && 8139 SVT.getVectorNumElements() > NElts) 8140 return SVT; 8141 } 8142 return MVT::Other; 8143} 8144