X86ISelLowering.cpp revision 9dd93b36b8b33281774c6257ad07a3e7d0d1c660
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineModuleInfo.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/PseudoSourceValue.h" 35#include "llvm/CodeGen/SelectionDAG.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Target/TargetOptions.h" 39#include "llvm/ADT/SmallSet.h" 40#include "llvm/ADT/StringExtras.h" 41using namespace llvm; 42 43// Forward declarations. 44static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG); 45 46X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 47 : TargetLowering(TM) { 48 Subtarget = &TM.getSubtarget<X86Subtarget>(); 49 X86ScalarSSEf64 = Subtarget->hasSSE2(); 50 X86ScalarSSEf32 = Subtarget->hasSSE1(); 51 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 52 53 bool Fast = false; 54 55 RegInfo = TM.getRegisterInfo(); 56 TD = getTargetData(); 57 58 // Set up the TargetLowering object. 59 60 // X86 is weird, it always uses i8 for shift amounts and setcc results. 61 setShiftAmountType(MVT::i8); 62 setSetCCResultContents(ZeroOrOneSetCCResult); 63 setSchedulingPreference(SchedulingForRegPressure); 64 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 65 setStackPointerRegisterToSaveRestore(X86StackPtr); 66 67 if (Subtarget->isTargetDarwin()) { 68 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 69 setUseUnderscoreSetJmp(false); 70 setUseUnderscoreLongJmp(false); 71 } else if (Subtarget->isTargetMingw()) { 72 // MS runtime is weird: it exports _setjmp, but longjmp! 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(false); 75 } else { 76 setUseUnderscoreSetJmp(true); 77 setUseUnderscoreLongJmp(true); 78 } 79 80 // Set up the register classes. 81 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 82 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 83 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 84 if (Subtarget->is64Bit()) 85 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 86 87 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 88 89 // We don't accept any truncstore of integer registers. 90 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 91 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 92 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 93 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 94 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 95 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 96 97 // SETOEQ and SETUNE require checking two conditions. 98 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 99 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 100 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 101 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 102 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 103 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 104 105 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 106 // operation. 107 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 108 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 109 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 110 111 if (Subtarget->is64Bit()) { 112 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 113 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 114 } else { 115 if (X86ScalarSSEf64) { 116 // We have an impenetrably clever algorithm for ui64->double only. 117 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 118 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 119 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 120 } else 121 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 122 } 123 124 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 125 // this operation. 126 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 127 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 128 // SSE has no i16 to fp conversion, only i32 129 if (X86ScalarSSEf32) { 130 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 131 // f32 and f64 cases are Legal, f80 case is not 132 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 133 } else { 134 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 135 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 136 } 137 138 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 139 // are Legal, f80 is custom lowered. 140 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 141 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 142 143 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 144 // this operation. 145 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 146 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 147 148 if (X86ScalarSSEf32) { 149 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 150 // f32 and f64 cases are Legal, f80 case is not 151 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 152 } else { 153 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 154 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 155 } 156 157 // Handle FP_TO_UINT by promoting the destination to a larger signed 158 // conversion. 159 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 160 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 161 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 162 163 if (Subtarget->is64Bit()) { 164 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 165 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 166 } else { 167 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 168 // Expand FP_TO_UINT into a select. 169 // FIXME: We would like to use a Custom expander here eventually to do 170 // the optimal thing for SSE vs. the default expansion in the legalizer. 171 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 172 else 173 // With SSE3 we can use fisttpll to convert to a signed i64. 174 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 175 } 176 177 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 178 if (!X86ScalarSSEf64) { 179 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 180 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 181 } 182 183 // Scalar integer divide and remainder are lowered to use operations that 184 // produce two results, to match the available instructions. This exposes 185 // the two-result form to trivial CSE, which is able to combine x/y and x%y 186 // into a single instruction. 187 // 188 // Scalar integer multiply-high is also lowered to use two-result 189 // operations, to match the available instructions. However, plain multiply 190 // (low) operations are left as Legal, as there are single-result 191 // instructions for this in x86. Using the two-result multiply instructions 192 // when both high and low results are needed must be arranged by dagcombine. 193 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 194 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 195 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 196 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 197 setOperationAction(ISD::SREM , MVT::i8 , Expand); 198 setOperationAction(ISD::UREM , MVT::i8 , Expand); 199 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 200 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 201 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 202 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 203 setOperationAction(ISD::SREM , MVT::i16 , Expand); 204 setOperationAction(ISD::UREM , MVT::i16 , Expand); 205 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 206 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 207 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 208 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 209 setOperationAction(ISD::SREM , MVT::i32 , Expand); 210 setOperationAction(ISD::UREM , MVT::i32 , Expand); 211 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 212 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 213 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 214 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 215 setOperationAction(ISD::SREM , MVT::i64 , Expand); 216 setOperationAction(ISD::UREM , MVT::i64 , Expand); 217 218 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 219 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 220 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 221 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 222 if (Subtarget->is64Bit()) 223 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 224 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 225 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 226 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 227 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 228 setOperationAction(ISD::FREM , MVT::f32 , Expand); 229 setOperationAction(ISD::FREM , MVT::f64 , Expand); 230 setOperationAction(ISD::FREM , MVT::f80 , Expand); 231 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 232 233 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 234 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 235 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 236 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 237 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 238 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 239 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 240 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 241 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 242 if (Subtarget->is64Bit()) { 243 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 244 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 245 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 246 } 247 248 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 249 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 250 251 // These should be promoted to a larger select which is supported. 252 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 253 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 254 // X86 wants to expand cmov itself. 255 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 256 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 257 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 258 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 259 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 260 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 261 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 262 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 263 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 264 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 265 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 266 if (Subtarget->is64Bit()) { 267 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 268 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 269 } 270 // X86 ret instruction may pop stack. 271 setOperationAction(ISD::RET , MVT::Other, Custom); 272 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 273 274 // Darwin ABI issue. 275 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 276 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 277 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 278 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 279 if (Subtarget->is64Bit()) 280 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 281 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 282 if (Subtarget->is64Bit()) { 283 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 284 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 285 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 286 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 287 } 288 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 289 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 290 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 291 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 292 if (Subtarget->is64Bit()) { 293 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 294 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 295 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 296 } 297 298 if (Subtarget->hasSSE1()) 299 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 300 301 if (!Subtarget->hasSSE2()) 302 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); 303 304 // Expand certain atomics 305 setOperationAction(ISD::ATOMIC_CMP_SWAP_8 , MVT::i8, Custom); 306 setOperationAction(ISD::ATOMIC_CMP_SWAP_16, MVT::i16, Custom); 307 setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom); 308 setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom); 309 310 setOperationAction(ISD::ATOMIC_LOAD_SUB_8 , MVT::i8, Custom); 311 setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Custom); 312 setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); 314 315 if (!Subtarget->is64Bit()) { 316 setOperationAction(ISD::ATOMIC_LOAD_ADD_64, MVT::i64, Custom); 317 setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); 318 setOperationAction(ISD::ATOMIC_LOAD_AND_64, MVT::i64, Custom); 319 setOperationAction(ISD::ATOMIC_LOAD_OR_64, MVT::i64, Custom); 320 setOperationAction(ISD::ATOMIC_LOAD_XOR_64, MVT::i64, Custom); 321 setOperationAction(ISD::ATOMIC_LOAD_NAND_64, MVT::i64, Custom); 322 setOperationAction(ISD::ATOMIC_SWAP_64, MVT::i64, Custom); 323 } 324 325 // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. 326 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 327 // FIXME - use subtarget debug flags 328 if (!Subtarget->isTargetDarwin() && 329 !Subtarget->isTargetELF() && 330 !Subtarget->isTargetCygMing()) { 331 setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); 332 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 333 } 334 335 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 336 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 337 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 338 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 339 if (Subtarget->is64Bit()) { 340 setExceptionPointerRegister(X86::RAX); 341 setExceptionSelectorRegister(X86::RDX); 342 } else { 343 setExceptionPointerRegister(X86::EAX); 344 setExceptionSelectorRegister(X86::EDX); 345 } 346 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 347 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 348 349 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 350 351 setOperationAction(ISD::TRAP, MVT::Other, Legal); 352 353 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 354 setOperationAction(ISD::VASTART , MVT::Other, Custom); 355 setOperationAction(ISD::VAEND , MVT::Other, Expand); 356 if (Subtarget->is64Bit()) { 357 setOperationAction(ISD::VAARG , MVT::Other, Custom); 358 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 359 } else { 360 setOperationAction(ISD::VAARG , MVT::Other, Expand); 361 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 362 } 363 364 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 365 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 366 if (Subtarget->is64Bit()) 367 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 368 if (Subtarget->isTargetCygMing()) 369 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 370 else 371 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 372 373 if (X86ScalarSSEf64) { 374 // f32 and f64 use SSE. 375 // Set up the FP register classes. 376 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 377 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 378 379 // Use ANDPD to simulate FABS. 380 setOperationAction(ISD::FABS , MVT::f64, Custom); 381 setOperationAction(ISD::FABS , MVT::f32, Custom); 382 383 // Use XORP to simulate FNEG. 384 setOperationAction(ISD::FNEG , MVT::f64, Custom); 385 setOperationAction(ISD::FNEG , MVT::f32, Custom); 386 387 // Use ANDPD and ORPD to simulate FCOPYSIGN. 388 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 389 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 390 391 // We don't support sin/cos/fmod 392 setOperationAction(ISD::FSIN , MVT::f64, Expand); 393 setOperationAction(ISD::FCOS , MVT::f64, Expand); 394 setOperationAction(ISD::FSIN , MVT::f32, Expand); 395 setOperationAction(ISD::FCOS , MVT::f32, Expand); 396 397 // Expand FP immediates into loads from the stack, except for the special 398 // cases we handle. 399 addLegalFPImmediate(APFloat(+0.0)); // xorpd 400 addLegalFPImmediate(APFloat(+0.0f)); // xorps 401 402 // Floating truncations from f80 and extensions to f80 go through memory. 403 // If optimizing, we lie about this though and handle it in 404 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 405 if (Fast) { 406 setConvertAction(MVT::f32, MVT::f80, Expand); 407 setConvertAction(MVT::f64, MVT::f80, Expand); 408 setConvertAction(MVT::f80, MVT::f32, Expand); 409 setConvertAction(MVT::f80, MVT::f64, Expand); 410 } 411 } else if (X86ScalarSSEf32) { 412 // Use SSE for f32, x87 for f64. 413 // Set up the FP register classes. 414 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 415 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 416 417 // Use ANDPS to simulate FABS. 418 setOperationAction(ISD::FABS , MVT::f32, Custom); 419 420 // Use XORP to simulate FNEG. 421 setOperationAction(ISD::FNEG , MVT::f32, Custom); 422 423 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 424 425 // Use ANDPS and ORPS to simulate FCOPYSIGN. 426 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 427 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 428 429 // We don't support sin/cos/fmod 430 setOperationAction(ISD::FSIN , MVT::f32, Expand); 431 setOperationAction(ISD::FCOS , MVT::f32, Expand); 432 433 // Special cases we handle for FP constants. 434 addLegalFPImmediate(APFloat(+0.0f)); // xorps 435 addLegalFPImmediate(APFloat(+0.0)); // FLD0 436 addLegalFPImmediate(APFloat(+1.0)); // FLD1 437 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 438 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 439 440 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 441 // this though and handle it in InstructionSelectPreprocess so that 442 // dagcombine2 can hack on these. 443 if (Fast) { 444 setConvertAction(MVT::f32, MVT::f64, Expand); 445 setConvertAction(MVT::f32, MVT::f80, Expand); 446 setConvertAction(MVT::f80, MVT::f32, Expand); 447 setConvertAction(MVT::f64, MVT::f32, Expand); 448 // And x87->x87 truncations also. 449 setConvertAction(MVT::f80, MVT::f64, Expand); 450 } 451 452 if (!UnsafeFPMath) { 453 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 454 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 455 } 456 } else { 457 // f32 and f64 in x87. 458 // Set up the FP register classes. 459 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 460 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 461 462 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 463 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 464 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 465 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 466 467 // Floating truncations go through memory. If optimizing, we lie about 468 // this though and handle it in InstructionSelectPreprocess so that 469 // dagcombine2 can hack on these. 470 if (Fast) { 471 setConvertAction(MVT::f80, MVT::f32, Expand); 472 setConvertAction(MVT::f64, MVT::f32, Expand); 473 setConvertAction(MVT::f80, MVT::f64, Expand); 474 } 475 476 if (!UnsafeFPMath) { 477 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 478 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 479 } 480 addLegalFPImmediate(APFloat(+0.0)); // FLD0 481 addLegalFPImmediate(APFloat(+1.0)); // FLD1 482 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 483 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 484 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 485 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 486 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 487 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 488 } 489 490 // Long double always uses X87. 491 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 492 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 493 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 494 { 495 bool ignored; 496 APFloat TmpFlt(+0.0); 497 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 498 &ignored); 499 addLegalFPImmediate(TmpFlt); // FLD0 500 TmpFlt.changeSign(); 501 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 502 APFloat TmpFlt2(+1.0); 503 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 504 &ignored); 505 addLegalFPImmediate(TmpFlt2); // FLD1 506 TmpFlt2.changeSign(); 507 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 508 } 509 510 if (!UnsafeFPMath) { 511 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 512 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 513 } 514 515 // Always use a library call for pow. 516 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 517 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 518 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 519 520 setOperationAction(ISD::FLOG, MVT::f80, Expand); 521 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 522 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 523 setOperationAction(ISD::FEXP, MVT::f80, Expand); 524 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 525 526 // First set operation action for all vector types to either to promote 527 // (for widening) or expand (for scalarization). Then we will selectively 528 // turn on ones that can be effectively codegen'd. 529 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 530 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 531 setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); 532 setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); 533 setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); 534 setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); 535 setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); 536 setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); 537 setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); 538 setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); 539 setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); 540 setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); 541 setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); 542 setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); 543 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); 544 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); 545 setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); 546 setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); 547 setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT, Expand); 548 setOperationAction(ISD::CONCAT_VECTORS,(MVT::SimpleValueType)VT, Expand); 549 setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); 550 setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); 551 setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); 552 setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); 553 setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); 554 setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); 555 setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); 556 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 557 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 558 setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); 559 setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); 560 setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); 561 setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); 562 setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); 563 setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); 564 setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); 565 setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); 566 setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); 567 setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); 568 setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); 569 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 570 setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); 571 setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); 572 setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); 573 setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); 574 setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); 575 setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); 576 } 577 578 if (Subtarget->hasMMX()) { 579 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 580 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 581 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 582 addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); 583 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 584 585 // FIXME: add MMX packed arithmetics 586 587 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 588 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 589 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 590 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 591 592 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 593 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 594 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 595 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 596 597 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 598 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 599 600 setOperationAction(ISD::AND, MVT::v8i8, Promote); 601 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 602 setOperationAction(ISD::AND, MVT::v4i16, Promote); 603 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 604 setOperationAction(ISD::AND, MVT::v2i32, Promote); 605 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 606 setOperationAction(ISD::AND, MVT::v1i64, Legal); 607 608 setOperationAction(ISD::OR, MVT::v8i8, Promote); 609 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 610 setOperationAction(ISD::OR, MVT::v4i16, Promote); 611 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 612 setOperationAction(ISD::OR, MVT::v2i32, Promote); 613 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 614 setOperationAction(ISD::OR, MVT::v1i64, Legal); 615 616 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 617 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 618 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 619 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 620 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 621 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 622 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 623 624 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 625 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 626 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 627 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 628 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 629 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 630 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 631 AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); 632 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 633 634 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 635 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 636 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 637 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); 638 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 639 640 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 641 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 642 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 643 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 644 645 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); 646 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 647 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 648 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 649 650 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); 651 } 652 653 if (Subtarget->hasSSE1()) { 654 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 655 656 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 657 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 658 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 659 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 660 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 661 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 662 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 663 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 664 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 665 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 666 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 667 setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); 668 } 669 670 if (Subtarget->hasSSE2()) { 671 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 672 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 673 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 674 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 675 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 676 677 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 678 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 679 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 680 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 681 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 682 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 683 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 684 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 685 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 686 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 687 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 688 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 689 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 690 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 691 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 692 693 setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); 694 setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); 695 setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); 696 setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); 697 698 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 699 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 700 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 701 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 702 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 703 704 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 705 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { 706 MVT VT = (MVT::SimpleValueType)i; 707 // Do not attempt to custom lower non-power-of-2 vectors 708 if (!isPowerOf2_32(VT.getVectorNumElements())) 709 continue; 710 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 711 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 712 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 713 } 714 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 715 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 716 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 717 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 718 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 719 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 720 if (Subtarget->is64Bit()) { 721 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 722 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 723 } 724 725 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 726 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 727 setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); 728 AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); 729 setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); 730 AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); 731 setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); 732 AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); 733 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); 734 AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); 735 setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); 736 AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); 737 } 738 739 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 740 741 // Custom lower v2i64 and v2f64 selects. 742 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 743 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 744 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 745 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 746 747 } 748 749 if (Subtarget->hasSSE41()) { 750 // FIXME: Do we need to handle scalar-to-vector here? 751 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 752 setOperationAction(ISD::MUL, MVT::v2i64, Legal); 753 754 // i8 and i16 vectors are custom , because the source register and source 755 // source memory operand types are not the same width. f32 vectors are 756 // custom since the immediate controlling the insert encodes additional 757 // information. 758 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 759 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 760 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); 761 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 762 763 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 764 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 765 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 766 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 767 768 if (Subtarget->is64Bit()) { 769 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 770 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 771 } 772 } 773 774 if (Subtarget->hasSSE42()) { 775 setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); 776 } 777 778 // We want to custom lower some of our intrinsics. 779 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 780 781 // We have target-specific dag combine patterns for the following nodes: 782 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 783 setTargetDAGCombine(ISD::BUILD_VECTOR); 784 setTargetDAGCombine(ISD::SELECT); 785 setTargetDAGCombine(ISD::STORE); 786 787 computeRegisterProperties(); 788 789 // FIXME: These should be based on subtarget info. Plus, the values should 790 // be smaller when we are in optimizing for size mode. 791 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 792 maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores 793 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores 794 allowUnalignedMemoryAccesses = true; // x86 supports it! 795 setPrefLoopAlignment(16); 796} 797 798 799MVT X86TargetLowering::getSetCCResultType(const SDValue &) const { 800 return MVT::i8; 801} 802 803 804/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 805/// the desired ByVal argument alignment. 806static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 807 if (MaxAlign == 16) 808 return; 809 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 810 if (VTy->getBitWidth() == 128) 811 MaxAlign = 16; 812 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 813 unsigned EltAlign = 0; 814 getMaxByValAlign(ATy->getElementType(), EltAlign); 815 if (EltAlign > MaxAlign) 816 MaxAlign = EltAlign; 817 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 818 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 819 unsigned EltAlign = 0; 820 getMaxByValAlign(STy->getElementType(i), EltAlign); 821 if (EltAlign > MaxAlign) 822 MaxAlign = EltAlign; 823 if (MaxAlign == 16) 824 break; 825 } 826 } 827 return; 828} 829 830/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 831/// function arguments in the caller parameter area. For X86, aggregates 832/// that contain SSE vectors are placed at 16-byte boundaries while the rest 833/// are at 4-byte boundaries. 834unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 835 if (Subtarget->is64Bit()) { 836 // Max of 8 and alignment of type. 837 unsigned TyAlign = TD->getABITypeAlignment(Ty); 838 if (TyAlign > 8) 839 return TyAlign; 840 return 8; 841 } 842 843 unsigned Align = 4; 844 if (Subtarget->hasSSE1()) 845 getMaxByValAlign(Ty, Align); 846 return Align; 847} 848 849/// getOptimalMemOpType - Returns the target specific optimal type for load 850/// and store operations as a result of memset, memcpy, and memmove 851/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for 852/// determining it. 853MVT 854X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, 855 bool isSrcConst, bool isSrcStr) const { 856 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like 857 // linux. This is because the stack realignment code can't handle certain 858 // cases like PR2962. This should be removed when PR2962 is fixed. 859 if (Subtarget->getStackAlignment() >= 16) { 860 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) 861 return MVT::v4i32; 862 if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) 863 return MVT::v4f32; 864 } 865 if (Subtarget->is64Bit() && Size >= 8) 866 return MVT::i64; 867 return MVT::i32; 868} 869 870 871/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 872/// jumptable. 873SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 874 SelectionDAG &DAG) const { 875 if (usesGlobalOffsetTable()) 876 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 877 if (!Subtarget->isPICStyleRIPRel()) 878 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 879 return Table; 880} 881 882//===----------------------------------------------------------------------===// 883// Return Value Calling Convention Implementation 884//===----------------------------------------------------------------------===// 885 886#include "X86GenCallingConv.inc" 887 888/// LowerRET - Lower an ISD::RET node. 889SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { 890 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 891 892 SmallVector<CCValAssign, 16> RVLocs; 893 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 894 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 895 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 896 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); 897 898 // If this is the first return lowered for this function, add the regs to the 899 // liveout set for the function. 900 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 901 for (unsigned i = 0; i != RVLocs.size(); ++i) 902 if (RVLocs[i].isRegLoc()) 903 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 904 } 905 SDValue Chain = Op.getOperand(0); 906 907 // Handle tail call return. 908 Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); 909 if (Chain.getOpcode() == X86ISD::TAILCALL) { 910 SDValue TailCall = Chain; 911 SDValue TargetAddress = TailCall.getOperand(1); 912 SDValue StackAdjustment = TailCall.getOperand(2); 913 assert(((TargetAddress.getOpcode() == ISD::Register && 914 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || 915 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 916 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 917 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 918 "Expecting an global address, external symbol, or register"); 919 assert(StackAdjustment.getOpcode() == ISD::Constant && 920 "Expecting a const value"); 921 922 SmallVector<SDValue,8> Operands; 923 Operands.push_back(Chain.getOperand(0)); 924 Operands.push_back(TargetAddress); 925 Operands.push_back(StackAdjustment); 926 // Copy registers used by the call. Last operand is a flag so it is not 927 // copied. 928 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 929 Operands.push_back(Chain.getOperand(i)); 930 } 931 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 932 Operands.size()); 933 } 934 935 // Regular return. 936 SDValue Flag; 937 938 SmallVector<SDValue, 6> RetOps; 939 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 940 // Operand #1 = Bytes To Pop 941 RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 942 943 // Copy the result values into the output registers. 944 for (unsigned i = 0; i != RVLocs.size(); ++i) { 945 CCValAssign &VA = RVLocs[i]; 946 assert(VA.isRegLoc() && "Can only return in registers!"); 947 SDValue ValToCopy = Op.getOperand(i*2+1); 948 949 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 950 // the RET instruction and handled by the FP Stackifier. 951 if (RVLocs[i].getLocReg() == X86::ST0 || 952 RVLocs[i].getLocReg() == X86::ST1) { 953 // If this is a copy from an xmm register to ST(0), use an FPExtend to 954 // change the value to the FP stack register class. 955 if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) 956 ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy); 957 RetOps.push_back(ValToCopy); 958 // Don't emit a copytoreg. 959 continue; 960 } 961 962 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); 963 Flag = Chain.getValue(1); 964 } 965 966 // The x86-64 ABI for returning structs by value requires that we copy 967 // the sret argument into %rax for the return. We saved the argument into 968 // a virtual register in the entry block, so now we copy the value out 969 // and into %rax. 970 if (Subtarget->is64Bit() && 971 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 972 MachineFunction &MF = DAG.getMachineFunction(); 973 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 974 unsigned Reg = FuncInfo->getSRetReturnReg(); 975 if (!Reg) { 976 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 977 FuncInfo->setSRetReturnReg(Reg); 978 } 979 SDValue Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy()); 980 981 Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag); 982 Flag = Chain.getValue(1); 983 } 984 985 RetOps[0] = Chain; // Update chain. 986 987 // Add the flag if we have it. 988 if (Flag.getNode()) 989 RetOps.push_back(Flag); 990 991 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, &RetOps[0], RetOps.size()); 992} 993 994 995/// LowerCallResult - Lower the result values of an ISD::CALL into the 996/// appropriate copies out of appropriate physical registers. This assumes that 997/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 998/// being lowered. The returns a SDNode with the same number of values as the 999/// ISD::CALL. 1000SDNode *X86TargetLowering:: 1001LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, 1002 unsigned CallingConv, SelectionDAG &DAG) { 1003 1004 // Assign locations to each value returned by this call. 1005 SmallVector<CCValAssign, 16> RVLocs; 1006 bool isVarArg = TheCall->isVarArg(); 1007 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 1008 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 1009 1010 SmallVector<SDValue, 8> ResultVals; 1011 1012 // Copy all of the result registers out of their specified physreg. 1013 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1014 MVT CopyVT = RVLocs[i].getValVT(); 1015 1016 // If this is a call to a function that returns an fp value on the floating 1017 // point stack, but where we prefer to use the value in xmm registers, copy 1018 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1019 if ((RVLocs[i].getLocReg() == X86::ST0 || 1020 RVLocs[i].getLocReg() == X86::ST1) && 1021 isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { 1022 CopyVT = MVT::f80; 1023 } 1024 1025 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 1026 CopyVT, InFlag).getValue(1); 1027 SDValue Val = Chain.getValue(0); 1028 InFlag = Chain.getValue(2); 1029 1030 if (CopyVT != RVLocs[i].getValVT()) { 1031 // Round the F80 the right size, which also moves to the appropriate xmm 1032 // register. 1033 Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val, 1034 // This truncation won't change the value. 1035 DAG.getIntPtrConstant(1)); 1036 } 1037 1038 ResultVals.push_back(Val); 1039 } 1040 1041 // Merge everything together with a MERGE_VALUES node. 1042 ResultVals.push_back(Chain); 1043 return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0], 1044 ResultVals.size()).getNode(); 1045} 1046 1047 1048//===----------------------------------------------------------------------===// 1049// C & StdCall & Fast Calling Convention implementation 1050//===----------------------------------------------------------------------===// 1051// StdCall calling convention seems to be standard for many Windows' API 1052// routines and around. It differs from C calling convention just a little: 1053// callee should clean up the stack, not caller. Symbols should be also 1054// decorated in some fancy way :) It doesn't support any vector arguments. 1055// For info on fast calling convention see Fast Calling Convention (tail call) 1056// implementation LowerX86_32FastCCCallTo. 1057 1058/// AddLiveIn - This helper function adds the specified physical register to the 1059/// MachineFunction as a live in value. It also creates a corresponding virtual 1060/// register for it. 1061static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 1062 const TargetRegisterClass *RC) { 1063 assert(RC->contains(PReg) && "Not the correct regclass!"); 1064 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 1065 MF.getRegInfo().addLiveIn(PReg, VReg); 1066 return VReg; 1067} 1068 1069/// CallIsStructReturn - Determines whether a CALL node uses struct return 1070/// semantics. 1071static bool CallIsStructReturn(CallSDNode *TheCall) { 1072 unsigned NumOps = TheCall->getNumArgs(); 1073 if (!NumOps) 1074 return false; 1075 1076 return TheCall->getArgFlags(0).isSRet(); 1077} 1078 1079/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct 1080/// return semantics. 1081static bool ArgsAreStructReturn(SDValue Op) { 1082 unsigned NumArgs = Op.getNode()->getNumValues() - 1; 1083 if (!NumArgs) 1084 return false; 1085 1086 return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet(); 1087} 1088 1089/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires 1090/// the callee to pop its own arguments. Callee pop is necessary to support tail 1091/// calls. 1092bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { 1093 if (IsVarArg) 1094 return false; 1095 1096 switch (CallingConv) { 1097 default: 1098 return false; 1099 case CallingConv::X86_StdCall: 1100 return !Subtarget->is64Bit(); 1101 case CallingConv::X86_FastCall: 1102 return !Subtarget->is64Bit(); 1103 case CallingConv::Fast: 1104 return PerformTailCallOpt; 1105 } 1106} 1107 1108/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1109/// given CallingConvention value. 1110CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { 1111 if (Subtarget->is64Bit()) { 1112 if (Subtarget->isTargetWin64()) 1113 return CC_X86_Win64_C; 1114 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1115 return CC_X86_64_TailCall; 1116 else 1117 return CC_X86_64_C; 1118 } 1119 1120 if (CC == CallingConv::X86_FastCall) 1121 return CC_X86_32_FastCall; 1122 else if (CC == CallingConv::Fast) 1123 return CC_X86_32_FastCC; 1124 else 1125 return CC_X86_32_C; 1126} 1127 1128/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to 1129/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. 1130NameDecorationStyle 1131X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { 1132 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1133 if (CC == CallingConv::X86_FastCall) 1134 return FastCall; 1135 else if (CC == CallingConv::X86_StdCall) 1136 return StdCall; 1137 return None; 1138} 1139 1140 1141/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer 1142/// in a register before calling. 1143bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { 1144 return !IsTailCall && !Is64Bit && 1145 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1146 Subtarget->isPICStyleGOT(); 1147} 1148 1149/// CallRequiresFnAddressInReg - Check whether the call requires the function 1150/// address to be loaded in a register. 1151bool 1152X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { 1153 return !Is64Bit && IsTailCall && 1154 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1155 Subtarget->isPICStyleGOT(); 1156} 1157 1158/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1159/// by "Src" to address "Dst" with size and alignment information specified by 1160/// the specific parameter attribute. The copy will be passed as a byval 1161/// function parameter. 1162static SDValue 1163CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1164 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1165 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1166 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), 1167 /*AlwaysInline=*/true, NULL, 0, NULL, 0); 1168} 1169 1170SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, 1171 const CCValAssign &VA, 1172 MachineFrameInfo *MFI, 1173 unsigned CC, 1174 SDValue Root, unsigned i) { 1175 // Create the nodes corresponding to a load from this parameter slot. 1176 ISD::ArgFlagsTy Flags = 1177 cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags(); 1178 bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; 1179 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1180 1181 // FIXME: For now, all byval parameter objects are marked mutable. This can be 1182 // changed with more analysis. 1183 // In case of tail call optimization mark all arguments mutable. Since they 1184 // could be overwritten by lowering of arguments in case of a tail call. 1185 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1186 VA.getLocMemOffset(), isImmutable); 1187 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1188 if (Flags.isByVal()) 1189 return FIN; 1190 return DAG.getLoad(VA.getValVT(), Root, FIN, 1191 PseudoSourceValue::getFixedStack(FI), 0); 1192} 1193 1194SDValue 1195X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { 1196 MachineFunction &MF = DAG.getMachineFunction(); 1197 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1198 1199 const Function* Fn = MF.getFunction(); 1200 if (Fn->hasExternalLinkage() && 1201 Subtarget->isTargetCygMing() && 1202 Fn->getName() == "main") 1203 FuncInfo->setForceFramePointer(true); 1204 1205 // Decorate the function name. 1206 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1207 1208 MachineFrameInfo *MFI = MF.getFrameInfo(); 1209 SDValue Root = Op.getOperand(0); 1210 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; 1211 unsigned CC = MF.getFunction()->getCallingConv(); 1212 bool Is64Bit = Subtarget->is64Bit(); 1213 bool IsWin64 = Subtarget->isTargetWin64(); 1214 1215 assert(!(isVarArg && CC == CallingConv::Fast) && 1216 "Var args not supported with calling convention fastcc"); 1217 1218 // Assign locations to all of the incoming arguments. 1219 SmallVector<CCValAssign, 16> ArgLocs; 1220 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1221 CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); 1222 1223 SmallVector<SDValue, 8> ArgValues; 1224 unsigned LastVal = ~0U; 1225 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1226 CCValAssign &VA = ArgLocs[i]; 1227 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1228 // places. 1229 assert(VA.getValNo() != LastVal && 1230 "Don't support value assigned to multiple locs yet"); 1231 LastVal = VA.getValNo(); 1232 1233 if (VA.isRegLoc()) { 1234 MVT RegVT = VA.getLocVT(); 1235 TargetRegisterClass *RC; 1236 if (RegVT == MVT::i32) 1237 RC = X86::GR32RegisterClass; 1238 else if (Is64Bit && RegVT == MVT::i64) 1239 RC = X86::GR64RegisterClass; 1240 else if (RegVT == MVT::f32) 1241 RC = X86::FR32RegisterClass; 1242 else if (RegVT == MVT::f64) 1243 RC = X86::FR64RegisterClass; 1244 else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) 1245 RC = X86::VR128RegisterClass; 1246 else if (RegVT.isVector()) { 1247 assert(RegVT.getSizeInBits() == 64); 1248 if (!Is64Bit) 1249 RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. 1250 else { 1251 // Darwin calling convention passes MMX values in either GPRs or 1252 // XMMs in x86-64. Other targets pass them in memory. 1253 if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { 1254 RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. 1255 RegVT = MVT::v2i64; 1256 } else { 1257 RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. 1258 RegVT = MVT::i64; 1259 } 1260 } 1261 } else { 1262 assert(0 && "Unknown argument type!"); 1263 } 1264 1265 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1266 SDValue ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1267 1268 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1269 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1270 // right size. 1271 if (VA.getLocInfo() == CCValAssign::SExt) 1272 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1273 DAG.getValueType(VA.getValVT())); 1274 else if (VA.getLocInfo() == CCValAssign::ZExt) 1275 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1276 DAG.getValueType(VA.getValVT())); 1277 1278 if (VA.getLocInfo() != CCValAssign::Full) 1279 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1280 1281 // Handle MMX values passed in GPRs. 1282 if (Is64Bit && RegVT != VA.getLocVT()) { 1283 if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) 1284 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1285 else if (RC == X86::VR128RegisterClass) { 1286 ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue, 1287 DAG.getConstant(0, MVT::i64)); 1288 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1289 } 1290 } 1291 1292 ArgValues.push_back(ArgValue); 1293 } else { 1294 assert(VA.isMemLoc()); 1295 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); 1296 } 1297 } 1298 1299 // The x86-64 ABI for returning structs by value requires that we copy 1300 // the sret argument into %rax for the return. Save the argument into 1301 // a virtual register so that we can access it from the return points. 1302 if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 1303 MachineFunction &MF = DAG.getMachineFunction(); 1304 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1305 unsigned Reg = FuncInfo->getSRetReturnReg(); 1306 if (!Reg) { 1307 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1308 FuncInfo->setSRetReturnReg(Reg); 1309 } 1310 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]); 1311 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root); 1312 } 1313 1314 unsigned StackSize = CCInfo.getNextStackOffset(); 1315 // align stack specially for tail calls 1316 if (PerformTailCallOpt && CC == CallingConv::Fast) 1317 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1318 1319 // If the function takes variable number of arguments, make a frame index for 1320 // the start of the first vararg value... for expansion of llvm.va_start. 1321 if (isVarArg) { 1322 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1323 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1324 } 1325 if (Is64Bit) { 1326 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 1327 1328 // FIXME: We should really autogenerate these arrays 1329 static const unsigned GPR64ArgRegsWin64[] = { 1330 X86::RCX, X86::RDX, X86::R8, X86::R9 1331 }; 1332 static const unsigned XMMArgRegsWin64[] = { 1333 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1334 }; 1335 static const unsigned GPR64ArgRegs64Bit[] = { 1336 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1337 }; 1338 static const unsigned XMMArgRegs64Bit[] = { 1339 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1340 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1341 }; 1342 const unsigned *GPR64ArgRegs, *XMMArgRegs; 1343 1344 if (IsWin64) { 1345 TotalNumIntRegs = 4; TotalNumXMMRegs = 4; 1346 GPR64ArgRegs = GPR64ArgRegsWin64; 1347 XMMArgRegs = XMMArgRegsWin64; 1348 } else { 1349 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 1350 GPR64ArgRegs = GPR64ArgRegs64Bit; 1351 XMMArgRegs = XMMArgRegs64Bit; 1352 } 1353 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 1354 TotalNumIntRegs); 1355 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 1356 TotalNumXMMRegs); 1357 1358 // For X86-64, if there are vararg parameters that are passed via 1359 // registers, then we must store them to their spots on the stack so they 1360 // may be loaded by deferencing the result of va_next. 1361 VarArgsGPOffset = NumIntRegs * 8; 1362 VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; 1363 RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + 1364 TotalNumXMMRegs * 16, 16); 1365 1366 // Store the integer parameter registers. 1367 SmallVector<SDValue, 8> MemOps; 1368 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1369 SDValue FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1370 DAG.getIntPtrConstant(VarArgsGPOffset)); 1371 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 1372 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1373 X86::GR64RegisterClass); 1374 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1375 SDValue Store = 1376 DAG.getStore(Val.getValue(1), Val, FIN, 1377 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1378 MemOps.push_back(Store); 1379 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1380 DAG.getIntPtrConstant(8)); 1381 } 1382 1383 // Now store the XMM (fp + vector) parameter registers. 1384 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1385 DAG.getIntPtrConstant(VarArgsFPOffset)); 1386 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 1387 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1388 X86::VR128RegisterClass); 1389 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1390 SDValue Store = 1391 DAG.getStore(Val.getValue(1), Val, FIN, 1392 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); 1393 MemOps.push_back(Store); 1394 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1395 DAG.getIntPtrConstant(16)); 1396 } 1397 if (!MemOps.empty()) 1398 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1399 &MemOps[0], MemOps.size()); 1400 } 1401 } 1402 1403 ArgValues.push_back(Root); 1404 1405 // Some CCs need callee pop. 1406 if (IsCalleePop(isVarArg, CC)) { 1407 BytesToPopOnReturn = StackSize; // Callee pops everything. 1408 BytesCallerReserves = 0; 1409 } else { 1410 BytesToPopOnReturn = 0; // Callee pops nothing. 1411 // If this is an sret function, the return should pop the hidden pointer. 1412 if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) 1413 BytesToPopOnReturn = 4; 1414 BytesCallerReserves = StackSize; 1415 } 1416 1417 if (!Is64Bit) { 1418 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1419 if (CC == CallingConv::X86_FastCall) 1420 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1421 } 1422 1423 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1424 1425 // Return the new list of results. 1426 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0], 1427 ArgValues.size()).getValue(Op.getResNo()); 1428} 1429 1430SDValue 1431X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, 1432 const SDValue &StackPtr, 1433 const CCValAssign &VA, 1434 SDValue Chain, 1435 SDValue Arg, ISD::ArgFlagsTy Flags) { 1436 unsigned LocMemOffset = VA.getLocMemOffset(); 1437 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1438 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1439 if (Flags.isByVal()) { 1440 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1441 } 1442 return DAG.getStore(Chain, Arg, PtrOff, 1443 PseudoSourceValue::getStack(), LocMemOffset); 1444} 1445 1446/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call 1447/// optimization is performed and it is required. 1448SDValue 1449X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 1450 SDValue &OutRetAddr, 1451 SDValue Chain, 1452 bool IsTailCall, 1453 bool Is64Bit, 1454 int FPDiff) { 1455 if (!IsTailCall || FPDiff==0) return Chain; 1456 1457 // Adjust the Return address stack slot. 1458 MVT VT = getPointerTy(); 1459 OutRetAddr = getReturnAddressFrameIndex(DAG); 1460 // Load the "old" Return address. 1461 OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0); 1462 return SDValue(OutRetAddr.getNode(), 1); 1463} 1464 1465/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call 1466/// optimization is performed and it is required (FPDiff!=0). 1467static SDValue 1468EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 1469 SDValue Chain, SDValue RetAddrFrIdx, 1470 bool Is64Bit, int FPDiff) { 1471 // Store the return address to the appropriate stack slot. 1472 if (!FPDiff) return Chain; 1473 // Calculate the new stack slot for the return address. 1474 int SlotSize = Is64Bit ? 8 : 4; 1475 int NewReturnAddrFI = 1476 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1477 MVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1478 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1479 Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, 1480 PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); 1481 return Chain; 1482} 1483 1484SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { 1485 MachineFunction &MF = DAG.getMachineFunction(); 1486 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); 1487 SDValue Chain = TheCall->getChain(); 1488 unsigned CC = TheCall->getCallingConv(); 1489 bool isVarArg = TheCall->isVarArg(); 1490 bool IsTailCall = TheCall->isTailCall() && 1491 CC == CallingConv::Fast && PerformTailCallOpt; 1492 SDValue Callee = TheCall->getCallee(); 1493 bool Is64Bit = Subtarget->is64Bit(); 1494 bool IsStructRet = CallIsStructReturn(TheCall); 1495 1496 assert(!(isVarArg && CC == CallingConv::Fast) && 1497 "Var args not supported with calling convention fastcc"); 1498 1499 // Analyze operands of the call, assigning locations to each operand. 1500 SmallVector<CCValAssign, 16> ArgLocs; 1501 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1502 CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); 1503 1504 // Get a count of how many bytes are to be pushed on the stack. 1505 unsigned NumBytes = CCInfo.getNextStackOffset(); 1506 if (PerformTailCallOpt && CC == CallingConv::Fast) 1507 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1508 1509 int FPDiff = 0; 1510 if (IsTailCall) { 1511 // Lower arguments at fp - stackoffset + fpdiff. 1512 unsigned NumBytesCallerPushed = 1513 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1514 FPDiff = NumBytesCallerPushed - NumBytes; 1515 1516 // Set the delta of movement of the returnaddr stackslot. 1517 // But only set if delta is greater than previous delta. 1518 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1519 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1520 } 1521 1522 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1523 1524 SDValue RetAddrFrIdx; 1525 // Load return adress for tail calls. 1526 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, 1527 FPDiff); 1528 1529 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1530 SmallVector<SDValue, 8> MemOpChains; 1531 SDValue StackPtr; 1532 1533 // Walk the register/memloc assignments, inserting copies/loads. In the case 1534 // of tail call optimization arguments are handle later. 1535 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1536 CCValAssign &VA = ArgLocs[i]; 1537 SDValue Arg = TheCall->getArg(i); 1538 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1539 bool isByVal = Flags.isByVal(); 1540 1541 // Promote the value if needed. 1542 switch (VA.getLocInfo()) { 1543 default: assert(0 && "Unknown loc info!"); 1544 case CCValAssign::Full: break; 1545 case CCValAssign::SExt: 1546 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1547 break; 1548 case CCValAssign::ZExt: 1549 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1550 break; 1551 case CCValAssign::AExt: 1552 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1553 break; 1554 } 1555 1556 if (VA.isRegLoc()) { 1557 if (Is64Bit) { 1558 MVT RegVT = VA.getLocVT(); 1559 if (RegVT.isVector() && RegVT.getSizeInBits() == 64) 1560 switch (VA.getLocReg()) { 1561 default: 1562 break; 1563 case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: 1564 case X86::R8: { 1565 // Special case: passing MMX values in GPR registers. 1566 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1567 break; 1568 } 1569 case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: 1570 case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { 1571 // Special case: passing MMX values in XMM registers. 1572 Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg); 1573 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg); 1574 Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 1575 DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg, 1576 getMOVLMask(2, DAG)); 1577 break; 1578 } 1579 } 1580 } 1581 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1582 } else { 1583 if (!IsTailCall || (IsTailCall && isByVal)) { 1584 assert(VA.isMemLoc()); 1585 if (StackPtr.getNode() == 0) 1586 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1587 1588 MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, 1589 Chain, Arg, Flags)); 1590 } 1591 } 1592 } 1593 1594 if (!MemOpChains.empty()) 1595 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1596 &MemOpChains[0], MemOpChains.size()); 1597 1598 // Build a sequence of copy-to-reg nodes chained together with token chain 1599 // and flag operands which copy the outgoing args into registers. 1600 SDValue InFlag; 1601 // Tail call byval lowering might overwrite argument registers so in case of 1602 // tail call optimization the copies to registers are lowered later. 1603 if (!IsTailCall) 1604 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1605 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1606 InFlag); 1607 InFlag = Chain.getValue(1); 1608 } 1609 1610 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1611 // GOT pointer. 1612 if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { 1613 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1614 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1615 InFlag); 1616 InFlag = Chain.getValue(1); 1617 } 1618 // If we are tail calling and generating PIC/GOT style code load the address 1619 // of the callee into ecx. The value in ecx is used as target of the tail 1620 // jump. This is done to circumvent the ebx/callee-saved problem for tail 1621 // calls on PIC/GOT architectures. Normally we would just put the address of 1622 // GOT into ebx and then call target@PLT. But for tail callss ebx would be 1623 // restored (since ebx is callee saved) before jumping to the target@PLT. 1624 if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { 1625 // Note: The actual moving to ecx is done further down. 1626 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1627 if (G && !G->getGlobal()->hasHiddenVisibility() && 1628 !G->getGlobal()->hasProtectedVisibility()) 1629 Callee = LowerGlobalAddress(Callee, DAG); 1630 else if (isa<ExternalSymbolSDNode>(Callee)) 1631 Callee = LowerExternalSymbol(Callee,DAG); 1632 } 1633 1634 if (Is64Bit && isVarArg) { 1635 // From AMD64 ABI document: 1636 // For calls that may call functions that use varargs or stdargs 1637 // (prototype-less calls or calls to functions containing ellipsis (...) in 1638 // the declaration) %al is used as hidden argument to specify the number 1639 // of SSE registers used. The contents of %al do not need to match exactly 1640 // the number of registers, but must be an ubound on the number of SSE 1641 // registers used and is in the range 0 - 8 inclusive. 1642 1643 // FIXME: Verify this on Win64 1644 // Count the number of XMM registers allocated. 1645 static const unsigned XMMArgRegs[] = { 1646 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1647 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1648 }; 1649 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1650 1651 Chain = DAG.getCopyToReg(Chain, X86::AL, 1652 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1653 InFlag = Chain.getValue(1); 1654 } 1655 1656 1657 // For tail calls lower the arguments to the 'real' stack slot. 1658 if (IsTailCall) { 1659 SmallVector<SDValue, 8> MemOpChains2; 1660 SDValue FIN; 1661 int FI = 0; 1662 // Do not flag preceeding copytoreg stuff together with the following stuff. 1663 InFlag = SDValue(); 1664 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1665 CCValAssign &VA = ArgLocs[i]; 1666 if (!VA.isRegLoc()) { 1667 assert(VA.isMemLoc()); 1668 SDValue Arg = TheCall->getArg(i); 1669 ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); 1670 // Create frame index. 1671 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1672 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 1673 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1674 FIN = DAG.getFrameIndex(FI, getPointerTy()); 1675 1676 if (Flags.isByVal()) { 1677 // Copy relative to framepointer. 1678 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1679 if (StackPtr.getNode() == 0) 1680 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1681 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1682 1683 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1684 Flags, DAG)); 1685 } else { 1686 // Store relative to framepointer. 1687 MemOpChains2.push_back( 1688 DAG.getStore(Chain, Arg, FIN, 1689 PseudoSourceValue::getFixedStack(FI), 0)); 1690 } 1691 } 1692 } 1693 1694 if (!MemOpChains2.empty()) 1695 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1696 &MemOpChains2[0], MemOpChains2.size()); 1697 1698 // Copy arguments to their registers. 1699 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1700 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1701 InFlag); 1702 InFlag = Chain.getValue(1); 1703 } 1704 InFlag =SDValue(); 1705 1706 // Store the return address to the appropriate stack slot. 1707 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, 1708 FPDiff); 1709 } 1710 1711 // If the callee is a GlobalAddress node (quite common, every direct call is) 1712 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1713 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1714 // We should use extra load for direct calls to dllimported functions in 1715 // non-JIT mode. 1716 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1717 getTargetMachine(), true)) 1718 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), 1719 G->getOffset()); 1720 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1721 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1722 } else if (IsTailCall) { 1723 unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; 1724 1725 Chain = DAG.getCopyToReg(Chain, 1726 DAG.getRegister(Opc, getPointerTy()), 1727 Callee,InFlag); 1728 Callee = DAG.getRegister(Opc, getPointerTy()); 1729 // Add register as live out. 1730 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1731 } 1732 1733 // Returns a chain & a flag for retval copy to use. 1734 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1735 SmallVector<SDValue, 8> Ops; 1736 1737 if (IsTailCall) { 1738 Ops.push_back(Chain); 1739 Ops.push_back(DAG.getIntPtrConstant(NumBytes, true)); 1740 Ops.push_back(DAG.getIntPtrConstant(0, true)); 1741 if (InFlag.getNode()) 1742 Ops.push_back(InFlag); 1743 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1744 InFlag = Chain.getValue(1); 1745 1746 // Returns a chain & a flag for retval copy to use. 1747 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1748 Ops.clear(); 1749 } 1750 1751 Ops.push_back(Chain); 1752 Ops.push_back(Callee); 1753 1754 if (IsTailCall) 1755 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1756 1757 // Add argument registers to the end of the list so that they are known live 1758 // into the call. 1759 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1760 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1761 RegsToPass[i].second.getValueType())); 1762 1763 // Add an implicit use GOT pointer in EBX. 1764 if (!IsTailCall && !Is64Bit && 1765 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1766 Subtarget->isPICStyleGOT()) 1767 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1768 1769 // Add an implicit use of AL for x86 vararg functions. 1770 if (Is64Bit && isVarArg) 1771 Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); 1772 1773 if (InFlag.getNode()) 1774 Ops.push_back(InFlag); 1775 1776 if (IsTailCall) { 1777 assert(InFlag.getNode() && 1778 "Flag must be set. Depend on flag being set in LowerRET"); 1779 Chain = DAG.getNode(X86ISD::TAILCALL, 1780 TheCall->getVTList(), &Ops[0], Ops.size()); 1781 1782 return SDValue(Chain.getNode(), Op.getResNo()); 1783 } 1784 1785 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1786 InFlag = Chain.getValue(1); 1787 1788 // Create the CALLSEQ_END node. 1789 unsigned NumBytesForCalleeToPush; 1790 if (IsCalleePop(isVarArg, CC)) 1791 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1792 else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) 1793 // If this is is a call to a struct-return function, the callee 1794 // pops the hidden struct pointer, so we have to push it back. 1795 // This is common for Darwin/X86, Linux & Mingw32 targets. 1796 NumBytesForCalleeToPush = 4; 1797 else 1798 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1799 1800 // Returns a flag for retval copy to use. 1801 Chain = DAG.getCALLSEQ_END(Chain, 1802 DAG.getIntPtrConstant(NumBytes, true), 1803 DAG.getIntPtrConstant(NumBytesForCalleeToPush, 1804 true), 1805 InFlag); 1806 InFlag = Chain.getValue(1); 1807 1808 // Handle result values, copying them out of physregs into vregs that we 1809 // return. 1810 return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), 1811 Op.getResNo()); 1812} 1813 1814 1815//===----------------------------------------------------------------------===// 1816// Fast Calling Convention (tail call) implementation 1817//===----------------------------------------------------------------------===// 1818 1819// Like std call, callee cleans arguments, convention except that ECX is 1820// reserved for storing the tail called function address. Only 2 registers are 1821// free for argument passing (inreg). Tail call optimization is performed 1822// provided: 1823// * tailcallopt is enabled 1824// * caller/callee are fastcc 1825// On X86_64 architecture with GOT-style position independent code only local 1826// (within module) calls are supported at the moment. 1827// To keep the stack aligned according to platform abi the function 1828// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1829// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1830// If a tail called function callee has more arguments than the caller the 1831// caller needs to make sure that there is room to move the RETADDR to. This is 1832// achieved by reserving an area the size of the argument delta right after the 1833// original REtADDR, but before the saved framepointer or the spilled registers 1834// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1835// stack layout: 1836// arg1 1837// arg2 1838// RETADDR 1839// [ new RETADDR 1840// move area ] 1841// (possible EBP) 1842// ESI 1843// EDI 1844// local1 .. 1845 1846/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1847/// for a 16 byte align requirement. 1848unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1849 SelectionDAG& DAG) { 1850 MachineFunction &MF = DAG.getMachineFunction(); 1851 const TargetMachine &TM = MF.getTarget(); 1852 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1853 unsigned StackAlignment = TFI.getStackAlignment(); 1854 uint64_t AlignMask = StackAlignment - 1; 1855 int64_t Offset = StackSize; 1856 uint64_t SlotSize = TD->getPointerSize(); 1857 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1858 // Number smaller than 12 so just add the difference. 1859 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1860 } else { 1861 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1862 Offset = ((~AlignMask) & Offset) + StackAlignment + 1863 (StackAlignment-SlotSize); 1864 } 1865 return Offset; 1866} 1867 1868/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1869/// following the call is a return. A function is eligible if caller/callee 1870/// calling conventions match, currently only fastcc supports tail calls, and 1871/// the function CALL is immediatly followed by a RET. 1872bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, 1873 SDValue Ret, 1874 SelectionDAG& DAG) const { 1875 if (!PerformTailCallOpt) 1876 return false; 1877 1878 if (CheckTailCallReturnConstraints(TheCall, Ret)) { 1879 MachineFunction &MF = DAG.getMachineFunction(); 1880 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1881 unsigned CalleeCC= TheCall->getCallingConv(); 1882 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1883 SDValue Callee = TheCall->getCallee(); 1884 // On x86/32Bit PIC/GOT tail calls are supported. 1885 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1886 !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) 1887 return true; 1888 1889 // Can only do local tail calls (in same module, hidden or protected) on 1890 // x86_64 PIC/GOT at the moment. 1891 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1892 return G->getGlobal()->hasHiddenVisibility() 1893 || G->getGlobal()->hasProtectedVisibility(); 1894 } 1895 } 1896 1897 return false; 1898} 1899 1900FastISel * 1901X86TargetLowering::createFastISel(MachineFunction &mf, 1902 MachineModuleInfo *mmo, 1903 DenseMap<const Value *, unsigned> &vm, 1904 DenseMap<const BasicBlock *, 1905 MachineBasicBlock *> &bm, 1906 DenseMap<const AllocaInst *, int> &am 1907#ifndef NDEBUG 1908 , SmallSet<Instruction*, 8> &cil 1909#endif 1910 ) { 1911 return X86::createFastISel(mf, mmo, vm, bm, am 1912#ifndef NDEBUG 1913 , cil 1914#endif 1915 ); 1916} 1917 1918 1919//===----------------------------------------------------------------------===// 1920// Other Lowering Hooks 1921//===----------------------------------------------------------------------===// 1922 1923 1924SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1925 MachineFunction &MF = DAG.getMachineFunction(); 1926 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1927 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1928 uint64_t SlotSize = TD->getPointerSize(); 1929 1930 if (ReturnAddrIndex == 0) { 1931 // Set up a frame object for the return address. 1932 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); 1933 FuncInfo->setRAIndex(ReturnAddrIndex); 1934 } 1935 1936 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1937} 1938 1939 1940/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1941/// specific condition code. It returns a false if it cannot do a direct 1942/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1943/// needed. 1944static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1945 unsigned &X86CC, SDValue &LHS, SDValue &RHS, 1946 SelectionDAG &DAG) { 1947 X86CC = X86::COND_INVALID; 1948 if (!isFP) { 1949 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1950 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1951 // X > -1 -> X == 0, jump !sign. 1952 RHS = DAG.getConstant(0, RHS.getValueType()); 1953 X86CC = X86::COND_NS; 1954 return true; 1955 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1956 // X < 0 -> X == 0, jump on sign. 1957 X86CC = X86::COND_S; 1958 return true; 1959 } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 1960 // X < 1 -> X <= 0 1961 RHS = DAG.getConstant(0, RHS.getValueType()); 1962 X86CC = X86::COND_LE; 1963 return true; 1964 } 1965 } 1966 1967 switch (SetCCOpcode) { 1968 default: break; 1969 case ISD::SETEQ: X86CC = X86::COND_E; break; 1970 case ISD::SETGT: X86CC = X86::COND_G; break; 1971 case ISD::SETGE: X86CC = X86::COND_GE; break; 1972 case ISD::SETLT: X86CC = X86::COND_L; break; 1973 case ISD::SETLE: X86CC = X86::COND_LE; break; 1974 case ISD::SETNE: X86CC = X86::COND_NE; break; 1975 case ISD::SETULT: X86CC = X86::COND_B; break; 1976 case ISD::SETUGT: X86CC = X86::COND_A; break; 1977 case ISD::SETULE: X86CC = X86::COND_BE; break; 1978 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1979 } 1980 } else { 1981 // First determine if it is required or is profitable to flip the operands. 1982 1983 // If LHS is a foldable load, but RHS is not, flip the condition. 1984 if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && 1985 !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { 1986 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 1987 std::swap(LHS, RHS); 1988 } 1989 1990 switch (SetCCOpcode) { 1991 default: break; 1992 case ISD::SETOLT: 1993 case ISD::SETOLE: 1994 case ISD::SETUGT: 1995 case ISD::SETUGE: 1996 std::swap(LHS, RHS); 1997 break; 1998 } 1999 2000 // On a floating point condition, the flags are set as follows: 2001 // ZF PF CF op 2002 // 0 | 0 | 0 | X > Y 2003 // 0 | 0 | 1 | X < Y 2004 // 1 | 0 | 0 | X == Y 2005 // 1 | 1 | 1 | unordered 2006 switch (SetCCOpcode) { 2007 default: break; 2008 case ISD::SETUEQ: 2009 case ISD::SETEQ: 2010 X86CC = X86::COND_E; 2011 break; 2012 case ISD::SETOLT: // flipped 2013 case ISD::SETOGT: 2014 case ISD::SETGT: 2015 X86CC = X86::COND_A; 2016 break; 2017 case ISD::SETOLE: // flipped 2018 case ISD::SETOGE: 2019 case ISD::SETGE: 2020 X86CC = X86::COND_AE; 2021 break; 2022 case ISD::SETUGT: // flipped 2023 case ISD::SETULT: 2024 case ISD::SETLT: 2025 X86CC = X86::COND_B; 2026 break; 2027 case ISD::SETUGE: // flipped 2028 case ISD::SETULE: 2029 case ISD::SETLE: 2030 X86CC = X86::COND_BE; 2031 break; 2032 case ISD::SETONE: 2033 case ISD::SETNE: 2034 X86CC = X86::COND_NE; 2035 break; 2036 case ISD::SETUO: 2037 X86CC = X86::COND_P; 2038 break; 2039 case ISD::SETO: 2040 X86CC = X86::COND_NP; 2041 break; 2042 } 2043 } 2044 2045 return X86CC != X86::COND_INVALID; 2046} 2047 2048/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2049/// code. Current x86 isa includes the following FP cmov instructions: 2050/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2051static bool hasFPCMov(unsigned X86CC) { 2052 switch (X86CC) { 2053 default: 2054 return false; 2055 case X86::COND_B: 2056 case X86::COND_BE: 2057 case X86::COND_E: 2058 case X86::COND_P: 2059 case X86::COND_A: 2060 case X86::COND_AE: 2061 case X86::COND_NE: 2062 case X86::COND_NP: 2063 return true; 2064 } 2065} 2066 2067/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2068/// true if Op is undef or if its value falls within the specified range (L, H]. 2069static bool isUndefOrInRange(SDValue Op, unsigned Low, unsigned Hi) { 2070 if (Op.getOpcode() == ISD::UNDEF) 2071 return true; 2072 2073 unsigned Val = cast<ConstantSDNode>(Op)->getZExtValue(); 2074 return (Val >= Low && Val < Hi); 2075} 2076 2077/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2078/// true if Op is undef or if its value equal to the specified value. 2079static bool isUndefOrEqual(SDValue Op, unsigned Val) { 2080 if (Op.getOpcode() == ISD::UNDEF) 2081 return true; 2082 return cast<ConstantSDNode>(Op)->getZExtValue() == Val; 2083} 2084 2085/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2086/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2087bool X86::isPSHUFDMask(SDNode *N) { 2088 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2089 2090 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2091 return false; 2092 2093 // Check if the value doesn't reference the second vector. 2094 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2095 SDValue Arg = N->getOperand(i); 2096 if (Arg.getOpcode() == ISD::UNDEF) continue; 2097 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2098 if (cast<ConstantSDNode>(Arg)->getZExtValue() >= e) 2099 return false; 2100 } 2101 2102 return true; 2103} 2104 2105/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2106/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2107bool X86::isPSHUFHWMask(SDNode *N) { 2108 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2109 2110 if (N->getNumOperands() != 8) 2111 return false; 2112 2113 // Lower quadword copied in order. 2114 for (unsigned i = 0; i != 4; ++i) { 2115 SDValue Arg = N->getOperand(i); 2116 if (Arg.getOpcode() == ISD::UNDEF) continue; 2117 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2118 if (cast<ConstantSDNode>(Arg)->getZExtValue() != i) 2119 return false; 2120 } 2121 2122 // Upper quadword shuffled. 2123 for (unsigned i = 4; i != 8; ++i) { 2124 SDValue Arg = N->getOperand(i); 2125 if (Arg.getOpcode() == ISD::UNDEF) continue; 2126 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2127 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2128 if (Val < 4 || Val > 7) 2129 return false; 2130 } 2131 2132 return true; 2133} 2134 2135/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2136/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2137bool X86::isPSHUFLWMask(SDNode *N) { 2138 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2139 2140 if (N->getNumOperands() != 8) 2141 return false; 2142 2143 // Upper quadword copied in order. 2144 for (unsigned i = 4; i != 8; ++i) 2145 if (!isUndefOrEqual(N->getOperand(i), i)) 2146 return false; 2147 2148 // Lower quadword shuffled. 2149 for (unsigned i = 0; i != 4; ++i) 2150 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2151 return false; 2152 2153 return true; 2154} 2155 2156/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2157/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2158static bool isSHUFPMask(SDOperandPtr Elems, unsigned NumElems) { 2159 if (NumElems != 2 && NumElems != 4) return false; 2160 2161 unsigned Half = NumElems / 2; 2162 for (unsigned i = 0; i < Half; ++i) 2163 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2164 return false; 2165 for (unsigned i = Half; i < NumElems; ++i) 2166 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2167 return false; 2168 2169 return true; 2170} 2171 2172bool X86::isSHUFPMask(SDNode *N) { 2173 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2174 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2175} 2176 2177/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2178/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2179/// half elements to come from vector 1 (which would equal the dest.) and 2180/// the upper half to come from vector 2. 2181static bool isCommutedSHUFP(SDOperandPtr Ops, unsigned NumOps) { 2182 if (NumOps != 2 && NumOps != 4) return false; 2183 2184 unsigned Half = NumOps / 2; 2185 for (unsigned i = 0; i < Half; ++i) 2186 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2187 return false; 2188 for (unsigned i = Half; i < NumOps; ++i) 2189 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2190 return false; 2191 return true; 2192} 2193 2194static bool isCommutedSHUFP(SDNode *N) { 2195 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2196 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2197} 2198 2199/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2200/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2201bool X86::isMOVHLPSMask(SDNode *N) { 2202 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2203 2204 if (N->getNumOperands() != 4) 2205 return false; 2206 2207 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2208 return isUndefOrEqual(N->getOperand(0), 6) && 2209 isUndefOrEqual(N->getOperand(1), 7) && 2210 isUndefOrEqual(N->getOperand(2), 2) && 2211 isUndefOrEqual(N->getOperand(3), 3); 2212} 2213 2214/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2215/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2216/// <2, 3, 2, 3> 2217bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2218 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2219 2220 if (N->getNumOperands() != 4) 2221 return false; 2222 2223 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2224 return isUndefOrEqual(N->getOperand(0), 2) && 2225 isUndefOrEqual(N->getOperand(1), 3) && 2226 isUndefOrEqual(N->getOperand(2), 2) && 2227 isUndefOrEqual(N->getOperand(3), 3); 2228} 2229 2230/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2231/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2232bool X86::isMOVLPMask(SDNode *N) { 2233 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2234 2235 unsigned NumElems = N->getNumOperands(); 2236 if (NumElems != 2 && NumElems != 4) 2237 return false; 2238 2239 for (unsigned i = 0; i < NumElems/2; ++i) 2240 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2241 return false; 2242 2243 for (unsigned i = NumElems/2; i < NumElems; ++i) 2244 if (!isUndefOrEqual(N->getOperand(i), i)) 2245 return false; 2246 2247 return true; 2248} 2249 2250/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2251/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2252/// and MOVLHPS. 2253bool X86::isMOVHPMask(SDNode *N) { 2254 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2255 2256 unsigned NumElems = N->getNumOperands(); 2257 if (NumElems != 2 && NumElems != 4) 2258 return false; 2259 2260 for (unsigned i = 0; i < NumElems/2; ++i) 2261 if (!isUndefOrEqual(N->getOperand(i), i)) 2262 return false; 2263 2264 for (unsigned i = 0; i < NumElems/2; ++i) { 2265 SDValue Arg = N->getOperand(i + NumElems/2); 2266 if (!isUndefOrEqual(Arg, i + NumElems)) 2267 return false; 2268 } 2269 2270 return true; 2271} 2272 2273/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2274/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2275bool static isUNPCKLMask(SDOperandPtr Elts, unsigned NumElts, 2276 bool V2IsSplat = false) { 2277 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2278 return false; 2279 2280 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2281 SDValue BitI = Elts[i]; 2282 SDValue BitI1 = Elts[i+1]; 2283 if (!isUndefOrEqual(BitI, j)) 2284 return false; 2285 if (V2IsSplat) { 2286 if (isUndefOrEqual(BitI1, NumElts)) 2287 return false; 2288 } else { 2289 if (!isUndefOrEqual(BitI1, j + NumElts)) 2290 return false; 2291 } 2292 } 2293 2294 return true; 2295} 2296 2297bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2298 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2299 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2300} 2301 2302/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2303/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2304bool static isUNPCKHMask(SDOperandPtr Elts, unsigned NumElts, 2305 bool V2IsSplat = false) { 2306 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2307 return false; 2308 2309 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2310 SDValue BitI = Elts[i]; 2311 SDValue BitI1 = Elts[i+1]; 2312 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2313 return false; 2314 if (V2IsSplat) { 2315 if (isUndefOrEqual(BitI1, NumElts)) 2316 return false; 2317 } else { 2318 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2319 return false; 2320 } 2321 } 2322 2323 return true; 2324} 2325 2326bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2327 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2328 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2329} 2330 2331/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2332/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2333/// <0, 0, 1, 1> 2334bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2335 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2336 2337 unsigned NumElems = N->getNumOperands(); 2338 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2339 return false; 2340 2341 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2342 SDValue BitI = N->getOperand(i); 2343 SDValue BitI1 = N->getOperand(i+1); 2344 2345 if (!isUndefOrEqual(BitI, j)) 2346 return false; 2347 if (!isUndefOrEqual(BitI1, j)) 2348 return false; 2349 } 2350 2351 return true; 2352} 2353 2354/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2355/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2356/// <2, 2, 3, 3> 2357bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2358 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2359 2360 unsigned NumElems = N->getNumOperands(); 2361 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2362 return false; 2363 2364 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2365 SDValue BitI = N->getOperand(i); 2366 SDValue BitI1 = N->getOperand(i + 1); 2367 2368 if (!isUndefOrEqual(BitI, j)) 2369 return false; 2370 if (!isUndefOrEqual(BitI1, j)) 2371 return false; 2372 } 2373 2374 return true; 2375} 2376 2377/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2378/// specifies a shuffle of elements that is suitable for input to MOVSS, 2379/// MOVSD, and MOVD, i.e. setting the lowest element. 2380static bool isMOVLMask(SDOperandPtr Elts, unsigned NumElts) { 2381 if (NumElts != 2 && NumElts != 4) 2382 return false; 2383 2384 if (!isUndefOrEqual(Elts[0], NumElts)) 2385 return false; 2386 2387 for (unsigned i = 1; i < NumElts; ++i) { 2388 if (!isUndefOrEqual(Elts[i], i)) 2389 return false; 2390 } 2391 2392 return true; 2393} 2394 2395bool X86::isMOVLMask(SDNode *N) { 2396 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2397 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2398} 2399 2400/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2401/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2402/// element of vector 2 and the other elements to come from vector 1 in order. 2403static bool isCommutedMOVL(SDOperandPtr Ops, unsigned NumOps, 2404 bool V2IsSplat = false, 2405 bool V2IsUndef = false) { 2406 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2407 return false; 2408 2409 if (!isUndefOrEqual(Ops[0], 0)) 2410 return false; 2411 2412 for (unsigned i = 1; i < NumOps; ++i) { 2413 SDValue Arg = Ops[i]; 2414 if (!(isUndefOrEqual(Arg, i+NumOps) || 2415 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2416 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2417 return false; 2418 } 2419 2420 return true; 2421} 2422 2423static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2424 bool V2IsUndef = false) { 2425 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2426 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2427 V2IsSplat, V2IsUndef); 2428} 2429 2430/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2431/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2432bool X86::isMOVSHDUPMask(SDNode *N) { 2433 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2434 2435 if (N->getNumOperands() != 4) 2436 return false; 2437 2438 // Expect 1, 1, 3, 3 2439 for (unsigned i = 0; i < 2; ++i) { 2440 SDValue Arg = N->getOperand(i); 2441 if (Arg.getOpcode() == ISD::UNDEF) continue; 2442 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2443 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2444 if (Val != 1) return false; 2445 } 2446 2447 bool HasHi = false; 2448 for (unsigned i = 2; i < 4; ++i) { 2449 SDValue Arg = N->getOperand(i); 2450 if (Arg.getOpcode() == ISD::UNDEF) continue; 2451 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2452 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2453 if (Val != 3) return false; 2454 HasHi = true; 2455 } 2456 2457 // Don't use movshdup if it can be done with a shufps. 2458 return HasHi; 2459} 2460 2461/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2462/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2463bool X86::isMOVSLDUPMask(SDNode *N) { 2464 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2465 2466 if (N->getNumOperands() != 4) 2467 return false; 2468 2469 // Expect 0, 0, 2, 2 2470 for (unsigned i = 0; i < 2; ++i) { 2471 SDValue Arg = N->getOperand(i); 2472 if (Arg.getOpcode() == ISD::UNDEF) continue; 2473 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2474 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2475 if (Val != 0) return false; 2476 } 2477 2478 bool HasHi = false; 2479 for (unsigned i = 2; i < 4; ++i) { 2480 SDValue Arg = N->getOperand(i); 2481 if (Arg.getOpcode() == ISD::UNDEF) continue; 2482 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2483 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2484 if (Val != 2) return false; 2485 HasHi = true; 2486 } 2487 2488 // Don't use movshdup if it can be done with a shufps. 2489 return HasHi; 2490} 2491 2492/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2493/// specifies a identity operation on the LHS or RHS. 2494static bool isIdentityMask(SDNode *N, bool RHS = false) { 2495 unsigned NumElems = N->getNumOperands(); 2496 for (unsigned i = 0; i < NumElems; ++i) 2497 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2498 return false; 2499 return true; 2500} 2501 2502/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2503/// a splat of a single element. 2504static bool isSplatMask(SDNode *N) { 2505 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2506 2507 // This is a splat operation if each element of the permute is the same, and 2508 // if the value doesn't reference the second vector. 2509 unsigned NumElems = N->getNumOperands(); 2510 SDValue ElementBase; 2511 unsigned i = 0; 2512 for (; i != NumElems; ++i) { 2513 SDValue Elt = N->getOperand(i); 2514 if (isa<ConstantSDNode>(Elt)) { 2515 ElementBase = Elt; 2516 break; 2517 } 2518 } 2519 2520 if (!ElementBase.getNode()) 2521 return false; 2522 2523 for (; i != NumElems; ++i) { 2524 SDValue Arg = N->getOperand(i); 2525 if (Arg.getOpcode() == ISD::UNDEF) continue; 2526 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2527 if (Arg != ElementBase) return false; 2528 } 2529 2530 // Make sure it is a splat of the first vector operand. 2531 return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems; 2532} 2533 2534/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2535/// a splat of a single element and it's a 2 or 4 element mask. 2536bool X86::isSplatMask(SDNode *N) { 2537 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2538 2539 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2540 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2541 return false; 2542 return ::isSplatMask(N); 2543} 2544 2545/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2546/// specifies a splat of zero element. 2547bool X86::isSplatLoMask(SDNode *N) { 2548 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2549 2550 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2551 if (!isUndefOrEqual(N->getOperand(i), 0)) 2552 return false; 2553 return true; 2554} 2555 2556/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2557/// specifies a shuffle of elements that is suitable for input to MOVDDUP. 2558bool X86::isMOVDDUPMask(SDNode *N) { 2559 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2560 2561 unsigned e = N->getNumOperands() / 2; 2562 for (unsigned i = 0; i < e; ++i) 2563 if (!isUndefOrEqual(N->getOperand(i), i)) 2564 return false; 2565 for (unsigned i = 0; i < e; ++i) 2566 if (!isUndefOrEqual(N->getOperand(e+i), i)) 2567 return false; 2568 return true; 2569} 2570 2571/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2572/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2573/// instructions. 2574unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2575 unsigned NumOperands = N->getNumOperands(); 2576 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2577 unsigned Mask = 0; 2578 for (unsigned i = 0; i < NumOperands; ++i) { 2579 unsigned Val = 0; 2580 SDValue Arg = N->getOperand(NumOperands-i-1); 2581 if (Arg.getOpcode() != ISD::UNDEF) 2582 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2583 if (Val >= NumOperands) Val -= NumOperands; 2584 Mask |= Val; 2585 if (i != NumOperands - 1) 2586 Mask <<= Shift; 2587 } 2588 2589 return Mask; 2590} 2591 2592/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2593/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2594/// instructions. 2595unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2596 unsigned Mask = 0; 2597 // 8 nodes, but we only care about the last 4. 2598 for (unsigned i = 7; i >= 4; --i) { 2599 unsigned Val = 0; 2600 SDValue Arg = N->getOperand(i); 2601 if (Arg.getOpcode() != ISD::UNDEF) 2602 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2603 Mask |= (Val - 4); 2604 if (i != 4) 2605 Mask <<= 2; 2606 } 2607 2608 return Mask; 2609} 2610 2611/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2612/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2613/// instructions. 2614unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2615 unsigned Mask = 0; 2616 // 8 nodes, but we only care about the first 4. 2617 for (int i = 3; i >= 0; --i) { 2618 unsigned Val = 0; 2619 SDValue Arg = N->getOperand(i); 2620 if (Arg.getOpcode() != ISD::UNDEF) 2621 Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2622 Mask |= Val; 2623 if (i != 0) 2624 Mask <<= 2; 2625 } 2626 2627 return Mask; 2628} 2629 2630/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2631/// specifies a 8 element shuffle that can be broken into a pair of 2632/// PSHUFHW and PSHUFLW. 2633static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2634 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2635 2636 if (N->getNumOperands() != 8) 2637 return false; 2638 2639 // Lower quadword shuffled. 2640 for (unsigned i = 0; i != 4; ++i) { 2641 SDValue Arg = N->getOperand(i); 2642 if (Arg.getOpcode() == ISD::UNDEF) continue; 2643 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2644 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2645 if (Val >= 4) 2646 return false; 2647 } 2648 2649 // Upper quadword shuffled. 2650 for (unsigned i = 4; i != 8; ++i) { 2651 SDValue Arg = N->getOperand(i); 2652 if (Arg.getOpcode() == ISD::UNDEF) continue; 2653 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2654 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2655 if (Val < 4 || Val > 7) 2656 return false; 2657 } 2658 2659 return true; 2660} 2661 2662/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2663/// values in ther permute mask. 2664static SDValue CommuteVectorShuffle(SDValue Op, SDValue &V1, 2665 SDValue &V2, SDValue &Mask, 2666 SelectionDAG &DAG) { 2667 MVT VT = Op.getValueType(); 2668 MVT MaskVT = Mask.getValueType(); 2669 MVT EltVT = MaskVT.getVectorElementType(); 2670 unsigned NumElems = Mask.getNumOperands(); 2671 SmallVector<SDValue, 8> MaskVec; 2672 2673 for (unsigned i = 0; i != NumElems; ++i) { 2674 SDValue Arg = Mask.getOperand(i); 2675 if (Arg.getOpcode() == ISD::UNDEF) { 2676 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2677 continue; 2678 } 2679 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2680 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2681 if (Val < NumElems) 2682 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2683 else 2684 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2685 } 2686 2687 std::swap(V1, V2); 2688 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2689 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2690} 2691 2692/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2693/// the two vector operands have swapped position. 2694static 2695SDValue CommuteVectorShuffleMask(SDValue Mask, SelectionDAG &DAG) { 2696 MVT MaskVT = Mask.getValueType(); 2697 MVT EltVT = MaskVT.getVectorElementType(); 2698 unsigned NumElems = Mask.getNumOperands(); 2699 SmallVector<SDValue, 8> MaskVec; 2700 for (unsigned i = 0; i != NumElems; ++i) { 2701 SDValue Arg = Mask.getOperand(i); 2702 if (Arg.getOpcode() == ISD::UNDEF) { 2703 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2704 continue; 2705 } 2706 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2707 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2708 if (Val < NumElems) 2709 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2710 else 2711 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2712 } 2713 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2714} 2715 2716 2717/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2718/// match movhlps. The lower half elements should come from upper half of 2719/// V1 (and in order), and the upper half elements should come from the upper 2720/// half of V2 (and in order). 2721static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2722 unsigned NumElems = Mask->getNumOperands(); 2723 if (NumElems != 4) 2724 return false; 2725 for (unsigned i = 0, e = 2; i != e; ++i) 2726 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2727 return false; 2728 for (unsigned i = 2; i != 4; ++i) 2729 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2730 return false; 2731 return true; 2732} 2733 2734/// isScalarLoadToVector - Returns true if the node is a scalar load that 2735/// is promoted to a vector. It also returns the LoadSDNode by reference if 2736/// required. 2737static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 2738 if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) 2739 return false; 2740 N = N->getOperand(0).getNode(); 2741 if (!ISD::isNON_EXTLoad(N)) 2742 return false; 2743 if (LD) 2744 *LD = cast<LoadSDNode>(N); 2745 return true; 2746} 2747 2748/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2749/// match movlp{s|d}. The lower half elements should come from lower half of 2750/// V1 (and in order), and the upper half elements should come from the upper 2751/// half of V2 (and in order). And since V1 will become the source of the 2752/// MOVLP, it must be either a vector load or a scalar load to vector. 2753static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2754 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2755 return false; 2756 // Is V2 is a vector load, don't do this transformation. We will try to use 2757 // load folding shufps op. 2758 if (ISD::isNON_EXTLoad(V2)) 2759 return false; 2760 2761 unsigned NumElems = Mask->getNumOperands(); 2762 if (NumElems != 2 && NumElems != 4) 2763 return false; 2764 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2765 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2766 return false; 2767 for (unsigned i = NumElems/2; i != NumElems; ++i) 2768 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2769 return false; 2770 return true; 2771} 2772 2773/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2774/// all the same. 2775static bool isSplatVector(SDNode *N) { 2776 if (N->getOpcode() != ISD::BUILD_VECTOR) 2777 return false; 2778 2779 SDValue SplatValue = N->getOperand(0); 2780 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2781 if (N->getOperand(i) != SplatValue) 2782 return false; 2783 return true; 2784} 2785 2786/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2787/// to an undef. 2788static bool isUndefShuffle(SDNode *N) { 2789 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2790 return false; 2791 2792 SDValue V1 = N->getOperand(0); 2793 SDValue V2 = N->getOperand(1); 2794 SDValue Mask = N->getOperand(2); 2795 unsigned NumElems = Mask.getNumOperands(); 2796 for (unsigned i = 0; i != NumElems; ++i) { 2797 SDValue Arg = Mask.getOperand(i); 2798 if (Arg.getOpcode() != ISD::UNDEF) { 2799 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2800 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2801 return false; 2802 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2803 return false; 2804 } 2805 } 2806 return true; 2807} 2808 2809/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2810/// constant +0.0. 2811static inline bool isZeroNode(SDValue Elt) { 2812 return ((isa<ConstantSDNode>(Elt) && 2813 cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || 2814 (isa<ConstantFPSDNode>(Elt) && 2815 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2816} 2817 2818/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2819/// to an zero vector. 2820static bool isZeroShuffle(SDNode *N) { 2821 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2822 return false; 2823 2824 SDValue V1 = N->getOperand(0); 2825 SDValue V2 = N->getOperand(1); 2826 SDValue Mask = N->getOperand(2); 2827 unsigned NumElems = Mask.getNumOperands(); 2828 for (unsigned i = 0; i != NumElems; ++i) { 2829 SDValue Arg = Mask.getOperand(i); 2830 if (Arg.getOpcode() == ISD::UNDEF) 2831 continue; 2832 2833 unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); 2834 if (Idx < NumElems) { 2835 unsigned Opc = V1.getNode()->getOpcode(); 2836 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 2837 continue; 2838 if (Opc != ISD::BUILD_VECTOR || 2839 !isZeroNode(V1.getNode()->getOperand(Idx))) 2840 return false; 2841 } else if (Idx >= NumElems) { 2842 unsigned Opc = V2.getNode()->getOpcode(); 2843 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 2844 continue; 2845 if (Opc != ISD::BUILD_VECTOR || 2846 !isZeroNode(V2.getNode()->getOperand(Idx - NumElems))) 2847 return false; 2848 } 2849 } 2850 return true; 2851} 2852 2853/// getZeroVector - Returns a vector of specified type with all zero elements. 2854/// 2855static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG) { 2856 assert(VT.isVector() && "Expected a vector type"); 2857 2858 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2859 // type. This ensures they get CSE'd. 2860 SDValue Vec; 2861 if (VT.getSizeInBits() == 64) { // MMX 2862 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2863 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2864 } else if (HasSSE2) { // SSE2 2865 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 2866 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2867 } else { // SSE1 2868 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 2869 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst); 2870 } 2871 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2872} 2873 2874/// getOnesVector - Returns a vector of specified type with all bits set. 2875/// 2876static SDValue getOnesVector(MVT VT, SelectionDAG &DAG) { 2877 assert(VT.isVector() && "Expected a vector type"); 2878 2879 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2880 // type. This ensures they get CSE'd. 2881 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 2882 SDValue Vec; 2883 if (VT.getSizeInBits() == 64) // MMX 2884 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2885 else // SSE 2886 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2887 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2888} 2889 2890 2891/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2892/// that point to V2 points to its first element. 2893static SDValue NormalizeMask(SDValue Mask, SelectionDAG &DAG) { 2894 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2895 2896 bool Changed = false; 2897 SmallVector<SDValue, 8> MaskVec; 2898 unsigned NumElems = Mask.getNumOperands(); 2899 for (unsigned i = 0; i != NumElems; ++i) { 2900 SDValue Arg = Mask.getOperand(i); 2901 if (Arg.getOpcode() != ISD::UNDEF) { 2902 unsigned Val = cast<ConstantSDNode>(Arg)->getZExtValue(); 2903 if (Val > NumElems) { 2904 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2905 Changed = true; 2906 } 2907 } 2908 MaskVec.push_back(Arg); 2909 } 2910 2911 if (Changed) 2912 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2913 &MaskVec[0], MaskVec.size()); 2914 return Mask; 2915} 2916 2917/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2918/// operation of specified width. 2919static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2920 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2921 MVT BaseVT = MaskVT.getVectorElementType(); 2922 2923 SmallVector<SDValue, 8> MaskVec; 2924 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2925 for (unsigned i = 1; i != NumElems; ++i) 2926 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2927 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2928} 2929 2930/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2931/// of specified width. 2932static SDValue getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2933 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2934 MVT BaseVT = MaskVT.getVectorElementType(); 2935 SmallVector<SDValue, 8> MaskVec; 2936 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2937 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2938 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2939 } 2940 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2941} 2942 2943/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2944/// of specified width. 2945static SDValue getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2946 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2947 MVT BaseVT = MaskVT.getVectorElementType(); 2948 unsigned Half = NumElems/2; 2949 SmallVector<SDValue, 8> MaskVec; 2950 for (unsigned i = 0; i != Half; ++i) { 2951 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2952 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2953 } 2954 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2955} 2956 2957/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps 2958/// element #0 of a vector with the specified index, leaving the rest of the 2959/// elements in place. 2960static SDValue getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, 2961 SelectionDAG &DAG) { 2962 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2963 MVT BaseVT = MaskVT.getVectorElementType(); 2964 SmallVector<SDValue, 8> MaskVec; 2965 // Element #0 of the result gets the elt we are replacing. 2966 MaskVec.push_back(DAG.getConstant(DestElt, BaseVT)); 2967 for (unsigned i = 1; i != NumElems; ++i) 2968 MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT)); 2969 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2970} 2971 2972/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. 2973static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { 2974 MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; 2975 MVT VT = Op.getValueType(); 2976 if (PVT == VT) 2977 return Op; 2978 SDValue V1 = Op.getOperand(0); 2979 SDValue Mask = Op.getOperand(2); 2980 unsigned NumElems = Mask.getNumOperands(); 2981 // Special handling of v4f32 -> v4i32. 2982 if (VT != MVT::v4f32) { 2983 Mask = getUnpacklMask(NumElems, DAG); 2984 while (NumElems > 4) { 2985 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2986 NumElems >>= 1; 2987 } 2988 Mask = getZeroVector(MVT::v4i32, true, DAG); 2989 } 2990 2991 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 2992 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 2993 DAG.getNode(ISD::UNDEF, PVT), Mask); 2994 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2995} 2996 2997/// isVectorLoad - Returns true if the node is a vector load, a scalar 2998/// load that's promoted to vector, or a load bitcasted. 2999static bool isVectorLoad(SDValue Op) { 3000 assert(Op.getValueType().isVector() && "Expected a vector type"); 3001 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || 3002 Op.getOpcode() == ISD::BIT_CONVERT) { 3003 return isa<LoadSDNode>(Op.getOperand(0)); 3004 } 3005 return isa<LoadSDNode>(Op); 3006} 3007 3008 3009/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. 3010/// 3011static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, 3012 SelectionDAG &DAG, bool HasSSE3) { 3013 // If we have sse3 and shuffle has more than one use or input is a load, then 3014 // use movddup. Otherwise, use movlhps. 3015 bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); 3016 MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; 3017 MVT VT = Op.getValueType(); 3018 if (VT == PVT) 3019 return Op; 3020 unsigned NumElems = PVT.getVectorNumElements(); 3021 if (NumElems == 2) { 3022 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 3023 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 3024 } else { 3025 assert(NumElems == 4); 3026 SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); 3027 SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); 3028 Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1); 3029 } 3030 3031 V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); 3032 SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, 3033 DAG.getNode(ISD::UNDEF, PVT), Mask); 3034 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3035} 3036 3037/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3038/// vector of zero or undef vector. This produces a shuffle where the low 3039/// element of V2 is swizzled into the zero/undef vector, landing at element 3040/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3041static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 3042 bool isZero, bool HasSSE2, 3043 SelectionDAG &DAG) { 3044 MVT VT = V2.getValueType(); 3045 SDValue V1 = isZero 3046 ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT); 3047 unsigned NumElems = V2.getValueType().getVectorNumElements(); 3048 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3049 MVT EVT = MaskVT.getVectorElementType(); 3050 SmallVector<SDValue, 16> MaskVec; 3051 for (unsigned i = 0; i != NumElems; ++i) 3052 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 3053 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 3054 else 3055 MaskVec.push_back(DAG.getConstant(i, EVT)); 3056 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3057 &MaskVec[0], MaskVec.size()); 3058 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3059} 3060 3061/// getNumOfConsecutiveZeros - Return the number of elements in a result of 3062/// a shuffle that is zero. 3063static 3064unsigned getNumOfConsecutiveZeros(SDValue Op, SDValue Mask, 3065 unsigned NumElems, bool Low, 3066 SelectionDAG &DAG) { 3067 unsigned NumZeros = 0; 3068 for (unsigned i = 0; i < NumElems; ++i) { 3069 unsigned Index = Low ? i : NumElems-i-1; 3070 SDValue Idx = Mask.getOperand(Index); 3071 if (Idx.getOpcode() == ISD::UNDEF) { 3072 ++NumZeros; 3073 continue; 3074 } 3075 SDValue Elt = DAG.getShuffleScalarElt(Op.getNode(), Index); 3076 if (Elt.getNode() && isZeroNode(Elt)) 3077 ++NumZeros; 3078 else 3079 break; 3080 } 3081 return NumZeros; 3082} 3083 3084/// isVectorShift - Returns true if the shuffle can be implemented as a 3085/// logical left or right shift of a vector. 3086static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG, 3087 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 3088 unsigned NumElems = Mask.getNumOperands(); 3089 3090 isLeft = true; 3091 unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG); 3092 if (!NumZeros) { 3093 isLeft = false; 3094 NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG); 3095 if (!NumZeros) 3096 return false; 3097 } 3098 3099 bool SeenV1 = false; 3100 bool SeenV2 = false; 3101 for (unsigned i = NumZeros; i < NumElems; ++i) { 3102 unsigned Val = isLeft ? (i - NumZeros) : i; 3103 SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros)); 3104 if (Idx.getOpcode() == ISD::UNDEF) 3105 continue; 3106 unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); 3107 if (Index < NumElems) 3108 SeenV1 = true; 3109 else { 3110 Index -= NumElems; 3111 SeenV2 = true; 3112 } 3113 if (Index != Val) 3114 return false; 3115 } 3116 if (SeenV1 && SeenV2) 3117 return false; 3118 3119 ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1); 3120 ShAmt = NumZeros; 3121 return true; 3122} 3123 3124 3125/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3126/// 3127static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 3128 unsigned NumNonZero, unsigned NumZero, 3129 SelectionDAG &DAG, TargetLowering &TLI) { 3130 if (NumNonZero > 8) 3131 return SDValue(); 3132 3133 SDValue V(0, 0); 3134 bool First = true; 3135 for (unsigned i = 0; i < 16; ++i) { 3136 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3137 if (ThisIsNonZero && First) { 3138 if (NumZero) 3139 V = getZeroVector(MVT::v8i16, true, DAG); 3140 else 3141 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3142 First = false; 3143 } 3144 3145 if ((i & 1) != 0) { 3146 SDValue ThisElt(0, 0), LastElt(0, 0); 3147 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3148 if (LastIsNonZero) { 3149 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3150 } 3151 if (ThisIsNonZero) { 3152 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3153 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3154 ThisElt, DAG.getConstant(8, MVT::i8)); 3155 if (LastIsNonZero) 3156 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3157 } else 3158 ThisElt = LastElt; 3159 3160 if (ThisElt.getNode()) 3161 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3162 DAG.getIntPtrConstant(i/2)); 3163 } 3164 } 3165 3166 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3167} 3168 3169/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3170/// 3171static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 3172 unsigned NumNonZero, unsigned NumZero, 3173 SelectionDAG &DAG, TargetLowering &TLI) { 3174 if (NumNonZero > 4) 3175 return SDValue(); 3176 3177 SDValue V(0, 0); 3178 bool First = true; 3179 for (unsigned i = 0; i < 8; ++i) { 3180 bool isNonZero = (NonZeros & (1 << i)) != 0; 3181 if (isNonZero) { 3182 if (First) { 3183 if (NumZero) 3184 V = getZeroVector(MVT::v8i16, true, DAG); 3185 else 3186 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3187 First = false; 3188 } 3189 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3190 DAG.getIntPtrConstant(i)); 3191 } 3192 } 3193 3194 return V; 3195} 3196 3197/// getVShift - Return a vector logical shift node. 3198/// 3199static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, 3200 unsigned NumBits, SelectionDAG &DAG, 3201 const TargetLowering &TLI) { 3202 bool isMMX = VT.getSizeInBits() == 64; 3203 MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; 3204 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; 3205 SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp); 3206 return DAG.getNode(ISD::BIT_CONVERT, VT, 3207 DAG.getNode(Opc, ShVT, SrcOp, 3208 DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); 3209} 3210 3211SDValue 3212X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3213 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3214 if (ISD::isBuildVectorAllZeros(Op.getNode()) 3215 || ISD::isBuildVectorAllOnes(Op.getNode())) { 3216 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3217 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3218 // eliminated on x86-32 hosts. 3219 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3220 return Op; 3221 3222 if (ISD::isBuildVectorAllOnes(Op.getNode())) 3223 return getOnesVector(Op.getValueType(), DAG); 3224 return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG); 3225 } 3226 3227 MVT VT = Op.getValueType(); 3228 MVT EVT = VT.getVectorElementType(); 3229 unsigned EVTBits = EVT.getSizeInBits(); 3230 3231 unsigned NumElems = Op.getNumOperands(); 3232 unsigned NumZero = 0; 3233 unsigned NumNonZero = 0; 3234 unsigned NonZeros = 0; 3235 bool IsAllConstants = true; 3236 SmallSet<SDValue, 8> Values; 3237 for (unsigned i = 0; i < NumElems; ++i) { 3238 SDValue Elt = Op.getOperand(i); 3239 if (Elt.getOpcode() == ISD::UNDEF) 3240 continue; 3241 Values.insert(Elt); 3242 if (Elt.getOpcode() != ISD::Constant && 3243 Elt.getOpcode() != ISD::ConstantFP) 3244 IsAllConstants = false; 3245 if (isZeroNode(Elt)) 3246 NumZero++; 3247 else { 3248 NonZeros |= (1 << i); 3249 NumNonZero++; 3250 } 3251 } 3252 3253 if (NumNonZero == 0) { 3254 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3255 return DAG.getNode(ISD::UNDEF, VT); 3256 } 3257 3258 // Special case for single non-zero, non-undef, element. 3259 if (NumNonZero == 1 && NumElems <= 4) { 3260 unsigned Idx = CountTrailingZeros_32(NonZeros); 3261 SDValue Item = Op.getOperand(Idx); 3262 3263 // If this is an insertion of an i64 value on x86-32, and if the top bits of 3264 // the value are obviously zero, truncate the value to i32 and do the 3265 // insertion that way. Only do this if the value is non-constant or if the 3266 // value is a constant being inserted into element 0. It is cheaper to do 3267 // a constant pool load than it is to do a movd + shuffle. 3268 if (EVT == MVT::i64 && !Subtarget->is64Bit() && 3269 (!IsAllConstants || Idx == 0)) { 3270 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 3271 // Handle MMX and SSE both. 3272 MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; 3273 unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; 3274 3275 // Truncate the value (which may itself be a constant) to i32, and 3276 // convert it to a vector with movd (S2V+shuffle to zero extend). 3277 Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item); 3278 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item); 3279 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3280 Subtarget->hasSSE2(), DAG); 3281 3282 // Now we have our 32-bit value zero extended in the low element of 3283 // a vector. If Idx != 0, swizzle it into place. 3284 if (Idx != 0) { 3285 SDValue Ops[] = { 3286 Item, DAG.getNode(ISD::UNDEF, Item.getValueType()), 3287 getSwapEltZeroMask(VecElts, Idx, DAG) 3288 }; 3289 Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3); 3290 } 3291 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item); 3292 } 3293 } 3294 3295 // If we have a constant or non-constant insertion into the low element of 3296 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 3297 // the rest of the elements. This will be matched as movd/movq/movss/movsd 3298 // depending on what the source datatype is. Because we can only get here 3299 // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. 3300 if (Idx == 0 && 3301 // Don't do this for i64 values on x86-32. 3302 (EVT != MVT::i64 || Subtarget->is64Bit())) { 3303 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3304 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3305 return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3306 Subtarget->hasSSE2(), DAG); 3307 } 3308 3309 // Is it a vector logical left shift? 3310 if (NumElems == 2 && Idx == 1 && 3311 isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { 3312 unsigned NumBits = VT.getSizeInBits(); 3313 return getVShift(true, VT, 3314 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)), 3315 NumBits/2, DAG, *this); 3316 } 3317 3318 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 3319 return SDValue(); 3320 3321 // Otherwise, if this is a vector with i32 or f32 elements, and the element 3322 // is a non-constant being inserted into an element other than the low one, 3323 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 3324 // movd/movss) to move this into the low element, then shuffle it into 3325 // place. 3326 if (EVTBits == 32) { 3327 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3328 3329 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3330 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3331 Subtarget->hasSSE2(), DAG); 3332 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3333 MVT MaskEVT = MaskVT.getVectorElementType(); 3334 SmallVector<SDValue, 8> MaskVec; 3335 for (unsigned i = 0; i < NumElems; i++) 3336 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3337 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3338 &MaskVec[0], MaskVec.size()); 3339 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3340 DAG.getNode(ISD::UNDEF, VT), Mask); 3341 } 3342 } 3343 3344 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3345 if (Values.size() == 1) 3346 return SDValue(); 3347 3348 // A vector full of immediates; various special cases are already 3349 // handled, so this is best done with a single constant-pool load. 3350 if (IsAllConstants) 3351 return SDValue(); 3352 3353 // Let legalizer expand 2-wide build_vectors. 3354 if (EVTBits == 64) { 3355 if (NumNonZero == 1) { 3356 // One half is zero or undef. 3357 unsigned Idx = CountTrailingZeros_32(NonZeros); 3358 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, 3359 Op.getOperand(Idx)); 3360 return getShuffleVectorZeroOrUndef(V2, Idx, true, 3361 Subtarget->hasSSE2(), DAG); 3362 } 3363 return SDValue(); 3364 } 3365 3366 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3367 if (EVTBits == 8 && NumElems == 16) { 3368 SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3369 *this); 3370 if (V.getNode()) return V; 3371 } 3372 3373 if (EVTBits == 16 && NumElems == 8) { 3374 SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3375 *this); 3376 if (V.getNode()) return V; 3377 } 3378 3379 // If element VT is == 32 bits, turn it into a number of shuffles. 3380 SmallVector<SDValue, 8> V; 3381 V.resize(NumElems); 3382 if (NumElems == 4 && NumZero > 0) { 3383 for (unsigned i = 0; i < 4; ++i) { 3384 bool isZero = !(NonZeros & (1 << i)); 3385 if (isZero) 3386 V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3387 else 3388 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3389 } 3390 3391 for (unsigned i = 0; i < 2; ++i) { 3392 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3393 default: break; 3394 case 0: 3395 V[i] = V[i*2]; // Must be a zero vector. 3396 break; 3397 case 1: 3398 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3399 getMOVLMask(NumElems, DAG)); 3400 break; 3401 case 2: 3402 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3403 getMOVLMask(NumElems, DAG)); 3404 break; 3405 case 3: 3406 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3407 getUnpacklMask(NumElems, DAG)); 3408 break; 3409 } 3410 } 3411 3412 MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3413 MVT EVT = MaskVT.getVectorElementType(); 3414 SmallVector<SDValue, 8> MaskVec; 3415 bool Reverse = (NonZeros & 0x3) == 2; 3416 for (unsigned i = 0; i < 2; ++i) 3417 if (Reverse) 3418 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3419 else 3420 MaskVec.push_back(DAG.getConstant(i, EVT)); 3421 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3422 for (unsigned i = 0; i < 2; ++i) 3423 if (Reverse) 3424 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3425 else 3426 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3427 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3428 &MaskVec[0], MaskVec.size()); 3429 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3430 } 3431 3432 if (Values.size() > 2) { 3433 // Expand into a number of unpckl*. 3434 // e.g. for v4f32 3435 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3436 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3437 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3438 SDValue UnpckMask = getUnpacklMask(NumElems, DAG); 3439 for (unsigned i = 0; i < NumElems; ++i) 3440 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3441 NumElems >>= 1; 3442 while (NumElems != 0) { 3443 for (unsigned i = 0; i < NumElems; ++i) 3444 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3445 UnpckMask); 3446 NumElems >>= 1; 3447 } 3448 return V[0]; 3449 } 3450 3451 return SDValue(); 3452} 3453 3454static 3455SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2, 3456 SDValue PermMask, SelectionDAG &DAG, 3457 TargetLowering &TLI) { 3458 SDValue NewV; 3459 MVT MaskVT = MVT::getIntVectorWithNumElements(8); 3460 MVT MaskEVT = MaskVT.getVectorElementType(); 3461 MVT PtrVT = TLI.getPointerTy(); 3462 SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(), 3463 PermMask.getNode()->op_end()); 3464 3465 // First record which half of which vector the low elements come from. 3466 SmallVector<unsigned, 4> LowQuad(4); 3467 for (unsigned i = 0; i < 4; ++i) { 3468 SDValue Elt = MaskElts[i]; 3469 if (Elt.getOpcode() == ISD::UNDEF) 3470 continue; 3471 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3472 int QuadIdx = EltIdx / 4; 3473 ++LowQuad[QuadIdx]; 3474 } 3475 3476 int BestLowQuad = -1; 3477 unsigned MaxQuad = 1; 3478 for (unsigned i = 0; i < 4; ++i) { 3479 if (LowQuad[i] > MaxQuad) { 3480 BestLowQuad = i; 3481 MaxQuad = LowQuad[i]; 3482 } 3483 } 3484 3485 // Record which half of which vector the high elements come from. 3486 SmallVector<unsigned, 4> HighQuad(4); 3487 for (unsigned i = 4; i < 8; ++i) { 3488 SDValue Elt = MaskElts[i]; 3489 if (Elt.getOpcode() == ISD::UNDEF) 3490 continue; 3491 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3492 int QuadIdx = EltIdx / 4; 3493 ++HighQuad[QuadIdx]; 3494 } 3495 3496 int BestHighQuad = -1; 3497 MaxQuad = 1; 3498 for (unsigned i = 0; i < 4; ++i) { 3499 if (HighQuad[i] > MaxQuad) { 3500 BestHighQuad = i; 3501 MaxQuad = HighQuad[i]; 3502 } 3503 } 3504 3505 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3506 if (BestLowQuad != -1 || BestHighQuad != -1) { 3507 // First sort the 4 chunks in order using shufpd. 3508 SmallVector<SDValue, 8> MaskVec; 3509 3510 if (BestLowQuad != -1) 3511 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3512 else 3513 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3514 3515 if (BestHighQuad != -1) 3516 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3517 else 3518 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3519 3520 SDValue Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3521 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3522 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3523 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3524 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3525 3526 // Now sort high and low parts separately. 3527 BitVector InOrder(8); 3528 if (BestLowQuad != -1) { 3529 // Sort lower half in order using PSHUFLW. 3530 MaskVec.clear(); 3531 bool AnyOutOrder = false; 3532 3533 for (unsigned i = 0; i != 4; ++i) { 3534 SDValue Elt = MaskElts[i]; 3535 if (Elt.getOpcode() == ISD::UNDEF) { 3536 MaskVec.push_back(Elt); 3537 InOrder.set(i); 3538 } else { 3539 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3540 if (EltIdx != i) 3541 AnyOutOrder = true; 3542 3543 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3544 3545 // If this element is in the right place after this shuffle, then 3546 // remember it. 3547 if ((int)(EltIdx / 4) == BestLowQuad) 3548 InOrder.set(i); 3549 } 3550 } 3551 if (AnyOutOrder) { 3552 for (unsigned i = 4; i != 8; ++i) 3553 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3554 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3555 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3556 } 3557 } 3558 3559 if (BestHighQuad != -1) { 3560 // Sort high half in order using PSHUFHW if possible. 3561 MaskVec.clear(); 3562 3563 for (unsigned i = 0; i != 4; ++i) 3564 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3565 3566 bool AnyOutOrder = false; 3567 for (unsigned i = 4; i != 8; ++i) { 3568 SDValue Elt = MaskElts[i]; 3569 if (Elt.getOpcode() == ISD::UNDEF) { 3570 MaskVec.push_back(Elt); 3571 InOrder.set(i); 3572 } else { 3573 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3574 if (EltIdx != i) 3575 AnyOutOrder = true; 3576 3577 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3578 3579 // If this element is in the right place after this shuffle, then 3580 // remember it. 3581 if ((int)(EltIdx / 4) == BestHighQuad) 3582 InOrder.set(i); 3583 } 3584 } 3585 3586 if (AnyOutOrder) { 3587 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3588 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3589 } 3590 } 3591 3592 // The other elements are put in the right place using pextrw and pinsrw. 3593 for (unsigned i = 0; i != 8; ++i) { 3594 if (InOrder[i]) 3595 continue; 3596 SDValue Elt = MaskElts[i]; 3597 if (Elt.getOpcode() == ISD::UNDEF) 3598 continue; 3599 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3600 SDValue ExtOp = (EltIdx < 8) 3601 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3602 DAG.getConstant(EltIdx, PtrVT)) 3603 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3604 DAG.getConstant(EltIdx - 8, PtrVT)); 3605 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3606 DAG.getConstant(i, PtrVT)); 3607 } 3608 3609 return NewV; 3610 } 3611 3612 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use as 3613 // few as possible. First, let's find out how many elements are already in the 3614 // right order. 3615 unsigned V1InOrder = 0; 3616 unsigned V1FromV1 = 0; 3617 unsigned V2InOrder = 0; 3618 unsigned V2FromV2 = 0; 3619 SmallVector<SDValue, 8> V1Elts; 3620 SmallVector<SDValue, 8> V2Elts; 3621 for (unsigned i = 0; i < 8; ++i) { 3622 SDValue Elt = MaskElts[i]; 3623 if (Elt.getOpcode() == ISD::UNDEF) { 3624 V1Elts.push_back(Elt); 3625 V2Elts.push_back(Elt); 3626 ++V1InOrder; 3627 ++V2InOrder; 3628 continue; 3629 } 3630 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3631 if (EltIdx == i) { 3632 V1Elts.push_back(Elt); 3633 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3634 ++V1InOrder; 3635 } else if (EltIdx == i+8) { 3636 V1Elts.push_back(Elt); 3637 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3638 ++V2InOrder; 3639 } else if (EltIdx < 8) { 3640 V1Elts.push_back(Elt); 3641 ++V1FromV1; 3642 } else { 3643 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3644 ++V2FromV2; 3645 } 3646 } 3647 3648 if (V2InOrder > V1InOrder) { 3649 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3650 std::swap(V1, V2); 3651 std::swap(V1Elts, V2Elts); 3652 std::swap(V1FromV1, V2FromV2); 3653 } 3654 3655 if ((V1FromV1 + V1InOrder) != 8) { 3656 // Some elements are from V2. 3657 if (V1FromV1) { 3658 // If there are elements that are from V1 but out of place, 3659 // then first sort them in place 3660 SmallVector<SDValue, 8> MaskVec; 3661 for (unsigned i = 0; i < 8; ++i) { 3662 SDValue Elt = V1Elts[i]; 3663 if (Elt.getOpcode() == ISD::UNDEF) { 3664 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3665 continue; 3666 } 3667 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3668 if (EltIdx >= 8) 3669 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3670 else 3671 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3672 } 3673 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3674 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3675 } 3676 3677 NewV = V1; 3678 for (unsigned i = 0; i < 8; ++i) { 3679 SDValue Elt = V1Elts[i]; 3680 if (Elt.getOpcode() == ISD::UNDEF) 3681 continue; 3682 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3683 if (EltIdx < 8) 3684 continue; 3685 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3686 DAG.getConstant(EltIdx - 8, PtrVT)); 3687 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3688 DAG.getConstant(i, PtrVT)); 3689 } 3690 return NewV; 3691 } else { 3692 // All elements are from V1. 3693 NewV = V1; 3694 for (unsigned i = 0; i < 8; ++i) { 3695 SDValue Elt = V1Elts[i]; 3696 if (Elt.getOpcode() == ISD::UNDEF) 3697 continue; 3698 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3699 SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3700 DAG.getConstant(EltIdx, PtrVT)); 3701 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3702 DAG.getConstant(i, PtrVT)); 3703 } 3704 return NewV; 3705 } 3706} 3707 3708/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3709/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3710/// done when every pair / quad of shuffle mask elements point to elements in 3711/// the right sequence. e.g. 3712/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3713static 3714SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2, 3715 MVT VT, 3716 SDValue PermMask, SelectionDAG &DAG, 3717 TargetLowering &TLI) { 3718 unsigned NumElems = PermMask.getNumOperands(); 3719 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3720 MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3721 MVT MaskEltVT = MaskVT.getVectorElementType(); 3722 MVT NewVT = MaskVT; 3723 switch (VT.getSimpleVT()) { 3724 default: assert(false && "Unexpected!"); 3725 case MVT::v4f32: NewVT = MVT::v2f64; break; 3726 case MVT::v4i32: NewVT = MVT::v2i64; break; 3727 case MVT::v8i16: NewVT = MVT::v4i32; break; 3728 case MVT::v16i8: NewVT = MVT::v4i32; break; 3729 } 3730 3731 if (NewWidth == 2) { 3732 if (VT.isInteger()) 3733 NewVT = MVT::v2i64; 3734 else 3735 NewVT = MVT::v2f64; 3736 } 3737 unsigned Scale = NumElems / NewWidth; 3738 SmallVector<SDValue, 8> MaskVec; 3739 for (unsigned i = 0; i < NumElems; i += Scale) { 3740 unsigned StartIdx = ~0U; 3741 for (unsigned j = 0; j < Scale; ++j) { 3742 SDValue Elt = PermMask.getOperand(i+j); 3743 if (Elt.getOpcode() == ISD::UNDEF) 3744 continue; 3745 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getZExtValue(); 3746 if (StartIdx == ~0U) 3747 StartIdx = EltIdx - (EltIdx % Scale); 3748 if (EltIdx != StartIdx + j) 3749 return SDValue(); 3750 } 3751 if (StartIdx == ~0U) 3752 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT)); 3753 else 3754 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT)); 3755 } 3756 3757 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3758 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3759 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3760 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3761 &MaskVec[0], MaskVec.size())); 3762} 3763 3764/// getVZextMovL - Return a zero-extending vector move low node. 3765/// 3766static SDValue getVZextMovL(MVT VT, MVT OpVT, 3767 SDValue SrcOp, SelectionDAG &DAG, 3768 const X86Subtarget *Subtarget) { 3769 if (VT == MVT::v2f64 || VT == MVT::v4f32) { 3770 LoadSDNode *LD = NULL; 3771 if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) 3772 LD = dyn_cast<LoadSDNode>(SrcOp); 3773 if (!LD) { 3774 // movssrr and movsdrr do not clear top bits. Try to use movd, movq 3775 // instead. 3776 MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; 3777 if ((EVT != MVT::i64 || Subtarget->is64Bit()) && 3778 SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 3779 SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && 3780 SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { 3781 // PR2108 3782 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; 3783 return DAG.getNode(ISD::BIT_CONVERT, VT, 3784 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3785 DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT, 3786 SrcOp.getOperand(0) 3787 .getOperand(0)))); 3788 } 3789 } 3790 } 3791 3792 return DAG.getNode(ISD::BIT_CONVERT, VT, 3793 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT, 3794 DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp))); 3795} 3796 3797/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of 3798/// shuffles. 3799static SDValue 3800LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2, 3801 SDValue PermMask, MVT VT, SelectionDAG &DAG) { 3802 MVT MaskVT = PermMask.getValueType(); 3803 MVT MaskEVT = MaskVT.getVectorElementType(); 3804 SmallVector<std::pair<int, int>, 8> Locs; 3805 Locs.resize(4); 3806 SmallVector<SDValue, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3807 unsigned NumHi = 0; 3808 unsigned NumLo = 0; 3809 for (unsigned i = 0; i != 4; ++i) { 3810 SDValue Elt = PermMask.getOperand(i); 3811 if (Elt.getOpcode() == ISD::UNDEF) { 3812 Locs[i] = std::make_pair(-1, -1); 3813 } else { 3814 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3815 assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!"); 3816 if (Val < 4) { 3817 Locs[i] = std::make_pair(0, NumLo); 3818 Mask1[NumLo] = Elt; 3819 NumLo++; 3820 } else { 3821 Locs[i] = std::make_pair(1, NumHi); 3822 if (2+NumHi < 4) 3823 Mask1[2+NumHi] = Elt; 3824 NumHi++; 3825 } 3826 } 3827 } 3828 3829 if (NumLo <= 2 && NumHi <= 2) { 3830 // If no more than two elements come from either vector. This can be 3831 // implemented with two shuffles. First shuffle gather the elements. 3832 // The second shuffle, which takes the first shuffle as both of its 3833 // vector operands, put the elements into the right order. 3834 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3835 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3836 &Mask1[0], Mask1.size())); 3837 3838 SmallVector<SDValue, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3839 for (unsigned i = 0; i != 4; ++i) { 3840 if (Locs[i].first == -1) 3841 continue; 3842 else { 3843 unsigned Idx = (i < 2) ? 0 : 4; 3844 Idx += Locs[i].first * 2 + Locs[i].second; 3845 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3846 } 3847 } 3848 3849 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3850 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3851 &Mask2[0], Mask2.size())); 3852 } else if (NumLo == 3 || NumHi == 3) { 3853 // Otherwise, we must have three elements from one vector, call it X, and 3854 // one element from the other, call it Y. First, use a shufps to build an 3855 // intermediate vector with the one element from Y and the element from X 3856 // that will be in the same half in the final destination (the indexes don't 3857 // matter). Then, use a shufps to build the final vector, taking the half 3858 // containing the element from Y from the intermediate, and the other half 3859 // from X. 3860 if (NumHi == 3) { 3861 // Normalize it so the 3 elements come from V1. 3862 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3863 std::swap(V1, V2); 3864 } 3865 3866 // Find the element from V2. 3867 unsigned HiIndex; 3868 for (HiIndex = 0; HiIndex < 3; ++HiIndex) { 3869 SDValue Elt = PermMask.getOperand(HiIndex); 3870 if (Elt.getOpcode() == ISD::UNDEF) 3871 continue; 3872 unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue(); 3873 if (Val >= 4) 3874 break; 3875 } 3876 3877 Mask1[0] = PermMask.getOperand(HiIndex); 3878 Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT); 3879 Mask1[2] = PermMask.getOperand(HiIndex^1); 3880 Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT); 3881 V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3882 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3883 3884 if (HiIndex >= 2) { 3885 Mask1[0] = PermMask.getOperand(0); 3886 Mask1[1] = PermMask.getOperand(1); 3887 Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT); 3888 Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT); 3889 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3890 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3891 } else { 3892 Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT); 3893 Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT); 3894 Mask1[2] = PermMask.getOperand(2); 3895 Mask1[3] = PermMask.getOperand(3); 3896 if (Mask1[2].getOpcode() != ISD::UNDEF) 3897 Mask1[2] = 3898 DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4, 3899 MaskEVT); 3900 if (Mask1[3].getOpcode() != ISD::UNDEF) 3901 Mask1[3] = 3902 DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4, 3903 MaskEVT); 3904 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, 3905 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4)); 3906 } 3907 } 3908 3909 // Break it into (shuffle shuffle_hi, shuffle_lo). 3910 Locs.clear(); 3911 SmallVector<SDValue,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3912 SmallVector<SDValue,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT)); 3913 SmallVector<SDValue,8> *MaskPtr = &LoMask; 3914 unsigned MaskIdx = 0; 3915 unsigned LoIdx = 0; 3916 unsigned HiIdx = 2; 3917 for (unsigned i = 0; i != 4; ++i) { 3918 if (i == 2) { 3919 MaskPtr = &HiMask; 3920 MaskIdx = 1; 3921 LoIdx = 0; 3922 HiIdx = 2; 3923 } 3924 SDValue Elt = PermMask.getOperand(i); 3925 if (Elt.getOpcode() == ISD::UNDEF) { 3926 Locs[i] = std::make_pair(-1, -1); 3927 } else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) { 3928 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3929 (*MaskPtr)[LoIdx] = Elt; 3930 LoIdx++; 3931 } else { 3932 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3933 (*MaskPtr)[HiIdx] = Elt; 3934 HiIdx++; 3935 } 3936 } 3937 3938 SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3939 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3940 &LoMask[0], LoMask.size())); 3941 SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3942 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3943 &HiMask[0], HiMask.size())); 3944 SmallVector<SDValue, 8> MaskOps; 3945 for (unsigned i = 0; i != 4; ++i) { 3946 if (Locs[i].first == -1) { 3947 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3948 } else { 3949 unsigned Idx = Locs[i].first * 4 + Locs[i].second; 3950 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3951 } 3952 } 3953 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3954 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3955 &MaskOps[0], MaskOps.size())); 3956} 3957 3958SDValue 3959X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3960 SDValue V1 = Op.getOperand(0); 3961 SDValue V2 = Op.getOperand(1); 3962 SDValue PermMask = Op.getOperand(2); 3963 MVT VT = Op.getValueType(); 3964 unsigned NumElems = PermMask.getNumOperands(); 3965 bool isMMX = VT.getSizeInBits() == 64; 3966 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3967 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3968 bool V1IsSplat = false; 3969 bool V2IsSplat = false; 3970 3971 if (isUndefShuffle(Op.getNode())) 3972 return DAG.getNode(ISD::UNDEF, VT); 3973 3974 if (isZeroShuffle(Op.getNode())) 3975 return getZeroVector(VT, Subtarget->hasSSE2(), DAG); 3976 3977 if (isIdentityMask(PermMask.getNode())) 3978 return V1; 3979 else if (isIdentityMask(PermMask.getNode(), true)) 3980 return V2; 3981 3982 // Canonicalize movddup shuffles. 3983 if (V2IsUndef && Subtarget->hasSSE2() && 3984 VT.getSizeInBits() == 128 && 3985 X86::isMOVDDUPMask(PermMask.getNode())) 3986 return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); 3987 3988 if (isSplatMask(PermMask.getNode())) { 3989 if (isMMX || NumElems < 4) return Op; 3990 // Promote it to a v4{if}32 splat. 3991 return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); 3992 } 3993 3994 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3995 // do it! 3996 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3997 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3998 if (NewOp.getNode()) 3999 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 4000 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 4001 // FIXME: Figure out a cleaner way to do this. 4002 // Try to make use of movq to zero out the top part. 4003 if (ISD::isBuildVectorAllZeros(V2.getNode())) { 4004 SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4005 DAG, *this); 4006 if (NewOp.getNode()) { 4007 SDValue NewV1 = NewOp.getOperand(0); 4008 SDValue NewV2 = NewOp.getOperand(1); 4009 SDValue NewMask = NewOp.getOperand(2); 4010 if (isCommutedMOVL(NewMask.getNode(), true, false)) { 4011 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 4012 return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget); 4013 } 4014 } 4015 } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { 4016 SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, 4017 DAG, *this); 4018 if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode())) 4019 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), 4020 DAG, Subtarget); 4021 } 4022 } 4023 4024 // Check if this can be converted into a logical shift. 4025 bool isLeft = false; 4026 unsigned ShAmt = 0; 4027 SDValue ShVal; 4028 bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt); 4029 if (isShift && ShVal.hasOneUse()) { 4030 // If the shifted value has multiple uses, it may be cheaper to use 4031 // v_set0 + movlhps or movhlps, etc. 4032 MVT EVT = VT.getVectorElementType(); 4033 ShAmt *= EVT.getSizeInBits(); 4034 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4035 } 4036 4037 if (X86::isMOVLMask(PermMask.getNode())) { 4038 if (V1IsUndef) 4039 return V2; 4040 if (ISD::isBuildVectorAllZeros(V1.getNode())) 4041 return getVZextMovL(VT, VT, V2, DAG, Subtarget); 4042 if (!isMMX) 4043 return Op; 4044 } 4045 4046 if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) || 4047 X86::isMOVSLDUPMask(PermMask.getNode()) || 4048 X86::isMOVHLPSMask(PermMask.getNode()) || 4049 X86::isMOVHPMask(PermMask.getNode()) || 4050 X86::isMOVLPMask(PermMask.getNode()))) 4051 return Op; 4052 4053 if (ShouldXformToMOVHLPS(PermMask.getNode()) || 4054 ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode())) 4055 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4056 4057 if (isShift) { 4058 // No better options. Use a vshl / vsrl. 4059 MVT EVT = VT.getVectorElementType(); 4060 ShAmt *= EVT.getSizeInBits(); 4061 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this); 4062 } 4063 4064 bool Commuted = false; 4065 // FIXME: This should also accept a bitcast of a splat? Be careful, not 4066 // 1,1,1,1 -> v8i16 though. 4067 V1IsSplat = isSplatVector(V1.getNode()); 4068 V2IsSplat = isSplatVector(V2.getNode()); 4069 4070 // Canonicalize the splat or undef, if present, to be on the RHS. 4071 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 4072 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4073 std::swap(V1IsSplat, V2IsSplat); 4074 std::swap(V1IsUndef, V2IsUndef); 4075 Commuted = true; 4076 } 4077 4078 // FIXME: Figure out a cleaner way to do this. 4079 if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) { 4080 if (V2IsUndef) return V1; 4081 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4082 if (V2IsSplat) { 4083 // V2 is a splat, so the mask may be malformed. That is, it may point 4084 // to any V2 element. The instruction selectior won't like this. Get 4085 // a corrected mask and commute to form a proper MOVS{S|D}. 4086 SDValue NewMask = getMOVLMask(NumElems, DAG); 4087 if (NewMask.getNode() != PermMask.getNode()) 4088 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4089 } 4090 return Op; 4091 } 4092 4093 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4094 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4095 X86::isUNPCKLMask(PermMask.getNode()) || 4096 X86::isUNPCKHMask(PermMask.getNode())) 4097 return Op; 4098 4099 if (V2IsSplat) { 4100 // Normalize mask so all entries that point to V2 points to its first 4101 // element then try to match unpck{h|l} again. If match, return a 4102 // new vector_shuffle with the corrected mask. 4103 SDValue NewMask = NormalizeMask(PermMask, DAG); 4104 if (NewMask.getNode() != PermMask.getNode()) { 4105 if (X86::isUNPCKLMask(PermMask.getNode(), true)) { 4106 SDValue NewMask = getUnpacklMask(NumElems, DAG); 4107 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4108 } else if (X86::isUNPCKHMask(PermMask.getNode(), true)) { 4109 SDValue NewMask = getUnpackhMask(NumElems, DAG); 4110 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 4111 } 4112 } 4113 } 4114 4115 // Normalize the node to match x86 shuffle ops if needed 4116 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode())) 4117 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4118 4119 if (Commuted) { 4120 // Commute is back and try unpck* again. 4121 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 4122 if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) || 4123 X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) || 4124 X86::isUNPCKLMask(PermMask.getNode()) || 4125 X86::isUNPCKHMask(PermMask.getNode())) 4126 return Op; 4127 } 4128 4129 // Try PSHUF* first, then SHUFP*. 4130 // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically 4131 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 4132 if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) { 4133 if (V2.getOpcode() != ISD::UNDEF) 4134 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 4135 DAG.getNode(ISD::UNDEF, VT), PermMask); 4136 return Op; 4137 } 4138 4139 if (!isMMX) { 4140 if (Subtarget->hasSSE2() && 4141 (X86::isPSHUFDMask(PermMask.getNode()) || 4142 X86::isPSHUFHWMask(PermMask.getNode()) || 4143 X86::isPSHUFLWMask(PermMask.getNode()))) { 4144 MVT RVT = VT; 4145 if (VT == MVT::v4f32) { 4146 RVT = MVT::v4i32; 4147 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, 4148 DAG.getNode(ISD::BIT_CONVERT, RVT, V1), 4149 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4150 } else if (V2.getOpcode() != ISD::UNDEF) 4151 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1, 4152 DAG.getNode(ISD::UNDEF, RVT), PermMask); 4153 if (RVT != VT) 4154 Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); 4155 return Op; 4156 } 4157 4158 // Binary or unary shufps. 4159 if (X86::isSHUFPMask(PermMask.getNode()) || 4160 (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode()))) 4161 return Op; 4162 } 4163 4164 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 4165 if (VT == MVT::v8i16) { 4166 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 4167 if (NewOp.getNode()) 4168 return NewOp; 4169 } 4170 4171 // Handle all 4 wide cases with a number of shuffles except for MMX. 4172 if (NumElems == 4 && !isMMX) 4173 return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG); 4174 4175 return SDValue(); 4176} 4177 4178SDValue 4179X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, 4180 SelectionDAG &DAG) { 4181 MVT VT = Op.getValueType(); 4182 if (VT.getSizeInBits() == 8) { 4183 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32, 4184 Op.getOperand(0), Op.getOperand(1)); 4185 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4186 DAG.getValueType(VT)); 4187 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4188 } else if (VT.getSizeInBits() == 16) { 4189 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32, 4190 Op.getOperand(0), Op.getOperand(1)); 4191 SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 4192 DAG.getValueType(VT)); 4193 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4194 } else if (VT == MVT::f32) { 4195 // EXTRACTPS outputs to a GPR32 register which will require a movd to copy 4196 // the result back to FR32 register. It's only worth matching if the 4197 // result has a single use which is a store or a bitcast to i32. And in 4198 // the case of a store, it's not worth it if the index is a constant 0, 4199 // because a MOVSSmr can be used instead, which is smaller and faster. 4200 if (!Op.hasOneUse()) 4201 return SDValue(); 4202 SDNode *User = *Op.getNode()->use_begin(); 4203 if ((User->getOpcode() != ISD::STORE || 4204 (isa<ConstantSDNode>(Op.getOperand(1)) && 4205 cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) && 4206 (User->getOpcode() != ISD::BIT_CONVERT || 4207 User->getValueType(0) != MVT::i32)) 4208 return SDValue(); 4209 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4210 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), 4211 Op.getOperand(1)); 4212 return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract); 4213 } 4214 return SDValue(); 4215} 4216 4217 4218SDValue 4219X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4220 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4221 return SDValue(); 4222 4223 if (Subtarget->hasSSE41()) { 4224 SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 4225 if (Res.getNode()) 4226 return Res; 4227 } 4228 4229 MVT VT = Op.getValueType(); 4230 // TODO: handle v16i8. 4231 if (VT.getSizeInBits() == 16) { 4232 SDValue Vec = Op.getOperand(0); 4233 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4234 if (Idx == 0) 4235 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 4236 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4237 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 4238 Op.getOperand(1))); 4239 // Transform it so it match pextrw which produces a 32-bit result. 4240 MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); 4241 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 4242 Op.getOperand(0), Op.getOperand(1)); 4243 SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 4244 DAG.getValueType(VT)); 4245 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 4246 } else if (VT.getSizeInBits() == 32) { 4247 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4248 if (Idx == 0) 4249 return Op; 4250 // SHUFPS the element to the lowest double word, then movss. 4251 MVT MaskVT = MVT::getIntVectorWithNumElements(4); 4252 SmallVector<SDValue, 8> IdxVec; 4253 IdxVec. 4254 push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType())); 4255 IdxVec. 4256 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4257 IdxVec. 4258 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4259 IdxVec. 4260 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4261 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4262 &IdxVec[0], IdxVec.size()); 4263 SDValue Vec = Op.getOperand(0); 4264 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4265 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4266 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4267 DAG.getIntPtrConstant(0)); 4268 } else if (VT.getSizeInBits() == 64) { 4269 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 4270 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 4271 // to match extract_elt for f64. 4272 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4273 if (Idx == 0) 4274 return Op; 4275 4276 // UNPCKHPD the element to the lowest double word, then movsd. 4277 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4278 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4279 MVT MaskVT = MVT::getIntVectorWithNumElements(2); 4280 SmallVector<SDValue, 8> IdxVec; 4281 IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType())); 4282 IdxVec. 4283 push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType())); 4284 SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4285 &IdxVec[0], IdxVec.size()); 4286 SDValue Vec = Op.getOperand(0); 4287 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4288 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4289 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4290 DAG.getIntPtrConstant(0)); 4291 } 4292 4293 return SDValue(); 4294} 4295 4296SDValue 4297X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ 4298 MVT VT = Op.getValueType(); 4299 MVT EVT = VT.getVectorElementType(); 4300 4301 SDValue N0 = Op.getOperand(0); 4302 SDValue N1 = Op.getOperand(1); 4303 SDValue N2 = Op.getOperand(2); 4304 4305 if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && 4306 isa<ConstantSDNode>(N2)) { 4307 unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB 4308 : X86ISD::PINSRW; 4309 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 4310 // argument. 4311 if (N1.getValueType() != MVT::i32) 4312 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4313 if (N2.getValueType() != MVT::i32) 4314 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4315 return DAG.getNode(Opc, VT, N0, N1, N2); 4316 } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) { 4317 // Bits [7:6] of the constant are the source select. This will always be 4318 // zero here. The DAG Combiner may combine an extract_elt index into these 4319 // bits. For example (insert (extract, 3), 2) could be matched by putting 4320 // the '3' into bits [7:6] of X86ISD::INSERTPS. 4321 // Bits [5:4] of the constant are the destination select. This is the 4322 // value of the incoming immediate. 4323 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 4324 // combine either bitwise AND or insert of float 0.0 to set these bits. 4325 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); 4326 return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2); 4327 } 4328 return SDValue(); 4329} 4330 4331SDValue 4332X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4333 MVT VT = Op.getValueType(); 4334 MVT EVT = VT.getVectorElementType(); 4335 4336 if (Subtarget->hasSSE41()) 4337 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 4338 4339 if (EVT == MVT::i8) 4340 return SDValue(); 4341 4342 SDValue N0 = Op.getOperand(0); 4343 SDValue N1 = Op.getOperand(1); 4344 SDValue N2 = Op.getOperand(2); 4345 4346 if (EVT.getSizeInBits() == 16) { 4347 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 4348 // as its second argument. 4349 if (N1.getValueType() != MVT::i32) 4350 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4351 if (N2.getValueType() != MVT::i32) 4352 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4353 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 4354 } 4355 return SDValue(); 4356} 4357 4358SDValue 4359X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 4360 if (Op.getValueType() == MVT::v2f32) 4361 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32, 4362 DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32, 4363 DAG.getNode(ISD::BIT_CONVERT, MVT::i32, 4364 Op.getOperand(0)))); 4365 4366 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 4367 MVT VT = MVT::v2i32; 4368 switch (Op.getValueType().getSimpleVT()) { 4369 default: break; 4370 case MVT::v16i8: 4371 case MVT::v8i16: 4372 VT = MVT::v4i32; 4373 break; 4374 } 4375 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), 4376 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt)); 4377} 4378 4379// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 4380// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 4381// one of the above mentioned nodes. It has to be wrapped because otherwise 4382// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 4383// be used to form addressing mode. These wrapped nodes will be selected 4384// into MOV32ri. 4385SDValue 4386X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 4387 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 4388 SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), 4389 getPointerTy(), 4390 CP->getAlignment()); 4391 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4392 // With PIC, the address is actually $g + Offset. 4393 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4394 !Subtarget->isPICStyleRIPRel()) { 4395 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4396 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4397 Result); 4398 } 4399 4400 return Result; 4401} 4402 4403SDValue 4404X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, 4405 int64_t Offset, 4406 SelectionDAG &DAG) const { 4407 bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; 4408 bool ExtraLoadRequired = 4409 Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); 4410 4411 // Create the TargetGlobalAddress node, folding in the constant 4412 // offset if it is legal. 4413 SDValue Result; 4414 if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { 4415 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); 4416 Offset = 0; 4417 } else 4418 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); 4419 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4420 4421 // With PIC, the address is actually $g + Offset. 4422 if (IsPic && !Subtarget->isPICStyleRIPRel()) { 4423 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4424 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4425 Result); 4426 } 4427 4428 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4429 // load the value at address GV, not the value of GV itself. This means that 4430 // the GlobalAddress must be in the base or index register of the address, not 4431 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4432 // The same applies for external symbols during PIC codegen 4433 if (ExtraLoadRequired) 4434 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, 4435 PseudoSourceValue::getGOT(), 0); 4436 4437 // If there was a non-zero offset that we didn't fold, create an explicit 4438 // addition for it. 4439 if (Offset != 0) 4440 Result = DAG.getNode(ISD::ADD, getPointerTy(), Result, 4441 DAG.getConstant(Offset, getPointerTy())); 4442 4443 return Result; 4444} 4445 4446SDValue 4447X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { 4448 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 4449 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 4450 return LowerGlobalAddress(GV, Offset, DAG); 4451} 4452 4453// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit 4454static SDValue 4455LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4456 const MVT PtrVT) { 4457 SDValue InFlag; 4458 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4459 DAG.getNode(X86ISD::GlobalBaseReg, 4460 PtrVT), InFlag); 4461 InFlag = Chain.getValue(1); 4462 4463 // emit leal symbol@TLSGD(,%ebx,1), %eax 4464 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4465 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4466 GA->getValueType(0), 4467 GA->getOffset()); 4468 SDValue Ops[] = { Chain, TGA, InFlag }; 4469 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4470 InFlag = Result.getValue(2); 4471 Chain = Result.getValue(1); 4472 4473 // call ___tls_get_addr. This function receives its argument in 4474 // the register EAX. 4475 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4476 InFlag = Chain.getValue(1); 4477 4478 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4479 SDValue Ops1[] = { Chain, 4480 DAG.getTargetExternalSymbol("___tls_get_addr", 4481 PtrVT), 4482 DAG.getRegister(X86::EAX, PtrVT), 4483 DAG.getRegister(X86::EBX, PtrVT), 4484 InFlag }; 4485 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4486 InFlag = Chain.getValue(1); 4487 4488 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4489} 4490 4491// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit 4492static SDValue 4493LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4494 const MVT PtrVT) { 4495 SDValue InFlag, Chain; 4496 4497 // emit leaq symbol@TLSGD(%rip), %rdi 4498 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4499 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4500 GA->getValueType(0), 4501 GA->getOffset()); 4502 SDValue Ops[] = { DAG.getEntryNode(), TGA}; 4503 SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2); 4504 Chain = Result.getValue(1); 4505 InFlag = Result.getValue(2); 4506 4507 // call __tls_get_addr. This function receives its argument in 4508 // the register RDI. 4509 Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag); 4510 InFlag = Chain.getValue(1); 4511 4512 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4513 SDValue Ops1[] = { Chain, 4514 DAG.getTargetExternalSymbol("__tls_get_addr", 4515 PtrVT), 4516 DAG.getRegister(X86::RDI, PtrVT), 4517 InFlag }; 4518 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4); 4519 InFlag = Chain.getValue(1); 4520 4521 return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag); 4522} 4523 4524// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4525// "local exec" model. 4526static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4527 const MVT PtrVT) { 4528 // Get the Thread Pointer 4529 SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4530 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4531 // exec) 4532 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4533 GA->getValueType(0), 4534 GA->getOffset()); 4535 SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4536 4537 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4538 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, 4539 PseudoSourceValue::getGOT(), 0); 4540 4541 // The address of the thread local variable is the add of the thread 4542 // pointer with the offset of the variable. 4543 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4544} 4545 4546SDValue 4547X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 4548 // TODO: implement the "local dynamic" model 4549 // TODO: implement the "initial exec"model for pic executables 4550 assert(Subtarget->isTargetELF() && 4551 "TLS not implemented for non-ELF targets"); 4552 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4553 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4554 // otherwise use the "Local Exec"TLS Model 4555 if (Subtarget->is64Bit()) { 4556 return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); 4557 } else { 4558 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4559 return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); 4560 else 4561 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4562 } 4563} 4564 4565SDValue 4566X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { 4567 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4568 SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4569 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4570 // With PIC, the address is actually $g + Offset. 4571 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4572 !Subtarget->isPICStyleRIPRel()) { 4573 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4574 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4575 Result); 4576 } 4577 4578 return Result; 4579} 4580 4581SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 4582 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4583 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4584 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4585 // With PIC, the address is actually $g + Offset. 4586 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4587 !Subtarget->isPICStyleRIPRel()) { 4588 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4589 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4590 Result); 4591 } 4592 4593 return Result; 4594} 4595 4596/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4597/// take a 2 x i32 value to shift plus a shift amount. 4598SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { 4599 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4600 MVT VT = Op.getValueType(); 4601 unsigned VTBits = VT.getSizeInBits(); 4602 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4603 SDValue ShOpLo = Op.getOperand(0); 4604 SDValue ShOpHi = Op.getOperand(1); 4605 SDValue ShAmt = Op.getOperand(2); 4606 SDValue Tmp1 = isSRA ? 4607 DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) : 4608 DAG.getConstant(0, VT); 4609 4610 SDValue Tmp2, Tmp3; 4611 if (Op.getOpcode() == ISD::SHL_PARTS) { 4612 Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt); 4613 Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt); 4614 } else { 4615 Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt); 4616 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt); 4617 } 4618 4619 SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4620 DAG.getConstant(VTBits, MVT::i8)); 4621 SDValue Cond = DAG.getNode(X86ISD::CMP, VT, 4622 AndNode, DAG.getConstant(0, MVT::i8)); 4623 4624 SDValue Hi, Lo; 4625 SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4626 SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; 4627 SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; 4628 4629 if (Op.getOpcode() == ISD::SHL_PARTS) { 4630 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4631 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4632 } else { 4633 Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4); 4634 Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4); 4635 } 4636 4637 SDValue Ops[2] = { Lo, Hi }; 4638 return DAG.getMergeValues(Ops, 2); 4639} 4640 4641SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4642 MVT SrcVT = Op.getOperand(0).getValueType(); 4643 assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && 4644 "Unknown SINT_TO_FP to lower!"); 4645 4646 // These are really Legal; caller falls through into that case. 4647 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 4648 return SDValue(); 4649 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 4650 Subtarget->is64Bit()) 4651 return SDValue(); 4652 4653 unsigned Size = SrcVT.getSizeInBits()/8; 4654 MachineFunction &MF = DAG.getMachineFunction(); 4655 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4656 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4657 SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4658 StackSlot, 4659 PseudoSourceValue::getFixedStack(SSFI), 0); 4660 4661 // Build the FILD 4662 SDVTList Tys; 4663 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 4664 if (useSSE) 4665 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4666 else 4667 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4668 SmallVector<SDValue, 8> Ops; 4669 Ops.push_back(Chain); 4670 Ops.push_back(StackSlot); 4671 Ops.push_back(DAG.getValueType(SrcVT)); 4672 SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, 4673 Tys, &Ops[0], Ops.size()); 4674 4675 if (useSSE) { 4676 Chain = Result.getValue(1); 4677 SDValue InFlag = Result.getValue(2); 4678 4679 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4680 // shouldn't be necessary except that RFP cannot be live across 4681 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4682 MachineFunction &MF = DAG.getMachineFunction(); 4683 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4684 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4685 Tys = DAG.getVTList(MVT::Other); 4686 SmallVector<SDValue, 8> Ops; 4687 Ops.push_back(Chain); 4688 Ops.push_back(Result); 4689 Ops.push_back(StackSlot); 4690 Ops.push_back(DAG.getValueType(Op.getValueType())); 4691 Ops.push_back(InFlag); 4692 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4693 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 4694 PseudoSourceValue::getFixedStack(SSFI), 0); 4695 } 4696 4697 return Result; 4698} 4699 4700SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 4701 MVT SrcVT = Op.getOperand(0).getValueType(); 4702 assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!"); 4703 4704 // We only handle SSE2 f64 target here; caller can handle the rest. 4705 if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) 4706 return SDValue(); 4707 4708 // This algorithm is not obvious. Here it is in C code, more or less: 4709/* 4710 double uint64_to_double( uint32_t hi, uint32_t lo ) 4711 { 4712 static const __m128i exp = { 0x4330000045300000ULL, 0 }; 4713 static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; 4714 4715 // copy ints to xmm registers 4716 __m128i xh = _mm_cvtsi32_si128( hi ); 4717 __m128i xl = _mm_cvtsi32_si128( lo ); 4718 4719 // combine into low half of a single xmm register 4720 __m128i x = _mm_unpacklo_epi32( xh, xl ); 4721 __m128d d; 4722 double sd; 4723 4724 // merge in appropriate exponents to give the integer bits the 4725 // right magnitude 4726 x = _mm_unpacklo_epi32( x, exp ); 4727 4728 // subtract away the biases to deal with the IEEE-754 double precision 4729 // implicit 1 4730 d = _mm_sub_pd( (__m128d) x, bias ); 4731 4732 // All conversions up to here are exact. The correctly rounded result is 4733 // calculated using the 4734 // current rounding mode using the following horizontal add. 4735 d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); 4736 _mm_store_sd( &sd, d ); //since we are returning doubles in XMM, this 4737 // store doesn't really need to be here (except maybe to zero the other 4738 // double) 4739 return sd; 4740 } 4741*/ 4742 4743 // Build some magic constants. 4744 std::vector<Constant*>CV0; 4745 CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); 4746 CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); 4747 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4748 CV0.push_back(ConstantInt::get(APInt(32, 0))); 4749 Constant *C0 = ConstantVector::get(CV0); 4750 SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4); 4751 4752 std::vector<Constant*>CV1; 4753 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); 4754 CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); 4755 Constant *C1 = ConstantVector::get(CV1); 4756 SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4); 4757 4758 SmallVector<SDValue, 4> MaskVec; 4759 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 4760 MaskVec.push_back(DAG.getConstant(4, MVT::i32)); 4761 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 4762 MaskVec.push_back(DAG.getConstant(5, MVT::i32)); 4763 SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0], 4764 MaskVec.size()); 4765 SmallVector<SDValue, 4> MaskVec2; 4766 MaskVec2.push_back(DAG.getConstant(1, MVT::i32)); 4767 MaskVec2.push_back(DAG.getConstant(0, MVT::i32)); 4768 SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec2[0], 4769 MaskVec2.size()); 4770 4771 SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4772 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4773 Op.getOperand(0), 4774 DAG.getIntPtrConstant(1))); 4775 SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, 4776 DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 4777 Op.getOperand(0), 4778 DAG.getIntPtrConstant(0))); 4779 SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4780 XR1, XR2, UnpcklMask); 4781 SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0, 4782 PseudoSourceValue::getConstantPool(), 0, false, 16); 4783 SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, 4784 Unpck1, CLod0, UnpcklMask); 4785 SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2); 4786 SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1, 4787 PseudoSourceValue::getConstantPool(), 0, false, 16); 4788 SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1); 4789 // Add the halves; easiest way is to swap them into another reg first. 4790 SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64, 4791 Sub, Sub, ShufMask); 4792 SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub); 4793 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add, 4794 DAG.getIntPtrConstant(0)); 4795} 4796 4797std::pair<SDValue,SDValue> X86TargetLowering:: 4798FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { 4799 assert(Op.getValueType().getSimpleVT() <= MVT::i64 && 4800 Op.getValueType().getSimpleVT() >= MVT::i16 && 4801 "Unknown FP_TO_SINT to lower!"); 4802 4803 // These are really Legal. 4804 if (Op.getValueType() == MVT::i32 && 4805 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4806 return std::make_pair(SDValue(), SDValue()); 4807 if (Subtarget->is64Bit() && 4808 Op.getValueType() == MVT::i64 && 4809 Op.getOperand(0).getValueType() != MVT::f80) 4810 return std::make_pair(SDValue(), SDValue()); 4811 4812 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4813 // stack slot. 4814 MachineFunction &MF = DAG.getMachineFunction(); 4815 unsigned MemSize = Op.getValueType().getSizeInBits()/8; 4816 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4817 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4818 unsigned Opc; 4819 switch (Op.getValueType().getSimpleVT()) { 4820 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4821 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4822 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4823 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4824 } 4825 4826 SDValue Chain = DAG.getEntryNode(); 4827 SDValue Value = Op.getOperand(0); 4828 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4829 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4830 Chain = DAG.getStore(Chain, Value, StackSlot, 4831 PseudoSourceValue::getFixedStack(SSFI), 0); 4832 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4833 SDValue Ops[] = { 4834 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4835 }; 4836 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4837 Chain = Value.getValue(1); 4838 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4839 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4840 } 4841 4842 // Build the FP_TO_INT*_IN_MEM 4843 SDValue Ops[] = { Chain, Value, StackSlot }; 4844 SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4845 4846 return std::make_pair(FIST, StackSlot); 4847} 4848 4849SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 4850 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG); 4851 SDValue FIST = Vals.first, StackSlot = Vals.second; 4852 if (FIST.getNode() == 0) return SDValue(); 4853 4854 // Load the result. 4855 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4856} 4857 4858SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4859 std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG); 4860 SDValue FIST = Vals.first, StackSlot = Vals.second; 4861 if (FIST.getNode() == 0) return 0; 4862 4863 MVT VT = N->getValueType(0); 4864 4865 // Return a load from the stack slot. 4866 SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0); 4867 4868 // Use MERGE_VALUES to drop the chain result value and get a node with one 4869 // result. This requires turning off getMergeValues simplification, since 4870 // otherwise it will give us Res back. 4871 return DAG.getMergeValues(&Res, 1, false).getNode(); 4872} 4873 4874SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { 4875 MVT VT = Op.getValueType(); 4876 MVT EltVT = VT; 4877 if (VT.isVector()) 4878 EltVT = VT.getVectorElementType(); 4879 std::vector<Constant*> CV; 4880 if (EltVT == MVT::f64) { 4881 Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); 4882 CV.push_back(C); 4883 CV.push_back(C); 4884 } else { 4885 Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); 4886 CV.push_back(C); 4887 CV.push_back(C); 4888 CV.push_back(C); 4889 CV.push_back(C); 4890 } 4891 Constant *C = ConstantVector::get(CV); 4892 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4893 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4894 PseudoSourceValue::getConstantPool(), 0, 4895 false, 16); 4896 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4897} 4898 4899SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { 4900 MVT VT = Op.getValueType(); 4901 MVT EltVT = VT; 4902 unsigned EltNum = 1; 4903 if (VT.isVector()) { 4904 EltVT = VT.getVectorElementType(); 4905 EltNum = VT.getVectorNumElements(); 4906 } 4907 std::vector<Constant*> CV; 4908 if (EltVT == MVT::f64) { 4909 Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); 4910 CV.push_back(C); 4911 CV.push_back(C); 4912 } else { 4913 Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); 4914 CV.push_back(C); 4915 CV.push_back(C); 4916 CV.push_back(C); 4917 CV.push_back(C); 4918 } 4919 Constant *C = ConstantVector::get(CV); 4920 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4921 SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4922 PseudoSourceValue::getConstantPool(), 0, 4923 false, 16); 4924 if (VT.isVector()) { 4925 return DAG.getNode(ISD::BIT_CONVERT, VT, 4926 DAG.getNode(ISD::XOR, MVT::v2i64, 4927 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4928 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4929 } else { 4930 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4931 } 4932} 4933 4934SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 4935 SDValue Op0 = Op.getOperand(0); 4936 SDValue Op1 = Op.getOperand(1); 4937 MVT VT = Op.getValueType(); 4938 MVT SrcVT = Op1.getValueType(); 4939 4940 // If second operand is smaller, extend it first. 4941 if (SrcVT.bitsLT(VT)) { 4942 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4943 SrcVT = VT; 4944 } 4945 // And if it is bigger, shrink it first. 4946 if (SrcVT.bitsGT(VT)) { 4947 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 4948 SrcVT = VT; 4949 } 4950 4951 // At this point the operands and the result should have the same 4952 // type, and that won't be f80 since that is not custom lowered. 4953 4954 // First get the sign bit of second operand. 4955 std::vector<Constant*> CV; 4956 if (SrcVT == MVT::f64) { 4957 CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); 4958 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4959 } else { 4960 CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); 4961 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4962 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4963 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4964 } 4965 Constant *C = ConstantVector::get(CV); 4966 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4967 SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, 4968 PseudoSourceValue::getConstantPool(), 0, 4969 false, 16); 4970 SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4971 4972 // Shift sign bit right or left if the two operands have different types. 4973 if (SrcVT.bitsGT(VT)) { 4974 // Op0 is MVT::f32, Op1 is MVT::f64. 4975 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4976 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4977 DAG.getConstant(32, MVT::i32)); 4978 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4979 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4980 DAG.getIntPtrConstant(0)); 4981 } 4982 4983 // Clear first operand sign bit. 4984 CV.clear(); 4985 if (VT == MVT::f64) { 4986 CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); 4987 CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); 4988 } else { 4989 CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); 4990 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4991 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4992 CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); 4993 } 4994 C = ConstantVector::get(CV); 4995 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4996 SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4997 PseudoSourceValue::getConstantPool(), 0, 4998 false, 16); 4999 SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 5000 5001 // Or the value with the sign bit. 5002 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 5003} 5004 5005SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 5006 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 5007 SDValue Cond; 5008 SDValue Op0 = Op.getOperand(0); 5009 SDValue Op1 = Op.getOperand(1); 5010 SDValue CC = Op.getOperand(2); 5011 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5012 unsigned X86CC; 5013 5014 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 5015 Op0, Op1, DAG)) { 5016 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 5017 return DAG.getNode(X86ISD::SETCC, MVT::i8, 5018 DAG.getConstant(X86CC, MVT::i8), Cond); 5019 } 5020 5021 assert(0 && "Illegal SetCC!"); 5022 return SDValue(); 5023} 5024 5025SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 5026 SDValue Cond; 5027 SDValue Op0 = Op.getOperand(0); 5028 SDValue Op1 = Op.getOperand(1); 5029 SDValue CC = Op.getOperand(2); 5030 MVT VT = Op.getValueType(); 5031 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 5032 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 5033 5034 if (isFP) { 5035 unsigned SSECC = 8; 5036 MVT VT0 = Op0.getValueType(); 5037 assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); 5038 unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; 5039 bool Swap = false; 5040 5041 switch (SetCCOpcode) { 5042 default: break; 5043 case ISD::SETOEQ: 5044 case ISD::SETEQ: SSECC = 0; break; 5045 case ISD::SETOGT: 5046 case ISD::SETGT: Swap = true; // Fallthrough 5047 case ISD::SETLT: 5048 case ISD::SETOLT: SSECC = 1; break; 5049 case ISD::SETOGE: 5050 case ISD::SETGE: Swap = true; // Fallthrough 5051 case ISD::SETLE: 5052 case ISD::SETOLE: SSECC = 2; break; 5053 case ISD::SETUO: SSECC = 3; break; 5054 case ISD::SETUNE: 5055 case ISD::SETNE: SSECC = 4; break; 5056 case ISD::SETULE: Swap = true; 5057 case ISD::SETUGE: SSECC = 5; break; 5058 case ISD::SETULT: Swap = true; 5059 case ISD::SETUGT: SSECC = 6; break; 5060 case ISD::SETO: SSECC = 7; break; 5061 } 5062 if (Swap) 5063 std::swap(Op0, Op1); 5064 5065 // In the two special cases we can't handle, emit two comparisons. 5066 if (SSECC == 8) { 5067 if (SetCCOpcode == ISD::SETUEQ) { 5068 SDValue UNORD, EQ; 5069 UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); 5070 EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); 5071 return DAG.getNode(ISD::OR, VT, UNORD, EQ); 5072 } 5073 else if (SetCCOpcode == ISD::SETONE) { 5074 SDValue ORD, NEQ; 5075 ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); 5076 NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); 5077 return DAG.getNode(ISD::AND, VT, ORD, NEQ); 5078 } 5079 assert(0 && "Illegal FP comparison"); 5080 } 5081 // Handle all other FP comparisons here. 5082 return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); 5083 } 5084 5085 // We are handling one of the integer comparisons here. Since SSE only has 5086 // GT and EQ comparisons for integer, swapping operands and multiple 5087 // operations may be required for some comparisons. 5088 unsigned Opc = 0, EQOpc = 0, GTOpc = 0; 5089 bool Swap = false, Invert = false, FlipSigns = false; 5090 5091 switch (VT.getSimpleVT()) { 5092 default: break; 5093 case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; 5094 case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; 5095 case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; 5096 case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; 5097 } 5098 5099 switch (SetCCOpcode) { 5100 default: break; 5101 case ISD::SETNE: Invert = true; 5102 case ISD::SETEQ: Opc = EQOpc; break; 5103 case ISD::SETLT: Swap = true; 5104 case ISD::SETGT: Opc = GTOpc; break; 5105 case ISD::SETGE: Swap = true; 5106 case ISD::SETLE: Opc = GTOpc; Invert = true; break; 5107 case ISD::SETULT: Swap = true; 5108 case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; 5109 case ISD::SETUGE: Swap = true; 5110 case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; 5111 } 5112 if (Swap) 5113 std::swap(Op0, Op1); 5114 5115 // Since SSE has no unsigned integer comparisons, we need to flip the sign 5116 // bits of the inputs before performing those operations. 5117 if (FlipSigns) { 5118 MVT EltVT = VT.getVectorElementType(); 5119 SDValue SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT); 5120 std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); 5121 SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0], 5122 SignBits.size()); 5123 Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec); 5124 Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec); 5125 } 5126 5127 SDValue Result = DAG.getNode(Opc, VT, Op0, Op1); 5128 5129 // If the logical-not of the result is required, perform that now. 5130 if (Invert) { 5131 MVT EltVT = VT.getVectorElementType(); 5132 SDValue NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT); 5133 std::vector<SDValue> NegOnes(VT.getVectorNumElements(), NegOne); 5134 SDValue NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0], 5135 NegOnes.size()); 5136 Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV); 5137 } 5138 return Result; 5139} 5140 5141SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { 5142 bool addTest = true; 5143 SDValue Cond = Op.getOperand(0); 5144 SDValue CC; 5145 5146 if (Cond.getOpcode() == ISD::SETCC) 5147 Cond = LowerSETCC(Cond, DAG); 5148 5149 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5150 // setting operand in place of the X86ISD::SETCC. 5151 if (Cond.getOpcode() == X86ISD::SETCC) { 5152 CC = Cond.getOperand(0); 5153 5154 SDValue Cmp = Cond.getOperand(1); 5155 unsigned Opc = Cmp.getOpcode(); 5156 MVT VT = Op.getValueType(); 5157 5158 bool IllegalFPCMov = false; 5159 if (VT.isFloatingPoint() && !VT.isVector() && 5160 !isScalarFPTypeInSSEReg(VT)) // FPStack? 5161 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); 5162 5163 if ((Opc == X86ISD::CMP || 5164 Opc == X86ISD::COMI || 5165 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 5166 Cond = Cmp; 5167 addTest = false; 5168 } 5169 } 5170 5171 if (addTest) { 5172 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5173 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5174 } 5175 5176 const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), 5177 MVT::Flag); 5178 SmallVector<SDValue, 4> Ops; 5179 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 5180 // condition is true. 5181 Ops.push_back(Op.getOperand(2)); 5182 Ops.push_back(Op.getOperand(1)); 5183 Ops.push_back(CC); 5184 Ops.push_back(Cond); 5185 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 5186} 5187 5188SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { 5189 bool addTest = true; 5190 SDValue Chain = Op.getOperand(0); 5191 SDValue Cond = Op.getOperand(1); 5192 SDValue Dest = Op.getOperand(2); 5193 SDValue CC; 5194 5195 if (Cond.getOpcode() == ISD::SETCC) 5196 Cond = LowerSETCC(Cond, DAG); 5197 5198 // If condition flag is set by a X86ISD::CMP, then use it as the condition 5199 // setting operand in place of the X86ISD::SETCC. 5200 if (Cond.getOpcode() == X86ISD::SETCC) { 5201 CC = Cond.getOperand(0); 5202 5203 SDValue Cmp = Cond.getOperand(1); 5204 unsigned Opc = Cmp.getOpcode(); 5205 if (Opc == X86ISD::CMP || 5206 Opc == X86ISD::COMI || 5207 Opc == X86ISD::UCOMI) { 5208 Cond = Cmp; 5209 addTest = false; 5210 } 5211 // Also, recognize the pattern generated by an FCMP_UNE. We can emit 5212 // two branches instead of an explicit OR instruction with a 5213 // separate test. 5214 } else if (Cond.getOpcode() == ISD::OR && 5215 Cond.hasOneUse() && 5216 Cond.getOperand(0).getOpcode() == X86ISD::SETCC && 5217 Cond.getOperand(0).hasOneUse() && 5218 Cond.getOperand(1).getOpcode() == X86ISD::SETCC && 5219 Cond.getOperand(1).hasOneUse()) { 5220 SDValue Cmp = Cond.getOperand(0).getOperand(1); 5221 unsigned Opc = Cmp.getOpcode(); 5222 if (Cmp == Cond.getOperand(1).getOperand(1) && 5223 (Opc == X86ISD::CMP || 5224 Opc == X86ISD::COMI || 5225 Opc == X86ISD::UCOMI)) { 5226 CC = Cond.getOperand(0).getOperand(0); 5227 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5228 Chain, Dest, CC, Cmp); 5229 CC = Cond.getOperand(1).getOperand(0); 5230 Cond = Cmp; 5231 addTest = false; 5232 } 5233 // Also, recognize the pattern generated by an FCMP_OEQ. We can emit 5234 // two branches instead of an explicit AND instruction with a 5235 // separate test. However, we only do this if this block doesn't 5236 // have a fall-through edge, because this requires an explicit 5237 // jmp when the condition is false. 5238 } else if (Cond.getOpcode() == ISD::AND && 5239 Cond.hasOneUse() && 5240 Cond.getOperand(0).getOpcode() == X86ISD::SETCC && 5241 Cond.getOperand(0).hasOneUse() && 5242 Cond.getOperand(1).getOpcode() == X86ISD::SETCC && 5243 Cond.getOperand(1).hasOneUse()) { 5244 SDValue Cmp = Cond.getOperand(0).getOperand(1); 5245 unsigned Opc = Cmp.getOpcode(); 5246 if (Cmp == Cond.getOperand(1).getOperand(1) && 5247 (Opc == X86ISD::CMP || 5248 Opc == X86ISD::COMI || 5249 Opc == X86ISD::UCOMI) && 5250 Op.getNode()->hasOneUse()) { 5251 X86::CondCode CCode = 5252 (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); 5253 CCode = X86::GetOppositeBranchCondition(CCode); 5254 CC = DAG.getConstant(CCode, MVT::i8); 5255 SDValue User = SDValue(*Op.getNode()->use_begin(), 0); 5256 // Look for an unconditional branch following this conditional branch. 5257 // We need this because we need to reverse the successors in order 5258 // to implement FCMP_OEQ. 5259 if (User.getOpcode() == ISD::BR) { 5260 SDValue FalseBB = User.getOperand(1); 5261 SDValue NewBR = 5262 DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); 5263 assert(NewBR == User); 5264 Dest = FalseBB; 5265 5266 Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5267 Chain, Dest, CC, Cmp); 5268 X86::CondCode CCode = 5269 (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); 5270 CCode = X86::GetOppositeBranchCondition(CCode); 5271 CC = DAG.getConstant(CCode, MVT::i8); 5272 Cond = Cmp; 5273 addTest = false; 5274 } 5275 } 5276 } 5277 5278 if (addTest) { 5279 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5280 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 5281 } 5282 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 5283 Chain, Dest, CC, Cond); 5284} 5285 5286 5287// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 5288// Calls to _alloca is needed to probe the stack when allocating more than 4k 5289// bytes in one go. Touching the stack at 4K increments is necessary to ensure 5290// that the guard pages used by the OS virtual memory manager are allocated in 5291// correct sequence. 5292SDValue 5293X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5294 SelectionDAG &DAG) { 5295 assert(Subtarget->isTargetCygMing() && 5296 "This should be used only on Cygwin/Mingw targets"); 5297 5298 // Get the inputs. 5299 SDValue Chain = Op.getOperand(0); 5300 SDValue Size = Op.getOperand(1); 5301 // FIXME: Ensure alignment here 5302 5303 SDValue Flag; 5304 5305 MVT IntPtr = getPointerTy(); 5306 MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 5307 5308 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 5309 5310 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 5311 Flag = Chain.getValue(1); 5312 5313 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 5314 SDValue Ops[] = { Chain, 5315 DAG.getTargetExternalSymbol("_alloca", IntPtr), 5316 DAG.getRegister(X86::EAX, IntPtr), 5317 DAG.getRegister(X86StackPtr, SPTy), 5318 Flag }; 5319 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5); 5320 Flag = Chain.getValue(1); 5321 5322 Chain = DAG.getCALLSEQ_END(Chain, 5323 DAG.getIntPtrConstant(0, true), 5324 DAG.getIntPtrConstant(0, true), 5325 Flag); 5326 5327 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 5328 5329 SDValue Ops1[2] = { Chain.getValue(0), Chain }; 5330 return DAG.getMergeValues(Ops1, 2); 5331} 5332 5333SDValue 5334X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, 5335 SDValue Chain, 5336 SDValue Dst, SDValue Src, 5337 SDValue Size, unsigned Align, 5338 const Value *DstSV, 5339 uint64_t DstSVOff) { 5340 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5341 5342 // If not DWORD aligned or size is more than the threshold, call the library. 5343 // The libc version is likely to be faster for these cases. It can use the 5344 // address value and run time information about the CPU. 5345 if ((Align & 3) != 0 || 5346 !ConstantSize || 5347 ConstantSize->getZExtValue() > 5348 getSubtarget()->getMaxInlineSizeThreshold()) { 5349 SDValue InFlag(0, 0); 5350 5351 // Check to see if there is a specialized entry-point for memory zeroing. 5352 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 5353 5354 if (const char *bzeroEntry = V && 5355 V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 5356 MVT IntPtr = getPointerTy(); 5357 const Type *IntPtrTy = TD->getIntPtrType(); 5358 TargetLowering::ArgListTy Args; 5359 TargetLowering::ArgListEntry Entry; 5360 Entry.Node = Dst; 5361 Entry.Ty = IntPtrTy; 5362 Args.push_back(Entry); 5363 Entry.Node = Size; 5364 Args.push_back(Entry); 5365 std::pair<SDValue,SDValue> CallResult = 5366 LowerCallTo(Chain, Type::VoidTy, false, false, false, false, 5367 CallingConv::C, false, 5368 DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG); 5369 return CallResult.second; 5370 } 5371 5372 // Otherwise have the target-independent code call memset. 5373 return SDValue(); 5374 } 5375 5376 uint64_t SizeVal = ConstantSize->getZExtValue(); 5377 SDValue InFlag(0, 0); 5378 MVT AVT; 5379 SDValue Count; 5380 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 5381 unsigned BytesLeft = 0; 5382 bool TwoRepStos = false; 5383 if (ValC) { 5384 unsigned ValReg; 5385 uint64_t Val = ValC->getZExtValue() & 255; 5386 5387 // If the value is a constant, then we can potentially use larger sets. 5388 switch (Align & 3) { 5389 case 2: // WORD aligned 5390 AVT = MVT::i16; 5391 ValReg = X86::AX; 5392 Val = (Val << 8) | Val; 5393 break; 5394 case 0: // DWORD aligned 5395 AVT = MVT::i32; 5396 ValReg = X86::EAX; 5397 Val = (Val << 8) | Val; 5398 Val = (Val << 16) | Val; 5399 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 5400 AVT = MVT::i64; 5401 ValReg = X86::RAX; 5402 Val = (Val << 32) | Val; 5403 } 5404 break; 5405 default: // Byte aligned 5406 AVT = MVT::i8; 5407 ValReg = X86::AL; 5408 Count = DAG.getIntPtrConstant(SizeVal); 5409 break; 5410 } 5411 5412 if (AVT.bitsGT(MVT::i8)) { 5413 unsigned UBytes = AVT.getSizeInBits() / 8; 5414 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 5415 BytesLeft = SizeVal % UBytes; 5416 } 5417 5418 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 5419 InFlag); 5420 InFlag = Chain.getValue(1); 5421 } else { 5422 AVT = MVT::i8; 5423 Count = DAG.getIntPtrConstant(SizeVal); 5424 Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); 5425 InFlag = Chain.getValue(1); 5426 } 5427 5428 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5429 Count, InFlag); 5430 InFlag = Chain.getValue(1); 5431 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5432 Dst, InFlag); 5433 InFlag = Chain.getValue(1); 5434 5435 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5436 SmallVector<SDValue, 8> Ops; 5437 Ops.push_back(Chain); 5438 Ops.push_back(DAG.getValueType(AVT)); 5439 Ops.push_back(InFlag); 5440 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5441 5442 if (TwoRepStos) { 5443 InFlag = Chain.getValue(1); 5444 Count = Size; 5445 MVT CVT = Count.getValueType(); 5446 SDValue Left = DAG.getNode(ISD::AND, CVT, Count, 5447 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 5448 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 5449 Left, InFlag); 5450 InFlag = Chain.getValue(1); 5451 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5452 Ops.clear(); 5453 Ops.push_back(Chain); 5454 Ops.push_back(DAG.getValueType(MVT::i8)); 5455 Ops.push_back(InFlag); 5456 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 5457 } else if (BytesLeft) { 5458 // Handle the last 1 - 7 bytes. 5459 unsigned Offset = SizeVal - BytesLeft; 5460 MVT AddrVT = Dst.getValueType(); 5461 MVT SizeVT = Size.getValueType(); 5462 5463 Chain = DAG.getMemset(Chain, 5464 DAG.getNode(ISD::ADD, AddrVT, Dst, 5465 DAG.getConstant(Offset, AddrVT)), 5466 Src, 5467 DAG.getConstant(BytesLeft, SizeVT), 5468 Align, DstSV, DstSVOff + Offset); 5469 } 5470 5471 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 5472 return Chain; 5473} 5474 5475SDValue 5476X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, 5477 SDValue Chain, SDValue Dst, SDValue Src, 5478 SDValue Size, unsigned Align, 5479 bool AlwaysInline, 5480 const Value *DstSV, uint64_t DstSVOff, 5481 const Value *SrcSV, uint64_t SrcSVOff) { 5482 // This requires the copy size to be a constant, preferrably 5483 // within a subtarget-specific limit. 5484 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 5485 if (!ConstantSize) 5486 return SDValue(); 5487 uint64_t SizeVal = ConstantSize->getZExtValue(); 5488 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 5489 return SDValue(); 5490 5491 /// If not DWORD aligned, call the library. 5492 if ((Align & 3) != 0) 5493 return SDValue(); 5494 5495 // DWORD aligned 5496 MVT AVT = MVT::i32; 5497 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned 5498 AVT = MVT::i64; 5499 5500 unsigned UBytes = AVT.getSizeInBits() / 8; 5501 unsigned CountVal = SizeVal / UBytes; 5502 SDValue Count = DAG.getIntPtrConstant(CountVal); 5503 unsigned BytesLeft = SizeVal % UBytes; 5504 5505 SDValue InFlag(0, 0); 5506 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 5507 Count, InFlag); 5508 InFlag = Chain.getValue(1); 5509 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 5510 Dst, InFlag); 5511 InFlag = Chain.getValue(1); 5512 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 5513 Src, InFlag); 5514 InFlag = Chain.getValue(1); 5515 5516 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5517 SmallVector<SDValue, 8> Ops; 5518 Ops.push_back(Chain); 5519 Ops.push_back(DAG.getValueType(AVT)); 5520 Ops.push_back(InFlag); 5521 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 5522 5523 SmallVector<SDValue, 4> Results; 5524 Results.push_back(RepMovs); 5525 if (BytesLeft) { 5526 // Handle the last 1 - 7 bytes. 5527 unsigned Offset = SizeVal - BytesLeft; 5528 MVT DstVT = Dst.getValueType(); 5529 MVT SrcVT = Src.getValueType(); 5530 MVT SizeVT = Size.getValueType(); 5531 Results.push_back(DAG.getMemcpy(Chain, 5532 DAG.getNode(ISD::ADD, DstVT, Dst, 5533 DAG.getConstant(Offset, DstVT)), 5534 DAG.getNode(ISD::ADD, SrcVT, Src, 5535 DAG.getConstant(Offset, SrcVT)), 5536 DAG.getConstant(BytesLeft, SizeVT), 5537 Align, AlwaysInline, 5538 DstSV, DstSVOff + Offset, 5539 SrcSV, SrcSVOff + Offset)); 5540 } 5541 5542 return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); 5543} 5544 5545/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 5546SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 5547 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5548 SDValue TheChain = N->getOperand(0); 5549 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 5550 if (Subtarget->is64Bit()) { 5551 SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 5552 SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 5553 MVT::i64, rax.getValue(2)); 5554 SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 5555 DAG.getConstant(32, MVT::i8)); 5556 SDValue Ops[] = { 5557 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 5558 }; 5559 5560 return DAG.getMergeValues(Ops, 2).getNode(); 5561 } 5562 5563 SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 5564 SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 5565 MVT::i32, eax.getValue(2)); 5566 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 5567 SDValue Ops[] = { eax, edx }; 5568 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 5569 5570 // Use a MERGE_VALUES to return the value and chain. 5571 Ops[1] = edx.getValue(1); 5572 return DAG.getMergeValues(Ops, 2).getNode(); 5573} 5574 5575SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { 5576 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 5577 5578 if (!Subtarget->is64Bit()) { 5579 // vastart just stores the address of the VarArgsFrameIndex slot into the 5580 // memory location argument. 5581 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5582 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0); 5583 } 5584 5585 // __va_list_tag: 5586 // gp_offset (0 - 6 * 8) 5587 // fp_offset (48 - 48 + 8 * 16) 5588 // overflow_arg_area (point to parameters coming in memory). 5589 // reg_save_area 5590 SmallVector<SDValue, 8> MemOps; 5591 SDValue FIN = Op.getOperand(1); 5592 // Store gp_offset 5593 SDValue Store = DAG.getStore(Op.getOperand(0), 5594 DAG.getConstant(VarArgsGPOffset, MVT::i32), 5595 FIN, SV, 0); 5596 MemOps.push_back(Store); 5597 5598 // Store fp_offset 5599 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5600 Store = DAG.getStore(Op.getOperand(0), 5601 DAG.getConstant(VarArgsFPOffset, MVT::i32), 5602 FIN, SV, 0); 5603 MemOps.push_back(Store); 5604 5605 // Store ptr to overflow_arg_area 5606 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 5607 SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5608 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0); 5609 MemOps.push_back(Store); 5610 5611 // Store ptr to reg_save_area. 5612 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 5613 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5614 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0); 5615 MemOps.push_back(Store); 5616 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5617} 5618 5619SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { 5620 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5621 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); 5622 SDValue Chain = Op.getOperand(0); 5623 SDValue SrcPtr = Op.getOperand(1); 5624 SDValue SrcSV = Op.getOperand(2); 5625 5626 assert(0 && "VAArgInst is not yet implemented for x86-64!"); 5627 abort(); 5628 return SDValue(); 5629} 5630 5631SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { 5632 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5633 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); 5634 SDValue Chain = Op.getOperand(0); 5635 SDValue DstPtr = Op.getOperand(1); 5636 SDValue SrcPtr = Op.getOperand(2); 5637 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 5638 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5639 5640 return DAG.getMemcpy(Chain, DstPtr, SrcPtr, 5641 DAG.getIntPtrConstant(24), 8, false, 5642 DstSV, 0, SrcSV, 0); 5643} 5644 5645SDValue 5646X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 5647 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5648 switch (IntNo) { 5649 default: return SDValue(); // Don't custom lower most intrinsics. 5650 // Comparison intrinsics. 5651 case Intrinsic::x86_sse_comieq_ss: 5652 case Intrinsic::x86_sse_comilt_ss: 5653 case Intrinsic::x86_sse_comile_ss: 5654 case Intrinsic::x86_sse_comigt_ss: 5655 case Intrinsic::x86_sse_comige_ss: 5656 case Intrinsic::x86_sse_comineq_ss: 5657 case Intrinsic::x86_sse_ucomieq_ss: 5658 case Intrinsic::x86_sse_ucomilt_ss: 5659 case Intrinsic::x86_sse_ucomile_ss: 5660 case Intrinsic::x86_sse_ucomigt_ss: 5661 case Intrinsic::x86_sse_ucomige_ss: 5662 case Intrinsic::x86_sse_ucomineq_ss: 5663 case Intrinsic::x86_sse2_comieq_sd: 5664 case Intrinsic::x86_sse2_comilt_sd: 5665 case Intrinsic::x86_sse2_comile_sd: 5666 case Intrinsic::x86_sse2_comigt_sd: 5667 case Intrinsic::x86_sse2_comige_sd: 5668 case Intrinsic::x86_sse2_comineq_sd: 5669 case Intrinsic::x86_sse2_ucomieq_sd: 5670 case Intrinsic::x86_sse2_ucomilt_sd: 5671 case Intrinsic::x86_sse2_ucomile_sd: 5672 case Intrinsic::x86_sse2_ucomigt_sd: 5673 case Intrinsic::x86_sse2_ucomige_sd: 5674 case Intrinsic::x86_sse2_ucomineq_sd: { 5675 unsigned Opc = 0; 5676 ISD::CondCode CC = ISD::SETCC_INVALID; 5677 switch (IntNo) { 5678 default: break; 5679 case Intrinsic::x86_sse_comieq_ss: 5680 case Intrinsic::x86_sse2_comieq_sd: 5681 Opc = X86ISD::COMI; 5682 CC = ISD::SETEQ; 5683 break; 5684 case Intrinsic::x86_sse_comilt_ss: 5685 case Intrinsic::x86_sse2_comilt_sd: 5686 Opc = X86ISD::COMI; 5687 CC = ISD::SETLT; 5688 break; 5689 case Intrinsic::x86_sse_comile_ss: 5690 case Intrinsic::x86_sse2_comile_sd: 5691 Opc = X86ISD::COMI; 5692 CC = ISD::SETLE; 5693 break; 5694 case Intrinsic::x86_sse_comigt_ss: 5695 case Intrinsic::x86_sse2_comigt_sd: 5696 Opc = X86ISD::COMI; 5697 CC = ISD::SETGT; 5698 break; 5699 case Intrinsic::x86_sse_comige_ss: 5700 case Intrinsic::x86_sse2_comige_sd: 5701 Opc = X86ISD::COMI; 5702 CC = ISD::SETGE; 5703 break; 5704 case Intrinsic::x86_sse_comineq_ss: 5705 case Intrinsic::x86_sse2_comineq_sd: 5706 Opc = X86ISD::COMI; 5707 CC = ISD::SETNE; 5708 break; 5709 case Intrinsic::x86_sse_ucomieq_ss: 5710 case Intrinsic::x86_sse2_ucomieq_sd: 5711 Opc = X86ISD::UCOMI; 5712 CC = ISD::SETEQ; 5713 break; 5714 case Intrinsic::x86_sse_ucomilt_ss: 5715 case Intrinsic::x86_sse2_ucomilt_sd: 5716 Opc = X86ISD::UCOMI; 5717 CC = ISD::SETLT; 5718 break; 5719 case Intrinsic::x86_sse_ucomile_ss: 5720 case Intrinsic::x86_sse2_ucomile_sd: 5721 Opc = X86ISD::UCOMI; 5722 CC = ISD::SETLE; 5723 break; 5724 case Intrinsic::x86_sse_ucomigt_ss: 5725 case Intrinsic::x86_sse2_ucomigt_sd: 5726 Opc = X86ISD::UCOMI; 5727 CC = ISD::SETGT; 5728 break; 5729 case Intrinsic::x86_sse_ucomige_ss: 5730 case Intrinsic::x86_sse2_ucomige_sd: 5731 Opc = X86ISD::UCOMI; 5732 CC = ISD::SETGE; 5733 break; 5734 case Intrinsic::x86_sse_ucomineq_ss: 5735 case Intrinsic::x86_sse2_ucomineq_sd: 5736 Opc = X86ISD::UCOMI; 5737 CC = ISD::SETNE; 5738 break; 5739 } 5740 5741 unsigned X86CC; 5742 SDValue LHS = Op.getOperand(1); 5743 SDValue RHS = Op.getOperand(2); 5744 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 5745 5746 SDValue Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5747 SDValue SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5748 DAG.getConstant(X86CC, MVT::i8), Cond); 5749 return DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, SetCC); 5750 } 5751 5752 // Fix vector shift instructions where the last operand is a non-immediate 5753 // i32 value. 5754 case Intrinsic::x86_sse2_pslli_w: 5755 case Intrinsic::x86_sse2_pslli_d: 5756 case Intrinsic::x86_sse2_pslli_q: 5757 case Intrinsic::x86_sse2_psrli_w: 5758 case Intrinsic::x86_sse2_psrli_d: 5759 case Intrinsic::x86_sse2_psrli_q: 5760 case Intrinsic::x86_sse2_psrai_w: 5761 case Intrinsic::x86_sse2_psrai_d: 5762 case Intrinsic::x86_mmx_pslli_w: 5763 case Intrinsic::x86_mmx_pslli_d: 5764 case Intrinsic::x86_mmx_pslli_q: 5765 case Intrinsic::x86_mmx_psrli_w: 5766 case Intrinsic::x86_mmx_psrli_d: 5767 case Intrinsic::x86_mmx_psrli_q: 5768 case Intrinsic::x86_mmx_psrai_w: 5769 case Intrinsic::x86_mmx_psrai_d: { 5770 SDValue ShAmt = Op.getOperand(2); 5771 if (isa<ConstantSDNode>(ShAmt)) 5772 return SDValue(); 5773 5774 unsigned NewIntNo = 0; 5775 MVT ShAmtVT = MVT::v4i32; 5776 switch (IntNo) { 5777 case Intrinsic::x86_sse2_pslli_w: 5778 NewIntNo = Intrinsic::x86_sse2_psll_w; 5779 break; 5780 case Intrinsic::x86_sse2_pslli_d: 5781 NewIntNo = Intrinsic::x86_sse2_psll_d; 5782 break; 5783 case Intrinsic::x86_sse2_pslli_q: 5784 NewIntNo = Intrinsic::x86_sse2_psll_q; 5785 break; 5786 case Intrinsic::x86_sse2_psrli_w: 5787 NewIntNo = Intrinsic::x86_sse2_psrl_w; 5788 break; 5789 case Intrinsic::x86_sse2_psrli_d: 5790 NewIntNo = Intrinsic::x86_sse2_psrl_d; 5791 break; 5792 case Intrinsic::x86_sse2_psrli_q: 5793 NewIntNo = Intrinsic::x86_sse2_psrl_q; 5794 break; 5795 case Intrinsic::x86_sse2_psrai_w: 5796 NewIntNo = Intrinsic::x86_sse2_psra_w; 5797 break; 5798 case Intrinsic::x86_sse2_psrai_d: 5799 NewIntNo = Intrinsic::x86_sse2_psra_d; 5800 break; 5801 default: { 5802 ShAmtVT = MVT::v2i32; 5803 switch (IntNo) { 5804 case Intrinsic::x86_mmx_pslli_w: 5805 NewIntNo = Intrinsic::x86_mmx_psll_w; 5806 break; 5807 case Intrinsic::x86_mmx_pslli_d: 5808 NewIntNo = Intrinsic::x86_mmx_psll_d; 5809 break; 5810 case Intrinsic::x86_mmx_pslli_q: 5811 NewIntNo = Intrinsic::x86_mmx_psll_q; 5812 break; 5813 case Intrinsic::x86_mmx_psrli_w: 5814 NewIntNo = Intrinsic::x86_mmx_psrl_w; 5815 break; 5816 case Intrinsic::x86_mmx_psrli_d: 5817 NewIntNo = Intrinsic::x86_mmx_psrl_d; 5818 break; 5819 case Intrinsic::x86_mmx_psrli_q: 5820 NewIntNo = Intrinsic::x86_mmx_psrl_q; 5821 break; 5822 case Intrinsic::x86_mmx_psrai_w: 5823 NewIntNo = Intrinsic::x86_mmx_psra_w; 5824 break; 5825 case Intrinsic::x86_mmx_psrai_d: 5826 NewIntNo = Intrinsic::x86_mmx_psra_d; 5827 break; 5828 default: abort(); // Can't reach here. 5829 } 5830 break; 5831 } 5832 } 5833 MVT VT = Op.getValueType(); 5834 ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT, 5835 DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt)); 5836 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, 5837 DAG.getConstant(NewIntNo, MVT::i32), 5838 Op.getOperand(1), ShAmt); 5839 } 5840 } 5841} 5842 5843SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 5844 // Depths > 0 not supported yet! 5845 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) 5846 return SDValue(); 5847 5848 // Just load the return address 5849 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 5850 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5851} 5852 5853SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 5854 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 5855 MFI->setFrameAddressIsTaken(true); 5856 MVT VT = Op.getValueType(); 5857 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5858 unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; 5859 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT); 5860 while (Depth--) 5861 FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0); 5862 return FrameAddr; 5863} 5864 5865SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, 5866 SelectionDAG &DAG) { 5867 return DAG.getIntPtrConstant(2*TD->getPointerSize()); 5868} 5869 5870SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) 5871{ 5872 MachineFunction &MF = DAG.getMachineFunction(); 5873 SDValue Chain = Op.getOperand(0); 5874 SDValue Offset = Op.getOperand(1); 5875 SDValue Handler = Op.getOperand(2); 5876 5877 SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, 5878 getPointerTy()); 5879 unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); 5880 5881 SDValue StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5882 DAG.getIntPtrConstant(-TD->getPointerSize())); 5883 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5884 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5885 Chain = DAG.getCopyToReg(Chain, StoreAddrReg, StoreAddr); 5886 MF.getRegInfo().addLiveOut(StoreAddrReg); 5887 5888 return DAG.getNode(X86ISD::EH_RETURN, 5889 MVT::Other, 5890 Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); 5891} 5892 5893SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, 5894 SelectionDAG &DAG) { 5895 SDValue Root = Op.getOperand(0); 5896 SDValue Trmp = Op.getOperand(1); // trampoline 5897 SDValue FPtr = Op.getOperand(2); // nested function 5898 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 5899 5900 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5901 5902 const X86InstrInfo *TII = 5903 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5904 5905 if (Subtarget->is64Bit()) { 5906 SDValue OutChains[6]; 5907 5908 // Large code-model. 5909 5910 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 5911 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 5912 5913 const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); 5914 const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); 5915 5916 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 5917 5918 // Load the pointer to the nested function into R11. 5919 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 5920 SDValue Addr = Trmp; 5921 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5922 TrmpAddr, 0); 5923 5924 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 5925 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2); 5926 5927 // Load the 'nest' parameter value into R10. 5928 // R10 is specified in X86CallingConv.td 5929 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 5930 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 5931 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5932 TrmpAddr, 10); 5933 5934 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 5935 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2); 5936 5937 // Jump to the nested function. 5938 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 5939 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 5940 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5941 TrmpAddr, 20); 5942 5943 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 5944 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 5945 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 5946 TrmpAddr, 22); 5947 5948 SDValue Ops[] = 5949 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 5950 return DAG.getMergeValues(Ops, 2); 5951 } else { 5952 const Function *Func = 5953 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5954 unsigned CC = Func->getCallingConv(); 5955 unsigned NestReg; 5956 5957 switch (CC) { 5958 default: 5959 assert(0 && "Unsupported calling convention"); 5960 case CallingConv::C: 5961 case CallingConv::X86_StdCall: { 5962 // Pass 'nest' parameter in ECX. 5963 // Must be kept in sync with X86CallingConv.td 5964 NestReg = X86::ECX; 5965 5966 // Check that ECX wasn't needed by an 'inreg' parameter. 5967 const FunctionType *FTy = Func->getFunctionType(); 5968 const AttrListPtr &Attrs = Func->getAttributes(); 5969 5970 if (!Attrs.isEmpty() && !Func->isVarArg()) { 5971 unsigned InRegCount = 0; 5972 unsigned Idx = 1; 5973 5974 for (FunctionType::param_iterator I = FTy->param_begin(), 5975 E = FTy->param_end(); I != E; ++I, ++Idx) 5976 if (Attrs.paramHasAttr(Idx, Attribute::InReg)) 5977 // FIXME: should only count parameters that are lowered to integers. 5978 InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; 5979 5980 if (InRegCount > 2) { 5981 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5982 abort(); 5983 } 5984 } 5985 break; 5986 } 5987 case CallingConv::X86_FastCall: 5988 case CallingConv::Fast: 5989 // Pass 'nest' parameter in EAX. 5990 // Must be kept in sync with X86CallingConv.td 5991 NestReg = X86::EAX; 5992 break; 5993 } 5994 5995 SDValue OutChains[4]; 5996 SDValue Addr, Disp; 5997 5998 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5999 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 6000 6001 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 6002 const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); 6003 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 6004 Trmp, TrmpAddr, 0); 6005 6006 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 6007 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1); 6008 6009 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 6010 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 6011 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 6012 TrmpAddr, 5, false, 1); 6013 6014 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 6015 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1); 6016 6017 SDValue Ops[] = 6018 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 6019 return DAG.getMergeValues(Ops, 2); 6020 } 6021} 6022 6023SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 6024 /* 6025 The rounding mode is in bits 11:10 of FPSR, and has the following 6026 settings: 6027 00 Round to nearest 6028 01 Round to -inf 6029 10 Round to +inf 6030 11 Round to 0 6031 6032 FLT_ROUNDS, on the other hand, expects the following: 6033 -1 Undefined 6034 0 Round to 0 6035 1 Round to nearest 6036 2 Round to +inf 6037 3 Round to -inf 6038 6039 To perform the conversion, we do: 6040 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 6041 */ 6042 6043 MachineFunction &MF = DAG.getMachineFunction(); 6044 const TargetMachine &TM = MF.getTarget(); 6045 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 6046 unsigned StackAlignment = TFI.getStackAlignment(); 6047 MVT VT = Op.getValueType(); 6048 6049 // Save FP Control Word to stack slot 6050 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 6051 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 6052 6053 SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 6054 DAG.getEntryNode(), StackSlot); 6055 6056 // Load FP Control Word from stack slot 6057 SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 6058 6059 // Transform as necessary 6060 SDValue CWD1 = 6061 DAG.getNode(ISD::SRL, MVT::i16, 6062 DAG.getNode(ISD::AND, MVT::i16, 6063 CWD, DAG.getConstant(0x800, MVT::i16)), 6064 DAG.getConstant(11, MVT::i8)); 6065 SDValue CWD2 = 6066 DAG.getNode(ISD::SRL, MVT::i16, 6067 DAG.getNode(ISD::AND, MVT::i16, 6068 CWD, DAG.getConstant(0x400, MVT::i16)), 6069 DAG.getConstant(9, MVT::i8)); 6070 6071 SDValue RetVal = 6072 DAG.getNode(ISD::AND, MVT::i16, 6073 DAG.getNode(ISD::ADD, MVT::i16, 6074 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 6075 DAG.getConstant(1, MVT::i16)), 6076 DAG.getConstant(3, MVT::i16)); 6077 6078 6079 return DAG.getNode((VT.getSizeInBits() < 16 ? 6080 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 6081} 6082 6083SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { 6084 MVT VT = Op.getValueType(); 6085 MVT OpVT = VT; 6086 unsigned NumBits = VT.getSizeInBits(); 6087 6088 Op = Op.getOperand(0); 6089 if (VT == MVT::i8) { 6090 // Zero extend to i32 since there is not an i8 bsr. 6091 OpVT = MVT::i32; 6092 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6093 } 6094 6095 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 6096 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6097 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 6098 6099 // If src is zero (i.e. bsr sets ZF), returns NumBits. 6100 SmallVector<SDValue, 4> Ops; 6101 Ops.push_back(Op); 6102 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 6103 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6104 Ops.push_back(Op.getValue(1)); 6105 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6106 6107 // Finally xor with NumBits-1. 6108 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 6109 6110 if (VT == MVT::i8) 6111 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6112 return Op; 6113} 6114 6115SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { 6116 MVT VT = Op.getValueType(); 6117 MVT OpVT = VT; 6118 unsigned NumBits = VT.getSizeInBits(); 6119 6120 Op = Op.getOperand(0); 6121 if (VT == MVT::i8) { 6122 OpVT = MVT::i32; 6123 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 6124 } 6125 6126 // Issue a bsf (scan bits forward) which also sets EFLAGS. 6127 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 6128 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 6129 6130 // If src is zero (i.e. bsf sets ZF), returns NumBits. 6131 SmallVector<SDValue, 4> Ops; 6132 Ops.push_back(Op); 6133 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 6134 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 6135 Ops.push_back(Op.getValue(1)); 6136 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 6137 6138 if (VT == MVT::i8) 6139 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 6140 return Op; 6141} 6142 6143SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { 6144 MVT T = Op.getValueType(); 6145 unsigned Reg = 0; 6146 unsigned size = 0; 6147 switch(T.getSimpleVT()) { 6148 default: 6149 assert(false && "Invalid value type!"); 6150 case MVT::i8: Reg = X86::AL; size = 1; break; 6151 case MVT::i16: Reg = X86::AX; size = 2; break; 6152 case MVT::i32: Reg = X86::EAX; size = 4; break; 6153 case MVT::i64: 6154 if (Subtarget->is64Bit()) { 6155 Reg = X86::RAX; size = 8; 6156 } else //Should go away when LegalizeType stuff lands 6157 return SDValue(ExpandATOMIC_CMP_SWAP(Op.getNode(), DAG), 0); 6158 break; 6159 }; 6160 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg, 6161 Op.getOperand(2), SDValue()); 6162 SDValue Ops[] = { cpIn.getValue(0), 6163 Op.getOperand(1), 6164 Op.getOperand(3), 6165 DAG.getTargetConstant(size, MVT::i8), 6166 cpIn.getValue(1) }; 6167 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6168 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5); 6169 SDValue cpOut = 6170 DAG.getCopyFromReg(Result.getValue(0), Reg, T, Result.getValue(1)); 6171 return cpOut; 6172} 6173 6174SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, 6175 SelectionDAG &DAG) { 6176 MVT T = Op->getValueType(0); 6177 assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); 6178 SDValue cpInL, cpInH; 6179 cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 6180 DAG.getConstant(0, MVT::i32)); 6181 cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2), 6182 DAG.getConstant(1, MVT::i32)); 6183 cpInL = DAG.getCopyToReg(Op->getOperand(0), X86::EAX, 6184 cpInL, SDValue()); 6185 cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX, 6186 cpInH, cpInL.getValue(1)); 6187 SDValue swapInL, swapInH; 6188 swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 6189 DAG.getConstant(0, MVT::i32)); 6190 swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3), 6191 DAG.getConstant(1, MVT::i32)); 6192 swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX, 6193 swapInL, cpInH.getValue(1)); 6194 swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, 6195 swapInH, swapInL.getValue(1)); 6196 SDValue Ops[] = { swapInH.getValue(0), 6197 Op->getOperand(1), 6198 swapInH.getValue(1) }; 6199 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6200 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3); 6201 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32, 6202 Result.getValue(1)); 6203 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32, 6204 cpOutL.getValue(2)); 6205 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; 6206 SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); 6207 SDValue Vals[2] = { ResultVal, cpOutH.getValue(1) }; 6208 return DAG.getMergeValues(Vals, 2).getNode(); 6209} 6210 6211SDValue X86TargetLowering::LowerATOMIC_BINARY_64(SDValue Op, 6212 SelectionDAG &DAG, 6213 unsigned NewOp) { 6214 SDNode *Node = Op.getNode(); 6215 MVT T = Node->getValueType(0); 6216 assert (T == MVT::i64 && "Only know how to expand i64 atomics"); 6217 6218 SDValue Chain = Node->getOperand(0); 6219 SDValue In1 = Node->getOperand(1); 6220 SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6221 Node->getOperand(2), DAG.getIntPtrConstant(0)); 6222 SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 6223 Node->getOperand(2), DAG.getIntPtrConstant(1)); 6224 // This is a generalized SDNode, not an AtomicSDNode, so it doesn't 6225 // have a MemOperand. Pass the info through as a normal operand. 6226 SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); 6227 SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; 6228 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6229 SDValue Result = DAG.getNode(NewOp, Tys, Ops, 5); 6230 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; 6231 SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); 6232 SDValue Vals[2] = { ResultVal, Result.getValue(2) }; 6233 return SDValue(DAG.getMergeValues(Vals, 2).getNode(), 0); 6234} 6235 6236SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { 6237 SDNode *Node = Op.getNode(); 6238 MVT T = Node->getValueType(0); 6239 SDValue negOp = DAG.getNode(ISD::SUB, T, 6240 DAG.getConstant(0, T), Node->getOperand(2)); 6241 return DAG.getAtomic((Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_8 ? 6242 ISD::ATOMIC_LOAD_ADD_8 : 6243 Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_16 ? 6244 ISD::ATOMIC_LOAD_ADD_16 : 6245 Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_32 ? 6246 ISD::ATOMIC_LOAD_ADD_32 : 6247 ISD::ATOMIC_LOAD_ADD_64), 6248 Node->getOperand(0), 6249 Node->getOperand(1), negOp, 6250 cast<AtomicSDNode>(Node)->getSrcValue(), 6251 cast<AtomicSDNode>(Node)->getAlignment()); 6252} 6253 6254/// LowerOperation - Provide custom lowering hooks for some operations. 6255/// 6256SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 6257 switch (Op.getOpcode()) { 6258 default: assert(0 && "Should not custom lower this!"); 6259 case ISD::ATOMIC_CMP_SWAP_8: 6260 case ISD::ATOMIC_CMP_SWAP_16: 6261 case ISD::ATOMIC_CMP_SWAP_32: 6262 case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG); 6263 case ISD::ATOMIC_LOAD_SUB_8: 6264 case ISD::ATOMIC_LOAD_SUB_16: 6265 case ISD::ATOMIC_LOAD_SUB_32: return LowerLOAD_SUB(Op,DAG); 6266 case ISD::ATOMIC_LOAD_SUB_64: return (Subtarget->is64Bit()) ? 6267 LowerLOAD_SUB(Op,DAG) : 6268 LowerATOMIC_BINARY_64(Op,DAG, 6269 X86ISD::ATOMSUB64_DAG); 6270 case ISD::ATOMIC_LOAD_AND_64: return LowerATOMIC_BINARY_64(Op,DAG, 6271 X86ISD::ATOMAND64_DAG); 6272 case ISD::ATOMIC_LOAD_OR_64: return LowerATOMIC_BINARY_64(Op, DAG, 6273 X86ISD::ATOMOR64_DAG); 6274 case ISD::ATOMIC_LOAD_XOR_64: return LowerATOMIC_BINARY_64(Op,DAG, 6275 X86ISD::ATOMXOR64_DAG); 6276 case ISD::ATOMIC_LOAD_NAND_64:return LowerATOMIC_BINARY_64(Op,DAG, 6277 X86ISD::ATOMNAND64_DAG); 6278 case ISD::ATOMIC_LOAD_ADD_64: return LowerATOMIC_BINARY_64(Op,DAG, 6279 X86ISD::ATOMADD64_DAG); 6280 case ISD::ATOMIC_SWAP_64: return LowerATOMIC_BINARY_64(Op,DAG, 6281 X86ISD::ATOMSWAP64_DAG); 6282 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6283 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6284 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6285 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6286 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6287 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6288 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6289 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6290 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 6291 case ISD::SHL_PARTS: 6292 case ISD::SRA_PARTS: 6293 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 6294 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 6295 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 6296 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 6297 case ISD::FABS: return LowerFABS(Op, DAG); 6298 case ISD::FNEG: return LowerFNEG(Op, DAG); 6299 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6300 case ISD::SETCC: return LowerSETCC(Op, DAG); 6301 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 6302 case ISD::SELECT: return LowerSELECT(Op, DAG); 6303 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 6304 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6305 case ISD::CALL: return LowerCALL(Op, DAG); 6306 case ISD::RET: return LowerRET(Op, DAG); 6307 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 6308 case ISD::VASTART: return LowerVASTART(Op, DAG); 6309 case ISD::VAARG: return LowerVAARG(Op, DAG); 6310 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 6311 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6312 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6313 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6314 case ISD::FRAME_TO_ARGS_OFFSET: 6315 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 6316 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 6317 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 6318 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 6319 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6320 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 6321 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 6322 6323 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 6324 case ISD::READCYCLECOUNTER: 6325 return SDValue(ExpandREADCYCLECOUNTER(Op.getNode(), DAG), 0); 6326 } 6327} 6328 6329/// ReplaceNodeResults - Replace a node with an illegal result type 6330/// with a new node built out of custom code. 6331SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { 6332 switch (N->getOpcode()) { 6333 default: 6334 return X86TargetLowering::LowerOperation(SDValue (N, 0), DAG).getNode(); 6335 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 6336 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 6337 case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG); 6338 } 6339} 6340 6341const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 6342 switch (Opcode) { 6343 default: return NULL; 6344 case X86ISD::BSF: return "X86ISD::BSF"; 6345 case X86ISD::BSR: return "X86ISD::BSR"; 6346 case X86ISD::SHLD: return "X86ISD::SHLD"; 6347 case X86ISD::SHRD: return "X86ISD::SHRD"; 6348 case X86ISD::FAND: return "X86ISD::FAND"; 6349 case X86ISD::FOR: return "X86ISD::FOR"; 6350 case X86ISD::FXOR: return "X86ISD::FXOR"; 6351 case X86ISD::FSRL: return "X86ISD::FSRL"; 6352 case X86ISD::FILD: return "X86ISD::FILD"; 6353 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 6354 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 6355 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 6356 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 6357 case X86ISD::FLD: return "X86ISD::FLD"; 6358 case X86ISD::FST: return "X86ISD::FST"; 6359 case X86ISD::CALL: return "X86ISD::CALL"; 6360 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 6361 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 6362 case X86ISD::CMP: return "X86ISD::CMP"; 6363 case X86ISD::COMI: return "X86ISD::COMI"; 6364 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 6365 case X86ISD::SETCC: return "X86ISD::SETCC"; 6366 case X86ISD::CMOV: return "X86ISD::CMOV"; 6367 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 6368 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 6369 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 6370 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 6371 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 6372 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 6373 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 6374 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 6375 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 6376 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 6377 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 6378 case X86ISD::FMAX: return "X86ISD::FMAX"; 6379 case X86ISD::FMIN: return "X86ISD::FMIN"; 6380 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 6381 case X86ISD::FRCP: return "X86ISD::FRCP"; 6382 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 6383 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 6384 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 6385 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 6386 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 6387 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; 6388 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; 6389 case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; 6390 case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; 6391 case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; 6392 case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; 6393 case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; 6394 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; 6395 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; 6396 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; 6397 case X86ISD::VSHL: return "X86ISD::VSHL"; 6398 case X86ISD::VSRL: return "X86ISD::VSRL"; 6399 case X86ISD::CMPPD: return "X86ISD::CMPPD"; 6400 case X86ISD::CMPPS: return "X86ISD::CMPPS"; 6401 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; 6402 case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; 6403 case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; 6404 case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; 6405 case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; 6406 case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; 6407 case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; 6408 case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; 6409 } 6410} 6411 6412// isLegalAddressingMode - Return true if the addressing mode represented 6413// by AM is legal for this target, for a load/store of the specified type. 6414bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 6415 const Type *Ty) const { 6416 // X86 supports extremely general addressing modes. 6417 6418 // X86 allows a sign-extended 32-bit immediate field as a displacement. 6419 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 6420 return false; 6421 6422 if (AM.BaseGV) { 6423 // We can only fold this if we don't need an extra load. 6424 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 6425 return false; 6426 6427 // X86-64 only supports addr of globals in small code model. 6428 if (Subtarget->is64Bit()) { 6429 if (getTargetMachine().getCodeModel() != CodeModel::Small) 6430 return false; 6431 // If lower 4G is not available, then we must use rip-relative addressing. 6432 if (AM.BaseOffs || AM.Scale > 1) 6433 return false; 6434 } 6435 } 6436 6437 switch (AM.Scale) { 6438 case 0: 6439 case 1: 6440 case 2: 6441 case 4: 6442 case 8: 6443 // These scales always work. 6444 break; 6445 case 3: 6446 case 5: 6447 case 9: 6448 // These scales are formed with basereg+scalereg. Only accept if there is 6449 // no basereg yet. 6450 if (AM.HasBaseReg) 6451 return false; 6452 break; 6453 default: // Other stuff never works. 6454 return false; 6455 } 6456 6457 return true; 6458} 6459 6460 6461bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 6462 if (!Ty1->isInteger() || !Ty2->isInteger()) 6463 return false; 6464 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 6465 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 6466 if (NumBits1 <= NumBits2) 6467 return false; 6468 return Subtarget->is64Bit() || NumBits1 < 64; 6469} 6470 6471bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { 6472 if (!VT1.isInteger() || !VT2.isInteger()) 6473 return false; 6474 unsigned NumBits1 = VT1.getSizeInBits(); 6475 unsigned NumBits2 = VT2.getSizeInBits(); 6476 if (NumBits1 <= NumBits2) 6477 return false; 6478 return Subtarget->is64Bit() || NumBits1 < 64; 6479} 6480 6481/// isShuffleMaskLegal - Targets can use this to indicate that they only 6482/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 6483/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 6484/// are assumed to be legal. 6485bool 6486X86TargetLowering::isShuffleMaskLegal(SDValue Mask, MVT VT) const { 6487 // Only do shuffles on 128-bit vector types for now. 6488 if (VT.getSizeInBits() == 64) return false; 6489 return (Mask.getNode()->getNumOperands() <= 4 || 6490 isIdentityMask(Mask.getNode()) || 6491 isIdentityMask(Mask.getNode(), true) || 6492 isSplatMask(Mask.getNode()) || 6493 isPSHUFHW_PSHUFLWMask(Mask.getNode()) || 6494 X86::isUNPCKLMask(Mask.getNode()) || 6495 X86::isUNPCKHMask(Mask.getNode()) || 6496 X86::isUNPCKL_v_undef_Mask(Mask.getNode()) || 6497 X86::isUNPCKH_v_undef_Mask(Mask.getNode())); 6498} 6499 6500bool 6501X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, 6502 MVT EVT, SelectionDAG &DAG) const { 6503 unsigned NumElts = BVOps.size(); 6504 // Only do shuffles on 128-bit vector types for now. 6505 if (EVT.getSizeInBits() * NumElts == 64) return false; 6506 if (NumElts == 2) return true; 6507 if (NumElts == 4) { 6508 return (isMOVLMask(&BVOps[0], 4) || 6509 isCommutedMOVL(&BVOps[0], 4, true) || 6510 isSHUFPMask(&BVOps[0], 4) || 6511 isCommutedSHUFP(&BVOps[0], 4)); 6512 } 6513 return false; 6514} 6515 6516//===----------------------------------------------------------------------===// 6517// X86 Scheduler Hooks 6518//===----------------------------------------------------------------------===// 6519 6520// private utility function 6521MachineBasicBlock * 6522X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, 6523 MachineBasicBlock *MBB, 6524 unsigned regOpc, 6525 unsigned immOpc, 6526 unsigned LoadOpc, 6527 unsigned CXchgOpc, 6528 unsigned copyOpc, 6529 unsigned notOpc, 6530 unsigned EAXreg, 6531 TargetRegisterClass *RC, 6532 bool invSrc) { 6533 // For the atomic bitwise operator, we generate 6534 // thisMBB: 6535 // newMBB: 6536 // ld t1 = [bitinstr.addr] 6537 // op t2 = t1, [bitinstr.val] 6538 // mov EAX = t1 6539 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6540 // bz newMBB 6541 // fallthrough -->nextMBB 6542 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6543 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6544 MachineFunction::iterator MBBIter = MBB; 6545 ++MBBIter; 6546 6547 /// First build the CFG 6548 MachineFunction *F = MBB->getParent(); 6549 MachineBasicBlock *thisMBB = MBB; 6550 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6551 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6552 F->insert(MBBIter, newMBB); 6553 F->insert(MBBIter, nextMBB); 6554 6555 // Move all successors to thisMBB to nextMBB 6556 nextMBB->transferSuccessors(thisMBB); 6557 6558 // Update thisMBB to fall through to newMBB 6559 thisMBB->addSuccessor(newMBB); 6560 6561 // newMBB jumps to itself and fall through to nextMBB 6562 newMBB->addSuccessor(nextMBB); 6563 newMBB->addSuccessor(newMBB); 6564 6565 // Insert instructions into newMBB based on incoming instruction 6566 assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); 6567 MachineOperand& destOper = bInstr->getOperand(0); 6568 MachineOperand* argOpers[6]; 6569 int numArgs = bInstr->getNumOperands() - 1; 6570 for (int i=0; i < numArgs; ++i) 6571 argOpers[i] = &bInstr->getOperand(i+1); 6572 6573 // x86 address has 4 operands: base, index, scale, and displacement 6574 int lastAddrIndx = 3; // [0,3] 6575 int valArgIndx = 4; 6576 6577 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6578 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(LoadOpc), t1); 6579 for (int i=0; i <= lastAddrIndx; ++i) 6580 (*MIB).addOperand(*argOpers[i]); 6581 6582 unsigned tt = F->getRegInfo().createVirtualRegister(RC); 6583 if (invSrc) { 6584 MIB = BuildMI(newMBB, TII->get(notOpc), tt).addReg(t1); 6585 } 6586 else 6587 tt = t1; 6588 6589 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6590 assert((argOpers[valArgIndx]->isReg() || 6591 argOpers[valArgIndx]->isImm()) && 6592 "invalid operand"); 6593 if (argOpers[valArgIndx]->isReg()) 6594 MIB = BuildMI(newMBB, TII->get(regOpc), t2); 6595 else 6596 MIB = BuildMI(newMBB, TII->get(immOpc), t2); 6597 MIB.addReg(tt); 6598 (*MIB).addOperand(*argOpers[valArgIndx]); 6599 6600 MIB = BuildMI(newMBB, TII->get(copyOpc), EAXreg); 6601 MIB.addReg(t1); 6602 6603 MIB = BuildMI(newMBB, TII->get(CXchgOpc)); 6604 for (int i=0; i <= lastAddrIndx; ++i) 6605 (*MIB).addOperand(*argOpers[i]); 6606 MIB.addReg(t2); 6607 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6608 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6609 6610 MIB = BuildMI(newMBB, TII->get(copyOpc), destOper.getReg()); 6611 MIB.addReg(EAXreg); 6612 6613 // insert branch 6614 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6615 6616 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6617 return nextMBB; 6618} 6619 6620// private utility function: 64 bit atomics on 32 bit host. 6621MachineBasicBlock * 6622X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, 6623 MachineBasicBlock *MBB, 6624 unsigned regOpcL, 6625 unsigned regOpcH, 6626 unsigned immOpcL, 6627 unsigned immOpcH, 6628 bool invSrc) { 6629 // For the atomic bitwise operator, we generate 6630 // thisMBB (instructions are in pairs, except cmpxchg8b) 6631 // ld t1,t2 = [bitinstr.addr] 6632 // newMBB: 6633 // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) 6634 // op t5, t6 <- out1, out2, [bitinstr.val] 6635 // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) 6636 // mov ECX, EBX <- t5, t6 6637 // mov EAX, EDX <- t1, t2 6638 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] 6639 // mov t3, t4 <- EAX, EDX 6640 // bz newMBB 6641 // result in out1, out2 6642 // fallthrough -->nextMBB 6643 6644 const TargetRegisterClass *RC = X86::GR32RegisterClass; 6645 const unsigned LoadOpc = X86::MOV32rm; 6646 const unsigned copyOpc = X86::MOV32rr; 6647 const unsigned NotOpc = X86::NOT32r; 6648 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6649 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6650 MachineFunction::iterator MBBIter = MBB; 6651 ++MBBIter; 6652 6653 /// First build the CFG 6654 MachineFunction *F = MBB->getParent(); 6655 MachineBasicBlock *thisMBB = MBB; 6656 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6657 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6658 F->insert(MBBIter, newMBB); 6659 F->insert(MBBIter, nextMBB); 6660 6661 // Move all successors to thisMBB to nextMBB 6662 nextMBB->transferSuccessors(thisMBB); 6663 6664 // Update thisMBB to fall through to newMBB 6665 thisMBB->addSuccessor(newMBB); 6666 6667 // newMBB jumps to itself and fall through to nextMBB 6668 newMBB->addSuccessor(nextMBB); 6669 newMBB->addSuccessor(newMBB); 6670 6671 // Insert instructions into newMBB based on incoming instruction 6672 // There are 8 "real" operands plus 9 implicit def/uses, ignored here. 6673 assert(bInstr->getNumOperands() < 18 && "unexpected number of operands"); 6674 MachineOperand& dest1Oper = bInstr->getOperand(0); 6675 MachineOperand& dest2Oper = bInstr->getOperand(1); 6676 MachineOperand* argOpers[6]; 6677 for (int i=0; i < 6; ++i) 6678 argOpers[i] = &bInstr->getOperand(i+2); 6679 6680 // x86 address has 4 operands: base, index, scale, and displacement 6681 int lastAddrIndx = 3; // [0,3] 6682 6683 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 6684 MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1); 6685 for (int i=0; i <= lastAddrIndx; ++i) 6686 (*MIB).addOperand(*argOpers[i]); 6687 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 6688 MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2); 6689 // add 4 to displacement. 6690 for (int i=0; i <= lastAddrIndx-1; ++i) 6691 (*MIB).addOperand(*argOpers[i]); 6692 MachineOperand newOp3 = *(argOpers[3]); 6693 if (newOp3.isImm()) 6694 newOp3.setImm(newOp3.getImm()+4); 6695 else 6696 newOp3.setOffset(newOp3.getOffset()+4); 6697 (*MIB).addOperand(newOp3); 6698 6699 // t3/4 are defined later, at the bottom of the loop 6700 unsigned t3 = F->getRegInfo().createVirtualRegister(RC); 6701 unsigned t4 = F->getRegInfo().createVirtualRegister(RC); 6702 BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg()) 6703 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); 6704 BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg()) 6705 .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); 6706 6707 unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); 6708 unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); 6709 if (invSrc) { 6710 MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1); 6711 MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2); 6712 } else { 6713 tt1 = t1; 6714 tt2 = t2; 6715 } 6716 6717 assert((argOpers[4]->isReg() || argOpers[4]->isImm()) && 6718 "invalid operand"); 6719 unsigned t5 = F->getRegInfo().createVirtualRegister(RC); 6720 unsigned t6 = F->getRegInfo().createVirtualRegister(RC); 6721 if (argOpers[4]->isReg()) 6722 MIB = BuildMI(newMBB, TII->get(regOpcL), t5); 6723 else 6724 MIB = BuildMI(newMBB, TII->get(immOpcL), t5); 6725 if (regOpcL != X86::MOV32rr) 6726 MIB.addReg(tt1); 6727 (*MIB).addOperand(*argOpers[4]); 6728 assert(argOpers[5]->isReg() == argOpers[4]->isReg()); 6729 assert(argOpers[5]->isImm() == argOpers[4]->isImm()); 6730 if (argOpers[5]->isReg()) 6731 MIB = BuildMI(newMBB, TII->get(regOpcH), t6); 6732 else 6733 MIB = BuildMI(newMBB, TII->get(immOpcH), t6); 6734 if (regOpcH != X86::MOV32rr) 6735 MIB.addReg(tt2); 6736 (*MIB).addOperand(*argOpers[5]); 6737 6738 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX); 6739 MIB.addReg(t1); 6740 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX); 6741 MIB.addReg(t2); 6742 6743 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX); 6744 MIB.addReg(t5); 6745 MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX); 6746 MIB.addReg(t6); 6747 6748 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B)); 6749 for (int i=0; i <= lastAddrIndx; ++i) 6750 (*MIB).addOperand(*argOpers[i]); 6751 6752 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6753 (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); 6754 6755 MIB = BuildMI(newMBB, TII->get(copyOpc), t3); 6756 MIB.addReg(X86::EAX); 6757 MIB = BuildMI(newMBB, TII->get(copyOpc), t4); 6758 MIB.addReg(X86::EDX); 6759 6760 // insert branch 6761 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6762 6763 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 6764 return nextMBB; 6765} 6766 6767// private utility function 6768MachineBasicBlock * 6769X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, 6770 MachineBasicBlock *MBB, 6771 unsigned cmovOpc) { 6772 // For the atomic min/max operator, we generate 6773 // thisMBB: 6774 // newMBB: 6775 // ld t1 = [min/max.addr] 6776 // mov t2 = [min/max.val] 6777 // cmp t1, t2 6778 // cmov[cond] t2 = t1 6779 // mov EAX = t1 6780 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 6781 // bz newMBB 6782 // fallthrough -->nextMBB 6783 // 6784 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6785 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 6786 MachineFunction::iterator MBBIter = MBB; 6787 ++MBBIter; 6788 6789 /// First build the CFG 6790 MachineFunction *F = MBB->getParent(); 6791 MachineBasicBlock *thisMBB = MBB; 6792 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 6793 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 6794 F->insert(MBBIter, newMBB); 6795 F->insert(MBBIter, nextMBB); 6796 6797 // Move all successors to thisMBB to nextMBB 6798 nextMBB->transferSuccessors(thisMBB); 6799 6800 // Update thisMBB to fall through to newMBB 6801 thisMBB->addSuccessor(newMBB); 6802 6803 // newMBB jumps to newMBB and fall through to nextMBB 6804 newMBB->addSuccessor(nextMBB); 6805 newMBB->addSuccessor(newMBB); 6806 6807 // Insert instructions into newMBB based on incoming instruction 6808 assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); 6809 MachineOperand& destOper = mInstr->getOperand(0); 6810 MachineOperand* argOpers[6]; 6811 int numArgs = mInstr->getNumOperands() - 1; 6812 for (int i=0; i < numArgs; ++i) 6813 argOpers[i] = &mInstr->getOperand(i+1); 6814 6815 // x86 address has 4 operands: base, index, scale, and displacement 6816 int lastAddrIndx = 3; // [0,3] 6817 int valArgIndx = 4; 6818 6819 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6820 MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1); 6821 for (int i=0; i <= lastAddrIndx; ++i) 6822 (*MIB).addOperand(*argOpers[i]); 6823 6824 // We only support register and immediate values 6825 assert((argOpers[valArgIndx]->isReg() || 6826 argOpers[valArgIndx]->isImm()) && 6827 "invalid operand"); 6828 6829 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6830 if (argOpers[valArgIndx]->isReg()) 6831 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6832 else 6833 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); 6834 (*MIB).addOperand(*argOpers[valArgIndx]); 6835 6836 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX); 6837 MIB.addReg(t1); 6838 6839 MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); 6840 MIB.addReg(t1); 6841 MIB.addReg(t2); 6842 6843 // Generate movc 6844 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 6845 MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); 6846 MIB.addReg(t2); 6847 MIB.addReg(t1); 6848 6849 // Cmp and exchange if none has modified the memory location 6850 MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); 6851 for (int i=0; i <= lastAddrIndx; ++i) 6852 (*MIB).addOperand(*argOpers[i]); 6853 MIB.addReg(t3); 6854 assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 6855 (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); 6856 6857 MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); 6858 MIB.addReg(X86::EAX); 6859 6860 // insert branch 6861 BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); 6862 6863 F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. 6864 return nextMBB; 6865} 6866 6867 6868MachineBasicBlock * 6869X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6870 MachineBasicBlock *BB) { 6871 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6872 switch (MI->getOpcode()) { 6873 default: assert(false && "Unexpected instr type to insert"); 6874 case X86::CMOV_FR32: 6875 case X86::CMOV_FR64: 6876 case X86::CMOV_V4F32: 6877 case X86::CMOV_V2F64: 6878 case X86::CMOV_V2I64: { 6879 // To "insert" a SELECT_CC instruction, we actually have to insert the 6880 // diamond control-flow pattern. The incoming instruction knows the 6881 // destination vreg to set, the condition code register to branch on, the 6882 // true/false values to select between, and a branch opcode to use. 6883 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6884 MachineFunction::iterator It = BB; 6885 ++It; 6886 6887 // thisMBB: 6888 // ... 6889 // TrueVal = ... 6890 // cmpTY ccX, r1, r2 6891 // bCC copy1MBB 6892 // fallthrough --> copy0MBB 6893 MachineBasicBlock *thisMBB = BB; 6894 MachineFunction *F = BB->getParent(); 6895 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6896 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6897 unsigned Opc = 6898 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 6899 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 6900 F->insert(It, copy0MBB); 6901 F->insert(It, sinkMBB); 6902 // Update machine-CFG edges by transferring all successors of the current 6903 // block to the new block which will contain the Phi node for the select. 6904 sinkMBB->transferSuccessors(BB); 6905 6906 // Add the true and fallthrough blocks as its successors. 6907 BB->addSuccessor(copy0MBB); 6908 BB->addSuccessor(sinkMBB); 6909 6910 // copy0MBB: 6911 // %FalseValue = ... 6912 // # fallthrough to sinkMBB 6913 BB = copy0MBB; 6914 6915 // Update machine-CFG edges 6916 BB->addSuccessor(sinkMBB); 6917 6918 // sinkMBB: 6919 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6920 // ... 6921 BB = sinkMBB; 6922 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 6923 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6924 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6925 6926 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 6927 return BB; 6928 } 6929 6930 case X86::FP32_TO_INT16_IN_MEM: 6931 case X86::FP32_TO_INT32_IN_MEM: 6932 case X86::FP32_TO_INT64_IN_MEM: 6933 case X86::FP64_TO_INT16_IN_MEM: 6934 case X86::FP64_TO_INT32_IN_MEM: 6935 case X86::FP64_TO_INT64_IN_MEM: 6936 case X86::FP80_TO_INT16_IN_MEM: 6937 case X86::FP80_TO_INT32_IN_MEM: 6938 case X86::FP80_TO_INT64_IN_MEM: { 6939 // Change the floating point control register to use "round towards zero" 6940 // mode when truncating to an integer value. 6941 MachineFunction *F = BB->getParent(); 6942 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 6943 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 6944 6945 // Load the old value of the high byte of the control word... 6946 unsigned OldCW = 6947 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 6948 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 6949 6950 // Set the high part to be round to zero... 6951 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 6952 .addImm(0xC7F); 6953 6954 // Reload the modified control word now... 6955 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 6956 6957 // Restore the memory image of control word to original value 6958 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 6959 .addReg(OldCW); 6960 6961 // Get the X86 opcode to use. 6962 unsigned Opc; 6963 switch (MI->getOpcode()) { 6964 default: assert(0 && "illegal opcode!"); 6965 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 6966 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 6967 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 6968 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 6969 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 6970 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 6971 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 6972 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 6973 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 6974 } 6975 6976 X86AddressMode AM; 6977 MachineOperand &Op = MI->getOperand(0); 6978 if (Op.isReg()) { 6979 AM.BaseType = X86AddressMode::RegBase; 6980 AM.Base.Reg = Op.getReg(); 6981 } else { 6982 AM.BaseType = X86AddressMode::FrameIndexBase; 6983 AM.Base.FrameIndex = Op.getIndex(); 6984 } 6985 Op = MI->getOperand(1); 6986 if (Op.isImm()) 6987 AM.Scale = Op.getImm(); 6988 Op = MI->getOperand(2); 6989 if (Op.isImm()) 6990 AM.IndexReg = Op.getImm(); 6991 Op = MI->getOperand(3); 6992 if (Op.isGlobal()) { 6993 AM.GV = Op.getGlobal(); 6994 } else { 6995 AM.Disp = Op.getImm(); 6996 } 6997 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 6998 .addReg(MI->getOperand(4).getReg()); 6999 7000 // Reload the original control word now. 7001 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 7002 7003 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 7004 return BB; 7005 } 7006 case X86::ATOMAND32: 7007 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7008 X86::AND32ri, X86::MOV32rm, 7009 X86::LCMPXCHG32, X86::MOV32rr, 7010 X86::NOT32r, X86::EAX, 7011 X86::GR32RegisterClass); 7012 case X86::ATOMOR32: 7013 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, 7014 X86::OR32ri, X86::MOV32rm, 7015 X86::LCMPXCHG32, X86::MOV32rr, 7016 X86::NOT32r, X86::EAX, 7017 X86::GR32RegisterClass); 7018 case X86::ATOMXOR32: 7019 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, 7020 X86::XOR32ri, X86::MOV32rm, 7021 X86::LCMPXCHG32, X86::MOV32rr, 7022 X86::NOT32r, X86::EAX, 7023 X86::GR32RegisterClass); 7024 case X86::ATOMNAND32: 7025 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 7026 X86::AND32ri, X86::MOV32rm, 7027 X86::LCMPXCHG32, X86::MOV32rr, 7028 X86::NOT32r, X86::EAX, 7029 X86::GR32RegisterClass, true); 7030 case X86::ATOMMIN32: 7031 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); 7032 case X86::ATOMMAX32: 7033 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); 7034 case X86::ATOMUMIN32: 7035 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); 7036 case X86::ATOMUMAX32: 7037 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); 7038 7039 case X86::ATOMAND16: 7040 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7041 X86::AND16ri, X86::MOV16rm, 7042 X86::LCMPXCHG16, X86::MOV16rr, 7043 X86::NOT16r, X86::AX, 7044 X86::GR16RegisterClass); 7045 case X86::ATOMOR16: 7046 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, 7047 X86::OR16ri, X86::MOV16rm, 7048 X86::LCMPXCHG16, X86::MOV16rr, 7049 X86::NOT16r, X86::AX, 7050 X86::GR16RegisterClass); 7051 case X86::ATOMXOR16: 7052 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, 7053 X86::XOR16ri, X86::MOV16rm, 7054 X86::LCMPXCHG16, X86::MOV16rr, 7055 X86::NOT16r, X86::AX, 7056 X86::GR16RegisterClass); 7057 case X86::ATOMNAND16: 7058 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 7059 X86::AND16ri, X86::MOV16rm, 7060 X86::LCMPXCHG16, X86::MOV16rr, 7061 X86::NOT16r, X86::AX, 7062 X86::GR16RegisterClass, true); 7063 case X86::ATOMMIN16: 7064 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); 7065 case X86::ATOMMAX16: 7066 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); 7067 case X86::ATOMUMIN16: 7068 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); 7069 case X86::ATOMUMAX16: 7070 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); 7071 7072 case X86::ATOMAND8: 7073 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7074 X86::AND8ri, X86::MOV8rm, 7075 X86::LCMPXCHG8, X86::MOV8rr, 7076 X86::NOT8r, X86::AL, 7077 X86::GR8RegisterClass); 7078 case X86::ATOMOR8: 7079 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, 7080 X86::OR8ri, X86::MOV8rm, 7081 X86::LCMPXCHG8, X86::MOV8rr, 7082 X86::NOT8r, X86::AL, 7083 X86::GR8RegisterClass); 7084 case X86::ATOMXOR8: 7085 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, 7086 X86::XOR8ri, X86::MOV8rm, 7087 X86::LCMPXCHG8, X86::MOV8rr, 7088 X86::NOT8r, X86::AL, 7089 X86::GR8RegisterClass); 7090 case X86::ATOMNAND8: 7091 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 7092 X86::AND8ri, X86::MOV8rm, 7093 X86::LCMPXCHG8, X86::MOV8rr, 7094 X86::NOT8r, X86::AL, 7095 X86::GR8RegisterClass, true); 7096 // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. 7097 // This group is for 64-bit host. 7098 case X86::ATOMAND64: 7099 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7100 X86::AND64ri32, X86::MOV64rm, 7101 X86::LCMPXCHG64, X86::MOV64rr, 7102 X86::NOT64r, X86::RAX, 7103 X86::GR64RegisterClass); 7104 case X86::ATOMOR64: 7105 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, 7106 X86::OR64ri32, X86::MOV64rm, 7107 X86::LCMPXCHG64, X86::MOV64rr, 7108 X86::NOT64r, X86::RAX, 7109 X86::GR64RegisterClass); 7110 case X86::ATOMXOR64: 7111 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, 7112 X86::XOR64ri32, X86::MOV64rm, 7113 X86::LCMPXCHG64, X86::MOV64rr, 7114 X86::NOT64r, X86::RAX, 7115 X86::GR64RegisterClass); 7116 case X86::ATOMNAND64: 7117 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 7118 X86::AND64ri32, X86::MOV64rm, 7119 X86::LCMPXCHG64, X86::MOV64rr, 7120 X86::NOT64r, X86::RAX, 7121 X86::GR64RegisterClass, true); 7122 case X86::ATOMMIN64: 7123 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); 7124 case X86::ATOMMAX64: 7125 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); 7126 case X86::ATOMUMIN64: 7127 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); 7128 case X86::ATOMUMAX64: 7129 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); 7130 7131 // This group does 64-bit operations on a 32-bit host. 7132 case X86::ATOMAND6432: 7133 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7134 X86::AND32rr, X86::AND32rr, 7135 X86::AND32ri, X86::AND32ri, 7136 false); 7137 case X86::ATOMOR6432: 7138 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7139 X86::OR32rr, X86::OR32rr, 7140 X86::OR32ri, X86::OR32ri, 7141 false); 7142 case X86::ATOMXOR6432: 7143 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7144 X86::XOR32rr, X86::XOR32rr, 7145 X86::XOR32ri, X86::XOR32ri, 7146 false); 7147 case X86::ATOMNAND6432: 7148 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7149 X86::AND32rr, X86::AND32rr, 7150 X86::AND32ri, X86::AND32ri, 7151 true); 7152 case X86::ATOMADD6432: 7153 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7154 X86::ADD32rr, X86::ADC32rr, 7155 X86::ADD32ri, X86::ADC32ri, 7156 false); 7157 case X86::ATOMSUB6432: 7158 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7159 X86::SUB32rr, X86::SBB32rr, 7160 X86::SUB32ri, X86::SBB32ri, 7161 false); 7162 case X86::ATOMSWAP6432: 7163 return EmitAtomicBit6432WithCustomInserter(MI, BB, 7164 X86::MOV32rr, X86::MOV32rr, 7165 X86::MOV32ri, X86::MOV32ri, 7166 false); 7167 } 7168} 7169 7170//===----------------------------------------------------------------------===// 7171// X86 Optimization Hooks 7172//===----------------------------------------------------------------------===// 7173 7174void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7175 const APInt &Mask, 7176 APInt &KnownZero, 7177 APInt &KnownOne, 7178 const SelectionDAG &DAG, 7179 unsigned Depth) const { 7180 unsigned Opc = Op.getOpcode(); 7181 assert((Opc >= ISD::BUILTIN_OP_END || 7182 Opc == ISD::INTRINSIC_WO_CHAIN || 7183 Opc == ISD::INTRINSIC_W_CHAIN || 7184 Opc == ISD::INTRINSIC_VOID) && 7185 "Should use MaskedValueIsZero if you don't know whether Op" 7186 " is a target node!"); 7187 7188 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 7189 switch (Opc) { 7190 default: break; 7191 case X86ISD::SETCC: 7192 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 7193 Mask.getBitWidth() - 1); 7194 break; 7195 } 7196} 7197 7198/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 7199/// node is a GlobalAddress + offset. 7200bool X86TargetLowering::isGAPlusOffset(SDNode *N, 7201 GlobalValue* &GA, int64_t &Offset) const{ 7202 if (N->getOpcode() == X86ISD::Wrapper) { 7203 if (isa<GlobalAddressSDNode>(N->getOperand(0))) { 7204 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 7205 Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset(); 7206 return true; 7207 } 7208 } 7209 return TargetLowering::isGAPlusOffset(N, GA, Offset); 7210} 7211 7212static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, 7213 const TargetLowering &TLI) { 7214 GlobalValue *GV; 7215 int64_t Offset = 0; 7216 if (TLI.isGAPlusOffset(Base, GV, Offset)) 7217 return (GV->getAlignment() >= N && (Offset % N) == 0); 7218 // DAG combine handles the stack object case. 7219 return false; 7220} 7221 7222static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, 7223 unsigned NumElems, MVT EVT, 7224 SDNode *&Base, 7225 SelectionDAG &DAG, MachineFrameInfo *MFI, 7226 const TargetLowering &TLI) { 7227 Base = NULL; 7228 for (unsigned i = 0; i < NumElems; ++i) { 7229 SDValue Idx = PermMask.getOperand(i); 7230 if (Idx.getOpcode() == ISD::UNDEF) { 7231 if (!Base) 7232 return false; 7233 continue; 7234 } 7235 7236 SDValue Elt = DAG.getShuffleScalarElt(N, i); 7237 if (!Elt.getNode() || 7238 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 7239 return false; 7240 if (!Base) { 7241 Base = Elt.getNode(); 7242 if (Base->getOpcode() == ISD::UNDEF) 7243 return false; 7244 continue; 7245 } 7246 if (Elt.getOpcode() == ISD::UNDEF) 7247 continue; 7248 7249 if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, 7250 EVT.getSizeInBits()/8, i, MFI)) 7251 return false; 7252 } 7253 return true; 7254} 7255 7256/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 7257/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 7258/// if the load addresses are consecutive, non-overlapping, and in the right 7259/// order. 7260static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 7261 const TargetLowering &TLI) { 7262 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7263 MVT VT = N->getValueType(0); 7264 MVT EVT = VT.getVectorElementType(); 7265 SDValue PermMask = N->getOperand(2); 7266 unsigned NumElems = PermMask.getNumOperands(); 7267 SDNode *Base = NULL; 7268 if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, 7269 DAG, MFI, TLI)) 7270 return SDValue(); 7271 7272 LoadSDNode *LD = cast<LoadSDNode>(Base); 7273 if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI)) 7274 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7275 LD->getSrcValueOffset(), LD->isVolatile()); 7276 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 7277 LD->getSrcValueOffset(), LD->isVolatile(), 7278 LD->getAlignment()); 7279} 7280 7281/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. 7282static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, 7283 const X86Subtarget *Subtarget, 7284 const TargetLowering &TLI) { 7285 unsigned NumOps = N->getNumOperands(); 7286 7287 // Ignore single operand BUILD_VECTOR. 7288 if (NumOps == 1) 7289 return SDValue(); 7290 7291 MVT VT = N->getValueType(0); 7292 MVT EVT = VT.getVectorElementType(); 7293 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) 7294 // We are looking for load i64 and zero extend. We want to transform 7295 // it before legalizer has a chance to expand it. Also look for i64 7296 // BUILD_PAIR bit casted to f64. 7297 return SDValue(); 7298 // This must be an insertion into a zero vector. 7299 SDValue HighElt = N->getOperand(1); 7300 if (!isZeroNode(HighElt)) 7301 return SDValue(); 7302 7303 // Value must be a load. 7304 SDNode *Base = N->getOperand(0).getNode(); 7305 if (!isa<LoadSDNode>(Base)) { 7306 if (Base->getOpcode() != ISD::BIT_CONVERT) 7307 return SDValue(); 7308 Base = Base->getOperand(0).getNode(); 7309 if (!isa<LoadSDNode>(Base)) 7310 return SDValue(); 7311 } 7312 7313 // Transform it into VZEXT_LOAD addr. 7314 LoadSDNode *LD = cast<LoadSDNode>(Base); 7315 7316 // Load must not be an extload. 7317 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 7318 return SDValue(); 7319 7320 SDVTList Tys = DAG.getVTList(VT, MVT::Other); 7321 SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; 7322 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2); 7323 DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1)); 7324 return ResNode; 7325} 7326 7327/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 7328static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 7329 const X86Subtarget *Subtarget) { 7330 SDValue Cond = N->getOperand(0); 7331 7332 // If we have SSE[12] support, try to form min/max nodes. 7333 if (Subtarget->hasSSE2() && 7334 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 7335 if (Cond.getOpcode() == ISD::SETCC) { 7336 // Get the LHS/RHS of the select. 7337 SDValue LHS = N->getOperand(1); 7338 SDValue RHS = N->getOperand(2); 7339 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 7340 7341 unsigned Opcode = 0; 7342 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 7343 switch (CC) { 7344 default: break; 7345 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 7346 case ISD::SETULE: 7347 case ISD::SETLE: 7348 if (!UnsafeFPMath) break; 7349 // FALL THROUGH. 7350 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 7351 case ISD::SETLT: 7352 Opcode = X86ISD::FMIN; 7353 break; 7354 7355 case ISD::SETOGT: // (X > Y) ? X : Y -> max 7356 case ISD::SETUGT: 7357 case ISD::SETGT: 7358 if (!UnsafeFPMath) break; 7359 // FALL THROUGH. 7360 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 7361 case ISD::SETGE: 7362 Opcode = X86ISD::FMAX; 7363 break; 7364 } 7365 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 7366 switch (CC) { 7367 default: break; 7368 case ISD::SETOGT: // (X > Y) ? Y : X -> min 7369 case ISD::SETUGT: 7370 case ISD::SETGT: 7371 if (!UnsafeFPMath) break; 7372 // FALL THROUGH. 7373 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 7374 case ISD::SETGE: 7375 Opcode = X86ISD::FMIN; 7376 break; 7377 7378 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 7379 case ISD::SETULE: 7380 case ISD::SETLE: 7381 if (!UnsafeFPMath) break; 7382 // FALL THROUGH. 7383 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 7384 case ISD::SETLT: 7385 Opcode = X86ISD::FMAX; 7386 break; 7387 } 7388 } 7389 7390 if (Opcode) 7391 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 7392 } 7393 7394 } 7395 7396 return SDValue(); 7397} 7398 7399/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. 7400static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, 7401 const X86Subtarget *Subtarget) { 7402 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering 7403 // the FP state in cases where an emms may be missing. 7404 // A preferable solution to the general problem is to figure out the right 7405 // places to insert EMMS. This qualifies as a quick hack. 7406 StoreSDNode *St = cast<StoreSDNode>(N); 7407 if (St->getValue().getValueType().isVector() && 7408 St->getValue().getValueType().getSizeInBits() == 64 && 7409 isa<LoadSDNode>(St->getValue()) && 7410 !cast<LoadSDNode>(St->getValue())->isVolatile() && 7411 St->getChain().hasOneUse() && !St->isVolatile()) { 7412 SDNode* LdVal = St->getValue().getNode(); 7413 LoadSDNode *Ld = 0; 7414 int TokenFactorIndex = -1; 7415 SmallVector<SDValue, 8> Ops; 7416 SDNode* ChainVal = St->getChain().getNode(); 7417 // Must be a store of a load. We currently handle two cases: the load 7418 // is a direct child, and it's under an intervening TokenFactor. It is 7419 // possible to dig deeper under nested TokenFactors. 7420 if (ChainVal == LdVal) 7421 Ld = cast<LoadSDNode>(St->getChain()); 7422 else if (St->getValue().hasOneUse() && 7423 ChainVal->getOpcode() == ISD::TokenFactor) { 7424 for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { 7425 if (ChainVal->getOperand(i).getNode() == LdVal) { 7426 TokenFactorIndex = i; 7427 Ld = cast<LoadSDNode>(St->getValue()); 7428 } else 7429 Ops.push_back(ChainVal->getOperand(i)); 7430 } 7431 } 7432 if (Ld) { 7433 // If we are a 64-bit capable x86, lower to a single movq load/store pair. 7434 if (Subtarget->is64Bit()) { 7435 SDValue NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), 7436 Ld->getBasePtr(), Ld->getSrcValue(), 7437 Ld->getSrcValueOffset(), Ld->isVolatile(), 7438 Ld->getAlignment()); 7439 SDValue NewChain = NewLd.getValue(1); 7440 if (TokenFactorIndex != -1) { 7441 Ops.push_back(NewChain); 7442 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7443 Ops.size()); 7444 } 7445 return DAG.getStore(NewChain, NewLd, St->getBasePtr(), 7446 St->getSrcValue(), St->getSrcValueOffset(), 7447 St->isVolatile(), St->getAlignment()); 7448 } 7449 7450 // Otherwise, lower to two 32-bit copies. 7451 SDValue LoAddr = Ld->getBasePtr(); 7452 SDValue HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7453 DAG.getConstant(4, MVT::i32)); 7454 7455 SDValue LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr, 7456 Ld->getSrcValue(), Ld->getSrcValueOffset(), 7457 Ld->isVolatile(), Ld->getAlignment()); 7458 SDValue HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr, 7459 Ld->getSrcValue(), Ld->getSrcValueOffset()+4, 7460 Ld->isVolatile(), 7461 MinAlign(Ld->getAlignment(), 4)); 7462 7463 SDValue NewChain = LoLd.getValue(1); 7464 if (TokenFactorIndex != -1) { 7465 Ops.push_back(LoLd); 7466 Ops.push_back(HiLd); 7467 NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], 7468 Ops.size()); 7469 } 7470 7471 LoAddr = St->getBasePtr(); 7472 HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, 7473 DAG.getConstant(4, MVT::i32)); 7474 7475 SDValue LoSt = DAG.getStore(NewChain, LoLd, LoAddr, 7476 St->getSrcValue(), St->getSrcValueOffset(), 7477 St->isVolatile(), St->getAlignment()); 7478 SDValue HiSt = DAG.getStore(NewChain, HiLd, HiAddr, 7479 St->getSrcValue(), 7480 St->getSrcValueOffset() + 4, 7481 St->isVolatile(), 7482 MinAlign(St->getAlignment(), 4)); 7483 return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt); 7484 } 7485 } 7486 return SDValue(); 7487} 7488 7489/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 7490/// X86ISD::FXOR nodes. 7491static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 7492 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 7493 // F[X]OR(0.0, x) -> x 7494 // F[X]OR(x, 0.0) -> x 7495 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7496 if (C->getValueAPF().isPosZero()) 7497 return N->getOperand(1); 7498 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7499 if (C->getValueAPF().isPosZero()) 7500 return N->getOperand(0); 7501 return SDValue(); 7502} 7503 7504/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 7505static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 7506 // FAND(0.0, x) -> 0.0 7507 // FAND(x, 0.0) -> 0.0 7508 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 7509 if (C->getValueAPF().isPosZero()) 7510 return N->getOperand(0); 7511 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 7512 if (C->getValueAPF().isPosZero()) 7513 return N->getOperand(1); 7514 return SDValue(); 7515} 7516 7517 7518SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 7519 DAGCombinerInfo &DCI) const { 7520 SelectionDAG &DAG = DCI.DAG; 7521 switch (N->getOpcode()) { 7522 default: break; 7523 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); 7524 case ISD::BUILD_VECTOR: 7525 return PerformBuildVectorCombine(N, DAG, Subtarget, *this); 7526 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 7527 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 7528 case X86ISD::FXOR: 7529 case X86ISD::FOR: return PerformFORCombine(N, DAG); 7530 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 7531 } 7532 7533 return SDValue(); 7534} 7535 7536//===----------------------------------------------------------------------===// 7537// X86 Inline Assembly Support 7538//===----------------------------------------------------------------------===// 7539 7540/// getConstraintType - Given a constraint letter, return the type of 7541/// constraint it is for this target. 7542X86TargetLowering::ConstraintType 7543X86TargetLowering::getConstraintType(const std::string &Constraint) const { 7544 if (Constraint.size() == 1) { 7545 switch (Constraint[0]) { 7546 case 'A': 7547 case 'f': 7548 case 'r': 7549 case 'R': 7550 case 'l': 7551 case 'q': 7552 case 'Q': 7553 case 'x': 7554 case 'y': 7555 case 'Y': 7556 return C_RegisterClass; 7557 default: 7558 break; 7559 } 7560 } 7561 return TargetLowering::getConstraintType(Constraint); 7562} 7563 7564/// LowerXConstraint - try to replace an X constraint, which matches anything, 7565/// with another that has more specific requirements based on the type of the 7566/// corresponding operand. 7567const char *X86TargetLowering:: 7568LowerXConstraint(MVT ConstraintVT) const { 7569 // FP X constraints get lowered to SSE1/2 registers if available, otherwise 7570 // 'f' like normal targets. 7571 if (ConstraintVT.isFloatingPoint()) { 7572 if (Subtarget->hasSSE2()) 7573 return "Y"; 7574 if (Subtarget->hasSSE1()) 7575 return "x"; 7576 } 7577 7578 return TargetLowering::LowerXConstraint(ConstraintVT); 7579} 7580 7581/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7582/// vector. If it is invalid, don't add anything to Ops. 7583void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7584 char Constraint, 7585 bool hasMemory, 7586 std::vector<SDValue>&Ops, 7587 SelectionDAG &DAG) const { 7588 SDValue Result(0, 0); 7589 7590 switch (Constraint) { 7591 default: break; 7592 case 'I': 7593 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7594 if (C->getZExtValue() <= 31) { 7595 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7596 break; 7597 } 7598 } 7599 return; 7600 case 'J': 7601 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7602 if (C->getZExtValue() <= 63) { 7603 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7604 break; 7605 } 7606 } 7607 return; 7608 case 'N': 7609 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 7610 if (C->getZExtValue() <= 255) { 7611 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 7612 break; 7613 } 7614 } 7615 return; 7616 case 'i': { 7617 // Literal immediates are always ok. 7618 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 7619 Result = DAG.getTargetConstant(CST->getZExtValue(), Op.getValueType()); 7620 break; 7621 } 7622 7623 // If we are in non-pic codegen mode, we allow the address of a global (with 7624 // an optional displacement) to be used with 'i'. 7625 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 7626 int64_t Offset = 0; 7627 7628 // Match either (GA) or (GA+C) 7629 if (GA) { 7630 Offset = GA->getOffset(); 7631 } else if (Op.getOpcode() == ISD::ADD) { 7632 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7633 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7634 if (C && GA) { 7635 Offset = GA->getOffset()+C->getZExtValue(); 7636 } else { 7637 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7638 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 7639 if (C && GA) 7640 Offset = GA->getOffset()+C->getZExtValue(); 7641 else 7642 C = 0, GA = 0; 7643 } 7644 } 7645 7646 if (GA) { 7647 if (hasMemory) 7648 Op = LowerGlobalAddress(GA->getGlobal(), Offset, DAG); 7649 else 7650 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 7651 Offset); 7652 Result = Op; 7653 break; 7654 } 7655 7656 // Otherwise, not valid for this mode. 7657 return; 7658 } 7659 } 7660 7661 if (Result.getNode()) { 7662 Ops.push_back(Result); 7663 return; 7664 } 7665 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 7666 Ops, DAG); 7667} 7668 7669std::vector<unsigned> X86TargetLowering:: 7670getRegClassForInlineAsmConstraint(const std::string &Constraint, 7671 MVT VT) const { 7672 if (Constraint.size() == 1) { 7673 // FIXME: not handling fp-stack yet! 7674 switch (Constraint[0]) { // GCC X86 Constraint Letters 7675 default: break; // Unknown constraint letter 7676 case 'A': // EAX/EDX 7677 if (VT == MVT::i32 || VT == MVT::i64) 7678 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 7679 break; 7680 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 7681 case 'Q': // Q_REGS 7682 if (VT == MVT::i32) 7683 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 7684 else if (VT == MVT::i16) 7685 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 7686 else if (VT == MVT::i8) 7687 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 7688 else if (VT == MVT::i64) 7689 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 7690 break; 7691 } 7692 } 7693 7694 return std::vector<unsigned>(); 7695} 7696 7697std::pair<unsigned, const TargetRegisterClass*> 7698X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7699 MVT VT) const { 7700 // First, see if this is a constraint that directly corresponds to an LLVM 7701 // register class. 7702 if (Constraint.size() == 1) { 7703 // GCC Constraint Letters 7704 switch (Constraint[0]) { 7705 default: break; 7706 case 'r': // GENERAL_REGS 7707 case 'R': // LEGACY_REGS 7708 case 'l': // INDEX_REGS 7709 if (VT == MVT::i8) 7710 return std::make_pair(0U, X86::GR8RegisterClass); 7711 if (VT == MVT::i16) 7712 return std::make_pair(0U, X86::GR16RegisterClass); 7713 if (VT == MVT::i32 || !Subtarget->is64Bit()) 7714 return std::make_pair(0U, X86::GR32RegisterClass); 7715 return std::make_pair(0U, X86::GR64RegisterClass); 7716 case 'f': // FP Stack registers. 7717 // If SSE is enabled for this VT, use f80 to ensure the isel moves the 7718 // value to the correct fpstack register class. 7719 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) 7720 return std::make_pair(0U, X86::RFP32RegisterClass); 7721 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) 7722 return std::make_pair(0U, X86::RFP64RegisterClass); 7723 return std::make_pair(0U, X86::RFP80RegisterClass); 7724 case 'y': // MMX_REGS if MMX allowed. 7725 if (!Subtarget->hasMMX()) break; 7726 return std::make_pair(0U, X86::VR64RegisterClass); 7727 case 'Y': // SSE_REGS if SSE2 allowed 7728 if (!Subtarget->hasSSE2()) break; 7729 // FALL THROUGH. 7730 case 'x': // SSE_REGS if SSE1 allowed 7731 if (!Subtarget->hasSSE1()) break; 7732 7733 switch (VT.getSimpleVT()) { 7734 default: break; 7735 // Scalar SSE types. 7736 case MVT::f32: 7737 case MVT::i32: 7738 return std::make_pair(0U, X86::FR32RegisterClass); 7739 case MVT::f64: 7740 case MVT::i64: 7741 return std::make_pair(0U, X86::FR64RegisterClass); 7742 // Vector types. 7743 case MVT::v16i8: 7744 case MVT::v8i16: 7745 case MVT::v4i32: 7746 case MVT::v2i64: 7747 case MVT::v4f32: 7748 case MVT::v2f64: 7749 return std::make_pair(0U, X86::VR128RegisterClass); 7750 } 7751 break; 7752 } 7753 } 7754 7755 // Use the default implementation in TargetLowering to convert the register 7756 // constraint into a member of a register class. 7757 std::pair<unsigned, const TargetRegisterClass*> Res; 7758 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7759 7760 // Not found as a standard register? 7761 if (Res.second == 0) { 7762 // GCC calls "st(0)" just plain "st". 7763 if (StringsEqualNoCase("{st}", Constraint)) { 7764 Res.first = X86::ST0; 7765 Res.second = X86::RFP80RegisterClass; 7766 } 7767 7768 return Res; 7769 } 7770 7771 // Otherwise, check to see if this is a register class of the wrong value 7772 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 7773 // turn into {ax},{dx}. 7774 if (Res.second->hasType(VT)) 7775 return Res; // Correct type already, nothing to do. 7776 7777 // All of the single-register GCC register classes map their values onto 7778 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 7779 // really want an 8-bit or 32-bit register, map to the appropriate register 7780 // class and return the appropriate register. 7781 if (Res.second == X86::GR16RegisterClass) { 7782 if (VT == MVT::i8) { 7783 unsigned DestReg = 0; 7784 switch (Res.first) { 7785 default: break; 7786 case X86::AX: DestReg = X86::AL; break; 7787 case X86::DX: DestReg = X86::DL; break; 7788 case X86::CX: DestReg = X86::CL; break; 7789 case X86::BX: DestReg = X86::BL; break; 7790 } 7791 if (DestReg) { 7792 Res.first = DestReg; 7793 Res.second = Res.second = X86::GR8RegisterClass; 7794 } 7795 } else if (VT == MVT::i32) { 7796 unsigned DestReg = 0; 7797 switch (Res.first) { 7798 default: break; 7799 case X86::AX: DestReg = X86::EAX; break; 7800 case X86::DX: DestReg = X86::EDX; break; 7801 case X86::CX: DestReg = X86::ECX; break; 7802 case X86::BX: DestReg = X86::EBX; break; 7803 case X86::SI: DestReg = X86::ESI; break; 7804 case X86::DI: DestReg = X86::EDI; break; 7805 case X86::BP: DestReg = X86::EBP; break; 7806 case X86::SP: DestReg = X86::ESP; break; 7807 } 7808 if (DestReg) { 7809 Res.first = DestReg; 7810 Res.second = Res.second = X86::GR32RegisterClass; 7811 } 7812 } else if (VT == MVT::i64) { 7813 unsigned DestReg = 0; 7814 switch (Res.first) { 7815 default: break; 7816 case X86::AX: DestReg = X86::RAX; break; 7817 case X86::DX: DestReg = X86::RDX; break; 7818 case X86::CX: DestReg = X86::RCX; break; 7819 case X86::BX: DestReg = X86::RBX; break; 7820 case X86::SI: DestReg = X86::RSI; break; 7821 case X86::DI: DestReg = X86::RDI; break; 7822 case X86::BP: DestReg = X86::RBP; break; 7823 case X86::SP: DestReg = X86::RSP; break; 7824 } 7825 if (DestReg) { 7826 Res.first = DestReg; 7827 Res.second = Res.second = X86::GR64RegisterClass; 7828 } 7829 } 7830 } else if (Res.second == X86::FR32RegisterClass || 7831 Res.second == X86::FR64RegisterClass || 7832 Res.second == X86::VR128RegisterClass) { 7833 // Handle references to XMM physical registers that got mapped into the 7834 // wrong class. This can happen with constraints like {xmm0} where the 7835 // target independent register mapper will just pick the first match it can 7836 // find, ignoring the required type. 7837 if (VT == MVT::f32) 7838 Res.second = X86::FR32RegisterClass; 7839 else if (VT == MVT::f64) 7840 Res.second = X86::FR64RegisterClass; 7841 else if (X86::VR128RegisterClass->hasType(VT)) 7842 Res.second = X86::VR128RegisterClass; 7843 } 7844 7845 return Res; 7846} 7847 7848//===----------------------------------------------------------------------===// 7849// X86 Widen vector type 7850//===----------------------------------------------------------------------===// 7851 7852/// getWidenVectorType: given a vector type, returns the type to widen 7853/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. 7854/// If there is no vector type that we want to widen to, returns MVT::Other 7855/// When and were to widen is target dependent based on the cost of 7856/// scalarizing vs using the wider vector type. 7857 7858MVT X86TargetLowering::getWidenVectorType(MVT VT) { 7859 assert(VT.isVector()); 7860 if (isTypeLegal(VT)) 7861 return VT; 7862 7863 // TODO: In computeRegisterProperty, we can compute the list of legal vector 7864 // type based on element type. This would speed up our search (though 7865 // it may not be worth it since the size of the list is relatively 7866 // small). 7867 MVT EltVT = VT.getVectorElementType(); 7868 unsigned NElts = VT.getVectorNumElements(); 7869 7870 // On X86, it make sense to widen any vector wider than 1 7871 if (NElts <= 1) 7872 return MVT::Other; 7873 7874 for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; 7875 nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { 7876 MVT SVT = (MVT::SimpleValueType)nVT; 7877 7878 if (isTypeLegal(SVT) && 7879 SVT.getVectorElementType() == EltVT && 7880 SVT.getVectorNumElements() > NElts) 7881 return SVT; 7882 } 7883 return MVT::Other; 7884} 7885