X86ISelLowering.cpp revision e179584f9b740cf3a36bde70f8cab40de59b8081
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineModuleInfo.h" 34#include "llvm/CodeGen/MachineRegisterInfo.h" 35#include "llvm/CodeGen/PseudoSourceValue.h" 36#include "llvm/CodeGen/SelectionDAG.h" 37#include "llvm/Support/MathExtras.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Target/TargetOptions.h" 40#include "llvm/ADT/SmallSet.h" 41#include "llvm/ADT/StringExtras.h" 42#include "llvm/ParameterAttributes.h" 43using namespace llvm; 44 45X86TargetLowering::X86TargetLowering(TargetMachine &TM) 46 : TargetLowering(TM) { 47 Subtarget = &TM.getSubtarget<X86Subtarget>(); 48 X86ScalarSSEf64 = Subtarget->hasSSE2(); 49 X86ScalarSSEf32 = Subtarget->hasSSE1(); 50 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 51 52 bool Fast = false; 53 54 RegInfo = TM.getRegisterInfo(); 55 56 // Set up the TargetLowering object. 57 58 // X86 is weird, it always uses i8 for shift amounts and setcc results. 59 setShiftAmountType(MVT::i8); 60 setSetCCResultType(MVT::i8); 61 setSetCCResultContents(ZeroOrOneSetCCResult); 62 setSchedulingPreference(SchedulingForRegPressure); 63 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 64 setStackPointerRegisterToSaveRestore(X86StackPtr); 65 66 if (Subtarget->isTargetDarwin()) { 67 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 68 setUseUnderscoreSetJmp(false); 69 setUseUnderscoreLongJmp(false); 70 } else if (Subtarget->isTargetMingw()) { 71 // MS runtime is weird: it exports _setjmp, but longjmp! 72 setUseUnderscoreSetJmp(true); 73 setUseUnderscoreLongJmp(false); 74 } else { 75 setUseUnderscoreSetJmp(true); 76 setUseUnderscoreLongJmp(true); 77 } 78 79 // Set up the register classes. 80 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 81 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 82 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 83 if (Subtarget->is64Bit()) 84 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 85 86 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 87 88 // We don't accept any truncstore of integer registers. 89 setTruncStoreAction(MVT::i64, MVT::i32, Expand); 90 setTruncStoreAction(MVT::i64, MVT::i16, Expand); 91 setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 92 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 93 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 94 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 95 96 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 97 // operation. 98 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 99 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 100 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 101 102 if (Subtarget->is64Bit()) { 103 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 104 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 105 } else { 106 if (X86ScalarSSEf64) 107 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 108 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 109 else 110 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 111 } 112 113 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 114 // this operation. 115 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 116 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 117 // SSE has no i16 to fp conversion, only i32 118 if (X86ScalarSSEf32) { 119 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 120 // f32 and f64 cases are Legal, f80 case is not 121 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 122 } else { 123 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 124 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 125 } 126 127 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 128 // are Legal, f80 is custom lowered. 129 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 130 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 131 132 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 133 // this operation. 134 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 135 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 136 137 if (X86ScalarSSEf32) { 138 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 139 // f32 and f64 cases are Legal, f80 case is not 140 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 141 } else { 142 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 143 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 144 } 145 146 // Handle FP_TO_UINT by promoting the destination to a larger signed 147 // conversion. 148 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 149 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 150 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 151 152 if (Subtarget->is64Bit()) { 153 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 154 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 155 } else { 156 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 157 // Expand FP_TO_UINT into a select. 158 // FIXME: We would like to use a Custom expander here eventually to do 159 // the optimal thing for SSE vs. the default expansion in the legalizer. 160 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 161 else 162 // With SSE3 we can use fisttpll to convert to a signed i64. 163 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 164 } 165 166 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 167 if (!X86ScalarSSEf64) { 168 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 169 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 170 } 171 172 // Scalar integer multiply, multiply-high, divide, and remainder are 173 // lowered to use operations that produce two results, to match the 174 // available instructions. This exposes the two-result form to trivial 175 // CSE, which is able to combine x/y and x%y into a single instruction, 176 // for example. The single-result multiply instructions are introduced 177 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 178 // is not needed. 179 setOperationAction(ISD::MUL , MVT::i8 , Expand); 180 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 181 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 182 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 183 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 184 setOperationAction(ISD::SREM , MVT::i8 , Expand); 185 setOperationAction(ISD::UREM , MVT::i8 , Expand); 186 setOperationAction(ISD::MUL , MVT::i16 , Expand); 187 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 188 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 189 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 190 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 191 setOperationAction(ISD::SREM , MVT::i16 , Expand); 192 setOperationAction(ISD::UREM , MVT::i16 , Expand); 193 setOperationAction(ISD::MUL , MVT::i32 , Expand); 194 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 195 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 196 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 197 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 198 setOperationAction(ISD::SREM , MVT::i32 , Expand); 199 setOperationAction(ISD::UREM , MVT::i32 , Expand); 200 setOperationAction(ISD::MUL , MVT::i64 , Expand); 201 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 202 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 203 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 204 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 205 setOperationAction(ISD::SREM , MVT::i64 , Expand); 206 setOperationAction(ISD::UREM , MVT::i64 , Expand); 207 208 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 209 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 210 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 211 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 212 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 213 if (Subtarget->is64Bit()) 214 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 215 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 216 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 217 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 218 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 219 setOperationAction(ISD::FREM , MVT::f64 , Expand); 220 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 221 222 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 223 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 224 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 225 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 226 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 227 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 228 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 229 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 230 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 231 if (Subtarget->is64Bit()) { 232 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 233 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 234 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 235 } 236 237 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 238 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 239 240 // These should be promoted to a larger select which is supported. 241 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 242 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 243 // X86 wants to expand cmov itself. 244 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 245 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 246 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 247 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 248 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 249 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 250 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 251 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 252 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 253 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 254 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 255 if (Subtarget->is64Bit()) { 256 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 257 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 258 } 259 // X86 ret instruction may pop stack. 260 setOperationAction(ISD::RET , MVT::Other, Custom); 261 if (!Subtarget->is64Bit()) 262 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 263 264 // Darwin ABI issue. 265 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 266 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 267 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 268 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 269 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 270 if (Subtarget->is64Bit()) { 271 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 272 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 273 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 274 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 275 } 276 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 277 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 278 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 279 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 280 // X86 wants to expand memset / memcpy itself. 281 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 282 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 283 284 // Use the default ISD::LOCATION, ISD::DECLARE expansion. 285 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 286 // FIXME - use subtarget debug flags 287 if (!Subtarget->isTargetDarwin() && 288 !Subtarget->isTargetELF() && 289 !Subtarget->isTargetCygMing()) 290 setOperationAction(ISD::LABEL, MVT::Other, Expand); 291 292 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 293 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 294 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 295 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 296 if (Subtarget->is64Bit()) { 297 // FIXME: Verify 298 setExceptionPointerRegister(X86::RAX); 299 setExceptionSelectorRegister(X86::RDX); 300 } else { 301 setExceptionPointerRegister(X86::EAX); 302 setExceptionSelectorRegister(X86::EDX); 303 } 304 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 305 306 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 307 308 setOperationAction(ISD::TRAP, MVT::Other, Legal); 309 310 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 311 setOperationAction(ISD::VASTART , MVT::Other, Custom); 312 setOperationAction(ISD::VAARG , MVT::Other, Expand); 313 setOperationAction(ISD::VAEND , MVT::Other, Expand); 314 if (Subtarget->is64Bit()) 315 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 316 else 317 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 318 319 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 320 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 321 if (Subtarget->is64Bit()) 322 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 323 if (Subtarget->isTargetCygMing()) 324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 325 else 326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 327 328 if (X86ScalarSSEf64) { 329 // f32 and f64 use SSE. 330 // Set up the FP register classes. 331 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 332 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 333 334 // Use ANDPD to simulate FABS. 335 setOperationAction(ISD::FABS , MVT::f64, Custom); 336 setOperationAction(ISD::FABS , MVT::f32, Custom); 337 338 // Use XORP to simulate FNEG. 339 setOperationAction(ISD::FNEG , MVT::f64, Custom); 340 setOperationAction(ISD::FNEG , MVT::f32, Custom); 341 342 // Use ANDPD and ORPD to simulate FCOPYSIGN. 343 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 344 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 345 346 // We don't support sin/cos/fmod 347 setOperationAction(ISD::FSIN , MVT::f64, Expand); 348 setOperationAction(ISD::FCOS , MVT::f64, Expand); 349 setOperationAction(ISD::FREM , MVT::f64, Expand); 350 setOperationAction(ISD::FSIN , MVT::f32, Expand); 351 setOperationAction(ISD::FCOS , MVT::f32, Expand); 352 setOperationAction(ISD::FREM , MVT::f32, Expand); 353 354 // Expand FP immediates into loads from the stack, except for the special 355 // cases we handle. 356 addLegalFPImmediate(APFloat(+0.0)); // xorpd 357 addLegalFPImmediate(APFloat(+0.0f)); // xorps 358 359 // Floating truncations from f80 and extensions to f80 go through memory. 360 // If optimizing, we lie about this though and handle it in 361 // InstructionSelectPreprocess so that dagcombine2 can hack on these. 362 if (Fast) { 363 setConvertAction(MVT::f32, MVT::f80, Expand); 364 setConvertAction(MVT::f64, MVT::f80, Expand); 365 setConvertAction(MVT::f80, MVT::f32, Expand); 366 setConvertAction(MVT::f80, MVT::f64, Expand); 367 } 368 } else if (X86ScalarSSEf32) { 369 // Use SSE for f32, x87 for f64. 370 // Set up the FP register classes. 371 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 372 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 373 374 // Use ANDPS to simulate FABS. 375 setOperationAction(ISD::FABS , MVT::f32, Custom); 376 377 // Use XORP to simulate FNEG. 378 setOperationAction(ISD::FNEG , MVT::f32, Custom); 379 380 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 381 382 // Use ANDPS and ORPS to simulate FCOPYSIGN. 383 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 384 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 385 386 // We don't support sin/cos/fmod 387 setOperationAction(ISD::FSIN , MVT::f32, Expand); 388 setOperationAction(ISD::FCOS , MVT::f32, Expand); 389 setOperationAction(ISD::FREM , MVT::f32, Expand); 390 391 // Special cases we handle for FP constants. 392 addLegalFPImmediate(APFloat(+0.0f)); // xorps 393 addLegalFPImmediate(APFloat(+0.0)); // FLD0 394 addLegalFPImmediate(APFloat(+1.0)); // FLD1 395 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 396 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 397 398 // SSE <-> X87 conversions go through memory. If optimizing, we lie about 399 // this though and handle it in InstructionSelectPreprocess so that 400 // dagcombine2 can hack on these. 401 if (Fast) { 402 setConvertAction(MVT::f32, MVT::f64, Expand); 403 setConvertAction(MVT::f32, MVT::f80, Expand); 404 setConvertAction(MVT::f80, MVT::f32, Expand); 405 setConvertAction(MVT::f64, MVT::f32, Expand); 406 // And x87->x87 truncations also. 407 setConvertAction(MVT::f80, MVT::f64, Expand); 408 } 409 410 if (!UnsafeFPMath) { 411 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 412 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 413 } 414 } else { 415 // f32 and f64 in x87. 416 // Set up the FP register classes. 417 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 418 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 419 420 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 421 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 422 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 423 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 424 425 // Floating truncations go through memory. If optimizing, we lie about 426 // this though and handle it in InstructionSelectPreprocess so that 427 // dagcombine2 can hack on these. 428 if (Fast) { 429 setConvertAction(MVT::f80, MVT::f32, Expand); 430 setConvertAction(MVT::f64, MVT::f32, Expand); 431 setConvertAction(MVT::f80, MVT::f64, Expand); 432 } 433 434 if (!UnsafeFPMath) { 435 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 436 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 437 } 438 addLegalFPImmediate(APFloat(+0.0)); // FLD0 439 addLegalFPImmediate(APFloat(+1.0)); // FLD1 440 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 441 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 442 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 443 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 444 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 445 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 446 } 447 448 // Long double always uses X87. 449 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 450 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 451 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 452 { 453 APFloat TmpFlt(+0.0); 454 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 455 addLegalFPImmediate(TmpFlt); // FLD0 456 TmpFlt.changeSign(); 457 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 458 APFloat TmpFlt2(+1.0); 459 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven); 460 addLegalFPImmediate(TmpFlt2); // FLD1 461 TmpFlt2.changeSign(); 462 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 463 } 464 465 if (!UnsafeFPMath) { 466 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 467 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 468 } 469 470 // Always use a library call for pow. 471 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 472 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 473 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 474 475 // First set operation action for all vector types to expand. Then we 476 // will selectively turn on ones that can be effectively codegen'd. 477 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 478 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 479 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 483 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 484 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 485 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 486 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 487 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 488 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 489 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 490 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 491 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 492 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 494 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 495 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 496 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 497 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 498 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 499 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 500 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 501 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 502 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 503 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 504 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 505 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 506 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 507 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 508 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 509 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 510 setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand); 511 setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand); 512 setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand); 513 setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand); 514 setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand); 515 setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand); 516 } 517 518 if (Subtarget->hasMMX()) { 519 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 520 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 521 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 522 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 523 524 // FIXME: add MMX packed arithmetics 525 526 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 527 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 528 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 529 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 530 531 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 532 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 533 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 534 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 535 536 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 537 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 538 539 setOperationAction(ISD::AND, MVT::v8i8, Promote); 540 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 541 setOperationAction(ISD::AND, MVT::v4i16, Promote); 542 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 543 setOperationAction(ISD::AND, MVT::v2i32, Promote); 544 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 545 setOperationAction(ISD::AND, MVT::v1i64, Legal); 546 547 setOperationAction(ISD::OR, MVT::v8i8, Promote); 548 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 549 setOperationAction(ISD::OR, MVT::v4i16, Promote); 550 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 551 setOperationAction(ISD::OR, MVT::v2i32, Promote); 552 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 553 setOperationAction(ISD::OR, MVT::v1i64, Legal); 554 555 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 556 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 557 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 558 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 559 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 560 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 561 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 562 563 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 564 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 565 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 566 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 567 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 568 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 569 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 570 571 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 572 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 573 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 574 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 575 576 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 577 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 578 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 579 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 580 581 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 582 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 583 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 584 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 585 } 586 587 if (Subtarget->hasSSE1()) { 588 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 589 590 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 591 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 592 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 593 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 594 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 595 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 596 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 597 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 598 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 599 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 600 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 601 } 602 603 if (Subtarget->hasSSE2()) { 604 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 605 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 606 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 607 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 608 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 609 610 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 611 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 612 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 613 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 614 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 615 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 616 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 617 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 618 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 619 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 620 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 621 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 622 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 623 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 624 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 625 626 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 627 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 628 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 629 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 630 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 631 632 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 633 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 634 // Do not attempt to custom lower non-power-of-2 vectors 635 if (!isPowerOf2_32(MVT::getVectorNumElements(VT))) 636 continue; 637 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 638 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 639 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 640 } 641 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 642 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 643 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 644 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 645 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 646 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 647 if (Subtarget->is64Bit()) { 648 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 649 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 650 } 651 652 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 653 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 654 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 655 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 656 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 657 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 658 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 659 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 660 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 661 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 662 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 663 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 664 } 665 666 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 667 668 // Custom lower v2i64 and v2f64 selects. 669 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 670 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 671 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 672 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 673 } 674 675 if (Subtarget->hasSSE41()) { 676 // FIXME: Do we need to handle scalar-to-vector here? 677 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 678 679 // i8 and i16 vectors are custom , because the source register and source 680 // source memory operand types are not the same width. f32 vectors are 681 // custom since the immediate controlling the insert encodes additional 682 // information. 683 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 684 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 685 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); 686 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 687 688 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 689 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 690 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); 691 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); 692 693 if (Subtarget->is64Bit()) { 694 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 695 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 696 } 697 } 698 699 // We want to custom lower some of our intrinsics. 700 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 701 702 // We have target-specific dag combine patterns for the following nodes: 703 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 704 setTargetDAGCombine(ISD::SELECT); 705 706 computeRegisterProperties(); 707 708 // FIXME: These should be based on subtarget info. Plus, the values should 709 // be smaller when we are in optimizing for size mode. 710 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 711 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 712 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 713 allowUnalignedMemoryAccesses = true; // x86 supports it! 714} 715 716/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 717/// the desired ByVal argument alignment. 718static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 719 if (MaxAlign == 16) 720 return; 721 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 722 if (VTy->getBitWidth() == 128) 723 MaxAlign = 16; 724 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 725 unsigned EltAlign = 0; 726 getMaxByValAlign(ATy->getElementType(), EltAlign); 727 if (EltAlign > MaxAlign) 728 MaxAlign = EltAlign; 729 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 730 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 731 unsigned EltAlign = 0; 732 getMaxByValAlign(STy->getElementType(i), EltAlign); 733 if (EltAlign > MaxAlign) 734 MaxAlign = EltAlign; 735 if (MaxAlign == 16) 736 break; 737 } 738 } 739 return; 740} 741 742/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 743/// function arguments in the caller parameter area. For X86, aggregates 744/// that contain SSE vectors are placed at 16-byte boundaries while the rest 745/// are at 4-byte boundaries. 746unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 747 if (Subtarget->is64Bit()) 748 return getTargetData()->getABITypeAlignment(Ty); 749 unsigned Align = 4; 750 if (Subtarget->hasSSE1()) 751 getMaxByValAlign(Ty, Align); 752 return Align; 753} 754 755/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 756/// jumptable. 757SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 758 SelectionDAG &DAG) const { 759 if (usesGlobalOffsetTable()) 760 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 761 if (!Subtarget->isPICStyleRIPRel()) 762 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 763 return Table; 764} 765 766//===----------------------------------------------------------------------===// 767// Return Value Calling Convention Implementation 768//===----------------------------------------------------------------------===// 769 770#include "X86GenCallingConv.inc" 771 772/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 773/// exists skip possible ISD:TokenFactor. 774static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 775 if (Chain.getOpcode() == X86ISD::TAILCALL) { 776 return Chain; 777 } else if (Chain.getOpcode() == ISD::TokenFactor) { 778 if (Chain.getNumOperands() && 779 Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL) 780 return Chain.getOperand(0); 781 } 782 return Chain; 783} 784 785/// LowerRET - Lower an ISD::RET node. 786SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 787 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 788 789 SmallVector<CCValAssign, 16> RVLocs; 790 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 791 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 792 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 793 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 794 795 // If this is the first return lowered for this function, add the regs to the 796 // liveout set for the function. 797 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 798 for (unsigned i = 0; i != RVLocs.size(); ++i) 799 if (RVLocs[i].isRegLoc()) 800 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 801 } 802 SDOperand Chain = Op.getOperand(0); 803 804 // Handle tail call return. 805 Chain = GetPossiblePreceedingTailCall(Chain); 806 if (Chain.getOpcode() == X86ISD::TAILCALL) { 807 SDOperand TailCall = Chain; 808 SDOperand TargetAddress = TailCall.getOperand(1); 809 SDOperand StackAdjustment = TailCall.getOperand(2); 810 assert(((TargetAddress.getOpcode() == ISD::Register && 811 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 812 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 813 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 814 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 815 "Expecting an global address, external symbol, or register"); 816 assert(StackAdjustment.getOpcode() == ISD::Constant && 817 "Expecting a const value"); 818 819 SmallVector<SDOperand,8> Operands; 820 Operands.push_back(Chain.getOperand(0)); 821 Operands.push_back(TargetAddress); 822 Operands.push_back(StackAdjustment); 823 // Copy registers used by the call. Last operand is a flag so it is not 824 // copied. 825 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 826 Operands.push_back(Chain.getOperand(i)); 827 } 828 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 829 Operands.size()); 830 } 831 832 // Regular return. 833 SDOperand Flag; 834 835 // Copy the result values into the output registers. 836 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 837 RVLocs[0].getLocReg() != X86::ST0) { 838 for (unsigned i = 0; i != RVLocs.size(); ++i) { 839 CCValAssign &VA = RVLocs[i]; 840 assert(VA.isRegLoc() && "Can only return in registers!"); 841 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 842 Flag); 843 Flag = Chain.getValue(1); 844 } 845 } else { 846 // We need to handle a destination of ST0 specially, because it isn't really 847 // a register. 848 SDOperand Value = Op.getOperand(1); 849 850 // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80. 851 // This will get legalized into a load/store if it can't get optimized away. 852 if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) 853 Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value); 854 855 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 856 SDOperand Ops[] = { Chain, Value }; 857 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 858 Flag = Chain.getValue(1); 859 } 860 861 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 862 if (Flag.Val) 863 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 864 else 865 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 866} 867 868 869/// LowerCallResult - Lower the result values of an ISD::CALL into the 870/// appropriate copies out of appropriate physical registers. This assumes that 871/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 872/// being lowered. The returns a SDNode with the same number of values as the 873/// ISD::CALL. 874SDNode *X86TargetLowering:: 875LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 876 unsigned CallingConv, SelectionDAG &DAG) { 877 878 // Assign locations to each value returned by this call. 879 SmallVector<CCValAssign, 16> RVLocs; 880 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 881 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 882 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 883 884 SmallVector<SDOperand, 8> ResultVals; 885 886 // Copy all of the result registers out of their specified physreg. 887 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 888 for (unsigned i = 0; i != RVLocs.size(); ++i) { 889 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 890 RVLocs[i].getValVT(), InFlag).getValue(1); 891 InFlag = Chain.getValue(2); 892 ResultVals.push_back(Chain.getValue(0)); 893 } 894 } else { 895 // Copies from the FP stack are special, as ST0 isn't a valid register 896 // before the fp stackifier runs. 897 898 // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up 899 // in an SSE register, copy it out as F80 and do a truncate, otherwise use 900 // the specified value type. 901 MVT::ValueType GetResultTy = RVLocs[0].getValVT(); 902 if (isScalarFPTypeInSSEReg(GetResultTy)) 903 GetResultTy = MVT::f80; 904 SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag); 905 906 SDOperand GROps[] = { Chain, InFlag }; 907 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 908 Chain = RetVal.getValue(1); 909 InFlag = RetVal.getValue(2); 910 911 // If we want the result in an SSE register, use an FP_TRUNCATE to get it 912 // there. 913 if (GetResultTy != RVLocs[0].getValVT()) 914 RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal, 915 // This truncation won't change the value. 916 DAG.getIntPtrConstant(1)); 917 918 ResultVals.push_back(RetVal); 919 } 920 921 // Merge everything together with a MERGE_VALUES node. 922 ResultVals.push_back(Chain); 923 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 924 &ResultVals[0], ResultVals.size()).Val; 925} 926 927/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64 928/// ISD::CALL where the results are known to be in two 64-bit registers, 929/// e.g. XMM0 and XMM1. This simplify store the two values back to the 930/// fixed stack slot allocated for StructRet. 931SDNode *X86TargetLowering:: 932LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag, 933 SDNode *TheCall, unsigned Reg1, unsigned Reg2, 934 MVT::ValueType VT, SelectionDAG &DAG) { 935 SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag); 936 Chain = RetVal1.getValue(1); 937 InFlag = RetVal1.getValue(2); 938 SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag); 939 Chain = RetVal2.getValue(1); 940 InFlag = RetVal2.getValue(2); 941 SDOperand FIN = TheCall->getOperand(5); 942 Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0); 943 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 944 Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0); 945 return Chain.Val; 946} 947 948/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL 949/// where the results are known to be in ST0 and ST1. 950SDNode *X86TargetLowering:: 951LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag, 952 SDNode *TheCall, SelectionDAG &DAG) { 953 SmallVector<SDOperand, 8> ResultVals; 954 const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag }; 955 SDVTList Tys = DAG.getVTList(VTs, 4); 956 SDOperand Ops[] = { Chain, InFlag }; 957 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2); 958 Chain = RetVal.getValue(2); 959 SDOperand FIN = TheCall->getOperand(5); 960 Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0); 961 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16)); 962 Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0); 963 return Chain.Val; 964} 965 966//===----------------------------------------------------------------------===// 967// C & StdCall & Fast Calling Convention implementation 968//===----------------------------------------------------------------------===// 969// StdCall calling convention seems to be standard for many Windows' API 970// routines and around. It differs from C calling convention just a little: 971// callee should clean up the stack, not caller. Symbols should be also 972// decorated in some fancy way :) It doesn't support any vector arguments. 973// For info on fast calling convention see Fast Calling Convention (tail call) 974// implementation LowerX86_32FastCCCallTo. 975 976/// AddLiveIn - This helper function adds the specified physical register to the 977/// MachineFunction as a live in value. It also creates a corresponding virtual 978/// register for it. 979static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 980 const TargetRegisterClass *RC) { 981 assert(RC->contains(PReg) && "Not the correct regclass!"); 982 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 983 MF.getRegInfo().addLiveIn(PReg, VReg); 984 return VReg; 985} 986 987// Determines whether a CALL node uses struct return semantics. 988static bool CallIsStructReturn(SDOperand Op) { 989 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 990 if (!NumOps) 991 return false; 992 993 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6)); 994 return Flags->getValue() & ISD::ParamFlags::StructReturn; 995} 996 997// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics. 998static bool ArgsAreStructReturn(SDOperand Op) { 999 unsigned NumArgs = Op.Val->getNumValues() - 1; 1000 if (!NumArgs) 1001 return false; 1002 1003 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3)); 1004 return Flags->getValue() & ISD::ParamFlags::StructReturn; 1005} 1006 1007// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop 1008// its own arguments. Callee pop is necessary to support tail calls. 1009bool X86TargetLowering::IsCalleePop(SDOperand Op) { 1010 bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1011 if (IsVarArg) 1012 return false; 1013 1014 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 1015 default: 1016 return false; 1017 case CallingConv::X86_StdCall: 1018 return !Subtarget->is64Bit(); 1019 case CallingConv::X86_FastCall: 1020 return !Subtarget->is64Bit(); 1021 case CallingConv::Fast: 1022 return PerformTailCallOpt; 1023 } 1024} 1025 1026// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node. 1027CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const { 1028 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1029 1030 if (Subtarget->is64Bit()) 1031 if (CC == CallingConv::Fast && PerformTailCallOpt) 1032 return CC_X86_64_TailCall; 1033 else 1034 return CC_X86_64_C; 1035 1036 if (CC == CallingConv::X86_FastCall) 1037 return CC_X86_32_FastCall; 1038 else if (CC == CallingConv::Fast && PerformTailCallOpt) 1039 return CC_X86_32_TailCall; 1040 else 1041 return CC_X86_32_C; 1042} 1043 1044// Selects the appropriate decoration to apply to a MachineFunction containing a 1045// given FORMAL_ARGUMENTS node. 1046NameDecorationStyle 1047X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) { 1048 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1049 if (CC == CallingConv::X86_FastCall) 1050 return FastCall; 1051 else if (CC == CallingConv::X86_StdCall) 1052 return StdCall; 1053 return None; 1054} 1055 1056 1057// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly 1058// be overwritten when lowering the outgoing arguments in a tail call. Currently 1059// the implementation of this call is very conservative and assumes all 1060// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual 1061// registers would be overwritten by direct lowering. 1062// Possible improvement: 1063// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes 1064// indicating inreg passed arguments which also need not be lowered to a safe 1065// stack slot. 1066static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) { 1067 RegisterSDNode * OpReg = NULL; 1068 if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || 1069 (Op.getOpcode()== ISD::CopyFromReg && 1070 (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) && 1071 OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) 1072 return true; 1073 return false; 1074} 1075 1076// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1077// by "Src" to address "Dst" with size and alignment information specified by 1078// the specific parameter attribute. The copy will be passed as a byval function 1079// parameter. 1080static SDOperand 1081CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, 1082 unsigned Flags, SelectionDAG &DAG) { 1083 unsigned Align = 1 << 1084 ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs); 1085 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1086 ISD::ParamFlags::ByValSizeOffs; 1087 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1088 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1089 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); 1090 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); 1091} 1092 1093SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 1094 const CCValAssign &VA, 1095 MachineFrameInfo *MFI, 1096 SDOperand Root, unsigned i) { 1097 // Create the nodes corresponding to a load from this parameter slot. 1098 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 1099 bool isByVal = Flags & ISD::ParamFlags::ByVal; 1100 1101 // FIXME: For now, all byval parameter objects are marked mutable. This 1102 // can be changed with more analysis. 1103 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1104 VA.getLocMemOffset(), !isByVal); 1105 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1106 if (isByVal) 1107 return FIN; 1108 return DAG.getLoad(VA.getValVT(), Root, FIN, 1109 PseudoSourceValue::getFixedStack(), FI); 1110} 1111 1112SDOperand 1113X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 1114 MachineFunction &MF = DAG.getMachineFunction(); 1115 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1116 1117 const Function* Fn = MF.getFunction(); 1118 if (Fn->hasExternalLinkage() && 1119 Subtarget->isTargetCygMing() && 1120 Fn->getName() == "main") 1121 FuncInfo->setForceFramePointer(true); 1122 1123 // Decorate the function name. 1124 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1125 1126 MachineFrameInfo *MFI = MF.getFrameInfo(); 1127 SDOperand Root = Op.getOperand(0); 1128 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1129 unsigned CC = MF.getFunction()->getCallingConv(); 1130 bool Is64Bit = Subtarget->is64Bit(); 1131 1132 assert(!(isVarArg && CC == CallingConv::Fast) && 1133 "Var args not supported with calling convention fastcc"); 1134 1135 // Assign locations to all of the incoming arguments. 1136 SmallVector<CCValAssign, 16> ArgLocs; 1137 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1138 CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op)); 1139 1140 SmallVector<SDOperand, 8> ArgValues; 1141 unsigned LastVal = ~0U; 1142 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1143 CCValAssign &VA = ArgLocs[i]; 1144 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1145 // places. 1146 assert(VA.getValNo() != LastVal && 1147 "Don't support value assigned to multiple locs yet"); 1148 LastVal = VA.getValNo(); 1149 1150 if (VA.isRegLoc()) { 1151 MVT::ValueType RegVT = VA.getLocVT(); 1152 TargetRegisterClass *RC; 1153 if (RegVT == MVT::i32) 1154 RC = X86::GR32RegisterClass; 1155 else if (Is64Bit && RegVT == MVT::i64) 1156 RC = X86::GR64RegisterClass; 1157 else if (RegVT == MVT::f32) 1158 RC = X86::FR32RegisterClass; 1159 else if (RegVT == MVT::f64) 1160 RC = X86::FR64RegisterClass; 1161 else { 1162 assert(MVT::isVector(RegVT)); 1163 if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) { 1164 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1165 RegVT = MVT::i64; 1166 } else 1167 RC = X86::VR128RegisterClass; 1168 } 1169 1170 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1171 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1172 1173 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1174 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1175 // right size. 1176 if (VA.getLocInfo() == CCValAssign::SExt) 1177 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1178 DAG.getValueType(VA.getValVT())); 1179 else if (VA.getLocInfo() == CCValAssign::ZExt) 1180 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1181 DAG.getValueType(VA.getValVT())); 1182 1183 if (VA.getLocInfo() != CCValAssign::Full) 1184 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1185 1186 // Handle MMX values passed in GPRs. 1187 if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1188 MVT::getSizeInBits(RegVT) == 64) 1189 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1190 1191 ArgValues.push_back(ArgValue); 1192 } else { 1193 assert(VA.isMemLoc()); 1194 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1195 } 1196 } 1197 1198 unsigned StackSize = CCInfo.getNextStackOffset(); 1199 // align stack specially for tail calls 1200 if (CC == CallingConv::Fast) 1201 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1202 1203 // If the function takes variable number of arguments, make a frame index for 1204 // the start of the first vararg value... for expansion of llvm.va_start. 1205 if (isVarArg) { 1206 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1207 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1208 } 1209 if (Is64Bit) { 1210 static const unsigned GPR64ArgRegs[] = { 1211 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1212 }; 1213 static const unsigned XMMArgRegs[] = { 1214 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1215 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1216 }; 1217 1218 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1219 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1220 1221 // For X86-64, if there are vararg parameters that are passed via 1222 // registers, then we must store them to their spots on the stack so they 1223 // may be loaded by deferencing the result of va_next. 1224 VarArgsGPOffset = NumIntRegs * 8; 1225 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1226 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1227 1228 // Store the integer parameter registers. 1229 SmallVector<SDOperand, 8> MemOps; 1230 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1231 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1232 DAG.getIntPtrConstant(VarArgsGPOffset)); 1233 for (; NumIntRegs != 6; ++NumIntRegs) { 1234 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1235 X86::GR64RegisterClass); 1236 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1237 SDOperand Store = 1238 DAG.getStore(Val.getValue(1), Val, FIN, 1239 PseudoSourceValue::getFixedStack(), 1240 RegSaveFrameIndex); 1241 MemOps.push_back(Store); 1242 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1243 DAG.getIntPtrConstant(8)); 1244 } 1245 1246 // Now store the XMM (fp + vector) parameter registers. 1247 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1248 DAG.getIntPtrConstant(VarArgsFPOffset)); 1249 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1250 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1251 X86::VR128RegisterClass); 1252 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1253 SDOperand Store = 1254 DAG.getStore(Val.getValue(1), Val, FIN, 1255 PseudoSourceValue::getFixedStack(), 1256 RegSaveFrameIndex); 1257 MemOps.push_back(Store); 1258 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1259 DAG.getIntPtrConstant(16)); 1260 } 1261 if (!MemOps.empty()) 1262 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1263 &MemOps[0], MemOps.size()); 1264 } 1265 } 1266 1267 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1268 // arguments and the arguments after the retaddr has been pushed are 1269 // aligned. 1270 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1271 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1272 (StackSize & 7) == 0) 1273 StackSize += 4; 1274 1275 ArgValues.push_back(Root); 1276 1277 // Some CCs need callee pop. 1278 if (IsCalleePop(Op)) { 1279 BytesToPopOnReturn = StackSize; // Callee pops everything. 1280 BytesCallerReserves = 0; 1281 } else { 1282 BytesToPopOnReturn = 0; // Callee pops nothing. 1283 // If this is an sret function, the return should pop the hidden pointer. 1284 if (!Is64Bit && ArgsAreStructReturn(Op)) 1285 BytesToPopOnReturn = 4; 1286 BytesCallerReserves = StackSize; 1287 } 1288 1289 if (!Is64Bit) { 1290 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1291 if (CC == CallingConv::X86_FastCall) 1292 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1293 } 1294 1295 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1296 1297 // Return the new list of results. 1298 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1299 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1300} 1301 1302SDOperand 1303X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1304 const SDOperand &StackPtr, 1305 const CCValAssign &VA, 1306 SDOperand Chain, 1307 SDOperand Arg) { 1308 unsigned LocMemOffset = VA.getLocMemOffset(); 1309 SDOperand PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1310 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1311 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1312 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1313 if (Flags & ISD::ParamFlags::ByVal) { 1314 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1315 } 1316 return DAG.getStore(Chain, Arg, PtrOff, 1317 PseudoSourceValue::getStack(), LocMemOffset); 1318} 1319 1320/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64 1321/// struct return call to the specified function. X86-64 ABI specifies 1322/// some SRet calls are actually returned in registers. Since current 1323/// LLVM cannot represent multi-value calls, they are represent as 1324/// calls where the results are passed in a hidden struct provided by 1325/// the caller. This function examines the type of the struct to 1326/// determine the correct way to implement the call. 1327X86::X86_64SRet 1328X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) { 1329 // FIXME: Disabled for now. 1330 return X86::InMemory; 1331 1332 const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType()); 1333 const Type *RTy = PTy->getElementType(); 1334 unsigned Size = getTargetData()->getABITypeSize(RTy); 1335 if (Size != 16 && Size != 32) 1336 return X86::InMemory; 1337 1338 if (Size == 32) { 1339 const StructType *STy = dyn_cast<StructType>(RTy); 1340 if (!STy) return X86::InMemory; 1341 if (STy->getNumElements() == 2 && 1342 STy->getElementType(0) == Type::X86_FP80Ty && 1343 STy->getElementType(1) == Type::X86_FP80Ty) 1344 return X86::InX87; 1345 } 1346 1347 bool AllFP = true; 1348 for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end(); 1349 I != E; ++I) { 1350 const Type *STy = I->get(); 1351 if (!STy->isFPOrFPVector()) { 1352 AllFP = false; 1353 break; 1354 } 1355 } 1356 1357 if (AllFP) 1358 return X86::InSSE; 1359 return X86::InGPR64; 1360} 1361 1362void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall, 1363 CCAssignFn *Fn, 1364 CCState &CCInfo) { 1365 unsigned NumOps = (TheCall->getNumOperands() - 5) / 2; 1366 for (unsigned i = 1; i != NumOps; ++i) { 1367 MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType(); 1368 SDOperand FlagOp = TheCall->getOperand(5+2*i+1); 1369 unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue(); 1370 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) { 1371 cerr << "Call operand #" << i << " has unhandled type " 1372 << MVT::getValueTypeString(ArgVT) << "\n"; 1373 abort(); 1374 } 1375 } 1376} 1377 1378SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 1379 MachineFunction &MF = DAG.getMachineFunction(); 1380 SDOperand Chain = Op.getOperand(0); 1381 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1382 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1383 bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 1384 && CC == CallingConv::Fast && PerformTailCallOpt; 1385 SDOperand Callee = Op.getOperand(4); 1386 bool Is64Bit = Subtarget->is64Bit(); 1387 bool IsStructRet = CallIsStructReturn(Op); 1388 1389 assert(!(isVarArg && CC == CallingConv::Fast) && 1390 "Var args not supported with calling convention fastcc"); 1391 1392 // Analyze operands of the call, assigning locations to each operand. 1393 SmallVector<CCValAssign, 16> ArgLocs; 1394 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1395 CCAssignFn *CCFn = CCAssignFnForNode(Op); 1396 1397 X86::X86_64SRet SRetMethod = X86::InMemory; 1398 if (Is64Bit && IsStructRet) 1399 // FIXME: We can't figure out type of the sret structure for indirect 1400 // calls. We need to copy more information from CallSite to the ISD::CALL 1401 // node. 1402 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1403 SRetMethod = 1404 ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal())); 1405 1406 // UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of 1407 // registers. Unfortunately, llvm does not support i128 yet so we pretend it's 1408 // a sret call. 1409 if (SRetMethod != X86::InMemory) 1410 X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo); 1411 else 1412 CCInfo.AnalyzeCallOperands(Op.Val, CCFn); 1413 1414 // Get a count of how many bytes are to be pushed on the stack. 1415 unsigned NumBytes = CCInfo.getNextStackOffset(); 1416 if (CC == CallingConv::Fast) 1417 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1418 1419 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1420 // arguments and the arguments after the retaddr has been pushed are aligned. 1421 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1422 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1423 (NumBytes & 7) == 0) 1424 NumBytes += 4; 1425 1426 int FPDiff = 0; 1427 if (IsTailCall) { 1428 // Lower arguments at fp - stackoffset + fpdiff. 1429 unsigned NumBytesCallerPushed = 1430 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1431 FPDiff = NumBytesCallerPushed - NumBytes; 1432 1433 // Set the delta of movement of the returnaddr stackslot. 1434 // But only set if delta is greater than previous delta. 1435 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1436 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1437 } 1438 1439 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes)); 1440 1441 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1442 if (IsTailCall) { 1443 // Adjust the Return address stack slot. 1444 if (FPDiff) { 1445 MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; 1446 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1447 // Load the "old" Return address. 1448 RetAddrFrIdx = 1449 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1450 // Calculate the new stack slot for the return address. 1451 int SlotSize = Is64Bit ? 8 : 4; 1452 int NewReturnAddrFI = 1453 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1454 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1455 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1456 } 1457 } 1458 1459 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1460 SmallVector<SDOperand, 8> MemOpChains; 1461 1462 SDOperand StackPtr; 1463 1464 // Walk the register/memloc assignments, inserting copies/loads. For tail 1465 // calls, lower arguments which could otherwise be possibly overwritten to the 1466 // stack slot where they would go on normal function calls. 1467 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1468 CCValAssign &VA = ArgLocs[i]; 1469 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1470 1471 // Promote the value if needed. 1472 switch (VA.getLocInfo()) { 1473 default: assert(0 && "Unknown loc info!"); 1474 case CCValAssign::Full: break; 1475 case CCValAssign::SExt: 1476 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1477 break; 1478 case CCValAssign::ZExt: 1479 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1480 break; 1481 case CCValAssign::AExt: 1482 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1483 break; 1484 } 1485 1486 if (VA.isRegLoc()) { 1487 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1488 } else { 1489 if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) { 1490 assert(VA.isMemLoc()); 1491 if (StackPtr.Val == 0) 1492 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1493 1494 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1495 Arg)); 1496 } 1497 } 1498 } 1499 1500 if (!MemOpChains.empty()) 1501 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1502 &MemOpChains[0], MemOpChains.size()); 1503 1504 // Build a sequence of copy-to-reg nodes chained together with token chain 1505 // and flag operands which copy the outgoing args into registers. 1506 SDOperand InFlag; 1507 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1508 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1509 InFlag); 1510 InFlag = Chain.getValue(1); 1511 } 1512 1513 if (IsTailCall) 1514 InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output? 1515 1516 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1517 // GOT pointer. 1518 // Does not work with tail call since ebx is not restored correctly by 1519 // tailcaller. TODO: at least for x86 - verify for x86-64 1520 if (!IsTailCall && !Is64Bit && 1521 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1522 Subtarget->isPICStyleGOT()) { 1523 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1524 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1525 InFlag); 1526 InFlag = Chain.getValue(1); 1527 } 1528 1529 if (Is64Bit && isVarArg) { 1530 // From AMD64 ABI document: 1531 // For calls that may call functions that use varargs or stdargs 1532 // (prototype-less calls or calls to functions containing ellipsis (...) in 1533 // the declaration) %al is used as hidden argument to specify the number 1534 // of SSE registers used. The contents of %al do not need to match exactly 1535 // the number of registers, but must be an ubound on the number of SSE 1536 // registers used and is in the range 0 - 8 inclusive. 1537 1538 // Count the number of XMM registers allocated. 1539 static const unsigned XMMArgRegs[] = { 1540 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1541 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1542 }; 1543 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1544 1545 Chain = DAG.getCopyToReg(Chain, X86::AL, 1546 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1547 InFlag = Chain.getValue(1); 1548 } 1549 1550 // For tail calls lower the arguments to the 'real' stack slot. 1551 if (IsTailCall) { 1552 SmallVector<SDOperand, 8> MemOpChains2; 1553 SDOperand FIN; 1554 int FI = 0; 1555 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1556 CCValAssign &VA = ArgLocs[i]; 1557 if (!VA.isRegLoc()) { 1558 assert(VA.isMemLoc()); 1559 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1560 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1561 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1562 // Create frame index. 1563 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1564 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1565 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1566 FIN = DAG.getFrameIndex(FI, MVT::i32); 1567 SDOperand Source = Arg; 1568 if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) { 1569 // Copy from stack slots to stack slot of a tail called function. This 1570 // needs to be done because if we would lower the arguments directly 1571 // to their real stack slot we might end up overwriting each other. 1572 // Get source stack slot. 1573 Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 1574 if (StackPtr.Val == 0) 1575 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1576 Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); 1577 if ((Flags & ISD::ParamFlags::ByVal)==0) 1578 Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0); 1579 } 1580 1581 if (Flags & ISD::ParamFlags::ByVal) { 1582 // Copy relative to framepointer. 1583 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, 1584 Flags, DAG)); 1585 } else { 1586 // Store relative to framepointer. 1587 MemOpChains2.push_back( 1588 DAG.getStore(Chain, Source, FIN, 1589 PseudoSourceValue::getFixedStack(), FI)); 1590 } 1591 } 1592 } 1593 1594 if (!MemOpChains2.empty()) 1595 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1596 &MemOpChains2[0], MemOpChains2.size()); 1597 1598 // Store the return address to the appropriate stack slot. 1599 if (FPDiff) 1600 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1601 } 1602 1603 // If the callee is a GlobalAddress node (quite common, every direct call is) 1604 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1605 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1606 // We should use extra load for direct calls to dllimported functions in 1607 // non-JIT mode. 1608 if ((IsTailCall || !Is64Bit || 1609 getTargetMachine().getCodeModel() != CodeModel::Large) 1610 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1611 getTargetMachine(), true)) 1612 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1613 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1614 if (IsTailCall || !Is64Bit || 1615 getTargetMachine().getCodeModel() != CodeModel::Large) 1616 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1617 } else if (IsTailCall) { 1618 assert(Callee.getOpcode() == ISD::LOAD && 1619 "Function destination must be loaded into virtual register"); 1620 unsigned Opc = Is64Bit ? X86::R9 : X86::ECX; 1621 1622 Chain = DAG.getCopyToReg(Chain, 1623 DAG.getRegister(Opc, getPointerTy()) , 1624 Callee,InFlag); 1625 Callee = DAG.getRegister(Opc, getPointerTy()); 1626 // Add register as live out. 1627 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1628 } 1629 1630 // Returns a chain & a flag for retval copy to use. 1631 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1632 SmallVector<SDOperand, 8> Ops; 1633 1634 if (IsTailCall) { 1635 Ops.push_back(Chain); 1636 Ops.push_back(DAG.getIntPtrConstant(NumBytes)); 1637 Ops.push_back(DAG.getIntPtrConstant(0)); 1638 if (InFlag.Val) 1639 Ops.push_back(InFlag); 1640 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1641 InFlag = Chain.getValue(1); 1642 1643 // Returns a chain & a flag for retval copy to use. 1644 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1645 Ops.clear(); 1646 } 1647 1648 Ops.push_back(Chain); 1649 Ops.push_back(Callee); 1650 1651 if (IsTailCall) 1652 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1653 1654 // Add an implicit use GOT pointer in EBX. 1655 if (!IsTailCall && !Is64Bit && 1656 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1657 Subtarget->isPICStyleGOT()) 1658 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1659 1660 // Add argument registers to the end of the list so that they are known live 1661 // into the call. 1662 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1663 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1664 RegsToPass[i].second.getValueType())); 1665 1666 if (InFlag.Val) 1667 Ops.push_back(InFlag); 1668 1669 if (IsTailCall) { 1670 assert(InFlag.Val && 1671 "Flag must be set. Depend on flag being set in LowerRET"); 1672 Chain = DAG.getNode(X86ISD::TAILCALL, 1673 Op.Val->getVTList(), &Ops[0], Ops.size()); 1674 1675 return SDOperand(Chain.Val, Op.ResNo); 1676 } 1677 1678 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1679 InFlag = Chain.getValue(1); 1680 1681 // Create the CALLSEQ_END node. 1682 unsigned NumBytesForCalleeToPush; 1683 if (IsCalleePop(Op)) 1684 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1685 else if (!Is64Bit && IsStructRet) 1686 // If this is is a call to a struct-return function, the callee 1687 // pops the hidden struct pointer, so we have to push it back. 1688 // This is common for Darwin/X86, Linux & Mingw32 targets. 1689 NumBytesForCalleeToPush = 4; 1690 else 1691 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1692 1693 // Returns a flag for retval copy to use. 1694 Chain = DAG.getCALLSEQ_END(Chain, 1695 DAG.getIntPtrConstant(NumBytes), 1696 DAG.getIntPtrConstant(NumBytesForCalleeToPush), 1697 InFlag); 1698 InFlag = Chain.getValue(1); 1699 1700 // Handle result values, copying them out of physregs into vregs that we 1701 // return. 1702 switch (SRetMethod) { 1703 default: 1704 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1705 case X86::InGPR64: 1706 return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val, 1707 X86::RAX, X86::RDX, 1708 MVT::i64, DAG), Op.ResNo); 1709 case X86::InSSE: 1710 return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val, 1711 X86::XMM0, X86::XMM1, 1712 MVT::f64, DAG), Op.ResNo); 1713 case X86::InX87: 1714 return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG), 1715 Op.ResNo); 1716 } 1717} 1718 1719 1720//===----------------------------------------------------------------------===// 1721// Fast Calling Convention (tail call) implementation 1722//===----------------------------------------------------------------------===// 1723 1724// Like std call, callee cleans arguments, convention except that ECX is 1725// reserved for storing the tail called function address. Only 2 registers are 1726// free for argument passing (inreg). Tail call optimization is performed 1727// provided: 1728// * tailcallopt is enabled 1729// * caller/callee are fastcc 1730// * elf/pic is disabled OR 1731// * elf/pic enabled + callee is in module + callee has 1732// visibility protected or hidden 1733// To keep the stack aligned according to platform abi the function 1734// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1735// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1736// If a tail called function callee has more arguments than the caller the 1737// caller needs to make sure that there is room to move the RETADDR to. This is 1738// achieved by reserving an area the size of the argument delta right after the 1739// original REtADDR, but before the saved framepointer or the spilled registers 1740// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1741// stack layout: 1742// arg1 1743// arg2 1744// RETADDR 1745// [ new RETADDR 1746// move area ] 1747// (possible EBP) 1748// ESI 1749// EDI 1750// local1 .. 1751 1752/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1753/// for a 16 byte align requirement. 1754unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1755 SelectionDAG& DAG) { 1756 if (PerformTailCallOpt) { 1757 MachineFunction &MF = DAG.getMachineFunction(); 1758 const TargetMachine &TM = MF.getTarget(); 1759 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1760 unsigned StackAlignment = TFI.getStackAlignment(); 1761 uint64_t AlignMask = StackAlignment - 1; 1762 int64_t Offset = StackSize; 1763 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1764 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1765 // Number smaller than 12 so just add the difference. 1766 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1767 } else { 1768 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1769 Offset = ((~AlignMask) & Offset) + StackAlignment + 1770 (StackAlignment-SlotSize); 1771 } 1772 StackSize = Offset; 1773 } 1774 return StackSize; 1775} 1776 1777/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1778/// following the call is a return. A function is eligible if caller/callee 1779/// calling conventions match, currently only fastcc supports tail calls, and 1780/// the function CALL is immediatly followed by a RET. 1781bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1782 SDOperand Ret, 1783 SelectionDAG& DAG) const { 1784 if (!PerformTailCallOpt) 1785 return false; 1786 1787 // Check whether CALL node immediatly preceeds the RET node and whether the 1788 // return uses the result of the node or is a void return. 1789 unsigned NumOps = Ret.getNumOperands(); 1790 if ((NumOps == 1 && 1791 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1792 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1793 (NumOps > 1 && 1794 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1795 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1796 MachineFunction &MF = DAG.getMachineFunction(); 1797 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1798 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1799 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1800 SDOperand Callee = Call.getOperand(4); 1801 // On elf/pic %ebx needs to be livein. 1802 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1803 !Subtarget->isPICStyleGOT()) 1804 return true; 1805 1806 // Can only do local tail calls with PIC. 1807 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1808 return G->getGlobal()->hasHiddenVisibility() 1809 || G->getGlobal()->hasProtectedVisibility(); 1810 } 1811 } 1812 1813 return false; 1814} 1815 1816//===----------------------------------------------------------------------===// 1817// Other Lowering Hooks 1818//===----------------------------------------------------------------------===// 1819 1820 1821SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1822 MachineFunction &MF = DAG.getMachineFunction(); 1823 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1824 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1825 1826 if (ReturnAddrIndex == 0) { 1827 // Set up a frame object for the return address. 1828 if (Subtarget->is64Bit()) 1829 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1830 else 1831 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1832 1833 FuncInfo->setRAIndex(ReturnAddrIndex); 1834 } 1835 1836 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1837} 1838 1839 1840 1841/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1842/// specific condition code. It returns a false if it cannot do a direct 1843/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1844/// needed. 1845static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1846 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1847 SelectionDAG &DAG) { 1848 X86CC = X86::COND_INVALID; 1849 if (!isFP) { 1850 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1851 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1852 // X > -1 -> X == 0, jump !sign. 1853 RHS = DAG.getConstant(0, RHS.getValueType()); 1854 X86CC = X86::COND_NS; 1855 return true; 1856 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1857 // X < 0 -> X == 0, jump on sign. 1858 X86CC = X86::COND_S; 1859 return true; 1860 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1861 // X < 1 -> X <= 0 1862 RHS = DAG.getConstant(0, RHS.getValueType()); 1863 X86CC = X86::COND_LE; 1864 return true; 1865 } 1866 } 1867 1868 switch (SetCCOpcode) { 1869 default: break; 1870 case ISD::SETEQ: X86CC = X86::COND_E; break; 1871 case ISD::SETGT: X86CC = X86::COND_G; break; 1872 case ISD::SETGE: X86CC = X86::COND_GE; break; 1873 case ISD::SETLT: X86CC = X86::COND_L; break; 1874 case ISD::SETLE: X86CC = X86::COND_LE; break; 1875 case ISD::SETNE: X86CC = X86::COND_NE; break; 1876 case ISD::SETULT: X86CC = X86::COND_B; break; 1877 case ISD::SETUGT: X86CC = X86::COND_A; break; 1878 case ISD::SETULE: X86CC = X86::COND_BE; break; 1879 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1880 } 1881 } else { 1882 // On a floating point condition, the flags are set as follows: 1883 // ZF PF CF op 1884 // 0 | 0 | 0 | X > Y 1885 // 0 | 0 | 1 | X < Y 1886 // 1 | 0 | 0 | X == Y 1887 // 1 | 1 | 1 | unordered 1888 bool Flip = false; 1889 switch (SetCCOpcode) { 1890 default: break; 1891 case ISD::SETUEQ: 1892 case ISD::SETEQ: X86CC = X86::COND_E; break; 1893 case ISD::SETOLT: Flip = true; // Fallthrough 1894 case ISD::SETOGT: 1895 case ISD::SETGT: X86CC = X86::COND_A; break; 1896 case ISD::SETOLE: Flip = true; // Fallthrough 1897 case ISD::SETOGE: 1898 case ISD::SETGE: X86CC = X86::COND_AE; break; 1899 case ISD::SETUGT: Flip = true; // Fallthrough 1900 case ISD::SETULT: 1901 case ISD::SETLT: X86CC = X86::COND_B; break; 1902 case ISD::SETUGE: Flip = true; // Fallthrough 1903 case ISD::SETULE: 1904 case ISD::SETLE: X86CC = X86::COND_BE; break; 1905 case ISD::SETONE: 1906 case ISD::SETNE: X86CC = X86::COND_NE; break; 1907 case ISD::SETUO: X86CC = X86::COND_P; break; 1908 case ISD::SETO: X86CC = X86::COND_NP; break; 1909 } 1910 if (Flip) 1911 std::swap(LHS, RHS); 1912 } 1913 1914 return X86CC != X86::COND_INVALID; 1915} 1916 1917/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1918/// code. Current x86 isa includes the following FP cmov instructions: 1919/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1920static bool hasFPCMov(unsigned X86CC) { 1921 switch (X86CC) { 1922 default: 1923 return false; 1924 case X86::COND_B: 1925 case X86::COND_BE: 1926 case X86::COND_E: 1927 case X86::COND_P: 1928 case X86::COND_A: 1929 case X86::COND_AE: 1930 case X86::COND_NE: 1931 case X86::COND_NP: 1932 return true; 1933 } 1934} 1935 1936/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1937/// true if Op is undef or if its value falls within the specified range (L, H]. 1938static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1939 if (Op.getOpcode() == ISD::UNDEF) 1940 return true; 1941 1942 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1943 return (Val >= Low && Val < Hi); 1944} 1945 1946/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1947/// true if Op is undef or if its value equal to the specified value. 1948static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1949 if (Op.getOpcode() == ISD::UNDEF) 1950 return true; 1951 return cast<ConstantSDNode>(Op)->getValue() == Val; 1952} 1953 1954/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1955/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1956bool X86::isPSHUFDMask(SDNode *N) { 1957 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1958 1959 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1960 return false; 1961 1962 // Check if the value doesn't reference the second vector. 1963 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1964 SDOperand Arg = N->getOperand(i); 1965 if (Arg.getOpcode() == ISD::UNDEF) continue; 1966 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1967 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1968 return false; 1969 } 1970 1971 return true; 1972} 1973 1974/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1975/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1976bool X86::isPSHUFHWMask(SDNode *N) { 1977 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1978 1979 if (N->getNumOperands() != 8) 1980 return false; 1981 1982 // Lower quadword copied in order. 1983 for (unsigned i = 0; i != 4; ++i) { 1984 SDOperand Arg = N->getOperand(i); 1985 if (Arg.getOpcode() == ISD::UNDEF) continue; 1986 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1987 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1988 return false; 1989 } 1990 1991 // Upper quadword shuffled. 1992 for (unsigned i = 4; i != 8; ++i) { 1993 SDOperand Arg = N->getOperand(i); 1994 if (Arg.getOpcode() == ISD::UNDEF) continue; 1995 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1996 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1997 if (Val < 4 || Val > 7) 1998 return false; 1999 } 2000 2001 return true; 2002} 2003 2004/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2005/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2006bool X86::isPSHUFLWMask(SDNode *N) { 2007 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2008 2009 if (N->getNumOperands() != 8) 2010 return false; 2011 2012 // Upper quadword copied in order. 2013 for (unsigned i = 4; i != 8; ++i) 2014 if (!isUndefOrEqual(N->getOperand(i), i)) 2015 return false; 2016 2017 // Lower quadword shuffled. 2018 for (unsigned i = 0; i != 4; ++i) 2019 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2020 return false; 2021 2022 return true; 2023} 2024 2025/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2026/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2027static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2028 if (NumElems != 2 && NumElems != 4) return false; 2029 2030 unsigned Half = NumElems / 2; 2031 for (unsigned i = 0; i < Half; ++i) 2032 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2033 return false; 2034 for (unsigned i = Half; i < NumElems; ++i) 2035 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2036 return false; 2037 2038 return true; 2039} 2040 2041bool X86::isSHUFPMask(SDNode *N) { 2042 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2043 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2044} 2045 2046/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2047/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2048/// half elements to come from vector 1 (which would equal the dest.) and 2049/// the upper half to come from vector 2. 2050static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2051 if (NumOps != 2 && NumOps != 4) return false; 2052 2053 unsigned Half = NumOps / 2; 2054 for (unsigned i = 0; i < Half; ++i) 2055 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2056 return false; 2057 for (unsigned i = Half; i < NumOps; ++i) 2058 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2059 return false; 2060 return true; 2061} 2062 2063static bool isCommutedSHUFP(SDNode *N) { 2064 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2065 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2066} 2067 2068/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2069/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2070bool X86::isMOVHLPSMask(SDNode *N) { 2071 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2072 2073 if (N->getNumOperands() != 4) 2074 return false; 2075 2076 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2077 return isUndefOrEqual(N->getOperand(0), 6) && 2078 isUndefOrEqual(N->getOperand(1), 7) && 2079 isUndefOrEqual(N->getOperand(2), 2) && 2080 isUndefOrEqual(N->getOperand(3), 3); 2081} 2082 2083/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2084/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2085/// <2, 3, 2, 3> 2086bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2087 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2088 2089 if (N->getNumOperands() != 4) 2090 return false; 2091 2092 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2093 return isUndefOrEqual(N->getOperand(0), 2) && 2094 isUndefOrEqual(N->getOperand(1), 3) && 2095 isUndefOrEqual(N->getOperand(2), 2) && 2096 isUndefOrEqual(N->getOperand(3), 3); 2097} 2098 2099/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2100/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2101bool X86::isMOVLPMask(SDNode *N) { 2102 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2103 2104 unsigned NumElems = N->getNumOperands(); 2105 if (NumElems != 2 && NumElems != 4) 2106 return false; 2107 2108 for (unsigned i = 0; i < NumElems/2; ++i) 2109 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2110 return false; 2111 2112 for (unsigned i = NumElems/2; i < NumElems; ++i) 2113 if (!isUndefOrEqual(N->getOperand(i), i)) 2114 return false; 2115 2116 return true; 2117} 2118 2119/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2120/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2121/// and MOVLHPS. 2122bool X86::isMOVHPMask(SDNode *N) { 2123 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2124 2125 unsigned NumElems = N->getNumOperands(); 2126 if (NumElems != 2 && NumElems != 4) 2127 return false; 2128 2129 for (unsigned i = 0; i < NumElems/2; ++i) 2130 if (!isUndefOrEqual(N->getOperand(i), i)) 2131 return false; 2132 2133 for (unsigned i = 0; i < NumElems/2; ++i) { 2134 SDOperand Arg = N->getOperand(i + NumElems/2); 2135 if (!isUndefOrEqual(Arg, i + NumElems)) 2136 return false; 2137 } 2138 2139 return true; 2140} 2141 2142/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2143/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2144bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2145 bool V2IsSplat = false) { 2146 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2147 return false; 2148 2149 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2150 SDOperand BitI = Elts[i]; 2151 SDOperand BitI1 = Elts[i+1]; 2152 if (!isUndefOrEqual(BitI, j)) 2153 return false; 2154 if (V2IsSplat) { 2155 if (isUndefOrEqual(BitI1, NumElts)) 2156 return false; 2157 } else { 2158 if (!isUndefOrEqual(BitI1, j + NumElts)) 2159 return false; 2160 } 2161 } 2162 2163 return true; 2164} 2165 2166bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2167 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2168 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2169} 2170 2171/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2172/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2173bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2174 bool V2IsSplat = false) { 2175 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2176 return false; 2177 2178 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2179 SDOperand BitI = Elts[i]; 2180 SDOperand BitI1 = Elts[i+1]; 2181 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2182 return false; 2183 if (V2IsSplat) { 2184 if (isUndefOrEqual(BitI1, NumElts)) 2185 return false; 2186 } else { 2187 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2188 return false; 2189 } 2190 } 2191 2192 return true; 2193} 2194 2195bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2196 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2197 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2198} 2199 2200/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2201/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2202/// <0, 0, 1, 1> 2203bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2204 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2205 2206 unsigned NumElems = N->getNumOperands(); 2207 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2208 return false; 2209 2210 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2211 SDOperand BitI = N->getOperand(i); 2212 SDOperand BitI1 = N->getOperand(i+1); 2213 2214 if (!isUndefOrEqual(BitI, j)) 2215 return false; 2216 if (!isUndefOrEqual(BitI1, j)) 2217 return false; 2218 } 2219 2220 return true; 2221} 2222 2223/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2224/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2225/// <2, 2, 3, 3> 2226bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2227 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2228 2229 unsigned NumElems = N->getNumOperands(); 2230 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2231 return false; 2232 2233 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2234 SDOperand BitI = N->getOperand(i); 2235 SDOperand BitI1 = N->getOperand(i + 1); 2236 2237 if (!isUndefOrEqual(BitI, j)) 2238 return false; 2239 if (!isUndefOrEqual(BitI1, j)) 2240 return false; 2241 } 2242 2243 return true; 2244} 2245 2246/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2247/// specifies a shuffle of elements that is suitable for input to MOVSS, 2248/// MOVSD, and MOVD, i.e. setting the lowest element. 2249static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2250 if (NumElts != 2 && NumElts != 4) 2251 return false; 2252 2253 if (!isUndefOrEqual(Elts[0], NumElts)) 2254 return false; 2255 2256 for (unsigned i = 1; i < NumElts; ++i) { 2257 if (!isUndefOrEqual(Elts[i], i)) 2258 return false; 2259 } 2260 2261 return true; 2262} 2263 2264bool X86::isMOVLMask(SDNode *N) { 2265 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2266 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2267} 2268 2269/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2270/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2271/// element of vector 2 and the other elements to come from vector 1 in order. 2272static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2273 bool V2IsSplat = false, 2274 bool V2IsUndef = false) { 2275 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2276 return false; 2277 2278 if (!isUndefOrEqual(Ops[0], 0)) 2279 return false; 2280 2281 for (unsigned i = 1; i < NumOps; ++i) { 2282 SDOperand Arg = Ops[i]; 2283 if (!(isUndefOrEqual(Arg, i+NumOps) || 2284 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2285 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2286 return false; 2287 } 2288 2289 return true; 2290} 2291 2292static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2293 bool V2IsUndef = false) { 2294 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2295 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2296 V2IsSplat, V2IsUndef); 2297} 2298 2299/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2300/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2301bool X86::isMOVSHDUPMask(SDNode *N) { 2302 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2303 2304 if (N->getNumOperands() != 4) 2305 return false; 2306 2307 // Expect 1, 1, 3, 3 2308 for (unsigned i = 0; i < 2; ++i) { 2309 SDOperand Arg = N->getOperand(i); 2310 if (Arg.getOpcode() == ISD::UNDEF) continue; 2311 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2312 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2313 if (Val != 1) return false; 2314 } 2315 2316 bool HasHi = false; 2317 for (unsigned i = 2; i < 4; ++i) { 2318 SDOperand Arg = N->getOperand(i); 2319 if (Arg.getOpcode() == ISD::UNDEF) continue; 2320 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2321 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2322 if (Val != 3) return false; 2323 HasHi = true; 2324 } 2325 2326 // Don't use movshdup if it can be done with a shufps. 2327 return HasHi; 2328} 2329 2330/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2331/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2332bool X86::isMOVSLDUPMask(SDNode *N) { 2333 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2334 2335 if (N->getNumOperands() != 4) 2336 return false; 2337 2338 // Expect 0, 0, 2, 2 2339 for (unsigned i = 0; i < 2; ++i) { 2340 SDOperand Arg = N->getOperand(i); 2341 if (Arg.getOpcode() == ISD::UNDEF) continue; 2342 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2343 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2344 if (Val != 0) return false; 2345 } 2346 2347 bool HasHi = false; 2348 for (unsigned i = 2; i < 4; ++i) { 2349 SDOperand Arg = N->getOperand(i); 2350 if (Arg.getOpcode() == ISD::UNDEF) continue; 2351 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2352 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2353 if (Val != 2) return false; 2354 HasHi = true; 2355 } 2356 2357 // Don't use movshdup if it can be done with a shufps. 2358 return HasHi; 2359} 2360 2361/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2362/// specifies a identity operation on the LHS or RHS. 2363static bool isIdentityMask(SDNode *N, bool RHS = false) { 2364 unsigned NumElems = N->getNumOperands(); 2365 for (unsigned i = 0; i < NumElems; ++i) 2366 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2367 return false; 2368 return true; 2369} 2370 2371/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2372/// a splat of a single element. 2373static bool isSplatMask(SDNode *N) { 2374 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2375 2376 // This is a splat operation if each element of the permute is the same, and 2377 // if the value doesn't reference the second vector. 2378 unsigned NumElems = N->getNumOperands(); 2379 SDOperand ElementBase; 2380 unsigned i = 0; 2381 for (; i != NumElems; ++i) { 2382 SDOperand Elt = N->getOperand(i); 2383 if (isa<ConstantSDNode>(Elt)) { 2384 ElementBase = Elt; 2385 break; 2386 } 2387 } 2388 2389 if (!ElementBase.Val) 2390 return false; 2391 2392 for (; i != NumElems; ++i) { 2393 SDOperand Arg = N->getOperand(i); 2394 if (Arg.getOpcode() == ISD::UNDEF) continue; 2395 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2396 if (Arg != ElementBase) return false; 2397 } 2398 2399 // Make sure it is a splat of the first vector operand. 2400 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2401} 2402 2403/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2404/// a splat of a single element and it's a 2 or 4 element mask. 2405bool X86::isSplatMask(SDNode *N) { 2406 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2407 2408 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2409 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2410 return false; 2411 return ::isSplatMask(N); 2412} 2413 2414/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2415/// specifies a splat of zero element. 2416bool X86::isSplatLoMask(SDNode *N) { 2417 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2418 2419 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2420 if (!isUndefOrEqual(N->getOperand(i), 0)) 2421 return false; 2422 return true; 2423} 2424 2425/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2426/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2427/// instructions. 2428unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2429 unsigned NumOperands = N->getNumOperands(); 2430 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2431 unsigned Mask = 0; 2432 for (unsigned i = 0; i < NumOperands; ++i) { 2433 unsigned Val = 0; 2434 SDOperand Arg = N->getOperand(NumOperands-i-1); 2435 if (Arg.getOpcode() != ISD::UNDEF) 2436 Val = cast<ConstantSDNode>(Arg)->getValue(); 2437 if (Val >= NumOperands) Val -= NumOperands; 2438 Mask |= Val; 2439 if (i != NumOperands - 1) 2440 Mask <<= Shift; 2441 } 2442 2443 return Mask; 2444} 2445 2446/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2447/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2448/// instructions. 2449unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2450 unsigned Mask = 0; 2451 // 8 nodes, but we only care about the last 4. 2452 for (unsigned i = 7; i >= 4; --i) { 2453 unsigned Val = 0; 2454 SDOperand Arg = N->getOperand(i); 2455 if (Arg.getOpcode() != ISD::UNDEF) 2456 Val = cast<ConstantSDNode>(Arg)->getValue(); 2457 Mask |= (Val - 4); 2458 if (i != 4) 2459 Mask <<= 2; 2460 } 2461 2462 return Mask; 2463} 2464 2465/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2466/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2467/// instructions. 2468unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2469 unsigned Mask = 0; 2470 // 8 nodes, but we only care about the first 4. 2471 for (int i = 3; i >= 0; --i) { 2472 unsigned Val = 0; 2473 SDOperand Arg = N->getOperand(i); 2474 if (Arg.getOpcode() != ISD::UNDEF) 2475 Val = cast<ConstantSDNode>(Arg)->getValue(); 2476 Mask |= Val; 2477 if (i != 0) 2478 Mask <<= 2; 2479 } 2480 2481 return Mask; 2482} 2483 2484/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2485/// specifies a 8 element shuffle that can be broken into a pair of 2486/// PSHUFHW and PSHUFLW. 2487static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2488 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2489 2490 if (N->getNumOperands() != 8) 2491 return false; 2492 2493 // Lower quadword shuffled. 2494 for (unsigned i = 0; i != 4; ++i) { 2495 SDOperand Arg = N->getOperand(i); 2496 if (Arg.getOpcode() == ISD::UNDEF) continue; 2497 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2498 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2499 if (Val >= 4) 2500 return false; 2501 } 2502 2503 // Upper quadword shuffled. 2504 for (unsigned i = 4; i != 8; ++i) { 2505 SDOperand Arg = N->getOperand(i); 2506 if (Arg.getOpcode() == ISD::UNDEF) continue; 2507 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2508 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2509 if (Val < 4 || Val > 7) 2510 return false; 2511 } 2512 2513 return true; 2514} 2515 2516/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2517/// values in ther permute mask. 2518static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2519 SDOperand &V2, SDOperand &Mask, 2520 SelectionDAG &DAG) { 2521 MVT::ValueType VT = Op.getValueType(); 2522 MVT::ValueType MaskVT = Mask.getValueType(); 2523 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2524 unsigned NumElems = Mask.getNumOperands(); 2525 SmallVector<SDOperand, 8> MaskVec; 2526 2527 for (unsigned i = 0; i != NumElems; ++i) { 2528 SDOperand Arg = Mask.getOperand(i); 2529 if (Arg.getOpcode() == ISD::UNDEF) { 2530 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2531 continue; 2532 } 2533 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2534 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2535 if (Val < NumElems) 2536 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2537 else 2538 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2539 } 2540 2541 std::swap(V1, V2); 2542 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2543 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2544} 2545 2546/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2547/// the two vector operands have swapped position. 2548static 2549SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { 2550 MVT::ValueType MaskVT = Mask.getValueType(); 2551 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2552 unsigned NumElems = Mask.getNumOperands(); 2553 SmallVector<SDOperand, 8> MaskVec; 2554 for (unsigned i = 0; i != NumElems; ++i) { 2555 SDOperand Arg = Mask.getOperand(i); 2556 if (Arg.getOpcode() == ISD::UNDEF) { 2557 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2558 continue; 2559 } 2560 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2561 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2562 if (Val < NumElems) 2563 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2564 else 2565 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2566 } 2567 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2568} 2569 2570 2571/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2572/// match movhlps. The lower half elements should come from upper half of 2573/// V1 (and in order), and the upper half elements should come from the upper 2574/// half of V2 (and in order). 2575static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2576 unsigned NumElems = Mask->getNumOperands(); 2577 if (NumElems != 4) 2578 return false; 2579 for (unsigned i = 0, e = 2; i != e; ++i) 2580 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2581 return false; 2582 for (unsigned i = 2; i != 4; ++i) 2583 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2584 return false; 2585 return true; 2586} 2587 2588/// isScalarLoadToVector - Returns true if the node is a scalar load that 2589/// is promoted to a vector. 2590static inline bool isScalarLoadToVector(SDNode *N) { 2591 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2592 N = N->getOperand(0).Val; 2593 return ISD::isNON_EXTLoad(N); 2594 } 2595 return false; 2596} 2597 2598/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2599/// match movlp{s|d}. The lower half elements should come from lower half of 2600/// V1 (and in order), and the upper half elements should come from the upper 2601/// half of V2 (and in order). And since V1 will become the source of the 2602/// MOVLP, it must be either a vector load or a scalar load to vector. 2603static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2604 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2605 return false; 2606 // Is V2 is a vector load, don't do this transformation. We will try to use 2607 // load folding shufps op. 2608 if (ISD::isNON_EXTLoad(V2)) 2609 return false; 2610 2611 unsigned NumElems = Mask->getNumOperands(); 2612 if (NumElems != 2 && NumElems != 4) 2613 return false; 2614 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2615 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2616 return false; 2617 for (unsigned i = NumElems/2; i != NumElems; ++i) 2618 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2619 return false; 2620 return true; 2621} 2622 2623/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2624/// all the same. 2625static bool isSplatVector(SDNode *N) { 2626 if (N->getOpcode() != ISD::BUILD_VECTOR) 2627 return false; 2628 2629 SDOperand SplatValue = N->getOperand(0); 2630 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2631 if (N->getOperand(i) != SplatValue) 2632 return false; 2633 return true; 2634} 2635 2636/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2637/// to an undef. 2638static bool isUndefShuffle(SDNode *N) { 2639 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2640 return false; 2641 2642 SDOperand V1 = N->getOperand(0); 2643 SDOperand V2 = N->getOperand(1); 2644 SDOperand Mask = N->getOperand(2); 2645 unsigned NumElems = Mask.getNumOperands(); 2646 for (unsigned i = 0; i != NumElems; ++i) { 2647 SDOperand Arg = Mask.getOperand(i); 2648 if (Arg.getOpcode() != ISD::UNDEF) { 2649 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2650 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2651 return false; 2652 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2653 return false; 2654 } 2655 } 2656 return true; 2657} 2658 2659/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2660/// constant +0.0. 2661static inline bool isZeroNode(SDOperand Elt) { 2662 return ((isa<ConstantSDNode>(Elt) && 2663 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2664 (isa<ConstantFPSDNode>(Elt) && 2665 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2666} 2667 2668/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2669/// to an zero vector. 2670static bool isZeroShuffle(SDNode *N) { 2671 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2672 return false; 2673 2674 SDOperand V1 = N->getOperand(0); 2675 SDOperand V2 = N->getOperand(1); 2676 SDOperand Mask = N->getOperand(2); 2677 unsigned NumElems = Mask.getNumOperands(); 2678 for (unsigned i = 0; i != NumElems; ++i) { 2679 SDOperand Arg = Mask.getOperand(i); 2680 if (Arg.getOpcode() == ISD::UNDEF) 2681 continue; 2682 2683 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2684 if (Idx < NumElems) { 2685 unsigned Opc = V1.Val->getOpcode(); 2686 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) 2687 continue; 2688 if (Opc != ISD::BUILD_VECTOR || 2689 !isZeroNode(V1.Val->getOperand(Idx))) 2690 return false; 2691 } else if (Idx >= NumElems) { 2692 unsigned Opc = V2.Val->getOpcode(); 2693 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) 2694 continue; 2695 if (Opc != ISD::BUILD_VECTOR || 2696 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2697 return false; 2698 } 2699 } 2700 return true; 2701} 2702 2703/// getZeroVector - Returns a vector of specified type with all zero elements. 2704/// 2705static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2706 assert(MVT::isVector(VT) && "Expected a vector type"); 2707 2708 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2709 // type. This ensures they get CSE'd. 2710 SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); 2711 SDOperand Vec; 2712 if (MVT::getSizeInBits(VT) == 64) // MMX 2713 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2714 else // SSE 2715 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2716 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2717} 2718 2719/// getOnesVector - Returns a vector of specified type with all bits set. 2720/// 2721static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { 2722 assert(MVT::isVector(VT) && "Expected a vector type"); 2723 2724 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2725 // type. This ensures they get CSE'd. 2726 SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); 2727 SDOperand Vec; 2728 if (MVT::getSizeInBits(VT) == 64) // MMX 2729 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2730 else // SSE 2731 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2732 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2733} 2734 2735 2736/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2737/// that point to V2 points to its first element. 2738static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2739 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2740 2741 bool Changed = false; 2742 SmallVector<SDOperand, 8> MaskVec; 2743 unsigned NumElems = Mask.getNumOperands(); 2744 for (unsigned i = 0; i != NumElems; ++i) { 2745 SDOperand Arg = Mask.getOperand(i); 2746 if (Arg.getOpcode() != ISD::UNDEF) { 2747 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2748 if (Val > NumElems) { 2749 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2750 Changed = true; 2751 } 2752 } 2753 MaskVec.push_back(Arg); 2754 } 2755 2756 if (Changed) 2757 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2758 &MaskVec[0], MaskVec.size()); 2759 return Mask; 2760} 2761 2762/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2763/// operation of specified width. 2764static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2765 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2766 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2767 2768 SmallVector<SDOperand, 8> MaskVec; 2769 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2770 for (unsigned i = 1; i != NumElems; ++i) 2771 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2772 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2773} 2774 2775/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2776/// of specified width. 2777static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2778 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2779 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2780 SmallVector<SDOperand, 8> MaskVec; 2781 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2782 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2783 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2784 } 2785 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2786} 2787 2788/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2789/// of specified width. 2790static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2791 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2792 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2793 unsigned Half = NumElems/2; 2794 SmallVector<SDOperand, 8> MaskVec; 2795 for (unsigned i = 0; i != Half; ++i) { 2796 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2797 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2798 } 2799 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2800} 2801 2802/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2803/// 2804static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2805 SDOperand V1 = Op.getOperand(0); 2806 SDOperand Mask = Op.getOperand(2); 2807 MVT::ValueType VT = Op.getValueType(); 2808 unsigned NumElems = Mask.getNumOperands(); 2809 Mask = getUnpacklMask(NumElems, DAG); 2810 while (NumElems != 4) { 2811 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2812 NumElems >>= 1; 2813 } 2814 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2815 2816 Mask = getZeroVector(MVT::v4i32, DAG); 2817 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2818 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2819 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2820} 2821 2822/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2823/// vector of zero or undef vector. This produces a shuffle where the low 2824/// element of V2 is swizzled into the zero/undef vector, landing at element 2825/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 2826static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2827 unsigned NumElems, unsigned Idx, 2828 bool isZero, SelectionDAG &DAG) { 2829 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2830 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2831 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2832 SmallVector<SDOperand, 16> MaskVec; 2833 for (unsigned i = 0; i != NumElems; ++i) 2834 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 2835 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 2836 else 2837 MaskVec.push_back(DAG.getConstant(i, EVT)); 2838 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2839 &MaskVec[0], MaskVec.size()); 2840 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2841} 2842 2843/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2844/// 2845static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2846 unsigned NumNonZero, unsigned NumZero, 2847 SelectionDAG &DAG, TargetLowering &TLI) { 2848 if (NumNonZero > 8) 2849 return SDOperand(); 2850 2851 SDOperand V(0, 0); 2852 bool First = true; 2853 for (unsigned i = 0; i < 16; ++i) { 2854 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2855 if (ThisIsNonZero && First) { 2856 if (NumZero) 2857 V = getZeroVector(MVT::v8i16, DAG); 2858 else 2859 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2860 First = false; 2861 } 2862 2863 if ((i & 1) != 0) { 2864 SDOperand ThisElt(0, 0), LastElt(0, 0); 2865 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2866 if (LastIsNonZero) { 2867 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2868 } 2869 if (ThisIsNonZero) { 2870 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2871 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2872 ThisElt, DAG.getConstant(8, MVT::i8)); 2873 if (LastIsNonZero) 2874 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2875 } else 2876 ThisElt = LastElt; 2877 2878 if (ThisElt.Val) 2879 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2880 DAG.getIntPtrConstant(i/2)); 2881 } 2882 } 2883 2884 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2885} 2886 2887/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2888/// 2889static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2890 unsigned NumNonZero, unsigned NumZero, 2891 SelectionDAG &DAG, TargetLowering &TLI) { 2892 if (NumNonZero > 4) 2893 return SDOperand(); 2894 2895 SDOperand V(0, 0); 2896 bool First = true; 2897 for (unsigned i = 0; i < 8; ++i) { 2898 bool isNonZero = (NonZeros & (1 << i)) != 0; 2899 if (isNonZero) { 2900 if (First) { 2901 if (NumZero) 2902 V = getZeroVector(MVT::v8i16, DAG); 2903 else 2904 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2905 First = false; 2906 } 2907 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2908 DAG.getIntPtrConstant(i)); 2909 } 2910 } 2911 2912 return V; 2913} 2914 2915SDOperand 2916X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2917 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 2918 if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { 2919 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 2920 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 2921 // eliminated on x86-32 hosts. 2922 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 2923 return Op; 2924 2925 if (ISD::isBuildVectorAllOnes(Op.Val)) 2926 return getOnesVector(Op.getValueType(), DAG); 2927 return getZeroVector(Op.getValueType(), DAG); 2928 } 2929 2930 MVT::ValueType VT = Op.getValueType(); 2931 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2932 unsigned EVTBits = MVT::getSizeInBits(EVT); 2933 2934 unsigned NumElems = Op.getNumOperands(); 2935 unsigned NumZero = 0; 2936 unsigned NumNonZero = 0; 2937 unsigned NonZeros = 0; 2938 bool HasNonImms = false; 2939 SmallSet<SDOperand, 8> Values; 2940 for (unsigned i = 0; i < NumElems; ++i) { 2941 SDOperand Elt = Op.getOperand(i); 2942 if (Elt.getOpcode() == ISD::UNDEF) 2943 continue; 2944 Values.insert(Elt); 2945 if (Elt.getOpcode() != ISD::Constant && 2946 Elt.getOpcode() != ISD::ConstantFP) 2947 HasNonImms = true; 2948 if (isZeroNode(Elt)) 2949 NumZero++; 2950 else { 2951 NonZeros |= (1 << i); 2952 NumNonZero++; 2953 } 2954 } 2955 2956 if (NumNonZero == 0) { 2957 // All undef vector. Return an UNDEF. All zero vectors were handled above. 2958 return DAG.getNode(ISD::UNDEF, VT); 2959 } 2960 2961 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2962 if (Values.size() == 1) 2963 return SDOperand(); 2964 2965 // Special case for single non-zero element. 2966 if (NumNonZero == 1 && NumElems <= 4) { 2967 unsigned Idx = CountTrailingZeros_32(NonZeros); 2968 SDOperand Item = Op.getOperand(Idx); 2969 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2970 if (Idx == 0) 2971 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2972 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2973 NumZero > 0, DAG); 2974 else if (!HasNonImms) // Otherwise, it's better to do a constpool load. 2975 return SDOperand(); 2976 2977 if (EVTBits == 32) { 2978 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2979 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2980 DAG); 2981 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2982 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2983 SmallVector<SDOperand, 8> MaskVec; 2984 for (unsigned i = 0; i < NumElems; i++) 2985 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2986 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2987 &MaskVec[0], MaskVec.size()); 2988 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2989 DAG.getNode(ISD::UNDEF, VT), Mask); 2990 } 2991 } 2992 2993 // A vector full of immediates; various special cases are already 2994 // handled, so this is best done with a single constant-pool load. 2995 if (!HasNonImms) 2996 return SDOperand(); 2997 2998 // Let legalizer expand 2-wide build_vectors. 2999 if (EVTBits == 64) 3000 return SDOperand(); 3001 3002 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3003 if (EVTBits == 8 && NumElems == 16) { 3004 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3005 *this); 3006 if (V.Val) return V; 3007 } 3008 3009 if (EVTBits == 16 && NumElems == 8) { 3010 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3011 *this); 3012 if (V.Val) return V; 3013 } 3014 3015 // If element VT is == 32 bits, turn it into a number of shuffles. 3016 SmallVector<SDOperand, 8> V; 3017 V.resize(NumElems); 3018 if (NumElems == 4 && NumZero > 0) { 3019 for (unsigned i = 0; i < 4; ++i) { 3020 bool isZero = !(NonZeros & (1 << i)); 3021 if (isZero) 3022 V[i] = getZeroVector(VT, DAG); 3023 else 3024 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3025 } 3026 3027 for (unsigned i = 0; i < 2; ++i) { 3028 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3029 default: break; 3030 case 0: 3031 V[i] = V[i*2]; // Must be a zero vector. 3032 break; 3033 case 1: 3034 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3035 getMOVLMask(NumElems, DAG)); 3036 break; 3037 case 2: 3038 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3039 getMOVLMask(NumElems, DAG)); 3040 break; 3041 case 3: 3042 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3043 getUnpacklMask(NumElems, DAG)); 3044 break; 3045 } 3046 } 3047 3048 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3049 // clears the upper bits. 3050 // FIXME: we can do the same for v4f32 case when we know both parts of 3051 // the lower half come from scalar_to_vector (loadf32). We should do 3052 // that in post legalizer dag combiner with target specific hooks. 3053 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3054 return V[0]; 3055 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3056 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3057 SmallVector<SDOperand, 8> MaskVec; 3058 bool Reverse = (NonZeros & 0x3) == 2; 3059 for (unsigned i = 0; i < 2; ++i) 3060 if (Reverse) 3061 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3062 else 3063 MaskVec.push_back(DAG.getConstant(i, EVT)); 3064 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3065 for (unsigned i = 0; i < 2; ++i) 3066 if (Reverse) 3067 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3068 else 3069 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3070 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3071 &MaskVec[0], MaskVec.size()); 3072 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3073 } 3074 3075 if (Values.size() > 2) { 3076 // Expand into a number of unpckl*. 3077 // e.g. for v4f32 3078 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3079 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3080 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3081 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3082 for (unsigned i = 0; i < NumElems; ++i) 3083 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3084 NumElems >>= 1; 3085 while (NumElems != 0) { 3086 for (unsigned i = 0; i < NumElems; ++i) 3087 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3088 UnpckMask); 3089 NumElems >>= 1; 3090 } 3091 return V[0]; 3092 } 3093 3094 return SDOperand(); 3095} 3096 3097static 3098SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, 3099 SDOperand PermMask, SelectionDAG &DAG, 3100 TargetLowering &TLI) { 3101 SDOperand NewV; 3102 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); 3103 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3104 MVT::ValueType PtrVT = TLI.getPointerTy(); 3105 SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), 3106 PermMask.Val->op_end()); 3107 3108 // First record which half of which vector the low elements come from. 3109 SmallVector<unsigned, 4> LowQuad(4); 3110 for (unsigned i = 0; i < 4; ++i) { 3111 SDOperand Elt = MaskElts[i]; 3112 if (Elt.getOpcode() == ISD::UNDEF) 3113 continue; 3114 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3115 int QuadIdx = EltIdx / 4; 3116 ++LowQuad[QuadIdx]; 3117 } 3118 int BestLowQuad = -1; 3119 unsigned MaxQuad = 1; 3120 for (unsigned i = 0; i < 4; ++i) { 3121 if (LowQuad[i] > MaxQuad) { 3122 BestLowQuad = i; 3123 MaxQuad = LowQuad[i]; 3124 } 3125 } 3126 3127 // Record which half of which vector the high elements come from. 3128 SmallVector<unsigned, 4> HighQuad(4); 3129 for (unsigned i = 4; i < 8; ++i) { 3130 SDOperand Elt = MaskElts[i]; 3131 if (Elt.getOpcode() == ISD::UNDEF) 3132 continue; 3133 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3134 int QuadIdx = EltIdx / 4; 3135 ++HighQuad[QuadIdx]; 3136 } 3137 int BestHighQuad = -1; 3138 MaxQuad = 1; 3139 for (unsigned i = 0; i < 4; ++i) { 3140 if (HighQuad[i] > MaxQuad) { 3141 BestHighQuad = i; 3142 MaxQuad = HighQuad[i]; 3143 } 3144 } 3145 3146 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3147 if (BestLowQuad != -1 || BestHighQuad != -1) { 3148 // First sort the 4 chunks in order using shufpd. 3149 SmallVector<SDOperand, 8> MaskVec; 3150 if (BestLowQuad != -1) 3151 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3152 else 3153 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3154 if (BestHighQuad != -1) 3155 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3156 else 3157 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3158 SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3159 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3160 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3161 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3162 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3163 3164 // Now sort high and low parts separately. 3165 BitVector InOrder(8); 3166 if (BestLowQuad != -1) { 3167 // Sort lower half in order using PSHUFLW. 3168 MaskVec.clear(); 3169 bool AnyOutOrder = false; 3170 for (unsigned i = 0; i != 4; ++i) { 3171 SDOperand Elt = MaskElts[i]; 3172 if (Elt.getOpcode() == ISD::UNDEF) { 3173 MaskVec.push_back(Elt); 3174 InOrder.set(i); 3175 } else { 3176 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3177 if (EltIdx != i) 3178 AnyOutOrder = true; 3179 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3180 // If this element is in the right place after this shuffle, then 3181 // remember it. 3182 if ((int)(EltIdx / 4) == BestLowQuad) 3183 InOrder.set(i); 3184 } 3185 } 3186 if (AnyOutOrder) { 3187 for (unsigned i = 4; i != 8; ++i) 3188 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3189 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3190 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3191 } 3192 } 3193 3194 if (BestHighQuad != -1) { 3195 // Sort high half in order using PSHUFHW if possible. 3196 MaskVec.clear(); 3197 for (unsigned i = 0; i != 4; ++i) 3198 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3199 bool AnyOutOrder = false; 3200 for (unsigned i = 4; i != 8; ++i) { 3201 SDOperand Elt = MaskElts[i]; 3202 if (Elt.getOpcode() == ISD::UNDEF) { 3203 MaskVec.push_back(Elt); 3204 InOrder.set(i); 3205 } else { 3206 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3207 if (EltIdx != i) 3208 AnyOutOrder = true; 3209 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3210 // If this element is in the right place after this shuffle, then 3211 // remember it. 3212 if ((int)(EltIdx / 4) == BestHighQuad) 3213 InOrder.set(i); 3214 } 3215 } 3216 if (AnyOutOrder) { 3217 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3218 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3219 } 3220 } 3221 3222 // The other elements are put in the right place using pextrw and pinsrw. 3223 for (unsigned i = 0; i != 8; ++i) { 3224 if (InOrder[i]) 3225 continue; 3226 SDOperand Elt = MaskElts[i]; 3227 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3228 if (EltIdx == i) 3229 continue; 3230 SDOperand ExtOp = (EltIdx < 8) 3231 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3232 DAG.getConstant(EltIdx, PtrVT)) 3233 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3234 DAG.getConstant(EltIdx - 8, PtrVT)); 3235 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3236 DAG.getConstant(i, PtrVT)); 3237 } 3238 return NewV; 3239 } 3240 3241 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use 3242 ///as few as possible. 3243 // First, let's find out how many elements are already in the right order. 3244 unsigned V1InOrder = 0; 3245 unsigned V1FromV1 = 0; 3246 unsigned V2InOrder = 0; 3247 unsigned V2FromV2 = 0; 3248 SmallVector<SDOperand, 8> V1Elts; 3249 SmallVector<SDOperand, 8> V2Elts; 3250 for (unsigned i = 0; i < 8; ++i) { 3251 SDOperand Elt = MaskElts[i]; 3252 if (Elt.getOpcode() == ISD::UNDEF) { 3253 V1Elts.push_back(Elt); 3254 V2Elts.push_back(Elt); 3255 ++V1InOrder; 3256 ++V2InOrder; 3257 continue; 3258 } 3259 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3260 if (EltIdx == i) { 3261 V1Elts.push_back(Elt); 3262 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3263 ++V1InOrder; 3264 } else if (EltIdx == i+8) { 3265 V1Elts.push_back(Elt); 3266 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3267 ++V2InOrder; 3268 } else if (EltIdx < 8) { 3269 V1Elts.push_back(Elt); 3270 ++V1FromV1; 3271 } else { 3272 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3273 ++V2FromV2; 3274 } 3275 } 3276 3277 if (V2InOrder > V1InOrder) { 3278 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3279 std::swap(V1, V2); 3280 std::swap(V1Elts, V2Elts); 3281 std::swap(V1FromV1, V2FromV2); 3282 } 3283 3284 if ((V1FromV1 + V1InOrder) != 8) { 3285 // Some elements are from V2. 3286 if (V1FromV1) { 3287 // If there are elements that are from V1 but out of place, 3288 // then first sort them in place 3289 SmallVector<SDOperand, 8> MaskVec; 3290 for (unsigned i = 0; i < 8; ++i) { 3291 SDOperand Elt = V1Elts[i]; 3292 if (Elt.getOpcode() == ISD::UNDEF) { 3293 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3294 continue; 3295 } 3296 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3297 if (EltIdx >= 8) 3298 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3299 else 3300 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3301 } 3302 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3303 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3304 } 3305 3306 NewV = V1; 3307 for (unsigned i = 0; i < 8; ++i) { 3308 SDOperand Elt = V1Elts[i]; 3309 if (Elt.getOpcode() == ISD::UNDEF) 3310 continue; 3311 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3312 if (EltIdx < 8) 3313 continue; 3314 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3315 DAG.getConstant(EltIdx - 8, PtrVT)); 3316 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3317 DAG.getConstant(i, PtrVT)); 3318 } 3319 return NewV; 3320 } else { 3321 // All elements are from V1. 3322 NewV = V1; 3323 for (unsigned i = 0; i < 8; ++i) { 3324 SDOperand Elt = V1Elts[i]; 3325 if (Elt.getOpcode() == ISD::UNDEF) 3326 continue; 3327 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3328 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3329 DAG.getConstant(EltIdx, PtrVT)); 3330 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3331 DAG.getConstant(i, PtrVT)); 3332 } 3333 return NewV; 3334 } 3335} 3336 3337/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3338/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3339/// done when every pair / quad of shuffle mask elements point to elements in 3340/// the right sequence. e.g. 3341/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3342static 3343SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2, 3344 MVT::ValueType VT, 3345 SDOperand PermMask, SelectionDAG &DAG, 3346 TargetLowering &TLI) { 3347 unsigned NumElems = PermMask.getNumOperands(); 3348 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3349 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3350 MVT::ValueType NewVT = MaskVT; 3351 switch (VT) { 3352 case MVT::v4f32: NewVT = MVT::v2f64; break; 3353 case MVT::v4i32: NewVT = MVT::v2i64; break; 3354 case MVT::v8i16: NewVT = MVT::v4i32; break; 3355 case MVT::v16i8: NewVT = MVT::v4i32; break; 3356 default: assert(false && "Unexpected!"); 3357 } 3358 3359 if (NewWidth == 2) 3360 if (MVT::isInteger(VT)) 3361 NewVT = MVT::v2i64; 3362 else 3363 NewVT = MVT::v2f64; 3364 unsigned Scale = NumElems / NewWidth; 3365 SmallVector<SDOperand, 8> MaskVec; 3366 for (unsigned i = 0; i < NumElems; i += Scale) { 3367 unsigned StartIdx = ~0U; 3368 for (unsigned j = 0; j < Scale; ++j) { 3369 SDOperand Elt = PermMask.getOperand(i+j); 3370 if (Elt.getOpcode() == ISD::UNDEF) 3371 continue; 3372 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3373 if (StartIdx == ~0U) 3374 StartIdx = EltIdx - (EltIdx % Scale); 3375 if (EltIdx != StartIdx + j) 3376 return SDOperand(); 3377 } 3378 if (StartIdx == ~0U) 3379 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 3380 else 3381 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); 3382 } 3383 3384 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3385 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3386 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3387 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3388 &MaskVec[0], MaskVec.size())); 3389} 3390 3391SDOperand 3392X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3393 SDOperand V1 = Op.getOperand(0); 3394 SDOperand V2 = Op.getOperand(1); 3395 SDOperand PermMask = Op.getOperand(2); 3396 MVT::ValueType VT = Op.getValueType(); 3397 unsigned NumElems = PermMask.getNumOperands(); 3398 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3399 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3400 bool V1IsSplat = false; 3401 bool V2IsSplat = false; 3402 3403 if (isUndefShuffle(Op.Val)) 3404 return DAG.getNode(ISD::UNDEF, VT); 3405 3406 if (isZeroShuffle(Op.Val)) 3407 return getZeroVector(VT, DAG); 3408 3409 if (isIdentityMask(PermMask.Val)) 3410 return V1; 3411 else if (isIdentityMask(PermMask.Val, true)) 3412 return V2; 3413 3414 if (isSplatMask(PermMask.Val)) { 3415 if (NumElems <= 4) return Op; 3416 // Promote it to a v4i32 splat. 3417 return PromoteSplat(Op, DAG); 3418 } 3419 3420 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3421 // do it! 3422 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3423 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3424 if (NewOp.Val) 3425 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3426 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3427 // FIXME: Figure out a cleaner way to do this. 3428 // Try to make use of movq to zero out the top part. 3429 if (ISD::isBuildVectorAllZeros(V2.Val)) { 3430 SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3431 if (NewOp.Val) { 3432 SDOperand NewV1 = NewOp.getOperand(0); 3433 SDOperand NewV2 = NewOp.getOperand(1); 3434 SDOperand NewMask = NewOp.getOperand(2); 3435 if (isCommutedMOVL(NewMask.Val, true, false)) { 3436 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 3437 NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(), 3438 NewV1, NewV2, getMOVLMask(2, DAG)); 3439 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3440 } 3441 } 3442 } else if (ISD::isBuildVectorAllZeros(V1.Val)) { 3443 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3444 if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val)) 3445 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3446 } 3447 } 3448 3449 if (X86::isMOVLMask(PermMask.Val)) 3450 return (V1IsUndef) ? V2 : Op; 3451 3452 if (X86::isMOVSHDUPMask(PermMask.Val) || 3453 X86::isMOVSLDUPMask(PermMask.Val) || 3454 X86::isMOVHLPSMask(PermMask.Val) || 3455 X86::isMOVHPMask(PermMask.Val) || 3456 X86::isMOVLPMask(PermMask.Val)) 3457 return Op; 3458 3459 if (ShouldXformToMOVHLPS(PermMask.Val) || 3460 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3461 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3462 3463 bool Commuted = false; 3464 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3465 // 1,1,1,1 -> v8i16 though. 3466 V1IsSplat = isSplatVector(V1.Val); 3467 V2IsSplat = isSplatVector(V2.Val); 3468 3469 // Canonicalize the splat or undef, if present, to be on the RHS. 3470 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3471 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3472 std::swap(V1IsSplat, V2IsSplat); 3473 std::swap(V1IsUndef, V2IsUndef); 3474 Commuted = true; 3475 } 3476 3477 // FIXME: Figure out a cleaner way to do this. 3478 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3479 if (V2IsUndef) return V1; 3480 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3481 if (V2IsSplat) { 3482 // V2 is a splat, so the mask may be malformed. That is, it may point 3483 // to any V2 element. The instruction selectior won't like this. Get 3484 // a corrected mask and commute to form a proper MOVS{S|D}. 3485 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3486 if (NewMask.Val != PermMask.Val) 3487 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3488 } 3489 return Op; 3490 } 3491 3492 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3493 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3494 X86::isUNPCKLMask(PermMask.Val) || 3495 X86::isUNPCKHMask(PermMask.Val)) 3496 return Op; 3497 3498 if (V2IsSplat) { 3499 // Normalize mask so all entries that point to V2 points to its first 3500 // element then try to match unpck{h|l} again. If match, return a 3501 // new vector_shuffle with the corrected mask. 3502 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3503 if (NewMask.Val != PermMask.Val) { 3504 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3505 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3506 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3507 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3508 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3509 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3510 } 3511 } 3512 } 3513 3514 // Normalize the node to match x86 shuffle ops if needed 3515 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3516 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3517 3518 if (Commuted) { 3519 // Commute is back and try unpck* again. 3520 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3521 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3522 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3523 X86::isUNPCKLMask(PermMask.Val) || 3524 X86::isUNPCKHMask(PermMask.Val)) 3525 return Op; 3526 } 3527 3528 // If VT is integer, try PSHUF* first, then SHUFP*. 3529 if (MVT::isInteger(VT)) { 3530 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3531 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3532 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3533 X86::isPSHUFDMask(PermMask.Val)) || 3534 X86::isPSHUFHWMask(PermMask.Val) || 3535 X86::isPSHUFLWMask(PermMask.Val)) { 3536 if (V2.getOpcode() != ISD::UNDEF) 3537 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3538 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3539 return Op; 3540 } 3541 3542 if (X86::isSHUFPMask(PermMask.Val) && 3543 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3544 return Op; 3545 } else { 3546 // Floating point cases in the other order. 3547 if (X86::isSHUFPMask(PermMask.Val)) 3548 return Op; 3549 if (X86::isPSHUFDMask(PermMask.Val) || 3550 X86::isPSHUFHWMask(PermMask.Val) || 3551 X86::isPSHUFLWMask(PermMask.Val)) { 3552 if (V2.getOpcode() != ISD::UNDEF) 3553 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3554 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3555 return Op; 3556 } 3557 } 3558 3559 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 3560 if (VT == MVT::v8i16) { 3561 SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 3562 if (NewOp.Val) 3563 return NewOp; 3564 } 3565 3566 // Handle all 4 wide cases with a number of shuffles. 3567 if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { 3568 // Don't do this for MMX. 3569 MVT::ValueType MaskVT = PermMask.getValueType(); 3570 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3571 SmallVector<std::pair<int, int>, 8> Locs; 3572 Locs.reserve(NumElems); 3573 SmallVector<SDOperand, 8> Mask1(NumElems, 3574 DAG.getNode(ISD::UNDEF, MaskEVT)); 3575 SmallVector<SDOperand, 8> Mask2(NumElems, 3576 DAG.getNode(ISD::UNDEF, MaskEVT)); 3577 unsigned NumHi = 0; 3578 unsigned NumLo = 0; 3579 // If no more than two elements come from either vector. This can be 3580 // implemented with two shuffles. First shuffle gather the elements. 3581 // The second shuffle, which takes the first shuffle as both of its 3582 // vector operands, put the elements into the right order. 3583 for (unsigned i = 0; i != NumElems; ++i) { 3584 SDOperand Elt = PermMask.getOperand(i); 3585 if (Elt.getOpcode() == ISD::UNDEF) { 3586 Locs[i] = std::make_pair(-1, -1); 3587 } else { 3588 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3589 if (Val < NumElems) { 3590 Locs[i] = std::make_pair(0, NumLo); 3591 Mask1[NumLo] = Elt; 3592 NumLo++; 3593 } else { 3594 Locs[i] = std::make_pair(1, NumHi); 3595 if (2+NumHi < NumElems) 3596 Mask1[2+NumHi] = Elt; 3597 NumHi++; 3598 } 3599 } 3600 } 3601 if (NumLo <= 2 && NumHi <= 2) { 3602 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3603 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3604 &Mask1[0], Mask1.size())); 3605 for (unsigned i = 0; i != NumElems; ++i) { 3606 if (Locs[i].first == -1) 3607 continue; 3608 else { 3609 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3610 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3611 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3612 } 3613 } 3614 3615 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3616 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3617 &Mask2[0], Mask2.size())); 3618 } 3619 3620 // Break it into (shuffle shuffle_hi, shuffle_lo). 3621 Locs.clear(); 3622 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3623 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3624 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3625 unsigned MaskIdx = 0; 3626 unsigned LoIdx = 0; 3627 unsigned HiIdx = NumElems/2; 3628 for (unsigned i = 0; i != NumElems; ++i) { 3629 if (i == NumElems/2) { 3630 MaskPtr = &HiMask; 3631 MaskIdx = 1; 3632 LoIdx = 0; 3633 HiIdx = NumElems/2; 3634 } 3635 SDOperand Elt = PermMask.getOperand(i); 3636 if (Elt.getOpcode() == ISD::UNDEF) { 3637 Locs[i] = std::make_pair(-1, -1); 3638 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3639 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3640 (*MaskPtr)[LoIdx] = Elt; 3641 LoIdx++; 3642 } else { 3643 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3644 (*MaskPtr)[HiIdx] = Elt; 3645 HiIdx++; 3646 } 3647 } 3648 3649 SDOperand LoShuffle = 3650 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3651 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3652 &LoMask[0], LoMask.size())); 3653 SDOperand HiShuffle = 3654 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3655 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3656 &HiMask[0], HiMask.size())); 3657 SmallVector<SDOperand, 8> MaskOps; 3658 for (unsigned i = 0; i != NumElems; ++i) { 3659 if (Locs[i].first == -1) { 3660 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3661 } else { 3662 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3663 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3664 } 3665 } 3666 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3667 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3668 &MaskOps[0], MaskOps.size())); 3669 } 3670 3671 return SDOperand(); 3672} 3673 3674SDOperand 3675X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op, 3676 SelectionDAG &DAG) { 3677 MVT::ValueType VT = Op.getValueType(); 3678 if (MVT::getSizeInBits(VT) == 8) { 3679 SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32, 3680 Op.getOperand(0), Op.getOperand(1)); 3681 SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 3682 DAG.getValueType(VT)); 3683 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3684 } else if (MVT::getSizeInBits(VT) == 16) { 3685 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32, 3686 Op.getOperand(0), Op.getOperand(1)); 3687 SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, 3688 DAG.getValueType(VT)); 3689 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3690 } 3691 return SDOperand(); 3692} 3693 3694 3695SDOperand 3696X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3697 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3698 return SDOperand(); 3699 3700 if (Subtarget->hasSSE41()) 3701 return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 3702 3703 MVT::ValueType VT = Op.getValueType(); 3704 // TODO: handle v16i8. 3705 if (MVT::getSizeInBits(VT) == 16) { 3706 SDOperand Vec = Op.getOperand(0); 3707 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3708 if (Idx == 0) 3709 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 3710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 3711 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 3712 Op.getOperand(1))); 3713 // Transform it so it match pextrw which produces a 32-bit result. 3714 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3715 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3716 Op.getOperand(0), Op.getOperand(1)); 3717 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3718 DAG.getValueType(VT)); 3719 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3720 } else if (MVT::getSizeInBits(VT) == 32) { 3721 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3722 if (Idx == 0) 3723 return Op; 3724 // SHUFPS the element to the lowest double word, then movss. 3725 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3726 SmallVector<SDOperand, 8> IdxVec; 3727 IdxVec. 3728 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3729 IdxVec. 3730 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3731 IdxVec. 3732 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3733 IdxVec. 3734 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3735 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3736 &IdxVec[0], IdxVec.size()); 3737 SDOperand Vec = Op.getOperand(0); 3738 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3739 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3740 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3741 DAG.getIntPtrConstant(0)); 3742 } else if (MVT::getSizeInBits(VT) == 64) { 3743 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 3744 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 3745 // to match extract_elt for f64. 3746 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3747 if (Idx == 0) 3748 return Op; 3749 3750 // UNPCKHPD the element to the lowest double word, then movsd. 3751 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3752 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3753 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3754 SmallVector<SDOperand, 8> IdxVec; 3755 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3756 IdxVec. 3757 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3758 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3759 &IdxVec[0], IdxVec.size()); 3760 SDOperand Vec = Op.getOperand(0); 3761 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3762 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3763 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3764 DAG.getIntPtrConstant(0)); 3765 } 3766 3767 return SDOperand(); 3768} 3769 3770SDOperand 3771X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG){ 3772 MVT::ValueType VT = Op.getValueType(); 3773 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3774 3775 SDOperand N0 = Op.getOperand(0); 3776 SDOperand N1 = Op.getOperand(1); 3777 SDOperand N2 = Op.getOperand(2); 3778 3779 if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) == 16)) { 3780 unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB 3781 : X86ISD::PINSRW; 3782 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 3783 // argument. 3784 if (N1.getValueType() != MVT::i32) 3785 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3786 if (N2.getValueType() != MVT::i32) 3787 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()); 3788 return DAG.getNode(Opc, VT, N0, N1, N2); 3789 } else if (EVT == MVT::f32) { 3790 // Bits [7:6] of the constant are the source select. This will always be 3791 // zero here. The DAG Combiner may combine an extract_elt index into these 3792 // bits. For example (insert (extract, 3), 2) could be matched by putting 3793 // the '3' into bits [7:6] of X86ISD::INSERTPS. 3794 // Bits [5:4] of the constant are the destination select. This is the 3795 // value of the incoming immediate. 3796 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 3797 // combine either bitwise AND or insert of float 0.0 to set these bits. 3798 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4); 3799 return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2); 3800 } 3801 return SDOperand(); 3802} 3803 3804SDOperand 3805X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3806 MVT::ValueType VT = Op.getValueType(); 3807 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3808 3809 if (Subtarget->hasSSE41()) 3810 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 3811 3812 if (EVT == MVT::i8) 3813 return SDOperand(); 3814 3815 SDOperand N0 = Op.getOperand(0); 3816 SDOperand N1 = Op.getOperand(1); 3817 SDOperand N2 = Op.getOperand(2); 3818 3819 if (MVT::getSizeInBits(EVT) == 16) { 3820 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3821 // as its second argument. 3822 if (N1.getValueType() != MVT::i32) 3823 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3824 if (N2.getValueType() != MVT::i32) 3825 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()); 3826 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3827 } 3828 return SDOperand(); 3829} 3830 3831SDOperand 3832X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3833 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3834 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3835} 3836 3837// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3838// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3839// one of the above mentioned nodes. It has to be wrapped because otherwise 3840// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3841// be used to form addressing mode. These wrapped nodes will be selected 3842// into MOV32ri. 3843SDOperand 3844X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3845 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3846 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3847 getPointerTy(), 3848 CP->getAlignment()); 3849 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3850 // With PIC, the address is actually $g + Offset. 3851 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3852 !Subtarget->isPICStyleRIPRel()) { 3853 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3854 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3855 Result); 3856 } 3857 3858 return Result; 3859} 3860 3861SDOperand 3862X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3863 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3864 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3865 // If it's a debug information descriptor, don't mess with it. 3866 if (DAG.isVerifiedDebugInfoDesc(Op)) 3867 return Result; 3868 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3869 // With PIC, the address is actually $g + Offset. 3870 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3871 !Subtarget->isPICStyleRIPRel()) { 3872 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3873 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3874 Result); 3875 } 3876 3877 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3878 // load the value at address GV, not the value of GV itself. This means that 3879 // the GlobalAddress must be in the base or index register of the address, not 3880 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3881 // The same applies for external symbols during PIC codegen 3882 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3883 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, 3884 PseudoSourceValue::getGOT(), 0); 3885 3886 return Result; 3887} 3888 3889// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3890static SDOperand 3891LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3892 const MVT::ValueType PtrVT) { 3893 SDOperand InFlag; 3894 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3895 DAG.getNode(X86ISD::GlobalBaseReg, 3896 PtrVT), InFlag); 3897 InFlag = Chain.getValue(1); 3898 3899 // emit leal symbol@TLSGD(,%ebx,1), %eax 3900 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3901 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3902 GA->getValueType(0), 3903 GA->getOffset()); 3904 SDOperand Ops[] = { Chain, TGA, InFlag }; 3905 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3906 InFlag = Result.getValue(2); 3907 Chain = Result.getValue(1); 3908 3909 // call ___tls_get_addr. This function receives its argument in 3910 // the register EAX. 3911 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3912 InFlag = Chain.getValue(1); 3913 3914 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3915 SDOperand Ops1[] = { Chain, 3916 DAG.getTargetExternalSymbol("___tls_get_addr", 3917 PtrVT), 3918 DAG.getRegister(X86::EAX, PtrVT), 3919 DAG.getRegister(X86::EBX, PtrVT), 3920 InFlag }; 3921 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3922 InFlag = Chain.getValue(1); 3923 3924 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3925} 3926 3927// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3928// "local exec" model. 3929static SDOperand 3930LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3931 const MVT::ValueType PtrVT) { 3932 // Get the Thread Pointer 3933 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3934 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3935 // exec) 3936 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3937 GA->getValueType(0), 3938 GA->getOffset()); 3939 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3940 3941 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3942 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, 3943 PseudoSourceValue::getGOT(), 0); 3944 3945 // The address of the thread local variable is the add of the thread 3946 // pointer with the offset of the variable. 3947 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3948} 3949 3950SDOperand 3951X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3952 // TODO: implement the "local dynamic" model 3953 // TODO: implement the "initial exec"model for pic executables 3954 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3955 "TLS not implemented for non-ELF and 64-bit targets"); 3956 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3957 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3958 // otherwise use the "Local Exec"TLS Model 3959 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3960 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3961 else 3962 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3963} 3964 3965SDOperand 3966X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3967 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3968 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3969 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3970 // With PIC, the address is actually $g + Offset. 3971 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3972 !Subtarget->isPICStyleRIPRel()) { 3973 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3974 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3975 Result); 3976 } 3977 3978 return Result; 3979} 3980 3981SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3982 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3983 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3984 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3985 // With PIC, the address is actually $g + Offset. 3986 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3987 !Subtarget->isPICStyleRIPRel()) { 3988 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3989 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3990 Result); 3991 } 3992 3993 return Result; 3994} 3995 3996/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 3997/// take a 2 x i32 value to shift plus a shift amount. 3998SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3999 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 4000 "Not an i64 shift!"); 4001 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4002 SDOperand ShOpLo = Op.getOperand(0); 4003 SDOperand ShOpHi = Op.getOperand(1); 4004 SDOperand ShAmt = Op.getOperand(2); 4005 SDOperand Tmp1 = isSRA ? 4006 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 4007 DAG.getConstant(0, MVT::i32); 4008 4009 SDOperand Tmp2, Tmp3; 4010 if (Op.getOpcode() == ISD::SHL_PARTS) { 4011 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 4012 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 4013 } else { 4014 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 4015 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 4016 } 4017 4018 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4019 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4020 DAG.getConstant(32, MVT::i8)); 4021 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 4022 AndNode, DAG.getConstant(0, MVT::i8)); 4023 4024 SDOperand Hi, Lo; 4025 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4026 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 4027 SmallVector<SDOperand, 4> Ops; 4028 if (Op.getOpcode() == ISD::SHL_PARTS) { 4029 Ops.push_back(Tmp2); 4030 Ops.push_back(Tmp3); 4031 Ops.push_back(CC); 4032 Ops.push_back(Cond); 4033 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4034 4035 Ops.clear(); 4036 Ops.push_back(Tmp3); 4037 Ops.push_back(Tmp1); 4038 Ops.push_back(CC); 4039 Ops.push_back(Cond); 4040 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4041 } else { 4042 Ops.push_back(Tmp2); 4043 Ops.push_back(Tmp3); 4044 Ops.push_back(CC); 4045 Ops.push_back(Cond); 4046 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4047 4048 Ops.clear(); 4049 Ops.push_back(Tmp3); 4050 Ops.push_back(Tmp1); 4051 Ops.push_back(CC); 4052 Ops.push_back(Cond); 4053 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4054 } 4055 4056 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 4057 Ops.clear(); 4058 Ops.push_back(Lo); 4059 Ops.push_back(Hi); 4060 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 4061} 4062 4063SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 4064 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 4065 Op.getOperand(0).getValueType() >= MVT::i16 && 4066 "Unknown SINT_TO_FP to lower!"); 4067 4068 SDOperand Result; 4069 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 4070 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 4071 MachineFunction &MF = DAG.getMachineFunction(); 4072 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4073 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4074 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4075 StackSlot, 4076 PseudoSourceValue::getFixedStack(), 4077 SSFI); 4078 4079 // These are really Legal; caller falls through into that case. 4080 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 4081 return Result; 4082 if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 && 4083 Subtarget->is64Bit()) 4084 return Result; 4085 4086 // Build the FILD 4087 SDVTList Tys; 4088 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 4089 if (useSSE) 4090 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4091 else 4092 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4093 SmallVector<SDOperand, 8> Ops; 4094 Ops.push_back(Chain); 4095 Ops.push_back(StackSlot); 4096 Ops.push_back(DAG.getValueType(SrcVT)); 4097 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 4098 Tys, &Ops[0], Ops.size()); 4099 4100 if (useSSE) { 4101 Chain = Result.getValue(1); 4102 SDOperand InFlag = Result.getValue(2); 4103 4104 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4105 // shouldn't be necessary except that RFP cannot be live across 4106 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4107 MachineFunction &MF = DAG.getMachineFunction(); 4108 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4109 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4110 Tys = DAG.getVTList(MVT::Other); 4111 SmallVector<SDOperand, 8> Ops; 4112 Ops.push_back(Chain); 4113 Ops.push_back(Result); 4114 Ops.push_back(StackSlot); 4115 Ops.push_back(DAG.getValueType(Op.getValueType())); 4116 Ops.push_back(InFlag); 4117 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4118 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 4119 PseudoSourceValue::getFixedStack(), SSFI); 4120 } 4121 4122 return Result; 4123} 4124 4125std::pair<SDOperand,SDOperand> X86TargetLowering:: 4126FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { 4127 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 4128 "Unknown FP_TO_SINT to lower!"); 4129 4130 // These are really Legal. 4131 if (Op.getValueType() == MVT::i32 && 4132 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 4133 return std::make_pair(SDOperand(), SDOperand()); 4134 if (Subtarget->is64Bit() && 4135 Op.getValueType() == MVT::i64 && 4136 Op.getOperand(0).getValueType() != MVT::f80) 4137 return std::make_pair(SDOperand(), SDOperand()); 4138 4139 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4140 // stack slot. 4141 MachineFunction &MF = DAG.getMachineFunction(); 4142 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 4143 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4144 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4145 unsigned Opc; 4146 switch (Op.getValueType()) { 4147 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4148 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4149 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4150 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4151 } 4152 4153 SDOperand Chain = DAG.getEntryNode(); 4154 SDOperand Value = Op.getOperand(0); 4155 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 4156 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4157 Chain = DAG.getStore(Chain, Value, StackSlot, 4158 PseudoSourceValue::getFixedStack(), SSFI); 4159 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4160 SDOperand Ops[] = { 4161 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4162 }; 4163 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4164 Chain = Value.getValue(1); 4165 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4166 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4167 } 4168 4169 // Build the FP_TO_INT*_IN_MEM 4170 SDOperand Ops[] = { Chain, Value, StackSlot }; 4171 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4172 4173 return std::make_pair(FIST, StackSlot); 4174} 4175 4176SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 4177 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG); 4178 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4179 if (FIST.Val == 0) return SDOperand(); 4180 4181 // Load the result. 4182 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4183} 4184 4185SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4186 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); 4187 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4188 if (FIST.Val == 0) return 0; 4189 4190 // Return an i64 load from the stack slot. 4191 SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); 4192 4193 // Use a MERGE_VALUES node to drop the chain result value. 4194 return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; 4195} 4196 4197SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4198 MVT::ValueType VT = Op.getValueType(); 4199 MVT::ValueType EltVT = VT; 4200 if (MVT::isVector(VT)) 4201 EltVT = MVT::getVectorElementType(VT); 4202 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4203 std::vector<Constant*> CV; 4204 if (EltVT == MVT::f64) { 4205 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 4206 CV.push_back(C); 4207 CV.push_back(C); 4208 } else { 4209 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 4210 CV.push_back(C); 4211 CV.push_back(C); 4212 CV.push_back(C); 4213 CV.push_back(C); 4214 } 4215 Constant *C = ConstantVector::get(CV); 4216 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4217 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4218 PseudoSourceValue::getConstantPool(), 0, 4219 false, 16); 4220 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4221} 4222 4223SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4224 MVT::ValueType VT = Op.getValueType(); 4225 MVT::ValueType EltVT = VT; 4226 unsigned EltNum = 1; 4227 if (MVT::isVector(VT)) { 4228 EltVT = MVT::getVectorElementType(VT); 4229 EltNum = MVT::getVectorNumElements(VT); 4230 } 4231 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4232 std::vector<Constant*> CV; 4233 if (EltVT == MVT::f64) { 4234 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4235 CV.push_back(C); 4236 CV.push_back(C); 4237 } else { 4238 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4239 CV.push_back(C); 4240 CV.push_back(C); 4241 CV.push_back(C); 4242 CV.push_back(C); 4243 } 4244 Constant *C = ConstantVector::get(CV); 4245 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4246 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4247 PseudoSourceValue::getConstantPool(), 0, 4248 false, 16); 4249 if (MVT::isVector(VT)) { 4250 return DAG.getNode(ISD::BIT_CONVERT, VT, 4251 DAG.getNode(ISD::XOR, MVT::v2i64, 4252 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4253 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4254 } else { 4255 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4256 } 4257} 4258 4259SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4260 SDOperand Op0 = Op.getOperand(0); 4261 SDOperand Op1 = Op.getOperand(1); 4262 MVT::ValueType VT = Op.getValueType(); 4263 MVT::ValueType SrcVT = Op1.getValueType(); 4264 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4265 4266 // If second operand is smaller, extend it first. 4267 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4268 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4269 SrcVT = VT; 4270 SrcTy = MVT::getTypeForValueType(SrcVT); 4271 } 4272 // And if it is bigger, shrink it first. 4273 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4274 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1)); 4275 SrcVT = VT; 4276 SrcTy = MVT::getTypeForValueType(SrcVT); 4277 } 4278 4279 // At this point the operands and the result should have the same 4280 // type, and that won't be f80 since that is not custom lowered. 4281 4282 // First get the sign bit of second operand. 4283 std::vector<Constant*> CV; 4284 if (SrcVT == MVT::f64) { 4285 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4286 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4287 } else { 4288 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4289 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4290 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4291 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4292 } 4293 Constant *C = ConstantVector::get(CV); 4294 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4295 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, 4296 PseudoSourceValue::getConstantPool(), 0, 4297 false, 16); 4298 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4299 4300 // Shift sign bit right or left if the two operands have different types. 4301 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4302 // Op0 is MVT::f32, Op1 is MVT::f64. 4303 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4304 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4305 DAG.getConstant(32, MVT::i32)); 4306 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4307 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4308 DAG.getIntPtrConstant(0)); 4309 } 4310 4311 // Clear first operand sign bit. 4312 CV.clear(); 4313 if (VT == MVT::f64) { 4314 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4315 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4316 } else { 4317 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4318 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4319 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4320 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4321 } 4322 C = ConstantVector::get(CV); 4323 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4324 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, 4325 PseudoSourceValue::getConstantPool(), 0, 4326 false, 16); 4327 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4328 4329 // Or the value with the sign bit. 4330 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4331} 4332 4333SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4334 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4335 SDOperand Cond; 4336 SDOperand Op0 = Op.getOperand(0); 4337 SDOperand Op1 = Op.getOperand(1); 4338 SDOperand CC = Op.getOperand(2); 4339 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4340 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4341 unsigned X86CC; 4342 4343 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4344 Op0, Op1, DAG)) { 4345 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4346 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4347 DAG.getConstant(X86CC, MVT::i8), Cond); 4348 } 4349 4350 assert(isFP && "Illegal integer SetCC!"); 4351 4352 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4353 switch (SetCCOpcode) { 4354 default: assert(false && "Illegal floating point SetCC!"); 4355 case ISD::SETOEQ: { // !PF & ZF 4356 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4357 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4358 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4359 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4360 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4361 } 4362 case ISD::SETUNE: { // PF | !ZF 4363 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4364 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4365 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4366 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4367 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4368 } 4369 } 4370} 4371 4372 4373SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4374 bool addTest = true; 4375 SDOperand Cond = Op.getOperand(0); 4376 SDOperand CC; 4377 4378 if (Cond.getOpcode() == ISD::SETCC) 4379 Cond = LowerSETCC(Cond, DAG); 4380 4381 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4382 // setting operand in place of the X86ISD::SETCC. 4383 if (Cond.getOpcode() == X86ISD::SETCC) { 4384 CC = Cond.getOperand(0); 4385 4386 SDOperand Cmp = Cond.getOperand(1); 4387 unsigned Opc = Cmp.getOpcode(); 4388 MVT::ValueType VT = Op.getValueType(); 4389 4390 bool IllegalFPCMov = false; 4391 if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) && 4392 !isScalarFPTypeInSSEReg(VT)) // FPStack? 4393 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4394 4395 if ((Opc == X86ISD::CMP || 4396 Opc == X86ISD::COMI || 4397 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4398 Cond = Cmp; 4399 addTest = false; 4400 } 4401 } 4402 4403 if (addTest) { 4404 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4405 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4406 } 4407 4408 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4409 MVT::Flag); 4410 SmallVector<SDOperand, 4> Ops; 4411 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4412 // condition is true. 4413 Ops.push_back(Op.getOperand(2)); 4414 Ops.push_back(Op.getOperand(1)); 4415 Ops.push_back(CC); 4416 Ops.push_back(Cond); 4417 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4418} 4419 4420SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4421 bool addTest = true; 4422 SDOperand Chain = Op.getOperand(0); 4423 SDOperand Cond = Op.getOperand(1); 4424 SDOperand Dest = Op.getOperand(2); 4425 SDOperand CC; 4426 4427 if (Cond.getOpcode() == ISD::SETCC) 4428 Cond = LowerSETCC(Cond, DAG); 4429 4430 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4431 // setting operand in place of the X86ISD::SETCC. 4432 if (Cond.getOpcode() == X86ISD::SETCC) { 4433 CC = Cond.getOperand(0); 4434 4435 SDOperand Cmp = Cond.getOperand(1); 4436 unsigned Opc = Cmp.getOpcode(); 4437 if (Opc == X86ISD::CMP || 4438 Opc == X86ISD::COMI || 4439 Opc == X86ISD::UCOMI) { 4440 Cond = Cmp; 4441 addTest = false; 4442 } 4443 } 4444 4445 if (addTest) { 4446 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4447 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4448 } 4449 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4450 Chain, Op.getOperand(2), CC, Cond); 4451} 4452 4453 4454// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4455// Calls to _alloca is needed to probe the stack when allocating more than 4k 4456// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4457// that the guard pages used by the OS virtual memory manager are allocated in 4458// correct sequence. 4459SDOperand 4460X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4461 SelectionDAG &DAG) { 4462 assert(Subtarget->isTargetCygMing() && 4463 "This should be used only on Cygwin/Mingw targets"); 4464 4465 // Get the inputs. 4466 SDOperand Chain = Op.getOperand(0); 4467 SDOperand Size = Op.getOperand(1); 4468 // FIXME: Ensure alignment here 4469 4470 SDOperand Flag; 4471 4472 MVT::ValueType IntPtr = getPointerTy(); 4473 MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 4474 4475 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4476 Flag = Chain.getValue(1); 4477 4478 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4479 SDOperand Ops[] = { Chain, 4480 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4481 DAG.getRegister(X86::EAX, IntPtr), 4482 Flag }; 4483 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4484 Flag = Chain.getValue(1); 4485 4486 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4487 4488 std::vector<MVT::ValueType> Tys; 4489 Tys.push_back(SPTy); 4490 Tys.push_back(MVT::Other); 4491 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4492 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4493} 4494 4495SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4496 SDOperand InFlag(0, 0); 4497 SDOperand Chain = Op.getOperand(0); 4498 unsigned Align = 4499 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4500 if (Align == 0) Align = 1; 4501 4502 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4503 // If not DWORD aligned or size is more than the threshold, call memset. 4504 // The libc version is likely to be faster for these cases. It can use the 4505 // address value and run time information about the CPU. 4506 if ((Align & 3) != 0 || 4507 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4508 MVT::ValueType IntPtr = getPointerTy(); 4509 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4510 TargetLowering::ArgListTy Args; 4511 TargetLowering::ArgListEntry Entry; 4512 Entry.Node = Op.getOperand(1); 4513 Entry.Ty = IntPtrTy; 4514 Args.push_back(Entry); 4515 // Extend the unsigned i8 argument to be an int value for the call. 4516 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4517 Entry.Ty = IntPtrTy; 4518 Args.push_back(Entry); 4519 Entry.Node = Op.getOperand(3); 4520 Args.push_back(Entry); 4521 std::pair<SDOperand,SDOperand> CallResult = 4522 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4523 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4524 return CallResult.second; 4525 } 4526 4527 MVT::ValueType AVT; 4528 SDOperand Count; 4529 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4530 unsigned BytesLeft = 0; 4531 bool TwoRepStos = false; 4532 if (ValC) { 4533 unsigned ValReg; 4534 uint64_t Val = ValC->getValue() & 255; 4535 4536 // If the value is a constant, then we can potentially use larger sets. 4537 switch (Align & 3) { 4538 case 2: // WORD aligned 4539 AVT = MVT::i16; 4540 ValReg = X86::AX; 4541 Val = (Val << 8) | Val; 4542 break; 4543 case 0: // DWORD aligned 4544 AVT = MVT::i32; 4545 ValReg = X86::EAX; 4546 Val = (Val << 8) | Val; 4547 Val = (Val << 16) | Val; 4548 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4549 AVT = MVT::i64; 4550 ValReg = X86::RAX; 4551 Val = (Val << 32) | Val; 4552 } 4553 break; 4554 default: // Byte aligned 4555 AVT = MVT::i8; 4556 ValReg = X86::AL; 4557 Count = Op.getOperand(3); 4558 break; 4559 } 4560 4561 if (AVT > MVT::i8) { 4562 if (I) { 4563 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4564 Count = DAG.getIntPtrConstant(I->getValue() / UBytes); 4565 BytesLeft = I->getValue() % UBytes; 4566 } else { 4567 assert(AVT >= MVT::i32 && 4568 "Do not use rep;stos if not at least DWORD aligned"); 4569 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4570 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4571 TwoRepStos = true; 4572 } 4573 } 4574 4575 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4576 InFlag); 4577 InFlag = Chain.getValue(1); 4578 } else { 4579 AVT = MVT::i8; 4580 Count = Op.getOperand(3); 4581 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4582 InFlag = Chain.getValue(1); 4583 } 4584 4585 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4586 Count, InFlag); 4587 InFlag = Chain.getValue(1); 4588 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4589 Op.getOperand(1), InFlag); 4590 InFlag = Chain.getValue(1); 4591 4592 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4593 SmallVector<SDOperand, 8> Ops; 4594 Ops.push_back(Chain); 4595 Ops.push_back(DAG.getValueType(AVT)); 4596 Ops.push_back(InFlag); 4597 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4598 4599 if (TwoRepStos) { 4600 InFlag = Chain.getValue(1); 4601 Count = Op.getOperand(3); 4602 MVT::ValueType CVT = Count.getValueType(); 4603 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4604 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4605 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4606 Left, InFlag); 4607 InFlag = Chain.getValue(1); 4608 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4609 Ops.clear(); 4610 Ops.push_back(Chain); 4611 Ops.push_back(DAG.getValueType(MVT::i8)); 4612 Ops.push_back(InFlag); 4613 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4614 } else if (BytesLeft) { 4615 // Issue stores for the last 1 - 7 bytes. 4616 SDOperand Value; 4617 unsigned Val = ValC->getValue() & 255; 4618 unsigned Offset = I->getValue() - BytesLeft; 4619 SDOperand DstAddr = Op.getOperand(1); 4620 MVT::ValueType AddrVT = DstAddr.getValueType(); 4621 if (BytesLeft >= 4) { 4622 Val = (Val << 8) | Val; 4623 Val = (Val << 16) | Val; 4624 Value = DAG.getConstant(Val, MVT::i32); 4625 Chain = DAG.getStore(Chain, Value, 4626 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4627 DAG.getConstant(Offset, AddrVT)), 4628 NULL, 0); 4629 BytesLeft -= 4; 4630 Offset += 4; 4631 } 4632 if (BytesLeft >= 2) { 4633 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4634 Chain = DAG.getStore(Chain, Value, 4635 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4636 DAG.getConstant(Offset, AddrVT)), 4637 NULL, 0); 4638 BytesLeft -= 2; 4639 Offset += 2; 4640 } 4641 if (BytesLeft == 1) { 4642 Value = DAG.getConstant(Val, MVT::i8); 4643 Chain = DAG.getStore(Chain, Value, 4644 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4645 DAG.getConstant(Offset, AddrVT)), 4646 NULL, 0); 4647 } 4648 } 4649 4650 return Chain; 4651} 4652 4653SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4654 SDOperand Dest, 4655 SDOperand Source, 4656 unsigned Size, 4657 unsigned Align, 4658 SelectionDAG &DAG) { 4659 MVT::ValueType AVT; 4660 unsigned BytesLeft = 0; 4661 switch (Align & 3) { 4662 case 2: // WORD aligned 4663 AVT = MVT::i16; 4664 break; 4665 case 0: // DWORD aligned 4666 AVT = MVT::i32; 4667 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4668 AVT = MVT::i64; 4669 break; 4670 default: // Byte aligned 4671 AVT = MVT::i8; 4672 break; 4673 } 4674 4675 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4676 SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); 4677 BytesLeft = Size % UBytes; 4678 4679 SDOperand InFlag(0, 0); 4680 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4681 Count, InFlag); 4682 InFlag = Chain.getValue(1); 4683 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4684 Dest, InFlag); 4685 InFlag = Chain.getValue(1); 4686 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4687 Source, InFlag); 4688 InFlag = Chain.getValue(1); 4689 4690 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4691 SmallVector<SDOperand, 8> Ops; 4692 Ops.push_back(Chain); 4693 Ops.push_back(DAG.getValueType(AVT)); 4694 Ops.push_back(InFlag); 4695 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4696 4697 if (BytesLeft) { 4698 // Issue loads and stores for the last 1 - 7 bytes. 4699 unsigned Offset = Size - BytesLeft; 4700 SDOperand DstAddr = Dest; 4701 MVT::ValueType DstVT = DstAddr.getValueType(); 4702 SDOperand SrcAddr = Source; 4703 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4704 SDOperand Value; 4705 if (BytesLeft >= 4) { 4706 Value = DAG.getLoad(MVT::i32, Chain, 4707 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4708 DAG.getConstant(Offset, SrcVT)), 4709 NULL, 0); 4710 Chain = Value.getValue(1); 4711 Chain = DAG.getStore(Chain, Value, 4712 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4713 DAG.getConstant(Offset, DstVT)), 4714 NULL, 0); 4715 BytesLeft -= 4; 4716 Offset += 4; 4717 } 4718 if (BytesLeft >= 2) { 4719 Value = DAG.getLoad(MVT::i16, Chain, 4720 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4721 DAG.getConstant(Offset, SrcVT)), 4722 NULL, 0); 4723 Chain = Value.getValue(1); 4724 Chain = DAG.getStore(Chain, Value, 4725 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4726 DAG.getConstant(Offset, DstVT)), 4727 NULL, 0); 4728 BytesLeft -= 2; 4729 Offset += 2; 4730 } 4731 4732 if (BytesLeft == 1) { 4733 Value = DAG.getLoad(MVT::i8, Chain, 4734 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4735 DAG.getConstant(Offset, SrcVT)), 4736 NULL, 0); 4737 Chain = Value.getValue(1); 4738 Chain = DAG.getStore(Chain, Value, 4739 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4740 DAG.getConstant(Offset, DstVT)), 4741 NULL, 0); 4742 } 4743 } 4744 4745 return Chain; 4746} 4747 4748/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 4749SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 4750 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4751 SDOperand TheChain = N->getOperand(0); 4752 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 4753 if (Subtarget->is64Bit()) { 4754 SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4755 SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 4756 MVT::i64, rax.getValue(2)); 4757 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 4758 DAG.getConstant(32, MVT::i8)); 4759 SDOperand Ops[] = { 4760 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 4761 }; 4762 4763 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4764 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4765 } 4766 4767 SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4768 SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 4769 MVT::i32, eax.getValue(2)); 4770 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 4771 SDOperand Ops[] = { eax, edx }; 4772 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 4773 4774 // Use a MERGE_VALUES to return the value and chain. 4775 Ops[1] = edx.getValue(1); 4776 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4777 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4778} 4779 4780SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4781 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 4782 4783 if (!Subtarget->is64Bit()) { 4784 // vastart just stores the address of the VarArgsFrameIndex slot into the 4785 // memory location argument. 4786 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4787 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0); 4788 } 4789 4790 // __va_list_tag: 4791 // gp_offset (0 - 6 * 8) 4792 // fp_offset (48 - 48 + 8 * 16) 4793 // overflow_arg_area (point to parameters coming in memory). 4794 // reg_save_area 4795 SmallVector<SDOperand, 8> MemOps; 4796 SDOperand FIN = Op.getOperand(1); 4797 // Store gp_offset 4798 SDOperand Store = DAG.getStore(Op.getOperand(0), 4799 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4800 FIN, SV, 0); 4801 MemOps.push_back(Store); 4802 4803 // Store fp_offset 4804 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 4805 Store = DAG.getStore(Op.getOperand(0), 4806 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4807 FIN, SV, 0); 4808 MemOps.push_back(Store); 4809 4810 // Store ptr to overflow_arg_area 4811 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); 4812 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4813 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0); 4814 MemOps.push_back(Store); 4815 4816 // Store ptr to reg_save_area. 4817 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); 4818 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4819 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0); 4820 MemOps.push_back(Store); 4821 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4822} 4823 4824SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4825 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4826 SDOperand Chain = Op.getOperand(0); 4827 SDOperand DstPtr = Op.getOperand(1); 4828 SDOperand SrcPtr = Op.getOperand(2); 4829 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 4830 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 4831 4832 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, SrcSV, 0); 4833 Chain = SrcPtr.getValue(1); 4834 for (unsigned i = 0; i < 3; ++i) { 4835 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, SrcSV, 0); 4836 Chain = Val.getValue(1); 4837 Chain = DAG.getStore(Chain, Val, DstPtr, DstSV, 0); 4838 if (i == 2) 4839 break; 4840 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4841 DAG.getIntPtrConstant(8)); 4842 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4843 DAG.getIntPtrConstant(8)); 4844 } 4845 return Chain; 4846} 4847 4848SDOperand 4849X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4850 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4851 switch (IntNo) { 4852 default: return SDOperand(); // Don't custom lower most intrinsics. 4853 // Comparison intrinsics. 4854 case Intrinsic::x86_sse_comieq_ss: 4855 case Intrinsic::x86_sse_comilt_ss: 4856 case Intrinsic::x86_sse_comile_ss: 4857 case Intrinsic::x86_sse_comigt_ss: 4858 case Intrinsic::x86_sse_comige_ss: 4859 case Intrinsic::x86_sse_comineq_ss: 4860 case Intrinsic::x86_sse_ucomieq_ss: 4861 case Intrinsic::x86_sse_ucomilt_ss: 4862 case Intrinsic::x86_sse_ucomile_ss: 4863 case Intrinsic::x86_sse_ucomigt_ss: 4864 case Intrinsic::x86_sse_ucomige_ss: 4865 case Intrinsic::x86_sse_ucomineq_ss: 4866 case Intrinsic::x86_sse2_comieq_sd: 4867 case Intrinsic::x86_sse2_comilt_sd: 4868 case Intrinsic::x86_sse2_comile_sd: 4869 case Intrinsic::x86_sse2_comigt_sd: 4870 case Intrinsic::x86_sse2_comige_sd: 4871 case Intrinsic::x86_sse2_comineq_sd: 4872 case Intrinsic::x86_sse2_ucomieq_sd: 4873 case Intrinsic::x86_sse2_ucomilt_sd: 4874 case Intrinsic::x86_sse2_ucomile_sd: 4875 case Intrinsic::x86_sse2_ucomigt_sd: 4876 case Intrinsic::x86_sse2_ucomige_sd: 4877 case Intrinsic::x86_sse2_ucomineq_sd: { 4878 unsigned Opc = 0; 4879 ISD::CondCode CC = ISD::SETCC_INVALID; 4880 switch (IntNo) { 4881 default: break; 4882 case Intrinsic::x86_sse_comieq_ss: 4883 case Intrinsic::x86_sse2_comieq_sd: 4884 Opc = X86ISD::COMI; 4885 CC = ISD::SETEQ; 4886 break; 4887 case Intrinsic::x86_sse_comilt_ss: 4888 case Intrinsic::x86_sse2_comilt_sd: 4889 Opc = X86ISD::COMI; 4890 CC = ISD::SETLT; 4891 break; 4892 case Intrinsic::x86_sse_comile_ss: 4893 case Intrinsic::x86_sse2_comile_sd: 4894 Opc = X86ISD::COMI; 4895 CC = ISD::SETLE; 4896 break; 4897 case Intrinsic::x86_sse_comigt_ss: 4898 case Intrinsic::x86_sse2_comigt_sd: 4899 Opc = X86ISD::COMI; 4900 CC = ISD::SETGT; 4901 break; 4902 case Intrinsic::x86_sse_comige_ss: 4903 case Intrinsic::x86_sse2_comige_sd: 4904 Opc = X86ISD::COMI; 4905 CC = ISD::SETGE; 4906 break; 4907 case Intrinsic::x86_sse_comineq_ss: 4908 case Intrinsic::x86_sse2_comineq_sd: 4909 Opc = X86ISD::COMI; 4910 CC = ISD::SETNE; 4911 break; 4912 case Intrinsic::x86_sse_ucomieq_ss: 4913 case Intrinsic::x86_sse2_ucomieq_sd: 4914 Opc = X86ISD::UCOMI; 4915 CC = ISD::SETEQ; 4916 break; 4917 case Intrinsic::x86_sse_ucomilt_ss: 4918 case Intrinsic::x86_sse2_ucomilt_sd: 4919 Opc = X86ISD::UCOMI; 4920 CC = ISD::SETLT; 4921 break; 4922 case Intrinsic::x86_sse_ucomile_ss: 4923 case Intrinsic::x86_sse2_ucomile_sd: 4924 Opc = X86ISD::UCOMI; 4925 CC = ISD::SETLE; 4926 break; 4927 case Intrinsic::x86_sse_ucomigt_ss: 4928 case Intrinsic::x86_sse2_ucomigt_sd: 4929 Opc = X86ISD::UCOMI; 4930 CC = ISD::SETGT; 4931 break; 4932 case Intrinsic::x86_sse_ucomige_ss: 4933 case Intrinsic::x86_sse2_ucomige_sd: 4934 Opc = X86ISD::UCOMI; 4935 CC = ISD::SETGE; 4936 break; 4937 case Intrinsic::x86_sse_ucomineq_ss: 4938 case Intrinsic::x86_sse2_ucomineq_sd: 4939 Opc = X86ISD::UCOMI; 4940 CC = ISD::SETNE; 4941 break; 4942 } 4943 4944 unsigned X86CC; 4945 SDOperand LHS = Op.getOperand(1); 4946 SDOperand RHS = Op.getOperand(2); 4947 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4948 4949 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4950 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4951 DAG.getConstant(X86CC, MVT::i8), Cond); 4952 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4953 } 4954 } 4955} 4956 4957SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4958 // Depths > 0 not supported yet! 4959 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4960 return SDOperand(); 4961 4962 // Just load the return address 4963 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4964 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4965} 4966 4967SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4968 // Depths > 0 not supported yet! 4969 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4970 return SDOperand(); 4971 4972 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4973 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4974 DAG.getIntPtrConstant(4)); 4975} 4976 4977SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4978 SelectionDAG &DAG) { 4979 // Is not yet supported on x86-64 4980 if (Subtarget->is64Bit()) 4981 return SDOperand(); 4982 4983 return DAG.getIntPtrConstant(8); 4984} 4985 4986SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4987{ 4988 assert(!Subtarget->is64Bit() && 4989 "Lowering of eh_return builtin is not supported yet on x86-64"); 4990 4991 MachineFunction &MF = DAG.getMachineFunction(); 4992 SDOperand Chain = Op.getOperand(0); 4993 SDOperand Offset = Op.getOperand(1); 4994 SDOperand Handler = Op.getOperand(2); 4995 4996 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4997 getPointerTy()); 4998 4999 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5000 DAG.getIntPtrConstant(-4UL)); 5001 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5002 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5003 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 5004 MF.getRegInfo().addLiveOut(X86::ECX); 5005 5006 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 5007 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 5008} 5009 5010SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 5011 SelectionDAG &DAG) { 5012 SDOperand Root = Op.getOperand(0); 5013 SDOperand Trmp = Op.getOperand(1); // trampoline 5014 SDOperand FPtr = Op.getOperand(2); // nested function 5015 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 5016 5017 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 5018 5019 const X86InstrInfo *TII = 5020 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5021 5022 if (Subtarget->is64Bit()) { 5023 SDOperand OutChains[6]; 5024 5025 // Large code-model. 5026 5027 const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); 5028 const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); 5029 5030 const unsigned char N86R10 = 5031 ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10); 5032 const unsigned char N86R11 = 5033 ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11); 5034 5035 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 5036 5037 // Load the pointer to the nested function into R11. 5038 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 5039 SDOperand Addr = Trmp; 5040 OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5041 TrmpAddr, 0); 5042 5043 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); 5044 OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2); 5045 5046 // Load the 'nest' parameter value into R10. 5047 // R10 is specified in X86CallingConv.td 5048 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 5049 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); 5050 OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5051 TrmpAddr, 10); 5052 5053 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); 5054 OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2); 5055 5056 // Jump to the nested function. 5057 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 5058 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); 5059 OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr, 5060 TrmpAddr, 20); 5061 5062 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 5063 Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); 5064 OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr, 5065 TrmpAddr, 22); 5066 5067 SDOperand Ops[] = 5068 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) }; 5069 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 5070 } else { 5071 const Function *Func = 5072 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5073 unsigned CC = Func->getCallingConv(); 5074 unsigned NestReg; 5075 5076 switch (CC) { 5077 default: 5078 assert(0 && "Unsupported calling convention"); 5079 case CallingConv::C: 5080 case CallingConv::X86_StdCall: { 5081 // Pass 'nest' parameter in ECX. 5082 // Must be kept in sync with X86CallingConv.td 5083 NestReg = X86::ECX; 5084 5085 // Check that ECX wasn't needed by an 'inreg' parameter. 5086 const FunctionType *FTy = Func->getFunctionType(); 5087 const ParamAttrsList *Attrs = Func->getParamAttrs(); 5088 5089 if (Attrs && !Func->isVarArg()) { 5090 unsigned InRegCount = 0; 5091 unsigned Idx = 1; 5092 5093 for (FunctionType::param_iterator I = FTy->param_begin(), 5094 E = FTy->param_end(); I != E; ++I, ++Idx) 5095 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 5096 // FIXME: should only count parameters that are lowered to integers. 5097 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 5098 5099 if (InRegCount > 2) { 5100 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5101 abort(); 5102 } 5103 } 5104 break; 5105 } 5106 case CallingConv::X86_FastCall: 5107 // Pass 'nest' parameter in EAX. 5108 // Must be kept in sync with X86CallingConv.td 5109 NestReg = X86::EAX; 5110 break; 5111 } 5112 5113 SDOperand OutChains[4]; 5114 SDOperand Addr, Disp; 5115 5116 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5117 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5118 5119 const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5120 const unsigned char N86Reg = 5121 ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg); 5122 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5123 Trmp, TrmpAddr, 0); 5124 5125 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 5126 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1); 5127 5128 const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 5129 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 5130 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 5131 TrmpAddr, 5, false, 1); 5132 5133 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 5134 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1); 5135 5136 SDOperand Ops[] = 5137 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 5138 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 5139 } 5140} 5141 5142SDOperand X86TargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) { 5143 /* 5144 The rounding mode is in bits 11:10 of FPSR, and has the following 5145 settings: 5146 00 Round to nearest 5147 01 Round to -inf 5148 10 Round to +inf 5149 11 Round to 0 5150 5151 FLT_ROUNDS, on the other hand, expects the following: 5152 -1 Undefined 5153 0 Round to 0 5154 1 Round to nearest 5155 2 Round to +inf 5156 3 Round to -inf 5157 5158 To perform the conversion, we do: 5159 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 5160 */ 5161 5162 MachineFunction &MF = DAG.getMachineFunction(); 5163 const TargetMachine &TM = MF.getTarget(); 5164 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 5165 unsigned StackAlignment = TFI.getStackAlignment(); 5166 MVT::ValueType VT = Op.getValueType(); 5167 5168 // Save FP Control Word to stack slot 5169 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 5170 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5171 5172 SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 5173 DAG.getEntryNode(), StackSlot); 5174 5175 // Load FP Control Word from stack slot 5176 SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 5177 5178 // Transform as necessary 5179 SDOperand CWD1 = 5180 DAG.getNode(ISD::SRL, MVT::i16, 5181 DAG.getNode(ISD::AND, MVT::i16, 5182 CWD, DAG.getConstant(0x800, MVT::i16)), 5183 DAG.getConstant(11, MVT::i8)); 5184 SDOperand CWD2 = 5185 DAG.getNode(ISD::SRL, MVT::i16, 5186 DAG.getNode(ISD::AND, MVT::i16, 5187 CWD, DAG.getConstant(0x400, MVT::i16)), 5188 DAG.getConstant(9, MVT::i8)); 5189 5190 SDOperand RetVal = 5191 DAG.getNode(ISD::AND, MVT::i16, 5192 DAG.getNode(ISD::ADD, MVT::i16, 5193 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 5194 DAG.getConstant(1, MVT::i16)), 5195 DAG.getConstant(3, MVT::i16)); 5196 5197 5198 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 5199 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 5200} 5201 5202SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { 5203 MVT::ValueType VT = Op.getValueType(); 5204 MVT::ValueType OpVT = VT; 5205 unsigned NumBits = MVT::getSizeInBits(VT); 5206 5207 Op = Op.getOperand(0); 5208 if (VT == MVT::i8) { 5209 // Zero extend to i32 since there is not an i8 bsr. 5210 OpVT = MVT::i32; 5211 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5212 } 5213 5214 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 5215 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5216 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 5217 5218 // If src is zero (i.e. bsr sets ZF), returns NumBits. 5219 SmallVector<SDOperand, 4> Ops; 5220 Ops.push_back(Op); 5221 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 5222 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5223 Ops.push_back(Op.getValue(1)); 5224 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5225 5226 // Finally xor with NumBits-1. 5227 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 5228 5229 if (VT == MVT::i8) 5230 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5231 return Op; 5232} 5233 5234SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { 5235 MVT::ValueType VT = Op.getValueType(); 5236 MVT::ValueType OpVT = VT; 5237 unsigned NumBits = MVT::getSizeInBits(VT); 5238 5239 Op = Op.getOperand(0); 5240 if (VT == MVT::i8) { 5241 OpVT = MVT::i32; 5242 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5243 } 5244 5245 // Issue a bsf (scan bits forward) which also sets EFLAGS. 5246 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5247 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 5248 5249 // If src is zero (i.e. bsf sets ZF), returns NumBits. 5250 SmallVector<SDOperand, 4> Ops; 5251 Ops.push_back(Op); 5252 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 5253 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5254 Ops.push_back(Op.getValue(1)); 5255 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5256 5257 if (VT == MVT::i8) 5258 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5259 return Op; 5260} 5261 5262/// LowerOperation - Provide custom lowering hooks for some operations. 5263/// 5264SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 5265 switch (Op.getOpcode()) { 5266 default: assert(0 && "Should not custom lower this!"); 5267 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5268 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5269 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5270 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5271 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5272 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5273 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5274 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5275 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5276 case ISD::SHL_PARTS: 5277 case ISD::SRA_PARTS: 5278 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5279 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5280 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5281 case ISD::FABS: return LowerFABS(Op, DAG); 5282 case ISD::FNEG: return LowerFNEG(Op, DAG); 5283 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5284 case ISD::SETCC: return LowerSETCC(Op, DAG); 5285 case ISD::SELECT: return LowerSELECT(Op, DAG); 5286 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5287 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5288 case ISD::CALL: return LowerCALL(Op, DAG); 5289 case ISD::RET: return LowerRET(Op, DAG); 5290 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5291 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5292 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5293 case ISD::VASTART: return LowerVASTART(Op, DAG); 5294 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5295 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5296 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5297 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5298 case ISD::FRAME_TO_ARGS_OFFSET: 5299 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5300 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5301 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5302 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5303 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5304 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 5305 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 5306 5307 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 5308 case ISD::READCYCLECOUNTER: 5309 return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); 5310 } 5311} 5312 5313/// ExpandOperation - Provide custom lowering hooks for expanding operations. 5314SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 5315 switch (N->getOpcode()) { 5316 default: assert(0 && "Should not custom lower this!"); 5317 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 5318 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 5319 } 5320} 5321 5322const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5323 switch (Opcode) { 5324 default: return NULL; 5325 case X86ISD::BSF: return "X86ISD::BSF"; 5326 case X86ISD::BSR: return "X86ISD::BSR"; 5327 case X86ISD::SHLD: return "X86ISD::SHLD"; 5328 case X86ISD::SHRD: return "X86ISD::SHRD"; 5329 case X86ISD::FAND: return "X86ISD::FAND"; 5330 case X86ISD::FOR: return "X86ISD::FOR"; 5331 case X86ISD::FXOR: return "X86ISD::FXOR"; 5332 case X86ISD::FSRL: return "X86ISD::FSRL"; 5333 case X86ISD::FILD: return "X86ISD::FILD"; 5334 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5335 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5336 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5337 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5338 case X86ISD::FLD: return "X86ISD::FLD"; 5339 case X86ISD::FST: return "X86ISD::FST"; 5340 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5341 case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2"; 5342 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5343 case X86ISD::CALL: return "X86ISD::CALL"; 5344 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5345 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5346 case X86ISD::CMP: return "X86ISD::CMP"; 5347 case X86ISD::COMI: return "X86ISD::COMI"; 5348 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5349 case X86ISD::SETCC: return "X86ISD::SETCC"; 5350 case X86ISD::CMOV: return "X86ISD::CMOV"; 5351 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5352 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5353 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5354 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5355 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5356 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5357 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5358 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 5359 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5360 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 5361 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 5362 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5363 case X86ISD::FMAX: return "X86ISD::FMAX"; 5364 case X86ISD::FMIN: return "X86ISD::FMIN"; 5365 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5366 case X86ISD::FRCP: return "X86ISD::FRCP"; 5367 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5368 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5369 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5370 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5371 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 5372 } 5373} 5374 5375// isLegalAddressingMode - Return true if the addressing mode represented 5376// by AM is legal for this target, for a load/store of the specified type. 5377bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5378 const Type *Ty) const { 5379 // X86 supports extremely general addressing modes. 5380 5381 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5382 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5383 return false; 5384 5385 if (AM.BaseGV) { 5386 // We can only fold this if we don't need an extra load. 5387 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5388 return false; 5389 5390 // X86-64 only supports addr of globals in small code model. 5391 if (Subtarget->is64Bit()) { 5392 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5393 return false; 5394 // If lower 4G is not available, then we must use rip-relative addressing. 5395 if (AM.BaseOffs || AM.Scale > 1) 5396 return false; 5397 } 5398 } 5399 5400 switch (AM.Scale) { 5401 case 0: 5402 case 1: 5403 case 2: 5404 case 4: 5405 case 8: 5406 // These scales always work. 5407 break; 5408 case 3: 5409 case 5: 5410 case 9: 5411 // These scales are formed with basereg+scalereg. Only accept if there is 5412 // no basereg yet. 5413 if (AM.HasBaseReg) 5414 return false; 5415 break; 5416 default: // Other stuff never works. 5417 return false; 5418 } 5419 5420 return true; 5421} 5422 5423 5424bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5425 if (!Ty1->isInteger() || !Ty2->isInteger()) 5426 return false; 5427 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5428 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5429 if (NumBits1 <= NumBits2) 5430 return false; 5431 return Subtarget->is64Bit() || NumBits1 < 64; 5432} 5433 5434bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5435 MVT::ValueType VT2) const { 5436 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5437 return false; 5438 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5439 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5440 if (NumBits1 <= NumBits2) 5441 return false; 5442 return Subtarget->is64Bit() || NumBits1 < 64; 5443} 5444 5445/// isShuffleMaskLegal - Targets can use this to indicate that they only 5446/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5447/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5448/// are assumed to be legal. 5449bool 5450X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5451 // Only do shuffles on 128-bit vector types for now. 5452 if (MVT::getSizeInBits(VT) == 64) return false; 5453 return (Mask.Val->getNumOperands() <= 4 || 5454 isIdentityMask(Mask.Val) || 5455 isIdentityMask(Mask.Val, true) || 5456 isSplatMask(Mask.Val) || 5457 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5458 X86::isUNPCKLMask(Mask.Val) || 5459 X86::isUNPCKHMask(Mask.Val) || 5460 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5461 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5462} 5463 5464bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5465 MVT::ValueType EVT, 5466 SelectionDAG &DAG) const { 5467 unsigned NumElts = BVOps.size(); 5468 // Only do shuffles on 128-bit vector types for now. 5469 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5470 if (NumElts == 2) return true; 5471 if (NumElts == 4) { 5472 return (isMOVLMask(&BVOps[0], 4) || 5473 isCommutedMOVL(&BVOps[0], 4, true) || 5474 isSHUFPMask(&BVOps[0], 4) || 5475 isCommutedSHUFP(&BVOps[0], 4)); 5476 } 5477 return false; 5478} 5479 5480//===----------------------------------------------------------------------===// 5481// X86 Scheduler Hooks 5482//===----------------------------------------------------------------------===// 5483 5484MachineBasicBlock * 5485X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 5486 MachineBasicBlock *BB) { 5487 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5488 switch (MI->getOpcode()) { 5489 default: assert(false && "Unexpected instr type to insert"); 5490 case X86::CMOV_FR32: 5491 case X86::CMOV_FR64: 5492 case X86::CMOV_V4F32: 5493 case X86::CMOV_V2F64: 5494 case X86::CMOV_V2I64: { 5495 // To "insert" a SELECT_CC instruction, we actually have to insert the 5496 // diamond control-flow pattern. The incoming instruction knows the 5497 // destination vreg to set, the condition code register to branch on, the 5498 // true/false values to select between, and a branch opcode to use. 5499 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5500 ilist<MachineBasicBlock>::iterator It = BB; 5501 ++It; 5502 5503 // thisMBB: 5504 // ... 5505 // TrueVal = ... 5506 // cmpTY ccX, r1, r2 5507 // bCC copy1MBB 5508 // fallthrough --> copy0MBB 5509 MachineBasicBlock *thisMBB = BB; 5510 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5511 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5512 unsigned Opc = 5513 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5514 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5515 MachineFunction *F = BB->getParent(); 5516 F->getBasicBlockList().insert(It, copy0MBB); 5517 F->getBasicBlockList().insert(It, sinkMBB); 5518 // Update machine-CFG edges by first adding all successors of the current 5519 // block to the new block which will contain the Phi node for the select. 5520 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5521 e = BB->succ_end(); i != e; ++i) 5522 sinkMBB->addSuccessor(*i); 5523 // Next, remove all successors of the current block, and add the true 5524 // and fallthrough blocks as its successors. 5525 while(!BB->succ_empty()) 5526 BB->removeSuccessor(BB->succ_begin()); 5527 BB->addSuccessor(copy0MBB); 5528 BB->addSuccessor(sinkMBB); 5529 5530 // copy0MBB: 5531 // %FalseValue = ... 5532 // # fallthrough to sinkMBB 5533 BB = copy0MBB; 5534 5535 // Update machine-CFG edges 5536 BB->addSuccessor(sinkMBB); 5537 5538 // sinkMBB: 5539 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5540 // ... 5541 BB = sinkMBB; 5542 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5543 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5544 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5545 5546 delete MI; // The pseudo instruction is gone now. 5547 return BB; 5548 } 5549 5550 case X86::FP32_TO_INT16_IN_MEM: 5551 case X86::FP32_TO_INT32_IN_MEM: 5552 case X86::FP32_TO_INT64_IN_MEM: 5553 case X86::FP64_TO_INT16_IN_MEM: 5554 case X86::FP64_TO_INT32_IN_MEM: 5555 case X86::FP64_TO_INT64_IN_MEM: 5556 case X86::FP80_TO_INT16_IN_MEM: 5557 case X86::FP80_TO_INT32_IN_MEM: 5558 case X86::FP80_TO_INT64_IN_MEM: { 5559 // Change the floating point control register to use "round towards zero" 5560 // mode when truncating to an integer value. 5561 MachineFunction *F = BB->getParent(); 5562 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5563 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5564 5565 // Load the old value of the high byte of the control word... 5566 unsigned OldCW = 5567 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 5568 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5569 5570 // Set the high part to be round to zero... 5571 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5572 .addImm(0xC7F); 5573 5574 // Reload the modified control word now... 5575 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5576 5577 // Restore the memory image of control word to original value 5578 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5579 .addReg(OldCW); 5580 5581 // Get the X86 opcode to use. 5582 unsigned Opc; 5583 switch (MI->getOpcode()) { 5584 default: assert(0 && "illegal opcode!"); 5585 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5586 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5587 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5588 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5589 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5590 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5591 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5592 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5593 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5594 } 5595 5596 X86AddressMode AM; 5597 MachineOperand &Op = MI->getOperand(0); 5598 if (Op.isRegister()) { 5599 AM.BaseType = X86AddressMode::RegBase; 5600 AM.Base.Reg = Op.getReg(); 5601 } else { 5602 AM.BaseType = X86AddressMode::FrameIndexBase; 5603 AM.Base.FrameIndex = Op.getIndex(); 5604 } 5605 Op = MI->getOperand(1); 5606 if (Op.isImmediate()) 5607 AM.Scale = Op.getImm(); 5608 Op = MI->getOperand(2); 5609 if (Op.isImmediate()) 5610 AM.IndexReg = Op.getImm(); 5611 Op = MI->getOperand(3); 5612 if (Op.isGlobalAddress()) { 5613 AM.GV = Op.getGlobal(); 5614 } else { 5615 AM.Disp = Op.getImm(); 5616 } 5617 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5618 .addReg(MI->getOperand(4).getReg()); 5619 5620 // Reload the original control word now. 5621 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5622 5623 delete MI; // The pseudo instruction is gone now. 5624 return BB; 5625 } 5626 } 5627} 5628 5629//===----------------------------------------------------------------------===// 5630// X86 Optimization Hooks 5631//===----------------------------------------------------------------------===// 5632 5633void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5634 const APInt &Mask, 5635 APInt &KnownZero, 5636 APInt &KnownOne, 5637 const SelectionDAG &DAG, 5638 unsigned Depth) const { 5639 unsigned Opc = Op.getOpcode(); 5640 assert((Opc >= ISD::BUILTIN_OP_END || 5641 Opc == ISD::INTRINSIC_WO_CHAIN || 5642 Opc == ISD::INTRINSIC_W_CHAIN || 5643 Opc == ISD::INTRINSIC_VOID) && 5644 "Should use MaskedValueIsZero if you don't know whether Op" 5645 " is a target node!"); 5646 5647 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 5648 switch (Opc) { 5649 default: break; 5650 case X86ISD::SETCC: 5651 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 5652 Mask.getBitWidth() - 1); 5653 break; 5654 } 5655} 5656 5657/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5658/// element of the result of the vector shuffle. 5659static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5660 MVT::ValueType VT = N->getValueType(0); 5661 SDOperand PermMask = N->getOperand(2); 5662 unsigned NumElems = PermMask.getNumOperands(); 5663 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5664 i %= NumElems; 5665 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5666 return (i == 0) 5667 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5668 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5669 SDOperand Idx = PermMask.getOperand(i); 5670 if (Idx.getOpcode() == ISD::UNDEF) 5671 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5672 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5673 } 5674 return SDOperand(); 5675} 5676 5677/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5678/// node is a GlobalAddress + an offset. 5679static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5680 unsigned Opc = N->getOpcode(); 5681 if (Opc == X86ISD::Wrapper) { 5682 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5683 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5684 return true; 5685 } 5686 } else if (Opc == ISD::ADD) { 5687 SDOperand N1 = N->getOperand(0); 5688 SDOperand N2 = N->getOperand(1); 5689 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5690 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5691 if (V) { 5692 Offset += V->getSignExtended(); 5693 return true; 5694 } 5695 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5696 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5697 if (V) { 5698 Offset += V->getSignExtended(); 5699 return true; 5700 } 5701 } 5702 } 5703 return false; 5704} 5705 5706/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5707/// + Dist * Size. 5708static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5709 MachineFrameInfo *MFI) { 5710 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5711 return false; 5712 5713 SDOperand Loc = N->getOperand(1); 5714 SDOperand BaseLoc = Base->getOperand(1); 5715 if (Loc.getOpcode() == ISD::FrameIndex) { 5716 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5717 return false; 5718 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5719 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5720 int FS = MFI->getObjectSize(FI); 5721 int BFS = MFI->getObjectSize(BFI); 5722 if (FS != BFS || FS != Size) return false; 5723 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5724 } else { 5725 GlobalValue *GV1 = NULL; 5726 GlobalValue *GV2 = NULL; 5727 int64_t Offset1 = 0; 5728 int64_t Offset2 = 0; 5729 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5730 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5731 if (isGA1 && isGA2 && GV1 == GV2) 5732 return Offset1 == (Offset2 + Dist*Size); 5733 } 5734 5735 return false; 5736} 5737 5738static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5739 const X86Subtarget *Subtarget) { 5740 GlobalValue *GV; 5741 int64_t Offset = 0; 5742 if (isGAPlusOffset(Base, GV, Offset)) 5743 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5744 // DAG combine handles the stack object case. 5745 return false; 5746} 5747 5748 5749/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5750/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5751/// if the load addresses are consecutive, non-overlapping, and in the right 5752/// order. 5753static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5754 const X86Subtarget *Subtarget) { 5755 MachineFunction &MF = DAG.getMachineFunction(); 5756 MachineFrameInfo *MFI = MF.getFrameInfo(); 5757 MVT::ValueType VT = N->getValueType(0); 5758 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5759 SDOperand PermMask = N->getOperand(2); 5760 int NumElems = (int)PermMask.getNumOperands(); 5761 SDNode *Base = NULL; 5762 for (int i = 0; i < NumElems; ++i) { 5763 SDOperand Idx = PermMask.getOperand(i); 5764 if (Idx.getOpcode() == ISD::UNDEF) { 5765 if (!Base) return SDOperand(); 5766 } else { 5767 SDOperand Arg = 5768 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5769 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5770 return SDOperand(); 5771 if (!Base) 5772 Base = Arg.Val; 5773 else if (!isConsecutiveLoad(Arg.Val, Base, 5774 i, MVT::getSizeInBits(EVT)/8,MFI)) 5775 return SDOperand(); 5776 } 5777 } 5778 5779 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5780 LoadSDNode *LD = cast<LoadSDNode>(Base); 5781 if (isAlign16) { 5782 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5783 LD->getSrcValueOffset(), LD->isVolatile()); 5784 } else { 5785 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5786 LD->getSrcValueOffset(), LD->isVolatile(), 5787 LD->getAlignment()); 5788 } 5789} 5790 5791/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5792static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5793 const X86Subtarget *Subtarget) { 5794 SDOperand Cond = N->getOperand(0); 5795 5796 // If we have SSE[12] support, try to form min/max nodes. 5797 if (Subtarget->hasSSE2() && 5798 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5799 if (Cond.getOpcode() == ISD::SETCC) { 5800 // Get the LHS/RHS of the select. 5801 SDOperand LHS = N->getOperand(1); 5802 SDOperand RHS = N->getOperand(2); 5803 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5804 5805 unsigned Opcode = 0; 5806 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5807 switch (CC) { 5808 default: break; 5809 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5810 case ISD::SETULE: 5811 case ISD::SETLE: 5812 if (!UnsafeFPMath) break; 5813 // FALL THROUGH. 5814 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5815 case ISD::SETLT: 5816 Opcode = X86ISD::FMIN; 5817 break; 5818 5819 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5820 case ISD::SETUGT: 5821 case ISD::SETGT: 5822 if (!UnsafeFPMath) break; 5823 // FALL THROUGH. 5824 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5825 case ISD::SETGE: 5826 Opcode = X86ISD::FMAX; 5827 break; 5828 } 5829 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5830 switch (CC) { 5831 default: break; 5832 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5833 case ISD::SETUGT: 5834 case ISD::SETGT: 5835 if (!UnsafeFPMath) break; 5836 // FALL THROUGH. 5837 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5838 case ISD::SETGE: 5839 Opcode = X86ISD::FMIN; 5840 break; 5841 5842 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5843 case ISD::SETULE: 5844 case ISD::SETLE: 5845 if (!UnsafeFPMath) break; 5846 // FALL THROUGH. 5847 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5848 case ISD::SETLT: 5849 Opcode = X86ISD::FMAX; 5850 break; 5851 } 5852 } 5853 5854 if (Opcode) 5855 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5856 } 5857 5858 } 5859 5860 return SDOperand(); 5861} 5862 5863/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 5864/// X86ISD::FXOR nodes. 5865static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 5866 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 5867 // F[X]OR(0.0, x) -> x 5868 // F[X]OR(x, 0.0) -> x 5869 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 5870 if (C->getValueAPF().isPosZero()) 5871 return N->getOperand(1); 5872 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 5873 if (C->getValueAPF().isPosZero()) 5874 return N->getOperand(0); 5875 return SDOperand(); 5876} 5877 5878/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 5879static SDOperand PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 5880 // FAND(0.0, x) -> 0.0 5881 // FAND(x, 0.0) -> 0.0 5882 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 5883 if (C->getValueAPF().isPosZero()) 5884 return N->getOperand(0); 5885 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 5886 if (C->getValueAPF().isPosZero()) 5887 return N->getOperand(1); 5888 return SDOperand(); 5889} 5890 5891 5892SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5893 DAGCombinerInfo &DCI) const { 5894 SelectionDAG &DAG = DCI.DAG; 5895 switch (N->getOpcode()) { 5896 default: break; 5897 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget); 5898 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 5899 case X86ISD::FXOR: 5900 case X86ISD::FOR: return PerformFORCombine(N, DAG); 5901 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 5902 } 5903 5904 return SDOperand(); 5905} 5906 5907//===----------------------------------------------------------------------===// 5908// X86 Inline Assembly Support 5909//===----------------------------------------------------------------------===// 5910 5911/// getConstraintType - Given a constraint letter, return the type of 5912/// constraint it is for this target. 5913X86TargetLowering::ConstraintType 5914X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5915 if (Constraint.size() == 1) { 5916 switch (Constraint[0]) { 5917 case 'A': 5918 case 'r': 5919 case 'R': 5920 case 'l': 5921 case 'q': 5922 case 'Q': 5923 case 'x': 5924 case 'Y': 5925 return C_RegisterClass; 5926 default: 5927 break; 5928 } 5929 } 5930 return TargetLowering::getConstraintType(Constraint); 5931} 5932 5933/// LowerXConstraint - try to replace an X constraint, which matches anything, 5934/// with another that has more specific requirements based on the type of the 5935/// corresponding operand. 5936void X86TargetLowering::lowerXConstraint(MVT::ValueType ConstraintVT, 5937 std::string& s) const { 5938 if (MVT::isFloatingPoint(ConstraintVT)) { 5939 if (Subtarget->hasSSE2()) 5940 s = "Y"; 5941 else if (Subtarget->hasSSE1()) 5942 s = "x"; 5943 else 5944 s = "f"; 5945 } else 5946 return TargetLowering::lowerXConstraint(ConstraintVT, s); 5947} 5948 5949/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5950/// vector. If it is invalid, don't add anything to Ops. 5951void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5952 char Constraint, 5953 std::vector<SDOperand>&Ops, 5954 SelectionDAG &DAG) { 5955 SDOperand Result(0, 0); 5956 5957 switch (Constraint) { 5958 default: break; 5959 case 'I': 5960 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5961 if (C->getValue() <= 31) { 5962 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5963 break; 5964 } 5965 } 5966 return; 5967 case 'N': 5968 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5969 if (C->getValue() <= 255) { 5970 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5971 break; 5972 } 5973 } 5974 return; 5975 case 'i': { 5976 // Literal immediates are always ok. 5977 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5978 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5979 break; 5980 } 5981 5982 // If we are in non-pic codegen mode, we allow the address of a global (with 5983 // an optional displacement) to be used with 'i'. 5984 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5985 int64_t Offset = 0; 5986 5987 // Match either (GA) or (GA+C) 5988 if (GA) { 5989 Offset = GA->getOffset(); 5990 } else if (Op.getOpcode() == ISD::ADD) { 5991 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5992 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5993 if (C && GA) { 5994 Offset = GA->getOffset()+C->getValue(); 5995 } else { 5996 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5997 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5998 if (C && GA) 5999 Offset = GA->getOffset()+C->getValue(); 6000 else 6001 C = 0, GA = 0; 6002 } 6003 } 6004 6005 if (GA) { 6006 // If addressing this global requires a load (e.g. in PIC mode), we can't 6007 // match. 6008 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 6009 false)) 6010 return; 6011 6012 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 6013 Offset); 6014 Result = Op; 6015 break; 6016 } 6017 6018 // Otherwise, not valid for this mode. 6019 return; 6020 } 6021 } 6022 6023 if (Result.Val) { 6024 Ops.push_back(Result); 6025 return; 6026 } 6027 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6028} 6029 6030std::vector<unsigned> X86TargetLowering:: 6031getRegClassForInlineAsmConstraint(const std::string &Constraint, 6032 MVT::ValueType VT) const { 6033 if (Constraint.size() == 1) { 6034 // FIXME: not handling fp-stack yet! 6035 switch (Constraint[0]) { // GCC X86 Constraint Letters 6036 default: break; // Unknown constraint letter 6037 case 'A': // EAX/EDX 6038 if (VT == MVT::i32 || VT == MVT::i64) 6039 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 6040 break; 6041 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 6042 case 'Q': // Q_REGS 6043 if (VT == MVT::i32) 6044 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 6045 else if (VT == MVT::i16) 6046 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 6047 else if (VT == MVT::i8) 6048 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 6049 else if (VT == MVT::i64) 6050 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 6051 break; 6052 } 6053 } 6054 6055 return std::vector<unsigned>(); 6056} 6057 6058std::pair<unsigned, const TargetRegisterClass*> 6059X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 6060 MVT::ValueType VT) const { 6061 // First, see if this is a constraint that directly corresponds to an LLVM 6062 // register class. 6063 if (Constraint.size() == 1) { 6064 // GCC Constraint Letters 6065 switch (Constraint[0]) { 6066 default: break; 6067 case 'r': // GENERAL_REGS 6068 case 'R': // LEGACY_REGS 6069 case 'l': // INDEX_REGS 6070 if (VT == MVT::i64 && Subtarget->is64Bit()) 6071 return std::make_pair(0U, X86::GR64RegisterClass); 6072 if (VT == MVT::i32) 6073 return std::make_pair(0U, X86::GR32RegisterClass); 6074 else if (VT == MVT::i16) 6075 return std::make_pair(0U, X86::GR16RegisterClass); 6076 else if (VT == MVT::i8) 6077 return std::make_pair(0U, X86::GR8RegisterClass); 6078 break; 6079 case 'y': // MMX_REGS if MMX allowed. 6080 if (!Subtarget->hasMMX()) break; 6081 return std::make_pair(0U, X86::VR64RegisterClass); 6082 break; 6083 case 'Y': // SSE_REGS if SSE2 allowed 6084 if (!Subtarget->hasSSE2()) break; 6085 // FALL THROUGH. 6086 case 'x': // SSE_REGS if SSE1 allowed 6087 if (!Subtarget->hasSSE1()) break; 6088 6089 switch (VT) { 6090 default: break; 6091 // Scalar SSE types. 6092 case MVT::f32: 6093 case MVT::i32: 6094 return std::make_pair(0U, X86::FR32RegisterClass); 6095 case MVT::f64: 6096 case MVT::i64: 6097 return std::make_pair(0U, X86::FR64RegisterClass); 6098 // Vector types. 6099 case MVT::v16i8: 6100 case MVT::v8i16: 6101 case MVT::v4i32: 6102 case MVT::v2i64: 6103 case MVT::v4f32: 6104 case MVT::v2f64: 6105 return std::make_pair(0U, X86::VR128RegisterClass); 6106 } 6107 break; 6108 } 6109 } 6110 6111 // Use the default implementation in TargetLowering to convert the register 6112 // constraint into a member of a register class. 6113 std::pair<unsigned, const TargetRegisterClass*> Res; 6114 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6115 6116 // Not found as a standard register? 6117 if (Res.second == 0) { 6118 // GCC calls "st(0)" just plain "st". 6119 if (StringsEqualNoCase("{st}", Constraint)) { 6120 Res.first = X86::ST0; 6121 Res.second = X86::RFP80RegisterClass; 6122 } 6123 6124 return Res; 6125 } 6126 6127 // Otherwise, check to see if this is a register class of the wrong value 6128 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 6129 // turn into {ax},{dx}. 6130 if (Res.second->hasType(VT)) 6131 return Res; // Correct type already, nothing to do. 6132 6133 // All of the single-register GCC register classes map their values onto 6134 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 6135 // really want an 8-bit or 32-bit register, map to the appropriate register 6136 // class and return the appropriate register. 6137 if (Res.second != X86::GR16RegisterClass) 6138 return Res; 6139 6140 if (VT == MVT::i8) { 6141 unsigned DestReg = 0; 6142 switch (Res.first) { 6143 default: break; 6144 case X86::AX: DestReg = X86::AL; break; 6145 case X86::DX: DestReg = X86::DL; break; 6146 case X86::CX: DestReg = X86::CL; break; 6147 case X86::BX: DestReg = X86::BL; break; 6148 } 6149 if (DestReg) { 6150 Res.first = DestReg; 6151 Res.second = Res.second = X86::GR8RegisterClass; 6152 } 6153 } else if (VT == MVT::i32) { 6154 unsigned DestReg = 0; 6155 switch (Res.first) { 6156 default: break; 6157 case X86::AX: DestReg = X86::EAX; break; 6158 case X86::DX: DestReg = X86::EDX; break; 6159 case X86::CX: DestReg = X86::ECX; break; 6160 case X86::BX: DestReg = X86::EBX; break; 6161 case X86::SI: DestReg = X86::ESI; break; 6162 case X86::DI: DestReg = X86::EDI; break; 6163 case X86::BP: DestReg = X86::EBP; break; 6164 case X86::SP: DestReg = X86::ESP; break; 6165 } 6166 if (DestReg) { 6167 Res.first = DestReg; 6168 Res.second = Res.second = X86::GR32RegisterClass; 6169 } 6170 } else if (VT == MVT::i64) { 6171 unsigned DestReg = 0; 6172 switch (Res.first) { 6173 default: break; 6174 case X86::AX: DestReg = X86::RAX; break; 6175 case X86::DX: DestReg = X86::RDX; break; 6176 case X86::CX: DestReg = X86::RCX; break; 6177 case X86::BX: DestReg = X86::RBX; break; 6178 case X86::SI: DestReg = X86::RSI; break; 6179 case X86::DI: DestReg = X86::RDI; break; 6180 case X86::BP: DestReg = X86::RBP; break; 6181 case X86::SP: DestReg = X86::RSP; break; 6182 } 6183 if (DestReg) { 6184 Res.first = DestReg; 6185 Res.second = Res.second = X86::GR64RegisterClass; 6186 } 6187 } 6188 6189 return Res; 6190} 6191