X86ISelLowering.cpp revision 4ee451de366474b9c228b4e5fa573795a715216d
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/SelectionDAG.h" 34#include "llvm/CodeGen/SSARegMap.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/StringExtras.h" 40#include "llvm/ParameterAttributes.h" 41using namespace llvm; 42 43X86TargetLowering::X86TargetLowering(TargetMachine &TM) 44 : TargetLowering(TM) { 45 Subtarget = &TM.getSubtarget<X86Subtarget>(); 46 X86ScalarSSEf64 = Subtarget->hasSSE2(); 47 X86ScalarSSEf32 = Subtarget->hasSSE1(); 48 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 49 50 51 RegInfo = TM.getRegisterInfo(); 52 53 // Set up the TargetLowering object. 54 55 // X86 is weird, it always uses i8 for shift amounts and setcc results. 56 setShiftAmountType(MVT::i8); 57 setSetCCResultType(MVT::i8); 58 setSetCCResultContents(ZeroOrOneSetCCResult); 59 setSchedulingPreference(SchedulingForRegPressure); 60 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 61 setStackPointerRegisterToSaveRestore(X86StackPtr); 62 63 if (Subtarget->isTargetDarwin()) { 64 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 65 setUseUnderscoreSetJmp(false); 66 setUseUnderscoreLongJmp(false); 67 } else if (Subtarget->isTargetMingw()) { 68 // MS runtime is weird: it exports _setjmp, but longjmp! 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(false); 71 } else { 72 setUseUnderscoreSetJmp(true); 73 setUseUnderscoreLongJmp(true); 74 } 75 76 // Set up the register classes. 77 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 78 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 79 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 80 if (Subtarget->is64Bit()) 81 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 82 83 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 84 85 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 86 // operation. 87 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 89 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 90 91 if (Subtarget->is64Bit()) { 92 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 93 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 94 } else { 95 if (X86ScalarSSEf64) 96 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 97 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 98 else 99 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 100 } 101 102 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 105 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 106 // SSE has no i16 to fp conversion, only i32 107 if (X86ScalarSSEf32) { 108 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 109 // f32 and f64 cases are Legal, f80 case is not 110 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 111 } else { 112 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 113 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 114 } 115 116 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 117 // are Legal, f80 is custom lowered. 118 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 119 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 120 121 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 122 // this operation. 123 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 124 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 125 126 if (X86ScalarSSEf32) { 127 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 128 // f32 and f64 cases are Legal, f80 case is not 129 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 130 } else { 131 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 132 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 133 } 134 135 // Handle FP_TO_UINT by promoting the destination to a larger signed 136 // conversion. 137 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 138 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 139 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 140 141 if (Subtarget->is64Bit()) { 142 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 143 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 144 } else { 145 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 146 // Expand FP_TO_UINT into a select. 147 // FIXME: We would like to use a Custom expander here eventually to do 148 // the optimal thing for SSE vs. the default expansion in the legalizer. 149 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 150 else 151 // With SSE3 we can use fisttpll to convert to a signed i64. 152 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 153 } 154 155 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 156 if (!X86ScalarSSEf64) { 157 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 158 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 159 } 160 161 // Scalar integer multiply, multiply-high, divide, and remainder are 162 // lowered to use operations that produce two results, to match the 163 // available instructions. This exposes the two-result form to trivial 164 // CSE, which is able to combine x/y and x%y into a single instruction, 165 // for example. The single-result multiply instructions are introduced 166 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 167 // is not needed. 168 setOperationAction(ISD::MUL , MVT::i8 , Expand); 169 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 170 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 171 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 172 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 173 setOperationAction(ISD::SREM , MVT::i8 , Expand); 174 setOperationAction(ISD::UREM , MVT::i8 , Expand); 175 setOperationAction(ISD::MUL , MVT::i16 , Expand); 176 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 177 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 178 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 179 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 180 setOperationAction(ISD::SREM , MVT::i16 , Expand); 181 setOperationAction(ISD::UREM , MVT::i16 , Expand); 182 setOperationAction(ISD::MUL , MVT::i32 , Expand); 183 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 184 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 185 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 186 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 187 setOperationAction(ISD::SREM , MVT::i32 , Expand); 188 setOperationAction(ISD::UREM , MVT::i32 , Expand); 189 setOperationAction(ISD::MUL , MVT::i64 , Expand); 190 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 191 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 192 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 193 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 194 setOperationAction(ISD::SREM , MVT::i64 , Expand); 195 setOperationAction(ISD::UREM , MVT::i64 , Expand); 196 197 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 198 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 199 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 200 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 201 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 202 if (Subtarget->is64Bit()) 203 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 205 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 206 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 207 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 208 setOperationAction(ISD::FREM , MVT::f64 , Expand); 209 setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); 210 211 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 212 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 213 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 214 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 215 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 216 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 217 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 218 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 219 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 220 if (Subtarget->is64Bit()) { 221 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 222 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 223 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 224 } 225 226 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 227 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 228 229 // These should be promoted to a larger select which is supported. 230 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 231 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 232 // X86 wants to expand cmov itself. 233 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 234 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 235 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 236 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 237 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 238 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 239 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 240 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 241 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 242 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 243 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 244 if (Subtarget->is64Bit()) { 245 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 246 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 247 } 248 // X86 ret instruction may pop stack. 249 setOperationAction(ISD::RET , MVT::Other, Custom); 250 if (!Subtarget->is64Bit()) 251 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 252 253 // Darwin ABI issue. 254 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 255 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 256 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 257 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 258 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 259 if (Subtarget->is64Bit()) { 260 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 261 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 262 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 263 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 264 } 265 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 266 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 267 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 268 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 269 // X86 wants to expand memset / memcpy itself. 270 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 271 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 272 273 // Use the default ISD::LOCATION expansion. 274 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 275 // FIXME - use subtarget debug flags 276 if (!Subtarget->isTargetDarwin() && 277 !Subtarget->isTargetELF() && 278 !Subtarget->isTargetCygMing()) 279 setOperationAction(ISD::LABEL, MVT::Other, Expand); 280 281 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 282 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 283 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 284 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 285 if (Subtarget->is64Bit()) { 286 // FIXME: Verify 287 setExceptionPointerRegister(X86::RAX); 288 setExceptionSelectorRegister(X86::RDX); 289 } else { 290 setExceptionPointerRegister(X86::EAX); 291 setExceptionSelectorRegister(X86::EDX); 292 } 293 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 294 295 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 296 297 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 298 setOperationAction(ISD::VASTART , MVT::Other, Custom); 299 setOperationAction(ISD::VAARG , MVT::Other, Expand); 300 setOperationAction(ISD::VAEND , MVT::Other, Expand); 301 if (Subtarget->is64Bit()) 302 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 303 else 304 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 305 306 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 307 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 308 if (Subtarget->is64Bit()) 309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 310 if (Subtarget->isTargetCygMing()) 311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 312 else 313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 314 315 if (X86ScalarSSEf64) { 316 // f32 and f64 use SSE. 317 // Set up the FP register classes. 318 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 319 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 320 321 // Use ANDPD to simulate FABS. 322 setOperationAction(ISD::FABS , MVT::f64, Custom); 323 setOperationAction(ISD::FABS , MVT::f32, Custom); 324 325 // Use XORP to simulate FNEG. 326 setOperationAction(ISD::FNEG , MVT::f64, Custom); 327 setOperationAction(ISD::FNEG , MVT::f32, Custom); 328 329 // Use ANDPD and ORPD to simulate FCOPYSIGN. 330 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 331 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 332 333 // We don't support sin/cos/fmod 334 setOperationAction(ISD::FSIN , MVT::f64, Expand); 335 setOperationAction(ISD::FCOS , MVT::f64, Expand); 336 setOperationAction(ISD::FREM , MVT::f64, Expand); 337 setOperationAction(ISD::FSIN , MVT::f32, Expand); 338 setOperationAction(ISD::FCOS , MVT::f32, Expand); 339 setOperationAction(ISD::FREM , MVT::f32, Expand); 340 341 // Expand FP immediates into loads from the stack, except for the special 342 // cases we handle. 343 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 344 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 345 addLegalFPImmediate(APFloat(+0.0)); // xorpd 346 addLegalFPImmediate(APFloat(+0.0f)); // xorps 347 348 // Conversions to long double (in X87) go through memory. 349 setConvertAction(MVT::f32, MVT::f80, Expand); 350 setConvertAction(MVT::f64, MVT::f80, Expand); 351 352 // Conversions from long double (in X87) go through memory. 353 setConvertAction(MVT::f80, MVT::f32, Expand); 354 setConvertAction(MVT::f80, MVT::f64, Expand); 355 } else if (X86ScalarSSEf32) { 356 // Use SSE for f32, x87 for f64. 357 // Set up the FP register classes. 358 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 359 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 360 361 // Use ANDPS to simulate FABS. 362 setOperationAction(ISD::FABS , MVT::f32, Custom); 363 364 // Use XORP to simulate FNEG. 365 setOperationAction(ISD::FNEG , MVT::f32, Custom); 366 367 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 368 369 // Use ANDPS and ORPS to simulate FCOPYSIGN. 370 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 371 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 372 373 // We don't support sin/cos/fmod 374 setOperationAction(ISD::FSIN , MVT::f32, Expand); 375 setOperationAction(ISD::FCOS , MVT::f32, Expand); 376 setOperationAction(ISD::FREM , MVT::f32, Expand); 377 378 // Expand FP immediates into loads from the stack, except for the special 379 // cases we handle. 380 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 381 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 382 addLegalFPImmediate(APFloat(+0.0f)); // xorps 383 addLegalFPImmediate(APFloat(+0.0)); // FLD0 384 addLegalFPImmediate(APFloat(+1.0)); // FLD1 385 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 386 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 387 388 // SSE->x87 conversions go through memory. 389 setConvertAction(MVT::f32, MVT::f64, Expand); 390 setConvertAction(MVT::f32, MVT::f80, Expand); 391 392 // x87->SSE truncations need to go through memory. 393 setConvertAction(MVT::f80, MVT::f32, Expand); 394 setConvertAction(MVT::f64, MVT::f32, Expand); 395 // And x87->x87 truncations also. 396 setConvertAction(MVT::f80, MVT::f64, Expand); 397 398 if (!UnsafeFPMath) { 399 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 400 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 401 } 402 } else { 403 // f32 and f64 in x87. 404 // Set up the FP register classes. 405 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 406 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 407 408 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 409 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 410 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 411 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 412 413 // Floating truncations need to go through memory. 414 setConvertAction(MVT::f80, MVT::f32, Expand); 415 setConvertAction(MVT::f64, MVT::f32, Expand); 416 setConvertAction(MVT::f80, MVT::f64, Expand); 417 418 if (!UnsafeFPMath) { 419 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 420 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 421 } 422 423 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 424 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 425 addLegalFPImmediate(APFloat(+0.0)); // FLD0 426 addLegalFPImmediate(APFloat(+1.0)); // FLD1 427 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 428 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 429 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 430 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 431 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 432 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 433 } 434 435 // Long double always uses X87. 436 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 437 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 438 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 439 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 440 if (!UnsafeFPMath) { 441 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 442 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 443 } 444 445 // Always use a library call for pow. 446 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 447 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 448 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 449 450 // First set operation action for all vector types to expand. Then we 451 // will selectively turn on ones that can be effectively codegen'd. 452 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 453 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 454 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 455 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 456 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 457 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 458 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 459 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 460 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 461 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 462 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 463 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 464 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 465 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 466 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 467 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 468 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 469 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 470 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 471 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 472 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 473 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 474 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 475 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 476 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 477 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 478 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 479 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 483 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 484 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 485 setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand); 486 setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand); 487 setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand); 488 setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand); 489 setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand); 490 setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand); 491 } 492 493 if (Subtarget->hasMMX()) { 494 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 495 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 496 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 497 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 498 499 // FIXME: add MMX packed arithmetics 500 501 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 502 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 503 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 504 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 505 506 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 507 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 508 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 509 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 510 511 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 512 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 513 514 setOperationAction(ISD::AND, MVT::v8i8, Promote); 515 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 516 setOperationAction(ISD::AND, MVT::v4i16, Promote); 517 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 518 setOperationAction(ISD::AND, MVT::v2i32, Promote); 519 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 520 setOperationAction(ISD::AND, MVT::v1i64, Legal); 521 522 setOperationAction(ISD::OR, MVT::v8i8, Promote); 523 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 524 setOperationAction(ISD::OR, MVT::v4i16, Promote); 525 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 526 setOperationAction(ISD::OR, MVT::v2i32, Promote); 527 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 528 setOperationAction(ISD::OR, MVT::v1i64, Legal); 529 530 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 531 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 532 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 533 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 534 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 535 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 536 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 537 538 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 539 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 540 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 541 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 542 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 543 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 544 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 545 546 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 547 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 548 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 549 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 550 551 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 552 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 553 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 554 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 555 556 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 557 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 558 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 560 } 561 562 if (Subtarget->hasSSE1()) { 563 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 564 565 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 566 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 567 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 568 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 569 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 570 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 571 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 572 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 573 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 574 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 575 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 576 } 577 578 if (Subtarget->hasSSE2()) { 579 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 580 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 581 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 582 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 583 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 584 585 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 586 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 587 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 588 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 589 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 590 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 591 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 592 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 593 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 594 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 595 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 596 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 597 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 598 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 599 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 600 601 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 602 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 603 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 604 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 605 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 606 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 607 608 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 609 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 610 // Do not attempt to custom lower non-power-of-2 vectors 611 if (!isPowerOf2_32(MVT::getVectorNumElements(VT))) 612 continue; 613 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 614 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 615 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 616 } 617 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 618 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 619 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 620 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 621 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 622 if (Subtarget->is64Bit()) 623 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 624 625 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 626 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 627 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 628 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 629 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 630 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 631 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 632 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 633 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 634 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 635 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 636 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 637 } 638 639 // Custom lower v2i64 and v2f64 selects. 640 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 641 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 642 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 643 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 644 } 645 646 // We want to custom lower some of our intrinsics. 647 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 648 649 // We have target-specific dag combine patterns for the following nodes: 650 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 651 setTargetDAGCombine(ISD::SELECT); 652 653 computeRegisterProperties(); 654 655 // FIXME: These should be based on subtarget info. Plus, the values should 656 // be smaller when we are in optimizing for size mode. 657 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 658 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 659 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 660 allowUnalignedMemoryAccesses = true; // x86 supports it! 661} 662 663 664/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 665/// jumptable. 666SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 667 SelectionDAG &DAG) const { 668 if (usesGlobalOffsetTable()) 669 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 670 if (!Subtarget->isPICStyleRIPRel()) 671 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 672 return Table; 673} 674 675//===----------------------------------------------------------------------===// 676// Return Value Calling Convention Implementation 677//===----------------------------------------------------------------------===// 678 679#include "X86GenCallingConv.inc" 680 681/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 682/// exists skip possible ISD:TokenFactor. 683static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 684 if (Chain.getOpcode()==X86ISD::TAILCALL) { 685 return Chain; 686 } else if (Chain.getOpcode()==ISD::TokenFactor) { 687 if (Chain.getNumOperands() && 688 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) 689 return Chain.getOperand(0); 690 } 691 return Chain; 692} 693 694/// LowerRET - Lower an ISD::RET node. 695SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 696 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 697 698 SmallVector<CCValAssign, 16> RVLocs; 699 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 700 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 701 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 702 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 703 704 // If this is the first return lowered for this function, add the regs to the 705 // liveout set for the function. 706 if (DAG.getMachineFunction().liveout_empty()) { 707 for (unsigned i = 0; i != RVLocs.size(); ++i) 708 if (RVLocs[i].isRegLoc()) 709 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 710 } 711 SDOperand Chain = Op.getOperand(0); 712 713 // Handle tail call return. 714 Chain = GetPossiblePreceedingTailCall(Chain); 715 if (Chain.getOpcode() == X86ISD::TAILCALL) { 716 SDOperand TailCall = Chain; 717 SDOperand TargetAddress = TailCall.getOperand(1); 718 SDOperand StackAdjustment = TailCall.getOperand(2); 719 assert ( ((TargetAddress.getOpcode() == ISD::Register && 720 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 721 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 722 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 723 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 724 "Expecting an global address, external symbol, or register"); 725 assert( StackAdjustment.getOpcode() == ISD::Constant && 726 "Expecting a const value"); 727 728 SmallVector<SDOperand,8> Operands; 729 Operands.push_back(Chain.getOperand(0)); 730 Operands.push_back(TargetAddress); 731 Operands.push_back(StackAdjustment); 732 // Copy registers used by the call. Last operand is a flag so it is not 733 // copied. 734 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 735 Operands.push_back(Chain.getOperand(i)); 736 } 737 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 738 Operands.size()); 739 } 740 741 // Regular return. 742 SDOperand Flag; 743 744 // Copy the result values into the output registers. 745 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 746 RVLocs[0].getLocReg() != X86::ST0) { 747 for (unsigned i = 0; i != RVLocs.size(); ++i) { 748 CCValAssign &VA = RVLocs[i]; 749 assert(VA.isRegLoc() && "Can only return in registers!"); 750 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 751 Flag); 752 Flag = Chain.getValue(1); 753 } 754 } else { 755 // We need to handle a destination of ST0 specially, because it isn't really 756 // a register. 757 SDOperand Value = Op.getOperand(1); 758 759 // If this is an FP return with ScalarSSE, we need to move the value from 760 // an XMM register onto the fp-stack. 761 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 762 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 763 SDOperand MemLoc; 764 765 // If this is a load into a scalarsse value, don't store the loaded value 766 // back to the stack, only to reload it: just replace the scalar-sse load. 767 if (ISD::isNON_EXTLoad(Value.Val) && 768 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 769 Chain = Value.getOperand(0); 770 MemLoc = Value.getOperand(1); 771 } else { 772 // Spill the value to memory and reload it into top of stack. 773 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 774 MachineFunction &MF = DAG.getMachineFunction(); 775 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 776 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 777 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 778 } 779 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 780 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 781 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 782 Chain = Value.getValue(1); 783 } 784 785 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 786 SDOperand Ops[] = { Chain, Value }; 787 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 788 Flag = Chain.getValue(1); 789 } 790 791 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 792 if (Flag.Val) 793 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 794 else 795 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 796} 797 798 799/// LowerCallResult - Lower the result values of an ISD::CALL into the 800/// appropriate copies out of appropriate physical registers. This assumes that 801/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 802/// being lowered. The returns a SDNode with the same number of values as the 803/// ISD::CALL. 804SDNode *X86TargetLowering:: 805LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 806 unsigned CallingConv, SelectionDAG &DAG) { 807 808 // Assign locations to each value returned by this call. 809 SmallVector<CCValAssign, 16> RVLocs; 810 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 811 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 812 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 813 814 SmallVector<SDOperand, 8> ResultVals; 815 816 // Copy all of the result registers out of their specified physreg. 817 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 818 for (unsigned i = 0; i != RVLocs.size(); ++i) { 819 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 820 RVLocs[i].getValVT(), InFlag).getValue(1); 821 InFlag = Chain.getValue(2); 822 ResultVals.push_back(Chain.getValue(0)); 823 } 824 } else { 825 // Copies from the FP stack are special, as ST0 isn't a valid register 826 // before the fp stackifier runs. 827 828 // Copy ST0 into an RFP register with FP_GET_RESULT. 829 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 830 SDOperand GROps[] = { Chain, InFlag }; 831 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 832 Chain = RetVal.getValue(1); 833 InFlag = RetVal.getValue(2); 834 835 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 836 // an XMM register. 837 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 838 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 839 SDOperand StoreLoc; 840 const Value *SrcVal = 0; 841 int SrcValOffset = 0; 842 MVT::ValueType RetStoreVT = RVLocs[0].getValVT(); 843 844 // Determine where to store the value. If the call result is directly 845 // used by a store, see if we can store directly into the location. In 846 // this case, we'll end up producing a fst + movss[load] + movss[store] to 847 // the same location, and the two movss's will be nuked as dead. This 848 // optimizes common things like "*D = atof(..)" to not need an 849 // intermediate stack slot. 850 if (SDOperand(TheCall, 0).hasOneUse() && 851 SDOperand(TheCall, 1).hasOneUse()) { 852 // In addition to direct uses, we also support a FP_ROUND that uses the 853 // value, if it is directly stored somewhere. 854 SDNode *User = *TheCall->use_begin(); 855 if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse()) 856 User = *User->use_begin(); 857 858 // Ok, we have one use of the value and one use of the chain. See if 859 // they are the same node: a store. 860 if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) { 861 // Verify that the value being stored is either the call or a 862 // truncation of the call. 863 SDNode *StoreVal = N->getValue().Val; 864 if (StoreVal == TheCall) 865 ; // ok. 866 else if (StoreVal->getOpcode() == ISD::FP_ROUND && 867 StoreVal->hasOneUse() && 868 StoreVal->getOperand(0).Val == TheCall) 869 ; // ok. 870 else 871 N = 0; // not ok. 872 873 if (N && N->getChain().Val == TheCall && 874 !N->isVolatile() && !N->isTruncatingStore() && 875 N->getAddressingMode() == ISD::UNINDEXED) { 876 StoreLoc = N->getBasePtr(); 877 SrcVal = N->getSrcValue(); 878 SrcValOffset = N->getSrcValueOffset(); 879 RetStoreVT = N->getValue().getValueType(); 880 } 881 } 882 } 883 884 // If we weren't able to optimize the result, just create a temporary 885 // stack slot. 886 if (StoreLoc.Val == 0) { 887 MachineFunction &MF = DAG.getMachineFunction(); 888 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 889 StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 890 } 891 892 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 893 // shouldn't be necessary except that RFP cannot be live across 894 // multiple blocks (which could happen if a select gets lowered into 895 // multiple blocks and scheduled in between them). When stackifier is 896 // fixed, they can be uncoupled. 897 SDOperand Ops[] = { 898 Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag 899 }; 900 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 901 RetVal = DAG.getLoad(RetStoreVT, Chain, 902 StoreLoc, SrcVal, SrcValOffset); 903 Chain = RetVal.getValue(1); 904 905 // If we optimized a truncate, then extend the result back to its desired 906 // type. 907 if (RVLocs[0].getValVT() != RetStoreVT) 908 RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal); 909 } 910 ResultVals.push_back(RetVal); 911 } 912 913 // Merge everything together with a MERGE_VALUES node. 914 ResultVals.push_back(Chain); 915 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 916 &ResultVals[0], ResultVals.size()).Val; 917} 918 919 920//===----------------------------------------------------------------------===// 921// C & StdCall & Fast Calling Convention implementation 922//===----------------------------------------------------------------------===// 923// StdCall calling convention seems to be standard for many Windows' API 924// routines and around. It differs from C calling convention just a little: 925// callee should clean up the stack, not caller. Symbols should be also 926// decorated in some fancy way :) It doesn't support any vector arguments. 927// For info on fast calling convention see Fast Calling Convention (tail call) 928// implementation LowerX86_32FastCCCallTo. 929 930/// AddLiveIn - This helper function adds the specified physical register to the 931/// MachineFunction as a live in value. It also creates a corresponding virtual 932/// register for it. 933static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 934 const TargetRegisterClass *RC) { 935 assert(RC->contains(PReg) && "Not the correct regclass!"); 936 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 937 MF.addLiveIn(PReg, VReg); 938 return VReg; 939} 940 941// align stack arguments according to platform alignment needed for tail calls 942unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); 943 944SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 945 const CCValAssign &VA, 946 MachineFrameInfo *MFI, 947 SDOperand Root, unsigned i) { 948 // Create the nodes corresponding to a load from this parameter slot. 949 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 950 VA.getLocMemOffset()); 951 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 952 953 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 954 955 if (Flags & ISD::ParamFlags::ByVal) 956 return FIN; 957 else 958 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 959} 960 961SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 962 bool isStdCall) { 963 unsigned NumArgs = Op.Val->getNumValues() - 1; 964 MachineFunction &MF = DAG.getMachineFunction(); 965 MachineFrameInfo *MFI = MF.getFrameInfo(); 966 SDOperand Root = Op.getOperand(0); 967 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 968 unsigned CC = MF.getFunction()->getCallingConv(); 969 // Assign locations to all of the incoming arguments. 970 SmallVector<CCValAssign, 16> ArgLocs; 971 CCState CCInfo(CC, isVarArg, 972 getTargetMachine(), ArgLocs); 973 // Check for possible tail call calling convention. 974 if (CC == CallingConv::Fast && PerformTailCallOpt) 975 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); 976 else 977 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 978 979 SmallVector<SDOperand, 8> ArgValues; 980 unsigned LastVal = ~0U; 981 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 982 CCValAssign &VA = ArgLocs[i]; 983 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 984 // places. 985 assert(VA.getValNo() != LastVal && 986 "Don't support value assigned to multiple locs yet"); 987 LastVal = VA.getValNo(); 988 989 if (VA.isRegLoc()) { 990 MVT::ValueType RegVT = VA.getLocVT(); 991 TargetRegisterClass *RC; 992 if (RegVT == MVT::i32) 993 RC = X86::GR32RegisterClass; 994 else { 995 assert(MVT::isVector(RegVT)); 996 RC = X86::VR128RegisterClass; 997 } 998 999 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1000 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1001 1002 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1003 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1004 // right size. 1005 if (VA.getLocInfo() == CCValAssign::SExt) 1006 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1007 DAG.getValueType(VA.getValVT())); 1008 else if (VA.getLocInfo() == CCValAssign::ZExt) 1009 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1010 DAG.getValueType(VA.getValVT())); 1011 1012 if (VA.getLocInfo() != CCValAssign::Full) 1013 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1014 1015 ArgValues.push_back(ArgValue); 1016 } else { 1017 assert(VA.isMemLoc()); 1018 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1019 } 1020 } 1021 1022 unsigned StackSize = CCInfo.getNextStackOffset(); 1023 // align stack specially for tail calls 1024 if (CC==CallingConv::Fast) 1025 StackSize = GetAlignedArgumentStackSize(StackSize,DAG); 1026 1027 ArgValues.push_back(Root); 1028 1029 // If the function takes variable number of arguments, make a frame index for 1030 // the start of the first vararg value... for expansion of llvm.va_start. 1031 if (isVarArg) 1032 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1033 1034 // Tail call calling convention (CallingConv::Fast) does not support varargs. 1035 assert( !(isVarArg && CC == CallingConv::Fast) && 1036 "CallingConv::Fast does not support varargs."); 1037 1038 if (isStdCall && !isVarArg && 1039 (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { 1040 BytesToPopOnReturn = StackSize; // Callee pops everything.. 1041 BytesCallerReserves = 0; 1042 } else { 1043 BytesToPopOnReturn = 0; // Callee pops nothing. 1044 1045 // If this is an sret function, the return should pop the hidden pointer. 1046 if (NumArgs && 1047 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 1048 ISD::ParamFlags::StructReturn)) 1049 BytesToPopOnReturn = 4; 1050 1051 BytesCallerReserves = StackSize; 1052 } 1053 1054 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1055 1056 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1057 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1058 1059 // Return the new list of results. 1060 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1061 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1062} 1063 1064SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 1065 unsigned CC) { 1066 SDOperand Chain = Op.getOperand(0); 1067 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1068 SDOperand Callee = Op.getOperand(4); 1069 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1070 1071 // Analyze operands of the call, assigning locations to each operand. 1072 SmallVector<CCValAssign, 16> ArgLocs; 1073 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1074 if(CC==CallingConv::Fast && PerformTailCallOpt) 1075 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1076 else 1077 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 1078 1079 // Get a count of how many bytes are to be pushed on the stack. 1080 unsigned NumBytes = CCInfo.getNextStackOffset(); 1081 if (CC==CallingConv::Fast) 1082 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1083 1084 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1085 1086 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1087 SmallVector<SDOperand, 8> MemOpChains; 1088 1089 SDOperand StackPtr; 1090 1091 // Walk the register/memloc assignments, inserting copies/loads. 1092 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1093 CCValAssign &VA = ArgLocs[i]; 1094 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1095 1096 // Promote the value if needed. 1097 switch (VA.getLocInfo()) { 1098 default: assert(0 && "Unknown loc info!"); 1099 case CCValAssign::Full: break; 1100 case CCValAssign::SExt: 1101 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1102 break; 1103 case CCValAssign::ZExt: 1104 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1105 break; 1106 case CCValAssign::AExt: 1107 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1108 break; 1109 } 1110 1111 if (VA.isRegLoc()) { 1112 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1113 } else { 1114 assert(VA.isMemLoc()); 1115 if (StackPtr.Val == 0) 1116 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1117 1118 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1119 Arg)); 1120 } 1121 } 1122 1123 // If the first argument is an sret pointer, remember it. 1124 bool isSRet = NumOps && 1125 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 1126 ISD::ParamFlags::StructReturn); 1127 1128 if (!MemOpChains.empty()) 1129 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1130 &MemOpChains[0], MemOpChains.size()); 1131 1132 // Build a sequence of copy-to-reg nodes chained together with token chain 1133 // and flag operands which copy the outgoing args into registers. 1134 SDOperand InFlag; 1135 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1136 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1137 InFlag); 1138 InFlag = Chain.getValue(1); 1139 } 1140 1141 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1142 // GOT pointer. 1143 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1144 Subtarget->isPICStyleGOT()) { 1145 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1146 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1147 InFlag); 1148 InFlag = Chain.getValue(1); 1149 } 1150 1151 // If the callee is a GlobalAddress node (quite common, every direct call is) 1152 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1153 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1154 // We should use extra load for direct calls to dllimported functions in 1155 // non-JIT mode. 1156 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1157 getTargetMachine(), true)) 1158 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1159 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1160 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1161 1162 // Returns a chain & a flag for retval copy to use. 1163 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1164 SmallVector<SDOperand, 8> Ops; 1165 Ops.push_back(Chain); 1166 Ops.push_back(Callee); 1167 1168 // Add argument registers to the end of the list so that they are known live 1169 // into the call. 1170 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1171 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1172 RegsToPass[i].second.getValueType())); 1173 1174 // Add an implicit use GOT pointer in EBX. 1175 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1176 Subtarget->isPICStyleGOT()) 1177 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1178 1179 if (InFlag.Val) 1180 Ops.push_back(InFlag); 1181 1182 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1183 InFlag = Chain.getValue(1); 1184 1185 // Create the CALLSEQ_END node. 1186 unsigned NumBytesForCalleeToPush = 0; 1187 1188 if (CC == CallingConv::X86_StdCall || 1189 (CC == CallingConv::Fast && PerformTailCallOpt)) { 1190 if (isVarArg) 1191 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1192 else 1193 NumBytesForCalleeToPush = NumBytes; 1194 assert(!(isVarArg && CC==CallingConv::Fast) && 1195 "CallingConv::Fast does not support varargs."); 1196 } else { 1197 // If this is is a call to a struct-return function, the callee 1198 // pops the hidden struct pointer, so we have to push it back. 1199 // This is common for Darwin/X86, Linux & Mingw32 targets. 1200 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1201 } 1202 1203 Chain = DAG.getCALLSEQ_END(Chain, 1204 DAG.getConstant(NumBytes, getPointerTy()), 1205 DAG.getConstant(NumBytesForCalleeToPush, 1206 getPointerTy()), 1207 InFlag); 1208 InFlag = Chain.getValue(1); 1209 1210 // Handle result values, copying them out of physregs into vregs that we 1211 // return. 1212 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1213} 1214 1215 1216//===----------------------------------------------------------------------===// 1217// FastCall Calling Convention implementation 1218//===----------------------------------------------------------------------===// 1219// 1220// The X86 'fastcall' calling convention passes up to two integer arguments in 1221// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1222// and requires that the callee pop its arguments off the stack (allowing proper 1223// tail calls), and has the same return value conventions as C calling convs. 1224// 1225// This calling convention always arranges for the callee pop value to be 8n+4 1226// bytes, which is needed for tail recursion elimination and stack alignment 1227// reasons. 1228SDOperand 1229X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1230 MachineFunction &MF = DAG.getMachineFunction(); 1231 MachineFrameInfo *MFI = MF.getFrameInfo(); 1232 SDOperand Root = Op.getOperand(0); 1233 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1234 1235 // Assign locations to all of the incoming arguments. 1236 SmallVector<CCValAssign, 16> ArgLocs; 1237 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1238 getTargetMachine(), ArgLocs); 1239 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1240 1241 SmallVector<SDOperand, 8> ArgValues; 1242 unsigned LastVal = ~0U; 1243 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1244 CCValAssign &VA = ArgLocs[i]; 1245 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1246 // places. 1247 assert(VA.getValNo() != LastVal && 1248 "Don't support value assigned to multiple locs yet"); 1249 LastVal = VA.getValNo(); 1250 1251 if (VA.isRegLoc()) { 1252 MVT::ValueType RegVT = VA.getLocVT(); 1253 TargetRegisterClass *RC; 1254 if (RegVT == MVT::i32) 1255 RC = X86::GR32RegisterClass; 1256 else { 1257 assert(MVT::isVector(RegVT)); 1258 RC = X86::VR128RegisterClass; 1259 } 1260 1261 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1262 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1263 1264 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1265 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1266 // right size. 1267 if (VA.getLocInfo() == CCValAssign::SExt) 1268 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1269 DAG.getValueType(VA.getValVT())); 1270 else if (VA.getLocInfo() == CCValAssign::ZExt) 1271 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1272 DAG.getValueType(VA.getValVT())); 1273 1274 if (VA.getLocInfo() != CCValAssign::Full) 1275 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1276 1277 ArgValues.push_back(ArgValue); 1278 } else { 1279 assert(VA.isMemLoc()); 1280 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1281 } 1282 } 1283 1284 ArgValues.push_back(Root); 1285 1286 unsigned StackSize = CCInfo.getNextStackOffset(); 1287 1288 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1289 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1290 // arguments and the arguments after the retaddr has been pushed are 1291 // aligned. 1292 if ((StackSize & 7) == 0) 1293 StackSize += 4; 1294 } 1295 1296 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1297 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1298 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1299 BytesCallerReserves = 0; 1300 1301 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1302 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1303 1304 // Return the new list of results. 1305 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1306 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1307} 1308 1309SDOperand 1310X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1311 const SDOperand &StackPtr, 1312 const CCValAssign &VA, 1313 SDOperand Chain, 1314 SDOperand Arg) { 1315 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1316 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1317 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1318 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1319 if (Flags & ISD::ParamFlags::ByVal) { 1320 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1321 ISD::ParamFlags::ByValAlignOffs); 1322 1323 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1324 ISD::ParamFlags::ByValSizeOffs; 1325 1326 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1327 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1328 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); 1329 1330 return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, 1331 AlwaysInline); 1332 } else { 1333 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1334 } 1335} 1336 1337SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1338 unsigned CC) { 1339 SDOperand Chain = Op.getOperand(0); 1340 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1341 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1342 SDOperand Callee = Op.getOperand(4); 1343 1344 // Analyze operands of the call, assigning locations to each operand. 1345 SmallVector<CCValAssign, 16> ArgLocs; 1346 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1347 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1348 1349 // Get a count of how many bytes are to be pushed on the stack. 1350 unsigned NumBytes = CCInfo.getNextStackOffset(); 1351 1352 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1353 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1354 // arguments and the arguments after the retaddr has been pushed are 1355 // aligned. 1356 if ((NumBytes & 7) == 0) 1357 NumBytes += 4; 1358 } 1359 1360 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1361 1362 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1363 SmallVector<SDOperand, 8> MemOpChains; 1364 1365 SDOperand StackPtr; 1366 1367 // Walk the register/memloc assignments, inserting copies/loads. 1368 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1369 CCValAssign &VA = ArgLocs[i]; 1370 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1371 1372 // Promote the value if needed. 1373 switch (VA.getLocInfo()) { 1374 default: assert(0 && "Unknown loc info!"); 1375 case CCValAssign::Full: break; 1376 case CCValAssign::SExt: 1377 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1378 break; 1379 case CCValAssign::ZExt: 1380 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1381 break; 1382 case CCValAssign::AExt: 1383 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1384 break; 1385 } 1386 1387 if (VA.isRegLoc()) { 1388 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1389 } else { 1390 assert(VA.isMemLoc()); 1391 if (StackPtr.Val == 0) 1392 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1393 1394 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1395 Arg)); 1396 } 1397 } 1398 1399 if (!MemOpChains.empty()) 1400 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1401 &MemOpChains[0], MemOpChains.size()); 1402 1403 // Build a sequence of copy-to-reg nodes chained together with token chain 1404 // and flag operands which copy the outgoing args into registers. 1405 SDOperand InFlag; 1406 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1407 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1408 InFlag); 1409 InFlag = Chain.getValue(1); 1410 } 1411 1412 // If the callee is a GlobalAddress node (quite common, every direct call is) 1413 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1414 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1415 // We should use extra load for direct calls to dllimported functions in 1416 // non-JIT mode. 1417 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1418 getTargetMachine(), true)) 1419 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1420 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1421 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1422 1423 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1424 // GOT pointer. 1425 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1426 Subtarget->isPICStyleGOT()) { 1427 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1428 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1429 InFlag); 1430 InFlag = Chain.getValue(1); 1431 } 1432 1433 // Returns a chain & a flag for retval copy to use. 1434 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1435 SmallVector<SDOperand, 8> Ops; 1436 Ops.push_back(Chain); 1437 Ops.push_back(Callee); 1438 1439 // Add argument registers to the end of the list so that they are known live 1440 // into the call. 1441 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1442 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1443 RegsToPass[i].second.getValueType())); 1444 1445 // Add an implicit use GOT pointer in EBX. 1446 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1447 Subtarget->isPICStyleGOT()) 1448 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1449 1450 if (InFlag.Val) 1451 Ops.push_back(InFlag); 1452 1453 assert(isTailCall==false && "no tail call here"); 1454 Chain = DAG.getNode(X86ISD::CALL, 1455 NodeTys, &Ops[0], Ops.size()); 1456 InFlag = Chain.getValue(1); 1457 1458 // Returns a flag for retval copy to use. 1459 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1460 Ops.clear(); 1461 Ops.push_back(Chain); 1462 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1463 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1464 Ops.push_back(InFlag); 1465 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1466 InFlag = Chain.getValue(1); 1467 1468 // Handle result values, copying them out of physregs into vregs that we 1469 // return. 1470 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1471} 1472 1473//===----------------------------------------------------------------------===// 1474// Fast Calling Convention (tail call) implementation 1475//===----------------------------------------------------------------------===// 1476 1477// Like std call, callee cleans arguments, convention except that ECX is 1478// reserved for storing the tail called function address. Only 2 registers are 1479// free for argument passing (inreg). Tail call optimization is performed 1480// provided: 1481// * tailcallopt is enabled 1482// * caller/callee are fastcc 1483// * elf/pic is disabled OR 1484// * elf/pic enabled + callee is in module + callee has 1485// visibility protected or hidden 1486// To keep the stack aligned according to platform abi the function 1487// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1488// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1489// If a tail called function callee has more arguments than the caller the 1490// caller needs to make sure that there is room to move the RETADDR to. This is 1491// achieved by reserving an area the size of the argument delta right after the 1492// original REtADDR, but before the saved framepointer or the spilled registers 1493// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1494// stack layout: 1495// arg1 1496// arg2 1497// RETADDR 1498// [ new RETADDR 1499// move area ] 1500// (possible EBP) 1501// ESI 1502// EDI 1503// local1 .. 1504 1505/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1506/// for a 16 byte align requirement. 1507unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1508 SelectionDAG& DAG) { 1509 if (PerformTailCallOpt) { 1510 MachineFunction &MF = DAG.getMachineFunction(); 1511 const TargetMachine &TM = MF.getTarget(); 1512 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1513 unsigned StackAlignment = TFI.getStackAlignment(); 1514 uint64_t AlignMask = StackAlignment - 1; 1515 int64_t Offset = StackSize; 1516 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1517 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1518 // Number smaller than 12 so just add the difference. 1519 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1520 } else { 1521 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1522 Offset = ((~AlignMask) & Offset) + StackAlignment + 1523 (StackAlignment-SlotSize); 1524 } 1525 StackSize = Offset; 1526 } 1527 return StackSize; 1528} 1529 1530/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1531/// following the call is a return. A function is eligible if caller/callee 1532/// calling conventions match, currently only fastcc supports tail calls, and 1533/// the function CALL is immediatly followed by a RET. 1534bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1535 SDOperand Ret, 1536 SelectionDAG& DAG) const { 1537 if (!PerformTailCallOpt) 1538 return false; 1539 1540 // Check whether CALL node immediatly preceeds the RET node and whether the 1541 // return uses the result of the node or is a void return. 1542 unsigned NumOps = Ret.getNumOperands(); 1543 if ((NumOps == 1 && 1544 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1545 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1546 (NumOps > 1 && 1547 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1548 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1549 MachineFunction &MF = DAG.getMachineFunction(); 1550 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1551 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1552 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1553 SDOperand Callee = Call.getOperand(4); 1554 // On elf/pic %ebx needs to be livein. 1555 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1556 !Subtarget->isPICStyleGOT()) 1557 return true; 1558 1559 // Can only do local tail calls with PIC. 1560 GlobalValue * GV = 0; 1561 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1562 if(G != 0 && 1563 (GV = G->getGlobal()) && 1564 (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) 1565 return true; 1566 } 1567 } 1568 1569 return false; 1570} 1571 1572SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, 1573 SelectionDAG &DAG, 1574 unsigned CC) { 1575 SDOperand Chain = Op.getOperand(0); 1576 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1577 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1578 SDOperand Callee = Op.getOperand(4); 1579 bool is64Bit = Subtarget->is64Bit(); 1580 1581 assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); 1582 1583 // Analyze operands of the call, assigning locations to each operand. 1584 SmallVector<CCValAssign, 16> ArgLocs; 1585 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1586 if (is64Bit) 1587 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1588 else 1589 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1590 1591 1592 // Lower arguments at fp - stackoffset + fpdiff. 1593 MachineFunction &MF = DAG.getMachineFunction(); 1594 1595 unsigned NumBytesToBePushed = 1596 GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); 1597 1598 unsigned NumBytesCallerPushed = 1599 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1600 int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; 1601 1602 // Set the delta of movement of the returnaddr stackslot. 1603 // But only set if delta is greater than previous delta. 1604 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1605 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1606 1607 Chain = DAG. 1608 getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1609 1610 // Adjust the Return address stack slot. 1611 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1612 if (FPDiff) { 1613 MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; 1614 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1615 // Load the "old" Return address. 1616 RetAddrFrIdx = 1617 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1618 // Calculate the new stack slot for the return address. 1619 int SlotSize = is64Bit ? 8 : 4; 1620 int NewReturnAddrFI = 1621 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1622 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1623 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1624 } 1625 1626 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1627 SmallVector<SDOperand, 8> MemOpChains; 1628 SmallVector<SDOperand, 8> MemOpChains2; 1629 SDOperand FramePtr, StackPtr; 1630 SDOperand PtrOff; 1631 SDOperand FIN; 1632 int FI = 0; 1633 1634 // Walk the register/memloc assignments, inserting copies/loads. Lower 1635 // arguments first to the stack slot where they would normally - in case of a 1636 // normal function call - be. 1637 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1638 CCValAssign &VA = ArgLocs[i]; 1639 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1640 1641 // Promote the value if needed. 1642 switch (VA.getLocInfo()) { 1643 default: assert(0 && "Unknown loc info!"); 1644 case CCValAssign::Full: break; 1645 case CCValAssign::SExt: 1646 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1647 break; 1648 case CCValAssign::ZExt: 1649 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1650 break; 1651 case CCValAssign::AExt: 1652 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1653 break; 1654 } 1655 1656 if (VA.isRegLoc()) { 1657 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1658 } else { 1659 assert(VA.isMemLoc()); 1660 if (StackPtr.Val == 0) 1661 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1662 1663 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1664 Arg)); 1665 } 1666 } 1667 1668 if (!MemOpChains.empty()) 1669 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1670 &MemOpChains[0], MemOpChains.size()); 1671 1672 // Build a sequence of copy-to-reg nodes chained together with token chain 1673 // and flag operands which copy the outgoing args into registers. 1674 SDOperand InFlag; 1675 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1676 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1677 InFlag); 1678 InFlag = Chain.getValue(1); 1679 } 1680 InFlag = SDOperand(); 1681 1682 // Copy from stack slots to stack slot of a tail called function. This needs 1683 // to be done because if we would lower the arguments directly to their real 1684 // stack slot we might end up overwriting each other. 1685 // TODO: To make this more efficient (sometimes saving a store/load) we could 1686 // analyse the arguments and emit this store/load/store sequence only for 1687 // arguments which would be overwritten otherwise. 1688 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1689 CCValAssign &VA = ArgLocs[i]; 1690 if (!VA.isRegLoc()) { 1691 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1692 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1693 1694 // Get source stack slot. 1695 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1696 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1697 // Create frame index. 1698 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1699 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1700 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1701 FIN = DAG.getFrameIndex(FI, MVT::i32); 1702 if (Flags & ISD::ParamFlags::ByVal) { 1703 // Copy relative to framepointer. 1704 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1705 ISD::ParamFlags::ByValAlignOffs); 1706 1707 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1708 ISD::ParamFlags::ByValSizeOffs; 1709 1710 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1711 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1712 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); 1713 1714 MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode, 1715 AlignNode,AlwaysInline)); 1716 } else { 1717 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); 1718 // Store relative to framepointer. 1719 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); 1720 } 1721 } 1722 } 1723 1724 if (!MemOpChains2.empty()) 1725 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1726 &MemOpChains2[0], MemOpChains.size()); 1727 1728 // Store the return address to the appropriate stack slot. 1729 if (FPDiff) 1730 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1731 1732 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1733 // GOT pointer. 1734 // Does not work with tail call since ebx is not restored correctly by 1735 // tailcaller. TODO: at least for x86 - verify for x86-64 1736 1737 // If the callee is a GlobalAddress node (quite common, every direct call is) 1738 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1739 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1740 // We should use extra load for direct calls to dllimported functions in 1741 // non-JIT mode. 1742 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1743 getTargetMachine(), true)) 1744 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1745 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1746 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1747 else { 1748 assert(Callee.getOpcode() == ISD::LOAD && 1749 "Function destination must be loaded into virtual register"); 1750 unsigned Opc = is64Bit ? X86::R9 : X86::ECX; 1751 1752 Chain = DAG.getCopyToReg(Chain, 1753 DAG.getRegister(Opc, getPointerTy()) , 1754 Callee,InFlag); 1755 Callee = DAG.getRegister(Opc, getPointerTy()); 1756 // Add register as live out. 1757 DAG.getMachineFunction().addLiveOut(Opc); 1758 } 1759 1760 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1761 SmallVector<SDOperand, 8> Ops; 1762 1763 Ops.push_back(Chain); 1764 Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1765 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1766 if (InFlag.Val) 1767 Ops.push_back(InFlag); 1768 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1769 InFlag = Chain.getValue(1); 1770 1771 // Returns a chain & a flag for retval copy to use. 1772 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1773 Ops.clear(); 1774 Ops.push_back(Chain); 1775 Ops.push_back(Callee); 1776 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1777 // Add argument registers to the end of the list so that they are known live 1778 // into the call. 1779 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1780 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1781 RegsToPass[i].second.getValueType())); 1782 if (InFlag.Val) 1783 Ops.push_back(InFlag); 1784 assert(InFlag.Val && 1785 "Flag must be set. Depend on flag being set in LowerRET"); 1786 Chain = DAG.getNode(X86ISD::TAILCALL, 1787 Op.Val->getVTList(), &Ops[0], Ops.size()); 1788 1789 return SDOperand(Chain.Val, Op.ResNo); 1790} 1791 1792//===----------------------------------------------------------------------===// 1793// X86-64 C Calling Convention implementation 1794//===----------------------------------------------------------------------===// 1795 1796SDOperand 1797X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1798 MachineFunction &MF = DAG.getMachineFunction(); 1799 MachineFrameInfo *MFI = MF.getFrameInfo(); 1800 SDOperand Root = Op.getOperand(0); 1801 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1802 unsigned CC= MF.getFunction()->getCallingConv(); 1803 1804 static const unsigned GPR64ArgRegs[] = { 1805 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1806 }; 1807 static const unsigned XMMArgRegs[] = { 1808 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1809 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1810 }; 1811 1812 1813 // Assign locations to all of the incoming arguments. 1814 SmallVector<CCValAssign, 16> ArgLocs; 1815 CCState CCInfo(CC, isVarArg, 1816 getTargetMachine(), ArgLocs); 1817 if (CC == CallingConv::Fast && PerformTailCallOpt) 1818 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); 1819 else 1820 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1821 1822 SmallVector<SDOperand, 8> ArgValues; 1823 unsigned LastVal = ~0U; 1824 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1825 CCValAssign &VA = ArgLocs[i]; 1826 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1827 // places. 1828 assert(VA.getValNo() != LastVal && 1829 "Don't support value assigned to multiple locs yet"); 1830 LastVal = VA.getValNo(); 1831 1832 if (VA.isRegLoc()) { 1833 MVT::ValueType RegVT = VA.getLocVT(); 1834 TargetRegisterClass *RC; 1835 if (RegVT == MVT::i32) 1836 RC = X86::GR32RegisterClass; 1837 else if (RegVT == MVT::i64) 1838 RC = X86::GR64RegisterClass; 1839 else if (RegVT == MVT::f32) 1840 RC = X86::FR32RegisterClass; 1841 else if (RegVT == MVT::f64) 1842 RC = X86::FR64RegisterClass; 1843 else { 1844 assert(MVT::isVector(RegVT)); 1845 if (MVT::getSizeInBits(RegVT) == 64) { 1846 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1847 RegVT = MVT::i64; 1848 } else 1849 RC = X86::VR128RegisterClass; 1850 } 1851 1852 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1853 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1854 1855 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1856 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1857 // right size. 1858 if (VA.getLocInfo() == CCValAssign::SExt) 1859 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1860 DAG.getValueType(VA.getValVT())); 1861 else if (VA.getLocInfo() == CCValAssign::ZExt) 1862 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1863 DAG.getValueType(VA.getValVT())); 1864 1865 if (VA.getLocInfo() != CCValAssign::Full) 1866 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1867 1868 // Handle MMX values passed in GPRs. 1869 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1870 MVT::getSizeInBits(RegVT) == 64) 1871 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1872 1873 ArgValues.push_back(ArgValue); 1874 } else { 1875 assert(VA.isMemLoc()); 1876 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1877 } 1878 } 1879 1880 unsigned StackSize = CCInfo.getNextStackOffset(); 1881 if (CC==CallingConv::Fast) 1882 StackSize =GetAlignedArgumentStackSize(StackSize, DAG); 1883 1884 // If the function takes variable number of arguments, make a frame index for 1885 // the start of the first vararg value... for expansion of llvm.va_start. 1886 if (isVarArg) { 1887 assert(CC!=CallingConv::Fast 1888 && "Var arg not supported with calling convention fastcc"); 1889 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1890 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1891 1892 // For X86-64, if there are vararg parameters that are passed via 1893 // registers, then we must store them to their spots on the stack so they 1894 // may be loaded by deferencing the result of va_next. 1895 VarArgsGPOffset = NumIntRegs * 8; 1896 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1897 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1898 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1899 1900 // Store the integer parameter registers. 1901 SmallVector<SDOperand, 8> MemOps; 1902 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1903 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1904 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1905 for (; NumIntRegs != 6; ++NumIntRegs) { 1906 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1907 X86::GR64RegisterClass); 1908 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1909 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1910 MemOps.push_back(Store); 1911 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1912 DAG.getConstant(8, getPointerTy())); 1913 } 1914 1915 // Now store the XMM (fp + vector) parameter registers. 1916 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1917 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1918 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1919 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1920 X86::VR128RegisterClass); 1921 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1922 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1923 MemOps.push_back(Store); 1924 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1925 DAG.getConstant(16, getPointerTy())); 1926 } 1927 if (!MemOps.empty()) 1928 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1929 &MemOps[0], MemOps.size()); 1930 } 1931 1932 ArgValues.push_back(Root); 1933 // Tail call convention (fastcc) needs callee pop. 1934 if (CC == CallingConv::Fast && PerformTailCallOpt) { 1935 BytesToPopOnReturn = StackSize; // Callee pops everything. 1936 BytesCallerReserves = 0; 1937 } else { 1938 BytesToPopOnReturn = 0; // Callee pops nothing. 1939 BytesCallerReserves = StackSize; 1940 } 1941 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1942 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1943 1944 // Return the new list of results. 1945 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1946 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1947} 1948 1949SDOperand 1950X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1951 unsigned CC) { 1952 SDOperand Chain = Op.getOperand(0); 1953 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1954 SDOperand Callee = Op.getOperand(4); 1955 1956 // Analyze operands of the call, assigning locations to each operand. 1957 SmallVector<CCValAssign, 16> ArgLocs; 1958 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1959 if (CC==CallingConv::Fast && PerformTailCallOpt) 1960 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1961 else 1962 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1963 1964 // Get a count of how many bytes are to be pushed on the stack. 1965 unsigned NumBytes = CCInfo.getNextStackOffset(); 1966 if (CC == CallingConv::Fast) 1967 NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); 1968 1969 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1970 1971 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1972 SmallVector<SDOperand, 8> MemOpChains; 1973 1974 SDOperand StackPtr; 1975 1976 // Walk the register/memloc assignments, inserting copies/loads. 1977 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1978 CCValAssign &VA = ArgLocs[i]; 1979 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1980 1981 // Promote the value if needed. 1982 switch (VA.getLocInfo()) { 1983 default: assert(0 && "Unknown loc info!"); 1984 case CCValAssign::Full: break; 1985 case CCValAssign::SExt: 1986 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1987 break; 1988 case CCValAssign::ZExt: 1989 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1990 break; 1991 case CCValAssign::AExt: 1992 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1993 break; 1994 } 1995 1996 if (VA.isRegLoc()) { 1997 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1998 } else { 1999 assert(VA.isMemLoc()); 2000 if (StackPtr.Val == 0) 2001 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 2002 2003 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 2004 Arg)); 2005 } 2006 } 2007 2008 if (!MemOpChains.empty()) 2009 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2010 &MemOpChains[0], MemOpChains.size()); 2011 2012 // Build a sequence of copy-to-reg nodes chained together with token chain 2013 // and flag operands which copy the outgoing args into registers. 2014 SDOperand InFlag; 2015 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2016 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 2017 InFlag); 2018 InFlag = Chain.getValue(1); 2019 } 2020 2021 if (isVarArg) { 2022 assert ( CallingConv::Fast != CC && 2023 "Var args not supported with calling convention fastcc"); 2024 2025 // From AMD64 ABI document: 2026 // For calls that may call functions that use varargs or stdargs 2027 // (prototype-less calls or calls to functions containing ellipsis (...) in 2028 // the declaration) %al is used as hidden argument to specify the number 2029 // of SSE registers used. The contents of %al do not need to match exactly 2030 // the number of registers, but must be an ubound on the number of SSE 2031 // registers used and is in the range 0 - 8 inclusive. 2032 2033 // Count the number of XMM registers allocated. 2034 static const unsigned XMMArgRegs[] = { 2035 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2036 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2037 }; 2038 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 2039 2040 Chain = DAG.getCopyToReg(Chain, X86::AL, 2041 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 2042 InFlag = Chain.getValue(1); 2043 } 2044 2045 // If the callee is a GlobalAddress node (quite common, every direct call is) 2046 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 2047 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2048 // We should use extra load for direct calls to dllimported functions in 2049 // non-JIT mode. 2050 if (getTargetMachine().getCodeModel() != CodeModel::Large 2051 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 2052 getTargetMachine(), true)) 2053 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 2054 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 2055 if (getTargetMachine().getCodeModel() != CodeModel::Large) 2056 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 2057 2058 // Returns a chain & a flag for retval copy to use. 2059 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2060 SmallVector<SDOperand, 8> Ops; 2061 Ops.push_back(Chain); 2062 Ops.push_back(Callee); 2063 2064 // Add argument registers to the end of the list so that they are known live 2065 // into the call. 2066 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2067 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2068 RegsToPass[i].second.getValueType())); 2069 2070 if (InFlag.Val) 2071 Ops.push_back(InFlag); 2072 2073 Chain = DAG.getNode(X86ISD::CALL, 2074 NodeTys, &Ops[0], Ops.size()); 2075 InFlag = Chain.getValue(1); 2076 int NumBytesForCalleeToPush = 0; 2077 if (CC==CallingConv::Fast && PerformTailCallOpt) { 2078 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 2079 } else { 2080 NumBytesForCalleeToPush = 0; // Callee pops nothing. 2081 } 2082 // Returns a flag for retval copy to use. 2083 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2084 Ops.clear(); 2085 Ops.push_back(Chain); 2086 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2087 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2088 Ops.push_back(InFlag); 2089 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2090 InFlag = Chain.getValue(1); 2091 2092 // Handle result values, copying them out of physregs into vregs that we 2093 // return. 2094 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 2095} 2096 2097 2098//===----------------------------------------------------------------------===// 2099// Other Lowering Hooks 2100//===----------------------------------------------------------------------===// 2101 2102 2103SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2104 MachineFunction &MF = DAG.getMachineFunction(); 2105 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2106 int ReturnAddrIndex = FuncInfo->getRAIndex(); 2107 2108 if (ReturnAddrIndex == 0) { 2109 // Set up a frame object for the return address. 2110 if (Subtarget->is64Bit()) 2111 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2112 else 2113 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2114 2115 FuncInfo->setRAIndex(ReturnAddrIndex); 2116 } 2117 2118 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2119} 2120 2121 2122 2123/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2124/// specific condition code. It returns a false if it cannot do a direct 2125/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2126/// needed. 2127static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2128 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2129 SelectionDAG &DAG) { 2130 X86CC = X86::COND_INVALID; 2131 if (!isFP) { 2132 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2133 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2134 // X > -1 -> X == 0, jump !sign. 2135 RHS = DAG.getConstant(0, RHS.getValueType()); 2136 X86CC = X86::COND_NS; 2137 return true; 2138 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2139 // X < 0 -> X == 0, jump on sign. 2140 X86CC = X86::COND_S; 2141 return true; 2142 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 2143 // X < 1 -> X <= 0 2144 RHS = DAG.getConstant(0, RHS.getValueType()); 2145 X86CC = X86::COND_LE; 2146 return true; 2147 } 2148 } 2149 2150 switch (SetCCOpcode) { 2151 default: break; 2152 case ISD::SETEQ: X86CC = X86::COND_E; break; 2153 case ISD::SETGT: X86CC = X86::COND_G; break; 2154 case ISD::SETGE: X86CC = X86::COND_GE; break; 2155 case ISD::SETLT: X86CC = X86::COND_L; break; 2156 case ISD::SETLE: X86CC = X86::COND_LE; break; 2157 case ISD::SETNE: X86CC = X86::COND_NE; break; 2158 case ISD::SETULT: X86CC = X86::COND_B; break; 2159 case ISD::SETUGT: X86CC = X86::COND_A; break; 2160 case ISD::SETULE: X86CC = X86::COND_BE; break; 2161 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2162 } 2163 } else { 2164 // On a floating point condition, the flags are set as follows: 2165 // ZF PF CF op 2166 // 0 | 0 | 0 | X > Y 2167 // 0 | 0 | 1 | X < Y 2168 // 1 | 0 | 0 | X == Y 2169 // 1 | 1 | 1 | unordered 2170 bool Flip = false; 2171 switch (SetCCOpcode) { 2172 default: break; 2173 case ISD::SETUEQ: 2174 case ISD::SETEQ: X86CC = X86::COND_E; break; 2175 case ISD::SETOLT: Flip = true; // Fallthrough 2176 case ISD::SETOGT: 2177 case ISD::SETGT: X86CC = X86::COND_A; break; 2178 case ISD::SETOLE: Flip = true; // Fallthrough 2179 case ISD::SETOGE: 2180 case ISD::SETGE: X86CC = X86::COND_AE; break; 2181 case ISD::SETUGT: Flip = true; // Fallthrough 2182 case ISD::SETULT: 2183 case ISD::SETLT: X86CC = X86::COND_B; break; 2184 case ISD::SETUGE: Flip = true; // Fallthrough 2185 case ISD::SETULE: 2186 case ISD::SETLE: X86CC = X86::COND_BE; break; 2187 case ISD::SETONE: 2188 case ISD::SETNE: X86CC = X86::COND_NE; break; 2189 case ISD::SETUO: X86CC = X86::COND_P; break; 2190 case ISD::SETO: X86CC = X86::COND_NP; break; 2191 } 2192 if (Flip) 2193 std::swap(LHS, RHS); 2194 } 2195 2196 return X86CC != X86::COND_INVALID; 2197} 2198 2199/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2200/// code. Current x86 isa includes the following FP cmov instructions: 2201/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2202static bool hasFPCMov(unsigned X86CC) { 2203 switch (X86CC) { 2204 default: 2205 return false; 2206 case X86::COND_B: 2207 case X86::COND_BE: 2208 case X86::COND_E: 2209 case X86::COND_P: 2210 case X86::COND_A: 2211 case X86::COND_AE: 2212 case X86::COND_NE: 2213 case X86::COND_NP: 2214 return true; 2215 } 2216} 2217 2218/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2219/// true if Op is undef or if its value falls within the specified range (L, H]. 2220static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2221 if (Op.getOpcode() == ISD::UNDEF) 2222 return true; 2223 2224 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2225 return (Val >= Low && Val < Hi); 2226} 2227 2228/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2229/// true if Op is undef or if its value equal to the specified value. 2230static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2231 if (Op.getOpcode() == ISD::UNDEF) 2232 return true; 2233 return cast<ConstantSDNode>(Op)->getValue() == Val; 2234} 2235 2236/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2237/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2238bool X86::isPSHUFDMask(SDNode *N) { 2239 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2240 2241 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2242 return false; 2243 2244 // Check if the value doesn't reference the second vector. 2245 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2246 SDOperand Arg = N->getOperand(i); 2247 if (Arg.getOpcode() == ISD::UNDEF) continue; 2248 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2249 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 2250 return false; 2251 } 2252 2253 return true; 2254} 2255 2256/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2257/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2258bool X86::isPSHUFHWMask(SDNode *N) { 2259 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2260 2261 if (N->getNumOperands() != 8) 2262 return false; 2263 2264 // Lower quadword copied in order. 2265 for (unsigned i = 0; i != 4; ++i) { 2266 SDOperand Arg = N->getOperand(i); 2267 if (Arg.getOpcode() == ISD::UNDEF) continue; 2268 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2269 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2270 return false; 2271 } 2272 2273 // Upper quadword shuffled. 2274 for (unsigned i = 4; i != 8; ++i) { 2275 SDOperand Arg = N->getOperand(i); 2276 if (Arg.getOpcode() == ISD::UNDEF) continue; 2277 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2278 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2279 if (Val < 4 || Val > 7) 2280 return false; 2281 } 2282 2283 return true; 2284} 2285 2286/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2287/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2288bool X86::isPSHUFLWMask(SDNode *N) { 2289 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2290 2291 if (N->getNumOperands() != 8) 2292 return false; 2293 2294 // Upper quadword copied in order. 2295 for (unsigned i = 4; i != 8; ++i) 2296 if (!isUndefOrEqual(N->getOperand(i), i)) 2297 return false; 2298 2299 // Lower quadword shuffled. 2300 for (unsigned i = 0; i != 4; ++i) 2301 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2302 return false; 2303 2304 return true; 2305} 2306 2307/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2308/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2309static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2310 if (NumElems != 2 && NumElems != 4) return false; 2311 2312 unsigned Half = NumElems / 2; 2313 for (unsigned i = 0; i < Half; ++i) 2314 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2315 return false; 2316 for (unsigned i = Half; i < NumElems; ++i) 2317 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2318 return false; 2319 2320 return true; 2321} 2322 2323bool X86::isSHUFPMask(SDNode *N) { 2324 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2325 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2326} 2327 2328/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2329/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2330/// half elements to come from vector 1 (which would equal the dest.) and 2331/// the upper half to come from vector 2. 2332static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2333 if (NumOps != 2 && NumOps != 4) return false; 2334 2335 unsigned Half = NumOps / 2; 2336 for (unsigned i = 0; i < Half; ++i) 2337 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2338 return false; 2339 for (unsigned i = Half; i < NumOps; ++i) 2340 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2341 return false; 2342 return true; 2343} 2344 2345static bool isCommutedSHUFP(SDNode *N) { 2346 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2347 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2348} 2349 2350/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2351/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2352bool X86::isMOVHLPSMask(SDNode *N) { 2353 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2354 2355 if (N->getNumOperands() != 4) 2356 return false; 2357 2358 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2359 return isUndefOrEqual(N->getOperand(0), 6) && 2360 isUndefOrEqual(N->getOperand(1), 7) && 2361 isUndefOrEqual(N->getOperand(2), 2) && 2362 isUndefOrEqual(N->getOperand(3), 3); 2363} 2364 2365/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2366/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2367/// <2, 3, 2, 3> 2368bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2369 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2370 2371 if (N->getNumOperands() != 4) 2372 return false; 2373 2374 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2375 return isUndefOrEqual(N->getOperand(0), 2) && 2376 isUndefOrEqual(N->getOperand(1), 3) && 2377 isUndefOrEqual(N->getOperand(2), 2) && 2378 isUndefOrEqual(N->getOperand(3), 3); 2379} 2380 2381/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2382/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2383bool X86::isMOVLPMask(SDNode *N) { 2384 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2385 2386 unsigned NumElems = N->getNumOperands(); 2387 if (NumElems != 2 && NumElems != 4) 2388 return false; 2389 2390 for (unsigned i = 0; i < NumElems/2; ++i) 2391 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2392 return false; 2393 2394 for (unsigned i = NumElems/2; i < NumElems; ++i) 2395 if (!isUndefOrEqual(N->getOperand(i), i)) 2396 return false; 2397 2398 return true; 2399} 2400 2401/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2402/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2403/// and MOVLHPS. 2404bool X86::isMOVHPMask(SDNode *N) { 2405 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2406 2407 unsigned NumElems = N->getNumOperands(); 2408 if (NumElems != 2 && NumElems != 4) 2409 return false; 2410 2411 for (unsigned i = 0; i < NumElems/2; ++i) 2412 if (!isUndefOrEqual(N->getOperand(i), i)) 2413 return false; 2414 2415 for (unsigned i = 0; i < NumElems/2; ++i) { 2416 SDOperand Arg = N->getOperand(i + NumElems/2); 2417 if (!isUndefOrEqual(Arg, i + NumElems)) 2418 return false; 2419 } 2420 2421 return true; 2422} 2423 2424/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2425/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2426bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2427 bool V2IsSplat = false) { 2428 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2429 return false; 2430 2431 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2432 SDOperand BitI = Elts[i]; 2433 SDOperand BitI1 = Elts[i+1]; 2434 if (!isUndefOrEqual(BitI, j)) 2435 return false; 2436 if (V2IsSplat) { 2437 if (isUndefOrEqual(BitI1, NumElts)) 2438 return false; 2439 } else { 2440 if (!isUndefOrEqual(BitI1, j + NumElts)) 2441 return false; 2442 } 2443 } 2444 2445 return true; 2446} 2447 2448bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2449 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2450 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2451} 2452 2453/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2454/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2455bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2456 bool V2IsSplat = false) { 2457 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2458 return false; 2459 2460 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2461 SDOperand BitI = Elts[i]; 2462 SDOperand BitI1 = Elts[i+1]; 2463 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2464 return false; 2465 if (V2IsSplat) { 2466 if (isUndefOrEqual(BitI1, NumElts)) 2467 return false; 2468 } else { 2469 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2470 return false; 2471 } 2472 } 2473 2474 return true; 2475} 2476 2477bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2478 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2479 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2480} 2481 2482/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2483/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2484/// <0, 0, 1, 1> 2485bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2486 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2487 2488 unsigned NumElems = N->getNumOperands(); 2489 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2490 return false; 2491 2492 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2493 SDOperand BitI = N->getOperand(i); 2494 SDOperand BitI1 = N->getOperand(i+1); 2495 2496 if (!isUndefOrEqual(BitI, j)) 2497 return false; 2498 if (!isUndefOrEqual(BitI1, j)) 2499 return false; 2500 } 2501 2502 return true; 2503} 2504 2505/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2506/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2507/// <2, 2, 3, 3> 2508bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2509 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2510 2511 unsigned NumElems = N->getNumOperands(); 2512 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2513 return false; 2514 2515 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2516 SDOperand BitI = N->getOperand(i); 2517 SDOperand BitI1 = N->getOperand(i + 1); 2518 2519 if (!isUndefOrEqual(BitI, j)) 2520 return false; 2521 if (!isUndefOrEqual(BitI1, j)) 2522 return false; 2523 } 2524 2525 return true; 2526} 2527 2528/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2529/// specifies a shuffle of elements that is suitable for input to MOVSS, 2530/// MOVSD, and MOVD, i.e. setting the lowest element. 2531static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2532 if (NumElts != 2 && NumElts != 4) 2533 return false; 2534 2535 if (!isUndefOrEqual(Elts[0], NumElts)) 2536 return false; 2537 2538 for (unsigned i = 1; i < NumElts; ++i) { 2539 if (!isUndefOrEqual(Elts[i], i)) 2540 return false; 2541 } 2542 2543 return true; 2544} 2545 2546bool X86::isMOVLMask(SDNode *N) { 2547 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2548 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2549} 2550 2551/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2552/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2553/// element of vector 2 and the other elements to come from vector 1 in order. 2554static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2555 bool V2IsSplat = false, 2556 bool V2IsUndef = false) { 2557 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2558 return false; 2559 2560 if (!isUndefOrEqual(Ops[0], 0)) 2561 return false; 2562 2563 for (unsigned i = 1; i < NumOps; ++i) { 2564 SDOperand Arg = Ops[i]; 2565 if (!(isUndefOrEqual(Arg, i+NumOps) || 2566 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2567 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2568 return false; 2569 } 2570 2571 return true; 2572} 2573 2574static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2575 bool V2IsUndef = false) { 2576 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2577 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2578 V2IsSplat, V2IsUndef); 2579} 2580 2581/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2582/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2583bool X86::isMOVSHDUPMask(SDNode *N) { 2584 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2585 2586 if (N->getNumOperands() != 4) 2587 return false; 2588 2589 // Expect 1, 1, 3, 3 2590 for (unsigned i = 0; i < 2; ++i) { 2591 SDOperand Arg = N->getOperand(i); 2592 if (Arg.getOpcode() == ISD::UNDEF) continue; 2593 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2594 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2595 if (Val != 1) return false; 2596 } 2597 2598 bool HasHi = false; 2599 for (unsigned i = 2; i < 4; ++i) { 2600 SDOperand Arg = N->getOperand(i); 2601 if (Arg.getOpcode() == ISD::UNDEF) continue; 2602 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2603 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2604 if (Val != 3) return false; 2605 HasHi = true; 2606 } 2607 2608 // Don't use movshdup if it can be done with a shufps. 2609 return HasHi; 2610} 2611 2612/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2613/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2614bool X86::isMOVSLDUPMask(SDNode *N) { 2615 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2616 2617 if (N->getNumOperands() != 4) 2618 return false; 2619 2620 // Expect 0, 0, 2, 2 2621 for (unsigned i = 0; i < 2; ++i) { 2622 SDOperand Arg = N->getOperand(i); 2623 if (Arg.getOpcode() == ISD::UNDEF) continue; 2624 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2625 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2626 if (Val != 0) return false; 2627 } 2628 2629 bool HasHi = false; 2630 for (unsigned i = 2; i < 4; ++i) { 2631 SDOperand Arg = N->getOperand(i); 2632 if (Arg.getOpcode() == ISD::UNDEF) continue; 2633 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2634 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2635 if (Val != 2) return false; 2636 HasHi = true; 2637 } 2638 2639 // Don't use movshdup if it can be done with a shufps. 2640 return HasHi; 2641} 2642 2643/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2644/// specifies a identity operation on the LHS or RHS. 2645static bool isIdentityMask(SDNode *N, bool RHS = false) { 2646 unsigned NumElems = N->getNumOperands(); 2647 for (unsigned i = 0; i < NumElems; ++i) 2648 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2649 return false; 2650 return true; 2651} 2652 2653/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2654/// a splat of a single element. 2655static bool isSplatMask(SDNode *N) { 2656 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2657 2658 // This is a splat operation if each element of the permute is the same, and 2659 // if the value doesn't reference the second vector. 2660 unsigned NumElems = N->getNumOperands(); 2661 SDOperand ElementBase; 2662 unsigned i = 0; 2663 for (; i != NumElems; ++i) { 2664 SDOperand Elt = N->getOperand(i); 2665 if (isa<ConstantSDNode>(Elt)) { 2666 ElementBase = Elt; 2667 break; 2668 } 2669 } 2670 2671 if (!ElementBase.Val) 2672 return false; 2673 2674 for (; i != NumElems; ++i) { 2675 SDOperand Arg = N->getOperand(i); 2676 if (Arg.getOpcode() == ISD::UNDEF) continue; 2677 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2678 if (Arg != ElementBase) return false; 2679 } 2680 2681 // Make sure it is a splat of the first vector operand. 2682 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2683} 2684 2685/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2686/// a splat of a single element and it's a 2 or 4 element mask. 2687bool X86::isSplatMask(SDNode *N) { 2688 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2689 2690 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2691 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2692 return false; 2693 return ::isSplatMask(N); 2694} 2695 2696/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2697/// specifies a splat of zero element. 2698bool X86::isSplatLoMask(SDNode *N) { 2699 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2700 2701 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2702 if (!isUndefOrEqual(N->getOperand(i), 0)) 2703 return false; 2704 return true; 2705} 2706 2707/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2708/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2709/// instructions. 2710unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2711 unsigned NumOperands = N->getNumOperands(); 2712 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2713 unsigned Mask = 0; 2714 for (unsigned i = 0; i < NumOperands; ++i) { 2715 unsigned Val = 0; 2716 SDOperand Arg = N->getOperand(NumOperands-i-1); 2717 if (Arg.getOpcode() != ISD::UNDEF) 2718 Val = cast<ConstantSDNode>(Arg)->getValue(); 2719 if (Val >= NumOperands) Val -= NumOperands; 2720 Mask |= Val; 2721 if (i != NumOperands - 1) 2722 Mask <<= Shift; 2723 } 2724 2725 return Mask; 2726} 2727 2728/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2729/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2730/// instructions. 2731unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2732 unsigned Mask = 0; 2733 // 8 nodes, but we only care about the last 4. 2734 for (unsigned i = 7; i >= 4; --i) { 2735 unsigned Val = 0; 2736 SDOperand Arg = N->getOperand(i); 2737 if (Arg.getOpcode() != ISD::UNDEF) 2738 Val = cast<ConstantSDNode>(Arg)->getValue(); 2739 Mask |= (Val - 4); 2740 if (i != 4) 2741 Mask <<= 2; 2742 } 2743 2744 return Mask; 2745} 2746 2747/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2748/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2749/// instructions. 2750unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2751 unsigned Mask = 0; 2752 // 8 nodes, but we only care about the first 4. 2753 for (int i = 3; i >= 0; --i) { 2754 unsigned Val = 0; 2755 SDOperand Arg = N->getOperand(i); 2756 if (Arg.getOpcode() != ISD::UNDEF) 2757 Val = cast<ConstantSDNode>(Arg)->getValue(); 2758 Mask |= Val; 2759 if (i != 0) 2760 Mask <<= 2; 2761 } 2762 2763 return Mask; 2764} 2765 2766/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2767/// specifies a 8 element shuffle that can be broken into a pair of 2768/// PSHUFHW and PSHUFLW. 2769static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2770 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2771 2772 if (N->getNumOperands() != 8) 2773 return false; 2774 2775 // Lower quadword shuffled. 2776 for (unsigned i = 0; i != 4; ++i) { 2777 SDOperand Arg = N->getOperand(i); 2778 if (Arg.getOpcode() == ISD::UNDEF) continue; 2779 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2780 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2781 if (Val >= 4) 2782 return false; 2783 } 2784 2785 // Upper quadword shuffled. 2786 for (unsigned i = 4; i != 8; ++i) { 2787 SDOperand Arg = N->getOperand(i); 2788 if (Arg.getOpcode() == ISD::UNDEF) continue; 2789 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2790 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2791 if (Val < 4 || Val > 7) 2792 return false; 2793 } 2794 2795 return true; 2796} 2797 2798/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2799/// values in ther permute mask. 2800static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2801 SDOperand &V2, SDOperand &Mask, 2802 SelectionDAG &DAG) { 2803 MVT::ValueType VT = Op.getValueType(); 2804 MVT::ValueType MaskVT = Mask.getValueType(); 2805 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2806 unsigned NumElems = Mask.getNumOperands(); 2807 SmallVector<SDOperand, 8> MaskVec; 2808 2809 for (unsigned i = 0; i != NumElems; ++i) { 2810 SDOperand Arg = Mask.getOperand(i); 2811 if (Arg.getOpcode() == ISD::UNDEF) { 2812 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2813 continue; 2814 } 2815 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2816 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2817 if (Val < NumElems) 2818 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2819 else 2820 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2821 } 2822 2823 std::swap(V1, V2); 2824 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2825 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2826} 2827 2828/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2829/// the two vector operands have swapped position. 2830static 2831SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { 2832 MVT::ValueType MaskVT = Mask.getValueType(); 2833 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2834 unsigned NumElems = Mask.getNumOperands(); 2835 SmallVector<SDOperand, 8> MaskVec; 2836 for (unsigned i = 0; i != NumElems; ++i) { 2837 SDOperand Arg = Mask.getOperand(i); 2838 if (Arg.getOpcode() == ISD::UNDEF) { 2839 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2840 continue; 2841 } 2842 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2843 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2844 if (Val < NumElems) 2845 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2846 else 2847 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2848 } 2849 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2850} 2851 2852 2853/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2854/// match movhlps. The lower half elements should come from upper half of 2855/// V1 (and in order), and the upper half elements should come from the upper 2856/// half of V2 (and in order). 2857static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2858 unsigned NumElems = Mask->getNumOperands(); 2859 if (NumElems != 4) 2860 return false; 2861 for (unsigned i = 0, e = 2; i != e; ++i) 2862 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2863 return false; 2864 for (unsigned i = 2; i != 4; ++i) 2865 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2866 return false; 2867 return true; 2868} 2869 2870/// isScalarLoadToVector - Returns true if the node is a scalar load that 2871/// is promoted to a vector. 2872static inline bool isScalarLoadToVector(SDNode *N) { 2873 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2874 N = N->getOperand(0).Val; 2875 return ISD::isNON_EXTLoad(N); 2876 } 2877 return false; 2878} 2879 2880/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2881/// match movlp{s|d}. The lower half elements should come from lower half of 2882/// V1 (and in order), and the upper half elements should come from the upper 2883/// half of V2 (and in order). And since V1 will become the source of the 2884/// MOVLP, it must be either a vector load or a scalar load to vector. 2885static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2886 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2887 return false; 2888 // Is V2 is a vector load, don't do this transformation. We will try to use 2889 // load folding shufps op. 2890 if (ISD::isNON_EXTLoad(V2)) 2891 return false; 2892 2893 unsigned NumElems = Mask->getNumOperands(); 2894 if (NumElems != 2 && NumElems != 4) 2895 return false; 2896 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2897 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2898 return false; 2899 for (unsigned i = NumElems/2; i != NumElems; ++i) 2900 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2901 return false; 2902 return true; 2903} 2904 2905/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2906/// all the same. 2907static bool isSplatVector(SDNode *N) { 2908 if (N->getOpcode() != ISD::BUILD_VECTOR) 2909 return false; 2910 2911 SDOperand SplatValue = N->getOperand(0); 2912 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2913 if (N->getOperand(i) != SplatValue) 2914 return false; 2915 return true; 2916} 2917 2918/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2919/// to an undef. 2920static bool isUndefShuffle(SDNode *N) { 2921 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2922 return false; 2923 2924 SDOperand V1 = N->getOperand(0); 2925 SDOperand V2 = N->getOperand(1); 2926 SDOperand Mask = N->getOperand(2); 2927 unsigned NumElems = Mask.getNumOperands(); 2928 for (unsigned i = 0; i != NumElems; ++i) { 2929 SDOperand Arg = Mask.getOperand(i); 2930 if (Arg.getOpcode() != ISD::UNDEF) { 2931 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2932 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2933 return false; 2934 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2935 return false; 2936 } 2937 } 2938 return true; 2939} 2940 2941/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2942/// constant +0.0. 2943static inline bool isZeroNode(SDOperand Elt) { 2944 return ((isa<ConstantSDNode>(Elt) && 2945 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2946 (isa<ConstantFPSDNode>(Elt) && 2947 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2948} 2949 2950/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2951/// to an zero vector. 2952static bool isZeroShuffle(SDNode *N) { 2953 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2954 return false; 2955 2956 SDOperand V1 = N->getOperand(0); 2957 SDOperand V2 = N->getOperand(1); 2958 SDOperand Mask = N->getOperand(2); 2959 unsigned NumElems = Mask.getNumOperands(); 2960 for (unsigned i = 0; i != NumElems; ++i) { 2961 SDOperand Arg = Mask.getOperand(i); 2962 if (Arg.getOpcode() == ISD::UNDEF) 2963 continue; 2964 2965 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2966 if (Idx < NumElems) { 2967 unsigned Opc = V1.Val->getOpcode(); 2968 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) 2969 continue; 2970 if (Opc != ISD::BUILD_VECTOR || 2971 !isZeroNode(V1.Val->getOperand(Idx))) 2972 return false; 2973 } else if (Idx >= NumElems) { 2974 unsigned Opc = V2.Val->getOpcode(); 2975 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) 2976 continue; 2977 if (Opc != ISD::BUILD_VECTOR || 2978 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2979 return false; 2980 } 2981 } 2982 return true; 2983} 2984 2985/// getZeroVector - Returns a vector of specified type with all zero elements. 2986/// 2987static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2988 assert(MVT::isVector(VT) && "Expected a vector type"); 2989 2990 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2991 // type. This ensures they get CSE'd. 2992 SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); 2993 SDOperand Vec; 2994 if (MVT::getSizeInBits(VT) == 64) // MMX 2995 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2996 else // SSE 2997 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2998 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2999} 3000 3001/// getOnesVector - Returns a vector of specified type with all bits set. 3002/// 3003static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { 3004 assert(MVT::isVector(VT) && "Expected a vector type"); 3005 3006 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 3007 // type. This ensures they get CSE'd. 3008 SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); 3009 SDOperand Vec; 3010 if (MVT::getSizeInBits(VT) == 64) // MMX 3011 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 3012 else // SSE 3013 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 3014 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 3015} 3016 3017 3018/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 3019/// that point to V2 points to its first element. 3020static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 3021 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 3022 3023 bool Changed = false; 3024 SmallVector<SDOperand, 8> MaskVec; 3025 unsigned NumElems = Mask.getNumOperands(); 3026 for (unsigned i = 0; i != NumElems; ++i) { 3027 SDOperand Arg = Mask.getOperand(i); 3028 if (Arg.getOpcode() != ISD::UNDEF) { 3029 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 3030 if (Val > NumElems) { 3031 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 3032 Changed = true; 3033 } 3034 } 3035 MaskVec.push_back(Arg); 3036 } 3037 3038 if (Changed) 3039 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 3040 &MaskVec[0], MaskVec.size()); 3041 return Mask; 3042} 3043 3044/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 3045/// operation of specified width. 3046static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 3047 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3048 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3049 3050 SmallVector<SDOperand, 8> MaskVec; 3051 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 3052 for (unsigned i = 1; i != NumElems; ++i) 3053 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3054 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3055} 3056 3057/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 3058/// of specified width. 3059static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 3060 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3061 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3062 SmallVector<SDOperand, 8> MaskVec; 3063 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 3064 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3065 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 3066 } 3067 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3068} 3069 3070/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 3071/// of specified width. 3072static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 3073 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3074 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3075 unsigned Half = NumElems/2; 3076 SmallVector<SDOperand, 8> MaskVec; 3077 for (unsigned i = 0; i != Half; ++i) { 3078 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 3079 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 3080 } 3081 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3082} 3083 3084/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 3085/// 3086static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 3087 SDOperand V1 = Op.getOperand(0); 3088 SDOperand Mask = Op.getOperand(2); 3089 MVT::ValueType VT = Op.getValueType(); 3090 unsigned NumElems = Mask.getNumOperands(); 3091 Mask = getUnpacklMask(NumElems, DAG); 3092 while (NumElems != 4) { 3093 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 3094 NumElems >>= 1; 3095 } 3096 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 3097 3098 Mask = getZeroVector(MVT::v4i32, DAG); 3099 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 3100 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 3101 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3102} 3103 3104/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3105/// vector of zero or undef vector. This produces a shuffle where the low 3106/// element of V2 is swizzled into the zero/undef vector, landing at element 3107/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3108static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 3109 unsigned NumElems, unsigned Idx, 3110 bool isZero, SelectionDAG &DAG) { 3111 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 3112 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3113 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3114 SmallVector<SDOperand, 16> MaskVec; 3115 for (unsigned i = 0; i != NumElems; ++i) 3116 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 3117 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 3118 else 3119 MaskVec.push_back(DAG.getConstant(i, EVT)); 3120 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3121 &MaskVec[0], MaskVec.size()); 3122 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3123} 3124 3125/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3126/// 3127static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 3128 unsigned NumNonZero, unsigned NumZero, 3129 SelectionDAG &DAG, TargetLowering &TLI) { 3130 if (NumNonZero > 8) 3131 return SDOperand(); 3132 3133 SDOperand V(0, 0); 3134 bool First = true; 3135 for (unsigned i = 0; i < 16; ++i) { 3136 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3137 if (ThisIsNonZero && First) { 3138 if (NumZero) 3139 V = getZeroVector(MVT::v8i16, DAG); 3140 else 3141 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3142 First = false; 3143 } 3144 3145 if ((i & 1) != 0) { 3146 SDOperand ThisElt(0, 0), LastElt(0, 0); 3147 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3148 if (LastIsNonZero) { 3149 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3150 } 3151 if (ThisIsNonZero) { 3152 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3153 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3154 ThisElt, DAG.getConstant(8, MVT::i8)); 3155 if (LastIsNonZero) 3156 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3157 } else 3158 ThisElt = LastElt; 3159 3160 if (ThisElt.Val) 3161 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3162 DAG.getConstant(i/2, TLI.getPointerTy())); 3163 } 3164 } 3165 3166 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3167} 3168 3169/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3170/// 3171static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3172 unsigned NumNonZero, unsigned NumZero, 3173 SelectionDAG &DAG, TargetLowering &TLI) { 3174 if (NumNonZero > 4) 3175 return SDOperand(); 3176 3177 SDOperand V(0, 0); 3178 bool First = true; 3179 for (unsigned i = 0; i < 8; ++i) { 3180 bool isNonZero = (NonZeros & (1 << i)) != 0; 3181 if (isNonZero) { 3182 if (First) { 3183 if (NumZero) 3184 V = getZeroVector(MVT::v8i16, DAG); 3185 else 3186 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3187 First = false; 3188 } 3189 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3190 DAG.getConstant(i, TLI.getPointerTy())); 3191 } 3192 } 3193 3194 return V; 3195} 3196 3197SDOperand 3198X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3199 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3200 if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { 3201 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3202 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3203 // eliminated on x86-32 hosts. 3204 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3205 return Op; 3206 3207 if (ISD::isBuildVectorAllOnes(Op.Val)) 3208 return getOnesVector(Op.getValueType(), DAG); 3209 return getZeroVector(Op.getValueType(), DAG); 3210 } 3211 3212 MVT::ValueType VT = Op.getValueType(); 3213 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3214 unsigned EVTBits = MVT::getSizeInBits(EVT); 3215 3216 unsigned NumElems = Op.getNumOperands(); 3217 unsigned NumZero = 0; 3218 unsigned NumNonZero = 0; 3219 unsigned NonZeros = 0; 3220 bool HasNonImms = false; 3221 SmallSet<SDOperand, 8> Values; 3222 for (unsigned i = 0; i < NumElems; ++i) { 3223 SDOperand Elt = Op.getOperand(i); 3224 if (Elt.getOpcode() == ISD::UNDEF) 3225 continue; 3226 Values.insert(Elt); 3227 if (Elt.getOpcode() != ISD::Constant && 3228 Elt.getOpcode() != ISD::ConstantFP) 3229 HasNonImms = true; 3230 if (isZeroNode(Elt)) 3231 NumZero++; 3232 else { 3233 NonZeros |= (1 << i); 3234 NumNonZero++; 3235 } 3236 } 3237 3238 if (NumNonZero == 0) { 3239 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3240 return DAG.getNode(ISD::UNDEF, VT); 3241 } 3242 3243 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3244 if (Values.size() == 1) 3245 return SDOperand(); 3246 3247 // Special case for single non-zero element. 3248 if (NumNonZero == 1 && NumElems <= 4) { 3249 unsigned Idx = CountTrailingZeros_32(NonZeros); 3250 SDOperand Item = Op.getOperand(Idx); 3251 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3252 if (Idx == 0) 3253 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3254 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3255 NumZero > 0, DAG); 3256 else if (!HasNonImms) // Otherwise, it's better to do a constpool load. 3257 return SDOperand(); 3258 3259 if (EVTBits == 32) { 3260 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3261 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3262 DAG); 3263 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3264 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3265 SmallVector<SDOperand, 8> MaskVec; 3266 for (unsigned i = 0; i < NumElems; i++) 3267 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3268 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3269 &MaskVec[0], MaskVec.size()); 3270 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3271 DAG.getNode(ISD::UNDEF, VT), Mask); 3272 } 3273 } 3274 3275 // A vector full of immediates; various special cases are already 3276 // handled, so this is best done with a single constant-pool load. 3277 if (!HasNonImms) 3278 return SDOperand(); 3279 3280 // Let legalizer expand 2-wide build_vectors. 3281 if (EVTBits == 64) 3282 return SDOperand(); 3283 3284 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3285 if (EVTBits == 8 && NumElems == 16) { 3286 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3287 *this); 3288 if (V.Val) return V; 3289 } 3290 3291 if (EVTBits == 16 && NumElems == 8) { 3292 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3293 *this); 3294 if (V.Val) return V; 3295 } 3296 3297 // If element VT is == 32 bits, turn it into a number of shuffles. 3298 SmallVector<SDOperand, 8> V; 3299 V.resize(NumElems); 3300 if (NumElems == 4 && NumZero > 0) { 3301 for (unsigned i = 0; i < 4; ++i) { 3302 bool isZero = !(NonZeros & (1 << i)); 3303 if (isZero) 3304 V[i] = getZeroVector(VT, DAG); 3305 else 3306 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3307 } 3308 3309 for (unsigned i = 0; i < 2; ++i) { 3310 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3311 default: break; 3312 case 0: 3313 V[i] = V[i*2]; // Must be a zero vector. 3314 break; 3315 case 1: 3316 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3317 getMOVLMask(NumElems, DAG)); 3318 break; 3319 case 2: 3320 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3321 getMOVLMask(NumElems, DAG)); 3322 break; 3323 case 3: 3324 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3325 getUnpacklMask(NumElems, DAG)); 3326 break; 3327 } 3328 } 3329 3330 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3331 // clears the upper bits. 3332 // FIXME: we can do the same for v4f32 case when we know both parts of 3333 // the lower half come from scalar_to_vector (loadf32). We should do 3334 // that in post legalizer dag combiner with target specific hooks. 3335 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3336 return V[0]; 3337 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3338 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3339 SmallVector<SDOperand, 8> MaskVec; 3340 bool Reverse = (NonZeros & 0x3) == 2; 3341 for (unsigned i = 0; i < 2; ++i) 3342 if (Reverse) 3343 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3344 else 3345 MaskVec.push_back(DAG.getConstant(i, EVT)); 3346 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3347 for (unsigned i = 0; i < 2; ++i) 3348 if (Reverse) 3349 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3350 else 3351 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3352 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3353 &MaskVec[0], MaskVec.size()); 3354 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3355 } 3356 3357 if (Values.size() > 2) { 3358 // Expand into a number of unpckl*. 3359 // e.g. for v4f32 3360 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3361 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3362 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3363 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3364 for (unsigned i = 0; i < NumElems; ++i) 3365 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3366 NumElems >>= 1; 3367 while (NumElems != 0) { 3368 for (unsigned i = 0; i < NumElems; ++i) 3369 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3370 UnpckMask); 3371 NumElems >>= 1; 3372 } 3373 return V[0]; 3374 } 3375 3376 return SDOperand(); 3377} 3378 3379static 3380SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, 3381 SDOperand PermMask, SelectionDAG &DAG, 3382 TargetLowering &TLI) { 3383 SDOperand NewV; 3384 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); 3385 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3386 MVT::ValueType PtrVT = TLI.getPointerTy(); 3387 SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), 3388 PermMask.Val->op_end()); 3389 3390 // First record which half of which vector the low elements come from. 3391 SmallVector<unsigned, 4> LowQuad(4); 3392 for (unsigned i = 0; i < 4; ++i) { 3393 SDOperand Elt = MaskElts[i]; 3394 if (Elt.getOpcode() == ISD::UNDEF) 3395 continue; 3396 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3397 int QuadIdx = EltIdx / 4; 3398 ++LowQuad[QuadIdx]; 3399 } 3400 int BestLowQuad = -1; 3401 unsigned MaxQuad = 1; 3402 for (unsigned i = 0; i < 4; ++i) { 3403 if (LowQuad[i] > MaxQuad) { 3404 BestLowQuad = i; 3405 MaxQuad = LowQuad[i]; 3406 } 3407 } 3408 3409 // Record which half of which vector the high elements come from. 3410 SmallVector<unsigned, 4> HighQuad(4); 3411 for (unsigned i = 4; i < 8; ++i) { 3412 SDOperand Elt = MaskElts[i]; 3413 if (Elt.getOpcode() == ISD::UNDEF) 3414 continue; 3415 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3416 int QuadIdx = EltIdx / 4; 3417 ++HighQuad[QuadIdx]; 3418 } 3419 int BestHighQuad = -1; 3420 MaxQuad = 1; 3421 for (unsigned i = 0; i < 4; ++i) { 3422 if (HighQuad[i] > MaxQuad) { 3423 BestHighQuad = i; 3424 MaxQuad = HighQuad[i]; 3425 } 3426 } 3427 3428 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3429 if (BestLowQuad != -1 || BestHighQuad != -1) { 3430 // First sort the 4 chunks in order using shufpd. 3431 SmallVector<SDOperand, 8> MaskVec; 3432 if (BestLowQuad != -1) 3433 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3434 else 3435 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3436 if (BestHighQuad != -1) 3437 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3438 else 3439 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3440 SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3441 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3442 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3443 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3444 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3445 3446 // Now sort high and low parts separately. 3447 BitVector InOrder(8); 3448 if (BestLowQuad != -1) { 3449 // Sort lower half in order using PSHUFLW. 3450 MaskVec.clear(); 3451 bool AnyOutOrder = false; 3452 for (unsigned i = 0; i != 4; ++i) { 3453 SDOperand Elt = MaskElts[i]; 3454 if (Elt.getOpcode() == ISD::UNDEF) { 3455 MaskVec.push_back(Elt); 3456 InOrder.set(i); 3457 } else { 3458 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3459 if (EltIdx != i) 3460 AnyOutOrder = true; 3461 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3462 // If this element is in the right place after this shuffle, then 3463 // remember it. 3464 if ((int)(EltIdx / 4) == BestLowQuad) 3465 InOrder.set(i); 3466 } 3467 } 3468 if (AnyOutOrder) { 3469 for (unsigned i = 4; i != 8; ++i) 3470 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3471 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3472 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3473 } 3474 } 3475 3476 if (BestHighQuad != -1) { 3477 // Sort high half in order using PSHUFHW if possible. 3478 MaskVec.clear(); 3479 for (unsigned i = 0; i != 4; ++i) 3480 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3481 bool AnyOutOrder = false; 3482 for (unsigned i = 4; i != 8; ++i) { 3483 SDOperand Elt = MaskElts[i]; 3484 if (Elt.getOpcode() == ISD::UNDEF) { 3485 MaskVec.push_back(Elt); 3486 InOrder.set(i); 3487 } else { 3488 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3489 if (EltIdx != i) 3490 AnyOutOrder = true; 3491 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3492 // If this element is in the right place after this shuffle, then 3493 // remember it. 3494 if ((int)(EltIdx / 4) == BestHighQuad) 3495 InOrder.set(i); 3496 } 3497 } 3498 if (AnyOutOrder) { 3499 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3500 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3501 } 3502 } 3503 3504 // The other elements are put in the right place using pextrw and pinsrw. 3505 for (unsigned i = 0; i != 8; ++i) { 3506 if (InOrder[i]) 3507 continue; 3508 SDOperand Elt = MaskElts[i]; 3509 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3510 if (EltIdx == i) 3511 continue; 3512 SDOperand ExtOp = (EltIdx < 8) 3513 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3514 DAG.getConstant(EltIdx, PtrVT)) 3515 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3516 DAG.getConstant(EltIdx - 8, PtrVT)); 3517 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3518 DAG.getConstant(i, PtrVT)); 3519 } 3520 return NewV; 3521 } 3522 3523 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use 3524 ///as few as possible. 3525 // First, let's find out how many elements are already in the right order. 3526 unsigned V1InOrder = 0; 3527 unsigned V1FromV1 = 0; 3528 unsigned V2InOrder = 0; 3529 unsigned V2FromV2 = 0; 3530 SmallVector<SDOperand, 8> V1Elts; 3531 SmallVector<SDOperand, 8> V2Elts; 3532 for (unsigned i = 0; i < 8; ++i) { 3533 SDOperand Elt = MaskElts[i]; 3534 if (Elt.getOpcode() == ISD::UNDEF) { 3535 V1Elts.push_back(Elt); 3536 V2Elts.push_back(Elt); 3537 ++V1InOrder; 3538 ++V2InOrder; 3539 continue; 3540 } 3541 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3542 if (EltIdx == i) { 3543 V1Elts.push_back(Elt); 3544 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3545 ++V1InOrder; 3546 } else if (EltIdx == i+8) { 3547 V1Elts.push_back(Elt); 3548 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3549 ++V2InOrder; 3550 } else if (EltIdx < 8) { 3551 V1Elts.push_back(Elt); 3552 ++V1FromV1; 3553 } else { 3554 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3555 ++V2FromV2; 3556 } 3557 } 3558 3559 if (V2InOrder > V1InOrder) { 3560 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3561 std::swap(V1, V2); 3562 std::swap(V1Elts, V2Elts); 3563 std::swap(V1FromV1, V2FromV2); 3564 } 3565 3566 if ((V1FromV1 + V1InOrder) != 8) { 3567 // Some elements are from V2. 3568 if (V1FromV1) { 3569 // If there are elements that are from V1 but out of place, 3570 // then first sort them in place 3571 SmallVector<SDOperand, 8> MaskVec; 3572 for (unsigned i = 0; i < 8; ++i) { 3573 SDOperand Elt = V1Elts[i]; 3574 if (Elt.getOpcode() == ISD::UNDEF) { 3575 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3576 continue; 3577 } 3578 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3579 if (EltIdx >= 8) 3580 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3581 else 3582 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3583 } 3584 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3585 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3586 } 3587 3588 NewV = V1; 3589 for (unsigned i = 0; i < 8; ++i) { 3590 SDOperand Elt = V1Elts[i]; 3591 if (Elt.getOpcode() == ISD::UNDEF) 3592 continue; 3593 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3594 if (EltIdx < 8) 3595 continue; 3596 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3597 DAG.getConstant(EltIdx - 8, PtrVT)); 3598 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3599 DAG.getConstant(i, PtrVT)); 3600 } 3601 return NewV; 3602 } else { 3603 // All elements are from V1. 3604 NewV = V1; 3605 for (unsigned i = 0; i < 8; ++i) { 3606 SDOperand Elt = V1Elts[i]; 3607 if (Elt.getOpcode() == ISD::UNDEF) 3608 continue; 3609 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3610 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3611 DAG.getConstant(EltIdx, PtrVT)); 3612 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3613 DAG.getConstant(i, PtrVT)); 3614 } 3615 return NewV; 3616 } 3617} 3618 3619/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3620/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3621/// done when every pair / quad of shuffle mask elements point to elements in 3622/// the right sequence. e.g. 3623/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3624static 3625SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2, 3626 MVT::ValueType VT, 3627 SDOperand PermMask, SelectionDAG &DAG, 3628 TargetLowering &TLI) { 3629 unsigned NumElems = PermMask.getNumOperands(); 3630 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3631 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3632 MVT::ValueType NewVT = MaskVT; 3633 switch (VT) { 3634 case MVT::v4f32: NewVT = MVT::v2f64; break; 3635 case MVT::v4i32: NewVT = MVT::v2i64; break; 3636 case MVT::v8i16: NewVT = MVT::v4i32; break; 3637 case MVT::v16i8: NewVT = MVT::v4i32; break; 3638 default: assert(false && "Unexpected!"); 3639 } 3640 3641 if (NewWidth == 2) 3642 if (MVT::isInteger(VT)) 3643 NewVT = MVT::v2i64; 3644 else 3645 NewVT = MVT::v2f64; 3646 unsigned Scale = NumElems / NewWidth; 3647 SmallVector<SDOperand, 8> MaskVec; 3648 for (unsigned i = 0; i < NumElems; i += Scale) { 3649 unsigned StartIdx = ~0U; 3650 for (unsigned j = 0; j < Scale; ++j) { 3651 SDOperand Elt = PermMask.getOperand(i+j); 3652 if (Elt.getOpcode() == ISD::UNDEF) 3653 continue; 3654 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3655 if (StartIdx == ~0U) 3656 StartIdx = EltIdx - (EltIdx % Scale); 3657 if (EltIdx != StartIdx + j) 3658 return SDOperand(); 3659 } 3660 if (StartIdx == ~0U) 3661 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 3662 else 3663 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); 3664 } 3665 3666 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3667 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3668 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3669 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3670 &MaskVec[0], MaskVec.size())); 3671} 3672 3673SDOperand 3674X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3675 SDOperand V1 = Op.getOperand(0); 3676 SDOperand V2 = Op.getOperand(1); 3677 SDOperand PermMask = Op.getOperand(2); 3678 MVT::ValueType VT = Op.getValueType(); 3679 unsigned NumElems = PermMask.getNumOperands(); 3680 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3681 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3682 bool V1IsSplat = false; 3683 bool V2IsSplat = false; 3684 3685 if (isUndefShuffle(Op.Val)) 3686 return DAG.getNode(ISD::UNDEF, VT); 3687 3688 if (isZeroShuffle(Op.Val)) 3689 return getZeroVector(VT, DAG); 3690 3691 if (isIdentityMask(PermMask.Val)) 3692 return V1; 3693 else if (isIdentityMask(PermMask.Val, true)) 3694 return V2; 3695 3696 if (isSplatMask(PermMask.Val)) { 3697 if (NumElems <= 4) return Op; 3698 // Promote it to a v4i32 splat. 3699 return PromoteSplat(Op, DAG); 3700 } 3701 3702 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3703 // do it! 3704 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3705 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3706 if (NewOp.Val) 3707 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3708 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3709 // FIXME: Figure out a cleaner way to do this. 3710 // Try to make use of movq to zero out the top part. 3711 if (ISD::isBuildVectorAllZeros(V2.Val)) { 3712 SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3713 if (NewOp.Val) { 3714 SDOperand NewV1 = NewOp.getOperand(0); 3715 SDOperand NewV2 = NewOp.getOperand(1); 3716 SDOperand NewMask = NewOp.getOperand(2); 3717 if (isCommutedMOVL(NewMask.Val, true, false)) { 3718 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 3719 NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(), 3720 NewV1, NewV2, getMOVLMask(2, DAG)); 3721 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3722 } 3723 } 3724 } else if (ISD::isBuildVectorAllZeros(V1.Val)) { 3725 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3726 if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val)) 3727 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3728 } 3729 } 3730 3731 if (X86::isMOVLMask(PermMask.Val)) 3732 return (V1IsUndef) ? V2 : Op; 3733 3734 if (X86::isMOVSHDUPMask(PermMask.Val) || 3735 X86::isMOVSLDUPMask(PermMask.Val) || 3736 X86::isMOVHLPSMask(PermMask.Val) || 3737 X86::isMOVHPMask(PermMask.Val) || 3738 X86::isMOVLPMask(PermMask.Val)) 3739 return Op; 3740 3741 if (ShouldXformToMOVHLPS(PermMask.Val) || 3742 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3743 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3744 3745 bool Commuted = false; 3746 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3747 // 1,1,1,1 -> v8i16 though. 3748 V1IsSplat = isSplatVector(V1.Val); 3749 V2IsSplat = isSplatVector(V2.Val); 3750 3751 // Canonicalize the splat or undef, if present, to be on the RHS. 3752 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3753 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3754 std::swap(V1IsSplat, V2IsSplat); 3755 std::swap(V1IsUndef, V2IsUndef); 3756 Commuted = true; 3757 } 3758 3759 // FIXME: Figure out a cleaner way to do this. 3760 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3761 if (V2IsUndef) return V1; 3762 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3763 if (V2IsSplat) { 3764 // V2 is a splat, so the mask may be malformed. That is, it may point 3765 // to any V2 element. The instruction selectior won't like this. Get 3766 // a corrected mask and commute to form a proper MOVS{S|D}. 3767 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3768 if (NewMask.Val != PermMask.Val) 3769 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3770 } 3771 return Op; 3772 } 3773 3774 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3775 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3776 X86::isUNPCKLMask(PermMask.Val) || 3777 X86::isUNPCKHMask(PermMask.Val)) 3778 return Op; 3779 3780 if (V2IsSplat) { 3781 // Normalize mask so all entries that point to V2 points to its first 3782 // element then try to match unpck{h|l} again. If match, return a 3783 // new vector_shuffle with the corrected mask. 3784 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3785 if (NewMask.Val != PermMask.Val) { 3786 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3787 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3788 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3789 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3790 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3791 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3792 } 3793 } 3794 } 3795 3796 // Normalize the node to match x86 shuffle ops if needed 3797 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3798 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3799 3800 if (Commuted) { 3801 // Commute is back and try unpck* again. 3802 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3803 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3804 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3805 X86::isUNPCKLMask(PermMask.Val) || 3806 X86::isUNPCKHMask(PermMask.Val)) 3807 return Op; 3808 } 3809 3810 // If VT is integer, try PSHUF* first, then SHUFP*. 3811 if (MVT::isInteger(VT)) { 3812 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3813 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3814 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3815 X86::isPSHUFDMask(PermMask.Val)) || 3816 X86::isPSHUFHWMask(PermMask.Val) || 3817 X86::isPSHUFLWMask(PermMask.Val)) { 3818 if (V2.getOpcode() != ISD::UNDEF) 3819 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3820 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3821 return Op; 3822 } 3823 3824 if (X86::isSHUFPMask(PermMask.Val) && 3825 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3826 return Op; 3827 } else { 3828 // Floating point cases in the other order. 3829 if (X86::isSHUFPMask(PermMask.Val)) 3830 return Op; 3831 if (X86::isPSHUFDMask(PermMask.Val) || 3832 X86::isPSHUFHWMask(PermMask.Val) || 3833 X86::isPSHUFLWMask(PermMask.Val)) { 3834 if (V2.getOpcode() != ISD::UNDEF) 3835 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3836 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3837 return Op; 3838 } 3839 } 3840 3841 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 3842 if (VT == MVT::v8i16) { 3843 SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 3844 if (NewOp.Val) 3845 return NewOp; 3846 } 3847 3848 // Handle all 4 wide cases with a number of shuffles. 3849 if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { 3850 // Don't do this for MMX. 3851 MVT::ValueType MaskVT = PermMask.getValueType(); 3852 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3853 SmallVector<std::pair<int, int>, 8> Locs; 3854 Locs.reserve(NumElems); 3855 SmallVector<SDOperand, 8> Mask1(NumElems, 3856 DAG.getNode(ISD::UNDEF, MaskEVT)); 3857 SmallVector<SDOperand, 8> Mask2(NumElems, 3858 DAG.getNode(ISD::UNDEF, MaskEVT)); 3859 unsigned NumHi = 0; 3860 unsigned NumLo = 0; 3861 // If no more than two elements come from either vector. This can be 3862 // implemented with two shuffles. First shuffle gather the elements. 3863 // The second shuffle, which takes the first shuffle as both of its 3864 // vector operands, put the elements into the right order. 3865 for (unsigned i = 0; i != NumElems; ++i) { 3866 SDOperand Elt = PermMask.getOperand(i); 3867 if (Elt.getOpcode() == ISD::UNDEF) { 3868 Locs[i] = std::make_pair(-1, -1); 3869 } else { 3870 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3871 if (Val < NumElems) { 3872 Locs[i] = std::make_pair(0, NumLo); 3873 Mask1[NumLo] = Elt; 3874 NumLo++; 3875 } else { 3876 Locs[i] = std::make_pair(1, NumHi); 3877 if (2+NumHi < NumElems) 3878 Mask1[2+NumHi] = Elt; 3879 NumHi++; 3880 } 3881 } 3882 } 3883 if (NumLo <= 2 && NumHi <= 2) { 3884 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3885 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3886 &Mask1[0], Mask1.size())); 3887 for (unsigned i = 0; i != NumElems; ++i) { 3888 if (Locs[i].first == -1) 3889 continue; 3890 else { 3891 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3892 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3893 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3894 } 3895 } 3896 3897 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3898 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3899 &Mask2[0], Mask2.size())); 3900 } 3901 3902 // Break it into (shuffle shuffle_hi, shuffle_lo). 3903 Locs.clear(); 3904 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3905 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3906 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3907 unsigned MaskIdx = 0; 3908 unsigned LoIdx = 0; 3909 unsigned HiIdx = NumElems/2; 3910 for (unsigned i = 0; i != NumElems; ++i) { 3911 if (i == NumElems/2) { 3912 MaskPtr = &HiMask; 3913 MaskIdx = 1; 3914 LoIdx = 0; 3915 HiIdx = NumElems/2; 3916 } 3917 SDOperand Elt = PermMask.getOperand(i); 3918 if (Elt.getOpcode() == ISD::UNDEF) { 3919 Locs[i] = std::make_pair(-1, -1); 3920 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3921 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3922 (*MaskPtr)[LoIdx] = Elt; 3923 LoIdx++; 3924 } else { 3925 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3926 (*MaskPtr)[HiIdx] = Elt; 3927 HiIdx++; 3928 } 3929 } 3930 3931 SDOperand LoShuffle = 3932 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3933 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3934 &LoMask[0], LoMask.size())); 3935 SDOperand HiShuffle = 3936 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3937 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3938 &HiMask[0], HiMask.size())); 3939 SmallVector<SDOperand, 8> MaskOps; 3940 for (unsigned i = 0; i != NumElems; ++i) { 3941 if (Locs[i].first == -1) { 3942 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3943 } else { 3944 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3945 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3946 } 3947 } 3948 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3949 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3950 &MaskOps[0], MaskOps.size())); 3951 } 3952 3953 return SDOperand(); 3954} 3955 3956SDOperand 3957X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3958 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3959 return SDOperand(); 3960 3961 MVT::ValueType VT = Op.getValueType(); 3962 // TODO: handle v16i8. 3963 if (MVT::getSizeInBits(VT) == 16) { 3964 SDOperand Vec = Op.getOperand(0); 3965 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3966 if (Idx == 0) 3967 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 3968 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 3969 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 3970 Op.getOperand(1))); 3971 // Transform it so it match pextrw which produces a 32-bit result. 3972 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3973 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3974 Op.getOperand(0), Op.getOperand(1)); 3975 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3976 DAG.getValueType(VT)); 3977 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3978 } else if (MVT::getSizeInBits(VT) == 32) { 3979 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3980 if (Idx == 0) 3981 return Op; 3982 // SHUFPS the element to the lowest double word, then movss. 3983 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3984 SmallVector<SDOperand, 8> IdxVec; 3985 IdxVec. 3986 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3987 IdxVec. 3988 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3989 IdxVec. 3990 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3991 IdxVec. 3992 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3993 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3994 &IdxVec[0], IdxVec.size()); 3995 SDOperand Vec = Op.getOperand(0); 3996 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3997 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3998 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3999 DAG.getConstant(0, getPointerTy())); 4000 } else if (MVT::getSizeInBits(VT) == 64) { 4001 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4002 if (Idx == 0) 4003 return Op; 4004 4005 // UNPCKHPD the element to the lowest double word, then movsd. 4006 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4007 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4008 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 4009 SmallVector<SDOperand, 8> IdxVec; 4010 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 4011 IdxVec. 4012 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 4013 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4014 &IdxVec[0], IdxVec.size()); 4015 SDOperand Vec = Op.getOperand(0); 4016 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 4017 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 4018 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 4019 DAG.getConstant(0, getPointerTy())); 4020 } 4021 4022 return SDOperand(); 4023} 4024 4025SDOperand 4026X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 4027 MVT::ValueType VT = Op.getValueType(); 4028 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4029 if (EVT == MVT::i8) 4030 return SDOperand(); 4031 4032 SDOperand N0 = Op.getOperand(0); 4033 SDOperand N1 = Op.getOperand(1); 4034 SDOperand N2 = Op.getOperand(2); 4035 4036 if (MVT::getSizeInBits(EVT) == 16) { 4037 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 4038 // as its second argument. 4039 if (N1.getValueType() != MVT::i32) 4040 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 4041 if (N2.getValueType() != MVT::i32) 4042 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 4043 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 4044 } 4045 4046 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 4047 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 4048 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 4049 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 4050 SmallVector<SDOperand, 4> MaskVec; 4051 for (unsigned i = 0; i < 4; ++i) 4052 MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT)); 4053 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 4054 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 4055 &MaskVec[0], MaskVec.size())); 4056} 4057 4058SDOperand 4059X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 4060 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 4061 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 4062} 4063 4064// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 4065// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 4066// one of the above mentioned nodes. It has to be wrapped because otherwise 4067// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 4068// be used to form addressing mode. These wrapped nodes will be selected 4069// into MOV32ri. 4070SDOperand 4071X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 4072 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 4073 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 4074 getPointerTy(), 4075 CP->getAlignment()); 4076 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4077 // With PIC, the address is actually $g + Offset. 4078 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4079 !Subtarget->isPICStyleRIPRel()) { 4080 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4081 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4082 Result); 4083 } 4084 4085 return Result; 4086} 4087 4088SDOperand 4089X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 4090 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 4091 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 4092 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4093 // With PIC, the address is actually $g + Offset. 4094 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4095 !Subtarget->isPICStyleRIPRel()) { 4096 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4097 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4098 Result); 4099 } 4100 4101 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4102 // load the value at address GV, not the value of GV itself. This means that 4103 // the GlobalAddress must be in the base or index register of the address, not 4104 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4105 // The same applies for external symbols during PIC codegen 4106 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 4107 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 4108 4109 return Result; 4110} 4111 4112// Lower ISD::GlobalTLSAddress using the "general dynamic" model 4113static SDOperand 4114LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4115 const MVT::ValueType PtrVT) { 4116 SDOperand InFlag; 4117 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4118 DAG.getNode(X86ISD::GlobalBaseReg, 4119 PtrVT), InFlag); 4120 InFlag = Chain.getValue(1); 4121 4122 // emit leal symbol@TLSGD(,%ebx,1), %eax 4123 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4124 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4125 GA->getValueType(0), 4126 GA->getOffset()); 4127 SDOperand Ops[] = { Chain, TGA, InFlag }; 4128 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4129 InFlag = Result.getValue(2); 4130 Chain = Result.getValue(1); 4131 4132 // call ___tls_get_addr. This function receives its argument in 4133 // the register EAX. 4134 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4135 InFlag = Chain.getValue(1); 4136 4137 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4138 SDOperand Ops1[] = { Chain, 4139 DAG.getTargetExternalSymbol("___tls_get_addr", 4140 PtrVT), 4141 DAG.getRegister(X86::EAX, PtrVT), 4142 DAG.getRegister(X86::EBX, PtrVT), 4143 InFlag }; 4144 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4145 InFlag = Chain.getValue(1); 4146 4147 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4148} 4149 4150// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4151// "local exec" model. 4152static SDOperand 4153LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4154 const MVT::ValueType PtrVT) { 4155 // Get the Thread Pointer 4156 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4157 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4158 // exec) 4159 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4160 GA->getValueType(0), 4161 GA->getOffset()); 4162 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4163 4164 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4165 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 4166 4167 // The address of the thread local variable is the add of the thread 4168 // pointer with the offset of the variable. 4169 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4170} 4171 4172SDOperand 4173X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 4174 // TODO: implement the "local dynamic" model 4175 // TODO: implement the "initial exec"model for pic executables 4176 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 4177 "TLS not implemented for non-ELF and 64-bit targets"); 4178 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4179 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4180 // otherwise use the "Local Exec"TLS Model 4181 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4182 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 4183 else 4184 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4185} 4186 4187SDOperand 4188X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 4189 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4190 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4191 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4192 // With PIC, the address is actually $g + Offset. 4193 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4194 !Subtarget->isPICStyleRIPRel()) { 4195 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4196 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4197 Result); 4198 } 4199 4200 return Result; 4201} 4202 4203SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 4204 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4205 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4206 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4207 // With PIC, the address is actually $g + Offset. 4208 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4209 !Subtarget->isPICStyleRIPRel()) { 4210 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4211 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4212 Result); 4213 } 4214 4215 return Result; 4216} 4217 4218/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4219/// take a 2 x i32 value to shift plus a shift amount. 4220SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 4221 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 4222 "Not an i64 shift!"); 4223 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4224 SDOperand ShOpLo = Op.getOperand(0); 4225 SDOperand ShOpHi = Op.getOperand(1); 4226 SDOperand ShAmt = Op.getOperand(2); 4227 SDOperand Tmp1 = isSRA ? 4228 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 4229 DAG.getConstant(0, MVT::i32); 4230 4231 SDOperand Tmp2, Tmp3; 4232 if (Op.getOpcode() == ISD::SHL_PARTS) { 4233 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 4234 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 4235 } else { 4236 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 4237 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 4238 } 4239 4240 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4241 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4242 DAG.getConstant(32, MVT::i8)); 4243 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 4244 AndNode, DAG.getConstant(0, MVT::i8)); 4245 4246 SDOperand Hi, Lo; 4247 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4248 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 4249 SmallVector<SDOperand, 4> Ops; 4250 if (Op.getOpcode() == ISD::SHL_PARTS) { 4251 Ops.push_back(Tmp2); 4252 Ops.push_back(Tmp3); 4253 Ops.push_back(CC); 4254 Ops.push_back(Cond); 4255 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4256 4257 Ops.clear(); 4258 Ops.push_back(Tmp3); 4259 Ops.push_back(Tmp1); 4260 Ops.push_back(CC); 4261 Ops.push_back(Cond); 4262 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4263 } else { 4264 Ops.push_back(Tmp2); 4265 Ops.push_back(Tmp3); 4266 Ops.push_back(CC); 4267 Ops.push_back(Cond); 4268 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4269 4270 Ops.clear(); 4271 Ops.push_back(Tmp3); 4272 Ops.push_back(Tmp1); 4273 Ops.push_back(CC); 4274 Ops.push_back(Cond); 4275 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4276 } 4277 4278 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 4279 Ops.clear(); 4280 Ops.push_back(Lo); 4281 Ops.push_back(Hi); 4282 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 4283} 4284 4285SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 4286 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 4287 Op.getOperand(0).getValueType() >= MVT::i16 && 4288 "Unknown SINT_TO_FP to lower!"); 4289 4290 SDOperand Result; 4291 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 4292 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 4293 MachineFunction &MF = DAG.getMachineFunction(); 4294 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4295 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4296 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4297 StackSlot, NULL, 0); 4298 4299 // These are really Legal; caller falls through into that case. 4300 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 4301 return Result; 4302 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 4303 return Result; 4304 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 4305 Subtarget->is64Bit()) 4306 return Result; 4307 4308 // Build the FILD 4309 SDVTList Tys; 4310 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 4311 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 4312 if (useSSE) 4313 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4314 else 4315 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4316 SmallVector<SDOperand, 8> Ops; 4317 Ops.push_back(Chain); 4318 Ops.push_back(StackSlot); 4319 Ops.push_back(DAG.getValueType(SrcVT)); 4320 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 4321 Tys, &Ops[0], Ops.size()); 4322 4323 if (useSSE) { 4324 Chain = Result.getValue(1); 4325 SDOperand InFlag = Result.getValue(2); 4326 4327 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4328 // shouldn't be necessary except that RFP cannot be live across 4329 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4330 MachineFunction &MF = DAG.getMachineFunction(); 4331 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4332 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4333 Tys = DAG.getVTList(MVT::Other); 4334 SmallVector<SDOperand, 8> Ops; 4335 Ops.push_back(Chain); 4336 Ops.push_back(Result); 4337 Ops.push_back(StackSlot); 4338 Ops.push_back(DAG.getValueType(Op.getValueType())); 4339 Ops.push_back(InFlag); 4340 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4341 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 4342 } 4343 4344 return Result; 4345} 4346 4347std::pair<SDOperand,SDOperand> X86TargetLowering:: 4348FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { 4349 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 4350 "Unknown FP_TO_SINT to lower!"); 4351 4352 // These are really Legal. 4353 if (Op.getValueType() == MVT::i32 && 4354 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 4355 return std::make_pair(SDOperand(), SDOperand()); 4356 if (Op.getValueType() == MVT::i32 && 4357 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 4358 return std::make_pair(SDOperand(), SDOperand()); 4359 if (Subtarget->is64Bit() && 4360 Op.getValueType() == MVT::i64 && 4361 Op.getOperand(0).getValueType() != MVT::f80) 4362 return std::make_pair(SDOperand(), SDOperand()); 4363 4364 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4365 // stack slot. 4366 MachineFunction &MF = DAG.getMachineFunction(); 4367 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 4368 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4369 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4370 unsigned Opc; 4371 switch (Op.getValueType()) { 4372 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4373 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4374 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4375 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4376 } 4377 4378 SDOperand Chain = DAG.getEntryNode(); 4379 SDOperand Value = Op.getOperand(0); 4380 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 4381 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 4382 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4383 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 4384 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4385 SDOperand Ops[] = { 4386 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4387 }; 4388 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4389 Chain = Value.getValue(1); 4390 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4391 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4392 } 4393 4394 // Build the FP_TO_INT*_IN_MEM 4395 SDOperand Ops[] = { Chain, Value, StackSlot }; 4396 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4397 4398 return std::make_pair(FIST, StackSlot); 4399} 4400 4401SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 4402 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG); 4403 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4404 if (FIST.Val == 0) return SDOperand(); 4405 4406 // Load the result. 4407 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4408} 4409 4410SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4411 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); 4412 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4413 if (FIST.Val == 0) return 0; 4414 4415 // Return an i64 load from the stack slot. 4416 SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); 4417 4418 // Use a MERGE_VALUES node to drop the chain result value. 4419 return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; 4420} 4421 4422SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4423 MVT::ValueType VT = Op.getValueType(); 4424 MVT::ValueType EltVT = VT; 4425 if (MVT::isVector(VT)) 4426 EltVT = MVT::getVectorElementType(VT); 4427 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4428 std::vector<Constant*> CV; 4429 if (EltVT == MVT::f64) { 4430 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 4431 CV.push_back(C); 4432 CV.push_back(C); 4433 } else { 4434 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 4435 CV.push_back(C); 4436 CV.push_back(C); 4437 CV.push_back(C); 4438 CV.push_back(C); 4439 } 4440 Constant *C = ConstantVector::get(CV); 4441 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4442 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4443 false, 16); 4444 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4445} 4446 4447SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4448 MVT::ValueType VT = Op.getValueType(); 4449 MVT::ValueType EltVT = VT; 4450 unsigned EltNum = 1; 4451 if (MVT::isVector(VT)) { 4452 EltVT = MVT::getVectorElementType(VT); 4453 EltNum = MVT::getVectorNumElements(VT); 4454 } 4455 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4456 std::vector<Constant*> CV; 4457 if (EltVT == MVT::f64) { 4458 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4459 CV.push_back(C); 4460 CV.push_back(C); 4461 } else { 4462 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4463 CV.push_back(C); 4464 CV.push_back(C); 4465 CV.push_back(C); 4466 CV.push_back(C); 4467 } 4468 Constant *C = ConstantVector::get(CV); 4469 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4470 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4471 false, 16); 4472 if (MVT::isVector(VT)) { 4473 return DAG.getNode(ISD::BIT_CONVERT, VT, 4474 DAG.getNode(ISD::XOR, MVT::v2i64, 4475 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4476 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4477 } else { 4478 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4479 } 4480} 4481 4482SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4483 SDOperand Op0 = Op.getOperand(0); 4484 SDOperand Op1 = Op.getOperand(1); 4485 MVT::ValueType VT = Op.getValueType(); 4486 MVT::ValueType SrcVT = Op1.getValueType(); 4487 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4488 4489 // If second operand is smaller, extend it first. 4490 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4491 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4492 SrcVT = VT; 4493 SrcTy = MVT::getTypeForValueType(SrcVT); 4494 } 4495 // And if it is bigger, shrink it first. 4496 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4497 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1); 4498 SrcVT = VT; 4499 SrcTy = MVT::getTypeForValueType(SrcVT); 4500 } 4501 4502 // At this point the operands and the result should have the same 4503 // type, and that won't be f80 since that is not custom lowered. 4504 4505 // First get the sign bit of second operand. 4506 std::vector<Constant*> CV; 4507 if (SrcVT == MVT::f64) { 4508 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4509 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4510 } else { 4511 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4512 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4513 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4514 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4515 } 4516 Constant *C = ConstantVector::get(CV); 4517 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4518 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4519 false, 16); 4520 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4521 4522 // Shift sign bit right or left if the two operands have different types. 4523 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4524 // Op0 is MVT::f32, Op1 is MVT::f64. 4525 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4526 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4527 DAG.getConstant(32, MVT::i32)); 4528 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4529 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4530 DAG.getConstant(0, getPointerTy())); 4531 } 4532 4533 // Clear first operand sign bit. 4534 CV.clear(); 4535 if (VT == MVT::f64) { 4536 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4537 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4538 } else { 4539 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4540 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4541 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4542 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4543 } 4544 C = ConstantVector::get(CV); 4545 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4546 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4547 false, 16); 4548 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4549 4550 // Or the value with the sign bit. 4551 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4552} 4553 4554SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4555 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4556 SDOperand Cond; 4557 SDOperand Op0 = Op.getOperand(0); 4558 SDOperand Op1 = Op.getOperand(1); 4559 SDOperand CC = Op.getOperand(2); 4560 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4561 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4562 unsigned X86CC; 4563 4564 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4565 Op0, Op1, DAG)) { 4566 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4567 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4568 DAG.getConstant(X86CC, MVT::i8), Cond); 4569 } 4570 4571 assert(isFP && "Illegal integer SetCC!"); 4572 4573 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4574 switch (SetCCOpcode) { 4575 default: assert(false && "Illegal floating point SetCC!"); 4576 case ISD::SETOEQ: { // !PF & ZF 4577 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4578 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4579 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4580 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4581 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4582 } 4583 case ISD::SETUNE: { // PF | !ZF 4584 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4585 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4586 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4587 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4588 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4589 } 4590 } 4591} 4592 4593 4594SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4595 bool addTest = true; 4596 SDOperand Cond = Op.getOperand(0); 4597 SDOperand CC; 4598 4599 if (Cond.getOpcode() == ISD::SETCC) 4600 Cond = LowerSETCC(Cond, DAG); 4601 4602 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4603 // setting operand in place of the X86ISD::SETCC. 4604 if (Cond.getOpcode() == X86ISD::SETCC) { 4605 CC = Cond.getOperand(0); 4606 4607 SDOperand Cmp = Cond.getOperand(1); 4608 unsigned Opc = Cmp.getOpcode(); 4609 MVT::ValueType VT = Op.getValueType(); 4610 bool IllegalFPCMov = false; 4611 if (VT == MVT::f32 && !X86ScalarSSEf32) 4612 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4613 else if (VT == MVT::f64 && !X86ScalarSSEf64) 4614 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4615 else if (VT == MVT::f80) 4616 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4617 if ((Opc == X86ISD::CMP || 4618 Opc == X86ISD::COMI || 4619 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4620 Cond = Cmp; 4621 addTest = false; 4622 } 4623 } 4624 4625 if (addTest) { 4626 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4627 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4628 } 4629 4630 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4631 MVT::Flag); 4632 SmallVector<SDOperand, 4> Ops; 4633 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4634 // condition is true. 4635 Ops.push_back(Op.getOperand(2)); 4636 Ops.push_back(Op.getOperand(1)); 4637 Ops.push_back(CC); 4638 Ops.push_back(Cond); 4639 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4640} 4641 4642SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4643 bool addTest = true; 4644 SDOperand Chain = Op.getOperand(0); 4645 SDOperand Cond = Op.getOperand(1); 4646 SDOperand Dest = Op.getOperand(2); 4647 SDOperand CC; 4648 4649 if (Cond.getOpcode() == ISD::SETCC) 4650 Cond = LowerSETCC(Cond, DAG); 4651 4652 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4653 // setting operand in place of the X86ISD::SETCC. 4654 if (Cond.getOpcode() == X86ISD::SETCC) { 4655 CC = Cond.getOperand(0); 4656 4657 SDOperand Cmp = Cond.getOperand(1); 4658 unsigned Opc = Cmp.getOpcode(); 4659 if (Opc == X86ISD::CMP || 4660 Opc == X86ISD::COMI || 4661 Opc == X86ISD::UCOMI) { 4662 Cond = Cmp; 4663 addTest = false; 4664 } 4665 } 4666 4667 if (addTest) { 4668 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4669 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4670 } 4671 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4672 Chain, Op.getOperand(2), CC, Cond); 4673} 4674 4675SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4676 unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4677 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 4678 4679 if (Subtarget->is64Bit()) 4680 if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) 4681 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4682 else 4683 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 4684 else 4685 switch (CallingConv) { 4686 default: 4687 assert(0 && "Unsupported calling convention"); 4688 case CallingConv::Fast: 4689 if (isTailCall && PerformTailCallOpt) 4690 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4691 else 4692 return LowerCCCCallTo(Op,DAG, CallingConv); 4693 case CallingConv::C: 4694 case CallingConv::X86_StdCall: 4695 return LowerCCCCallTo(Op, DAG, CallingConv); 4696 case CallingConv::X86_FastCall: 4697 return LowerFastCCCallTo(Op, DAG, CallingConv); 4698 } 4699} 4700 4701 4702// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4703// Calls to _alloca is needed to probe the stack when allocating more than 4k 4704// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4705// that the guard pages used by the OS virtual memory manager are allocated in 4706// correct sequence. 4707SDOperand 4708X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4709 SelectionDAG &DAG) { 4710 assert(Subtarget->isTargetCygMing() && 4711 "This should be used only on Cygwin/Mingw targets"); 4712 4713 // Get the inputs. 4714 SDOperand Chain = Op.getOperand(0); 4715 SDOperand Size = Op.getOperand(1); 4716 // FIXME: Ensure alignment here 4717 4718 SDOperand Flag; 4719 4720 MVT::ValueType IntPtr = getPointerTy(); 4721 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 4722 4723 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4724 Flag = Chain.getValue(1); 4725 4726 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4727 SDOperand Ops[] = { Chain, 4728 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4729 DAG.getRegister(X86::EAX, IntPtr), 4730 Flag }; 4731 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4732 Flag = Chain.getValue(1); 4733 4734 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4735 4736 std::vector<MVT::ValueType> Tys; 4737 Tys.push_back(SPTy); 4738 Tys.push_back(MVT::Other); 4739 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4740 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4741} 4742 4743SDOperand 4744X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4745 MachineFunction &MF = DAG.getMachineFunction(); 4746 const Function* Fn = MF.getFunction(); 4747 if (Fn->hasExternalLinkage() && 4748 Subtarget->isTargetCygMing() && 4749 Fn->getName() == "main") 4750 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 4751 4752 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4753 if (Subtarget->is64Bit()) 4754 return LowerX86_64CCCArguments(Op, DAG); 4755 else 4756 switch(CC) { 4757 default: 4758 assert(0 && "Unsupported calling convention"); 4759 case CallingConv::Fast: 4760 return LowerCCCArguments(Op,DAG, true); 4761 // Falls through 4762 case CallingConv::C: 4763 return LowerCCCArguments(Op, DAG); 4764 case CallingConv::X86_StdCall: 4765 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 4766 return LowerCCCArguments(Op, DAG, true); 4767 case CallingConv::X86_FastCall: 4768 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 4769 return LowerFastCCArguments(Op, DAG); 4770 } 4771} 4772 4773SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4774 SDOperand InFlag(0, 0); 4775 SDOperand Chain = Op.getOperand(0); 4776 unsigned Align = 4777 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4778 if (Align == 0) Align = 1; 4779 4780 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4781 // If not DWORD aligned or size is more than the threshold, call memset. 4782 // The libc version is likely to be faster for these cases. It can use the 4783 // address value and run time information about the CPU. 4784 if ((Align & 3) != 0 || 4785 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4786 MVT::ValueType IntPtr = getPointerTy(); 4787 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4788 TargetLowering::ArgListTy Args; 4789 TargetLowering::ArgListEntry Entry; 4790 Entry.Node = Op.getOperand(1); 4791 Entry.Ty = IntPtrTy; 4792 Args.push_back(Entry); 4793 // Extend the unsigned i8 argument to be an int value for the call. 4794 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4795 Entry.Ty = IntPtrTy; 4796 Args.push_back(Entry); 4797 Entry.Node = Op.getOperand(3); 4798 Args.push_back(Entry); 4799 std::pair<SDOperand,SDOperand> CallResult = 4800 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4801 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4802 return CallResult.second; 4803 } 4804 4805 MVT::ValueType AVT; 4806 SDOperand Count; 4807 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4808 unsigned BytesLeft = 0; 4809 bool TwoRepStos = false; 4810 if (ValC) { 4811 unsigned ValReg; 4812 uint64_t Val = ValC->getValue() & 255; 4813 4814 // If the value is a constant, then we can potentially use larger sets. 4815 switch (Align & 3) { 4816 case 2: // WORD aligned 4817 AVT = MVT::i16; 4818 ValReg = X86::AX; 4819 Val = (Val << 8) | Val; 4820 break; 4821 case 0: // DWORD aligned 4822 AVT = MVT::i32; 4823 ValReg = X86::EAX; 4824 Val = (Val << 8) | Val; 4825 Val = (Val << 16) | Val; 4826 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4827 AVT = MVT::i64; 4828 ValReg = X86::RAX; 4829 Val = (Val << 32) | Val; 4830 } 4831 break; 4832 default: // Byte aligned 4833 AVT = MVT::i8; 4834 ValReg = X86::AL; 4835 Count = Op.getOperand(3); 4836 break; 4837 } 4838 4839 if (AVT > MVT::i8) { 4840 if (I) { 4841 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4842 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4843 BytesLeft = I->getValue() % UBytes; 4844 } else { 4845 assert(AVT >= MVT::i32 && 4846 "Do not use rep;stos if not at least DWORD aligned"); 4847 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4848 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4849 TwoRepStos = true; 4850 } 4851 } 4852 4853 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4854 InFlag); 4855 InFlag = Chain.getValue(1); 4856 } else { 4857 AVT = MVT::i8; 4858 Count = Op.getOperand(3); 4859 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4860 InFlag = Chain.getValue(1); 4861 } 4862 4863 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4864 Count, InFlag); 4865 InFlag = Chain.getValue(1); 4866 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4867 Op.getOperand(1), InFlag); 4868 InFlag = Chain.getValue(1); 4869 4870 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4871 SmallVector<SDOperand, 8> Ops; 4872 Ops.push_back(Chain); 4873 Ops.push_back(DAG.getValueType(AVT)); 4874 Ops.push_back(InFlag); 4875 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4876 4877 if (TwoRepStos) { 4878 InFlag = Chain.getValue(1); 4879 Count = Op.getOperand(3); 4880 MVT::ValueType CVT = Count.getValueType(); 4881 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4882 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4883 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4884 Left, InFlag); 4885 InFlag = Chain.getValue(1); 4886 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4887 Ops.clear(); 4888 Ops.push_back(Chain); 4889 Ops.push_back(DAG.getValueType(MVT::i8)); 4890 Ops.push_back(InFlag); 4891 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4892 } else if (BytesLeft) { 4893 // Issue stores for the last 1 - 7 bytes. 4894 SDOperand Value; 4895 unsigned Val = ValC->getValue() & 255; 4896 unsigned Offset = I->getValue() - BytesLeft; 4897 SDOperand DstAddr = Op.getOperand(1); 4898 MVT::ValueType AddrVT = DstAddr.getValueType(); 4899 if (BytesLeft >= 4) { 4900 Val = (Val << 8) | Val; 4901 Val = (Val << 16) | Val; 4902 Value = DAG.getConstant(Val, MVT::i32); 4903 Chain = DAG.getStore(Chain, Value, 4904 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4905 DAG.getConstant(Offset, AddrVT)), 4906 NULL, 0); 4907 BytesLeft -= 4; 4908 Offset += 4; 4909 } 4910 if (BytesLeft >= 2) { 4911 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4912 Chain = DAG.getStore(Chain, Value, 4913 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4914 DAG.getConstant(Offset, AddrVT)), 4915 NULL, 0); 4916 BytesLeft -= 2; 4917 Offset += 2; 4918 } 4919 if (BytesLeft == 1) { 4920 Value = DAG.getConstant(Val, MVT::i8); 4921 Chain = DAG.getStore(Chain, Value, 4922 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4923 DAG.getConstant(Offset, AddrVT)), 4924 NULL, 0); 4925 } 4926 } 4927 4928 return Chain; 4929} 4930 4931SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4932 SDOperand Dest, 4933 SDOperand Source, 4934 unsigned Size, 4935 unsigned Align, 4936 SelectionDAG &DAG) { 4937 MVT::ValueType AVT; 4938 unsigned BytesLeft = 0; 4939 switch (Align & 3) { 4940 case 2: // WORD aligned 4941 AVT = MVT::i16; 4942 break; 4943 case 0: // DWORD aligned 4944 AVT = MVT::i32; 4945 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4946 AVT = MVT::i64; 4947 break; 4948 default: // Byte aligned 4949 AVT = MVT::i8; 4950 break; 4951 } 4952 4953 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4954 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4955 BytesLeft = Size % UBytes; 4956 4957 SDOperand InFlag(0, 0); 4958 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4959 Count, InFlag); 4960 InFlag = Chain.getValue(1); 4961 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4962 Dest, InFlag); 4963 InFlag = Chain.getValue(1); 4964 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4965 Source, InFlag); 4966 InFlag = Chain.getValue(1); 4967 4968 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4969 SmallVector<SDOperand, 8> Ops; 4970 Ops.push_back(Chain); 4971 Ops.push_back(DAG.getValueType(AVT)); 4972 Ops.push_back(InFlag); 4973 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4974 4975 if (BytesLeft) { 4976 // Issue loads and stores for the last 1 - 7 bytes. 4977 unsigned Offset = Size - BytesLeft; 4978 SDOperand DstAddr = Dest; 4979 MVT::ValueType DstVT = DstAddr.getValueType(); 4980 SDOperand SrcAddr = Source; 4981 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4982 SDOperand Value; 4983 if (BytesLeft >= 4) { 4984 Value = DAG.getLoad(MVT::i32, Chain, 4985 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4986 DAG.getConstant(Offset, SrcVT)), 4987 NULL, 0); 4988 Chain = Value.getValue(1); 4989 Chain = DAG.getStore(Chain, Value, 4990 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4991 DAG.getConstant(Offset, DstVT)), 4992 NULL, 0); 4993 BytesLeft -= 4; 4994 Offset += 4; 4995 } 4996 if (BytesLeft >= 2) { 4997 Value = DAG.getLoad(MVT::i16, Chain, 4998 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4999 DAG.getConstant(Offset, SrcVT)), 5000 NULL, 0); 5001 Chain = Value.getValue(1); 5002 Chain = DAG.getStore(Chain, Value, 5003 DAG.getNode(ISD::ADD, DstVT, DstAddr, 5004 DAG.getConstant(Offset, DstVT)), 5005 NULL, 0); 5006 BytesLeft -= 2; 5007 Offset += 2; 5008 } 5009 5010 if (BytesLeft == 1) { 5011 Value = DAG.getLoad(MVT::i8, Chain, 5012 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 5013 DAG.getConstant(Offset, SrcVT)), 5014 NULL, 0); 5015 Chain = Value.getValue(1); 5016 Chain = DAG.getStore(Chain, Value, 5017 DAG.getNode(ISD::ADD, DstVT, DstAddr, 5018 DAG.getConstant(Offset, DstVT)), 5019 NULL, 0); 5020 } 5021 } 5022 5023 return Chain; 5024} 5025 5026/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 5027SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 5028 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 5029 SDOperand TheChain = N->getOperand(0); 5030 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 5031 if (Subtarget->is64Bit()) { 5032 SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 5033 SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 5034 MVT::i64, rax.getValue(2)); 5035 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 5036 DAG.getConstant(32, MVT::i8)); 5037 SDOperand Ops[] = { 5038 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 5039 }; 5040 5041 Tys = DAG.getVTList(MVT::i64, MVT::Other); 5042 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 5043 } 5044 5045 SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 5046 SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 5047 MVT::i32, eax.getValue(2)); 5048 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 5049 SDOperand Ops[] = { eax, edx }; 5050 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 5051 5052 // Use a MERGE_VALUES to return the value and chain. 5053 Ops[1] = edx.getValue(1); 5054 Tys = DAG.getVTList(MVT::i64, MVT::Other); 5055 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 5056} 5057 5058SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 5059 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 5060 5061 if (!Subtarget->is64Bit()) { 5062 // vastart just stores the address of the VarArgsFrameIndex slot into the 5063 // memory location argument. 5064 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5065 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 5066 SV->getOffset()); 5067 } 5068 5069 // __va_list_tag: 5070 // gp_offset (0 - 6 * 8) 5071 // fp_offset (48 - 48 + 8 * 16) 5072 // overflow_arg_area (point to parameters coming in memory). 5073 // reg_save_area 5074 SmallVector<SDOperand, 8> MemOps; 5075 SDOperand FIN = Op.getOperand(1); 5076 // Store gp_offset 5077 SDOperand Store = DAG.getStore(Op.getOperand(0), 5078 DAG.getConstant(VarArgsGPOffset, MVT::i32), 5079 FIN, SV->getValue(), SV->getOffset()); 5080 MemOps.push_back(Store); 5081 5082 // Store fp_offset 5083 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 5084 DAG.getConstant(4, getPointerTy())); 5085 Store = DAG.getStore(Op.getOperand(0), 5086 DAG.getConstant(VarArgsFPOffset, MVT::i32), 5087 FIN, SV->getValue(), SV->getOffset()); 5088 MemOps.push_back(Store); 5089 5090 // Store ptr to overflow_arg_area 5091 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 5092 DAG.getConstant(4, getPointerTy())); 5093 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 5094 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 5095 SV->getOffset()); 5096 MemOps.push_back(Store); 5097 5098 // Store ptr to reg_save_area. 5099 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 5100 DAG.getConstant(8, getPointerTy())); 5101 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5102 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 5103 SV->getOffset()); 5104 MemOps.push_back(Store); 5105 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5106} 5107 5108SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 5109 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5110 SDOperand Chain = Op.getOperand(0); 5111 SDOperand DstPtr = Op.getOperand(1); 5112 SDOperand SrcPtr = Op.getOperand(2); 5113 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 5114 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 5115 5116 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 5117 SrcSV->getValue(), SrcSV->getOffset()); 5118 Chain = SrcPtr.getValue(1); 5119 for (unsigned i = 0; i < 3; ++i) { 5120 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 5121 SrcSV->getValue(), SrcSV->getOffset()); 5122 Chain = Val.getValue(1); 5123 Chain = DAG.getStore(Chain, Val, DstPtr, 5124 DstSV->getValue(), DstSV->getOffset()); 5125 if (i == 2) 5126 break; 5127 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 5128 DAG.getConstant(8, getPointerTy())); 5129 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 5130 DAG.getConstant(8, getPointerTy())); 5131 } 5132 return Chain; 5133} 5134 5135SDOperand 5136X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 5137 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 5138 switch (IntNo) { 5139 default: return SDOperand(); // Don't custom lower most intrinsics. 5140 // Comparison intrinsics. 5141 case Intrinsic::x86_sse_comieq_ss: 5142 case Intrinsic::x86_sse_comilt_ss: 5143 case Intrinsic::x86_sse_comile_ss: 5144 case Intrinsic::x86_sse_comigt_ss: 5145 case Intrinsic::x86_sse_comige_ss: 5146 case Intrinsic::x86_sse_comineq_ss: 5147 case Intrinsic::x86_sse_ucomieq_ss: 5148 case Intrinsic::x86_sse_ucomilt_ss: 5149 case Intrinsic::x86_sse_ucomile_ss: 5150 case Intrinsic::x86_sse_ucomigt_ss: 5151 case Intrinsic::x86_sse_ucomige_ss: 5152 case Intrinsic::x86_sse_ucomineq_ss: 5153 case Intrinsic::x86_sse2_comieq_sd: 5154 case Intrinsic::x86_sse2_comilt_sd: 5155 case Intrinsic::x86_sse2_comile_sd: 5156 case Intrinsic::x86_sse2_comigt_sd: 5157 case Intrinsic::x86_sse2_comige_sd: 5158 case Intrinsic::x86_sse2_comineq_sd: 5159 case Intrinsic::x86_sse2_ucomieq_sd: 5160 case Intrinsic::x86_sse2_ucomilt_sd: 5161 case Intrinsic::x86_sse2_ucomile_sd: 5162 case Intrinsic::x86_sse2_ucomigt_sd: 5163 case Intrinsic::x86_sse2_ucomige_sd: 5164 case Intrinsic::x86_sse2_ucomineq_sd: { 5165 unsigned Opc = 0; 5166 ISD::CondCode CC = ISD::SETCC_INVALID; 5167 switch (IntNo) { 5168 default: break; 5169 case Intrinsic::x86_sse_comieq_ss: 5170 case Intrinsic::x86_sse2_comieq_sd: 5171 Opc = X86ISD::COMI; 5172 CC = ISD::SETEQ; 5173 break; 5174 case Intrinsic::x86_sse_comilt_ss: 5175 case Intrinsic::x86_sse2_comilt_sd: 5176 Opc = X86ISD::COMI; 5177 CC = ISD::SETLT; 5178 break; 5179 case Intrinsic::x86_sse_comile_ss: 5180 case Intrinsic::x86_sse2_comile_sd: 5181 Opc = X86ISD::COMI; 5182 CC = ISD::SETLE; 5183 break; 5184 case Intrinsic::x86_sse_comigt_ss: 5185 case Intrinsic::x86_sse2_comigt_sd: 5186 Opc = X86ISD::COMI; 5187 CC = ISD::SETGT; 5188 break; 5189 case Intrinsic::x86_sse_comige_ss: 5190 case Intrinsic::x86_sse2_comige_sd: 5191 Opc = X86ISD::COMI; 5192 CC = ISD::SETGE; 5193 break; 5194 case Intrinsic::x86_sse_comineq_ss: 5195 case Intrinsic::x86_sse2_comineq_sd: 5196 Opc = X86ISD::COMI; 5197 CC = ISD::SETNE; 5198 break; 5199 case Intrinsic::x86_sse_ucomieq_ss: 5200 case Intrinsic::x86_sse2_ucomieq_sd: 5201 Opc = X86ISD::UCOMI; 5202 CC = ISD::SETEQ; 5203 break; 5204 case Intrinsic::x86_sse_ucomilt_ss: 5205 case Intrinsic::x86_sse2_ucomilt_sd: 5206 Opc = X86ISD::UCOMI; 5207 CC = ISD::SETLT; 5208 break; 5209 case Intrinsic::x86_sse_ucomile_ss: 5210 case Intrinsic::x86_sse2_ucomile_sd: 5211 Opc = X86ISD::UCOMI; 5212 CC = ISD::SETLE; 5213 break; 5214 case Intrinsic::x86_sse_ucomigt_ss: 5215 case Intrinsic::x86_sse2_ucomigt_sd: 5216 Opc = X86ISD::UCOMI; 5217 CC = ISD::SETGT; 5218 break; 5219 case Intrinsic::x86_sse_ucomige_ss: 5220 case Intrinsic::x86_sse2_ucomige_sd: 5221 Opc = X86ISD::UCOMI; 5222 CC = ISD::SETGE; 5223 break; 5224 case Intrinsic::x86_sse_ucomineq_ss: 5225 case Intrinsic::x86_sse2_ucomineq_sd: 5226 Opc = X86ISD::UCOMI; 5227 CC = ISD::SETNE; 5228 break; 5229 } 5230 5231 unsigned X86CC; 5232 SDOperand LHS = Op.getOperand(1); 5233 SDOperand RHS = Op.getOperand(2); 5234 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 5235 5236 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5237 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5238 DAG.getConstant(X86CC, MVT::i8), Cond); 5239 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 5240 } 5241 } 5242} 5243 5244SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 5245 // Depths > 0 not supported yet! 5246 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5247 return SDOperand(); 5248 5249 // Just load the return address 5250 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 5251 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5252} 5253 5254SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 5255 // Depths > 0 not supported yet! 5256 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5257 return SDOperand(); 5258 5259 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 5260 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 5261 DAG.getConstant(4, getPointerTy())); 5262} 5263 5264SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 5265 SelectionDAG &DAG) { 5266 // Is not yet supported on x86-64 5267 if (Subtarget->is64Bit()) 5268 return SDOperand(); 5269 5270 return DAG.getConstant(8, getPointerTy()); 5271} 5272 5273SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 5274{ 5275 assert(!Subtarget->is64Bit() && 5276 "Lowering of eh_return builtin is not supported yet on x86-64"); 5277 5278 MachineFunction &MF = DAG.getMachineFunction(); 5279 SDOperand Chain = Op.getOperand(0); 5280 SDOperand Offset = Op.getOperand(1); 5281 SDOperand Handler = Op.getOperand(2); 5282 5283 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 5284 getPointerTy()); 5285 5286 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5287 DAG.getConstant(-4UL, getPointerTy())); 5288 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5289 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5290 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 5291 MF.addLiveOut(X86::ECX); 5292 5293 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 5294 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 5295} 5296 5297SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 5298 SelectionDAG &DAG) { 5299 SDOperand Root = Op.getOperand(0); 5300 SDOperand Trmp = Op.getOperand(1); // trampoline 5301 SDOperand FPtr = Op.getOperand(2); // nested function 5302 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 5303 5304 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 5305 5306 if (Subtarget->is64Bit()) { 5307 return SDOperand(); // not yet supported 5308 } else { 5309 Function *Func = (Function *) 5310 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5311 unsigned CC = Func->getCallingConv(); 5312 unsigned NestReg; 5313 5314 switch (CC) { 5315 default: 5316 assert(0 && "Unsupported calling convention"); 5317 case CallingConv::C: 5318 case CallingConv::X86_StdCall: { 5319 // Pass 'nest' parameter in ECX. 5320 // Must be kept in sync with X86CallingConv.td 5321 NestReg = X86::ECX; 5322 5323 // Check that ECX wasn't needed by an 'inreg' parameter. 5324 const FunctionType *FTy = Func->getFunctionType(); 5325 const ParamAttrsList *Attrs = Func->getParamAttrs(); 5326 5327 if (Attrs && !Func->isVarArg()) { 5328 unsigned InRegCount = 0; 5329 unsigned Idx = 1; 5330 5331 for (FunctionType::param_iterator I = FTy->param_begin(), 5332 E = FTy->param_end(); I != E; ++I, ++Idx) 5333 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 5334 // FIXME: should only count parameters that are lowered to integers. 5335 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 5336 5337 if (InRegCount > 2) { 5338 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5339 abort(); 5340 } 5341 } 5342 break; 5343 } 5344 case CallingConv::X86_FastCall: 5345 // Pass 'nest' parameter in EAX. 5346 // Must be kept in sync with X86CallingConv.td 5347 NestReg = X86::EAX; 5348 break; 5349 } 5350 5351 const X86InstrInfo *TII = 5352 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5353 5354 SDOperand OutChains[4]; 5355 SDOperand Addr, Disp; 5356 5357 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5358 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5359 5360 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5361 unsigned char N86Reg = ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg); 5362 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5363 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 5364 5365 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 5366 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 5367 TrmpSV->getOffset() + 1, false, 1); 5368 5369 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 5370 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 5371 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 5372 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 5373 5374 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 5375 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 5376 TrmpSV->getOffset() + 6, false, 1); 5377 5378 SDOperand Ops[] = 5379 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 5380 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 5381 } 5382} 5383 5384SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { 5385 /* 5386 The rounding mode is in bits 11:10 of FPSR, and has the following 5387 settings: 5388 00 Round to nearest 5389 01 Round to -inf 5390 10 Round to +inf 5391 11 Round to 0 5392 5393 FLT_ROUNDS, on the other hand, expects the following: 5394 -1 Undefined 5395 0 Round to 0 5396 1 Round to nearest 5397 2 Round to +inf 5398 3 Round to -inf 5399 5400 To perform the conversion, we do: 5401 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 5402 */ 5403 5404 MachineFunction &MF = DAG.getMachineFunction(); 5405 const TargetMachine &TM = MF.getTarget(); 5406 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 5407 unsigned StackAlignment = TFI.getStackAlignment(); 5408 MVT::ValueType VT = Op.getValueType(); 5409 5410 // Save FP Control Word to stack slot 5411 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 5412 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5413 5414 SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 5415 DAG.getEntryNode(), StackSlot); 5416 5417 // Load FP Control Word from stack slot 5418 SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 5419 5420 // Transform as necessary 5421 SDOperand CWD1 = 5422 DAG.getNode(ISD::SRL, MVT::i16, 5423 DAG.getNode(ISD::AND, MVT::i16, 5424 CWD, DAG.getConstant(0x800, MVT::i16)), 5425 DAG.getConstant(11, MVT::i8)); 5426 SDOperand CWD2 = 5427 DAG.getNode(ISD::SRL, MVT::i16, 5428 DAG.getNode(ISD::AND, MVT::i16, 5429 CWD, DAG.getConstant(0x400, MVT::i16)), 5430 DAG.getConstant(9, MVT::i8)); 5431 5432 SDOperand RetVal = 5433 DAG.getNode(ISD::AND, MVT::i16, 5434 DAG.getNode(ISD::ADD, MVT::i16, 5435 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 5436 DAG.getConstant(1, MVT::i16)), 5437 DAG.getConstant(3, MVT::i16)); 5438 5439 5440 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 5441 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 5442} 5443 5444SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { 5445 MVT::ValueType VT = Op.getValueType(); 5446 MVT::ValueType OpVT = VT; 5447 unsigned NumBits = MVT::getSizeInBits(VT); 5448 5449 Op = Op.getOperand(0); 5450 if (VT == MVT::i8) { 5451 // Zero extend to i32 since there is not an i8 bsr. 5452 OpVT = MVT::i32; 5453 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5454 } 5455 5456 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 5457 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5458 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 5459 5460 // If src is zero (i.e. bsr sets ZF), returns NumBits. 5461 SmallVector<SDOperand, 4> Ops; 5462 Ops.push_back(Op); 5463 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 5464 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5465 Ops.push_back(Op.getValue(1)); 5466 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5467 5468 // Finally xor with NumBits-1. 5469 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 5470 5471 if (VT == MVT::i8) 5472 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5473 return Op; 5474} 5475 5476SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { 5477 MVT::ValueType VT = Op.getValueType(); 5478 MVT::ValueType OpVT = VT; 5479 unsigned NumBits = MVT::getSizeInBits(VT); 5480 5481 Op = Op.getOperand(0); 5482 if (VT == MVT::i8) { 5483 OpVT = MVT::i32; 5484 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5485 } 5486 5487 // Issue a bsf (scan bits forward) which also sets EFLAGS. 5488 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5489 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 5490 5491 // If src is zero (i.e. bsf sets ZF), returns NumBits. 5492 SmallVector<SDOperand, 4> Ops; 5493 Ops.push_back(Op); 5494 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 5495 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5496 Ops.push_back(Op.getValue(1)); 5497 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5498 5499 if (VT == MVT::i8) 5500 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5501 return Op; 5502} 5503 5504/// LowerOperation - Provide custom lowering hooks for some operations. 5505/// 5506SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 5507 switch (Op.getOpcode()) { 5508 default: assert(0 && "Should not custom lower this!"); 5509 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5510 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5511 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5512 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5513 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5514 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5515 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5516 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5517 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5518 case ISD::SHL_PARTS: 5519 case ISD::SRA_PARTS: 5520 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5521 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5522 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5523 case ISD::FABS: return LowerFABS(Op, DAG); 5524 case ISD::FNEG: return LowerFNEG(Op, DAG); 5525 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5526 case ISD::SETCC: return LowerSETCC(Op, DAG); 5527 case ISD::SELECT: return LowerSELECT(Op, DAG); 5528 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5529 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5530 case ISD::CALL: return LowerCALL(Op, DAG); 5531 case ISD::RET: return LowerRET(Op, DAG); 5532 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5533 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5534 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5535 case ISD::VASTART: return LowerVASTART(Op, DAG); 5536 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5537 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5538 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5539 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5540 case ISD::FRAME_TO_ARGS_OFFSET: 5541 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5542 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5543 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5544 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5545 case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); 5546 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 5547 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 5548 5549 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 5550 case ISD::READCYCLECOUNTER: 5551 return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); 5552 } 5553} 5554 5555/// ExpandOperation - Provide custom lowering hooks for expanding operations. 5556SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 5557 switch (N->getOpcode()) { 5558 default: assert(0 && "Should not custom lower this!"); 5559 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 5560 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 5561 } 5562} 5563 5564const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5565 switch (Opcode) { 5566 default: return NULL; 5567 case X86ISD::BSF: return "X86ISD::BSF"; 5568 case X86ISD::BSR: return "X86ISD::BSR"; 5569 case X86ISD::SHLD: return "X86ISD::SHLD"; 5570 case X86ISD::SHRD: return "X86ISD::SHRD"; 5571 case X86ISD::FAND: return "X86ISD::FAND"; 5572 case X86ISD::FOR: return "X86ISD::FOR"; 5573 case X86ISD::FXOR: return "X86ISD::FXOR"; 5574 case X86ISD::FSRL: return "X86ISD::FSRL"; 5575 case X86ISD::FILD: return "X86ISD::FILD"; 5576 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5577 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5578 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5579 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5580 case X86ISD::FLD: return "X86ISD::FLD"; 5581 case X86ISD::FST: return "X86ISD::FST"; 5582 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5583 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5584 case X86ISD::CALL: return "X86ISD::CALL"; 5585 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5586 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5587 case X86ISD::CMP: return "X86ISD::CMP"; 5588 case X86ISD::COMI: return "X86ISD::COMI"; 5589 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5590 case X86ISD::SETCC: return "X86ISD::SETCC"; 5591 case X86ISD::CMOV: return "X86ISD::CMOV"; 5592 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5593 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5594 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5595 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5596 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5597 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5598 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5599 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5600 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5601 case X86ISD::FMAX: return "X86ISD::FMAX"; 5602 case X86ISD::FMIN: return "X86ISD::FMIN"; 5603 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5604 case X86ISD::FRCP: return "X86ISD::FRCP"; 5605 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5606 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5607 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5608 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5609 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 5610 } 5611} 5612 5613// isLegalAddressingMode - Return true if the addressing mode represented 5614// by AM is legal for this target, for a load/store of the specified type. 5615bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5616 const Type *Ty) const { 5617 // X86 supports extremely general addressing modes. 5618 5619 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5620 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5621 return false; 5622 5623 if (AM.BaseGV) { 5624 // We can only fold this if we don't need an extra load. 5625 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5626 return false; 5627 5628 // X86-64 only supports addr of globals in small code model. 5629 if (Subtarget->is64Bit()) { 5630 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5631 return false; 5632 // If lower 4G is not available, then we must use rip-relative addressing. 5633 if (AM.BaseOffs || AM.Scale > 1) 5634 return false; 5635 } 5636 } 5637 5638 switch (AM.Scale) { 5639 case 0: 5640 case 1: 5641 case 2: 5642 case 4: 5643 case 8: 5644 // These scales always work. 5645 break; 5646 case 3: 5647 case 5: 5648 case 9: 5649 // These scales are formed with basereg+scalereg. Only accept if there is 5650 // no basereg yet. 5651 if (AM.HasBaseReg) 5652 return false; 5653 break; 5654 default: // Other stuff never works. 5655 return false; 5656 } 5657 5658 return true; 5659} 5660 5661 5662bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5663 if (!Ty1->isInteger() || !Ty2->isInteger()) 5664 return false; 5665 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5666 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5667 if (NumBits1 <= NumBits2) 5668 return false; 5669 return Subtarget->is64Bit() || NumBits1 < 64; 5670} 5671 5672bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5673 MVT::ValueType VT2) const { 5674 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5675 return false; 5676 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5677 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5678 if (NumBits1 <= NumBits2) 5679 return false; 5680 return Subtarget->is64Bit() || NumBits1 < 64; 5681} 5682 5683/// isShuffleMaskLegal - Targets can use this to indicate that they only 5684/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5685/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5686/// are assumed to be legal. 5687bool 5688X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5689 // Only do shuffles on 128-bit vector types for now. 5690 if (MVT::getSizeInBits(VT) == 64) return false; 5691 return (Mask.Val->getNumOperands() <= 4 || 5692 isIdentityMask(Mask.Val) || 5693 isIdentityMask(Mask.Val, true) || 5694 isSplatMask(Mask.Val) || 5695 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5696 X86::isUNPCKLMask(Mask.Val) || 5697 X86::isUNPCKHMask(Mask.Val) || 5698 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5699 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5700} 5701 5702bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5703 MVT::ValueType EVT, 5704 SelectionDAG &DAG) const { 5705 unsigned NumElts = BVOps.size(); 5706 // Only do shuffles on 128-bit vector types for now. 5707 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5708 if (NumElts == 2) return true; 5709 if (NumElts == 4) { 5710 return (isMOVLMask(&BVOps[0], 4) || 5711 isCommutedMOVL(&BVOps[0], 4, true) || 5712 isSHUFPMask(&BVOps[0], 4) || 5713 isCommutedSHUFP(&BVOps[0], 4)); 5714 } 5715 return false; 5716} 5717 5718//===----------------------------------------------------------------------===// 5719// X86 Scheduler Hooks 5720//===----------------------------------------------------------------------===// 5721 5722MachineBasicBlock * 5723X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5724 MachineBasicBlock *BB) { 5725 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5726 switch (MI->getOpcode()) { 5727 default: assert(false && "Unexpected instr type to insert"); 5728 case X86::CMOV_FR32: 5729 case X86::CMOV_FR64: 5730 case X86::CMOV_V4F32: 5731 case X86::CMOV_V2F64: 5732 case X86::CMOV_V2I64: { 5733 // To "insert" a SELECT_CC instruction, we actually have to insert the 5734 // diamond control-flow pattern. The incoming instruction knows the 5735 // destination vreg to set, the condition code register to branch on, the 5736 // true/false values to select between, and a branch opcode to use. 5737 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5738 ilist<MachineBasicBlock>::iterator It = BB; 5739 ++It; 5740 5741 // thisMBB: 5742 // ... 5743 // TrueVal = ... 5744 // cmpTY ccX, r1, r2 5745 // bCC copy1MBB 5746 // fallthrough --> copy0MBB 5747 MachineBasicBlock *thisMBB = BB; 5748 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5749 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5750 unsigned Opc = 5751 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5752 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5753 MachineFunction *F = BB->getParent(); 5754 F->getBasicBlockList().insert(It, copy0MBB); 5755 F->getBasicBlockList().insert(It, sinkMBB); 5756 // Update machine-CFG edges by first adding all successors of the current 5757 // block to the new block which will contain the Phi node for the select. 5758 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5759 e = BB->succ_end(); i != e; ++i) 5760 sinkMBB->addSuccessor(*i); 5761 // Next, remove all successors of the current block, and add the true 5762 // and fallthrough blocks as its successors. 5763 while(!BB->succ_empty()) 5764 BB->removeSuccessor(BB->succ_begin()); 5765 BB->addSuccessor(copy0MBB); 5766 BB->addSuccessor(sinkMBB); 5767 5768 // copy0MBB: 5769 // %FalseValue = ... 5770 // # fallthrough to sinkMBB 5771 BB = copy0MBB; 5772 5773 // Update machine-CFG edges 5774 BB->addSuccessor(sinkMBB); 5775 5776 // sinkMBB: 5777 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5778 // ... 5779 BB = sinkMBB; 5780 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5781 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5782 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5783 5784 delete MI; // The pseudo instruction is gone now. 5785 return BB; 5786 } 5787 5788 case X86::FP32_TO_INT16_IN_MEM: 5789 case X86::FP32_TO_INT32_IN_MEM: 5790 case X86::FP32_TO_INT64_IN_MEM: 5791 case X86::FP64_TO_INT16_IN_MEM: 5792 case X86::FP64_TO_INT32_IN_MEM: 5793 case X86::FP64_TO_INT64_IN_MEM: 5794 case X86::FP80_TO_INT16_IN_MEM: 5795 case X86::FP80_TO_INT32_IN_MEM: 5796 case X86::FP80_TO_INT64_IN_MEM: { 5797 // Change the floating point control register to use "round towards zero" 5798 // mode when truncating to an integer value. 5799 MachineFunction *F = BB->getParent(); 5800 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5801 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5802 5803 // Load the old value of the high byte of the control word... 5804 unsigned OldCW = 5805 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5806 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5807 5808 // Set the high part to be round to zero... 5809 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5810 .addImm(0xC7F); 5811 5812 // Reload the modified control word now... 5813 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5814 5815 // Restore the memory image of control word to original value 5816 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5817 .addReg(OldCW); 5818 5819 // Get the X86 opcode to use. 5820 unsigned Opc; 5821 switch (MI->getOpcode()) { 5822 default: assert(0 && "illegal opcode!"); 5823 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5824 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5825 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5826 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5827 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5828 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5829 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5830 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5831 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5832 } 5833 5834 X86AddressMode AM; 5835 MachineOperand &Op = MI->getOperand(0); 5836 if (Op.isRegister()) { 5837 AM.BaseType = X86AddressMode::RegBase; 5838 AM.Base.Reg = Op.getReg(); 5839 } else { 5840 AM.BaseType = X86AddressMode::FrameIndexBase; 5841 AM.Base.FrameIndex = Op.getFrameIndex(); 5842 } 5843 Op = MI->getOperand(1); 5844 if (Op.isImmediate()) 5845 AM.Scale = Op.getImm(); 5846 Op = MI->getOperand(2); 5847 if (Op.isImmediate()) 5848 AM.IndexReg = Op.getImm(); 5849 Op = MI->getOperand(3); 5850 if (Op.isGlobalAddress()) { 5851 AM.GV = Op.getGlobal(); 5852 } else { 5853 AM.Disp = Op.getImm(); 5854 } 5855 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5856 .addReg(MI->getOperand(4).getReg()); 5857 5858 // Reload the original control word now. 5859 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5860 5861 delete MI; // The pseudo instruction is gone now. 5862 return BB; 5863 } 5864 } 5865} 5866 5867//===----------------------------------------------------------------------===// 5868// X86 Optimization Hooks 5869//===----------------------------------------------------------------------===// 5870 5871void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5872 uint64_t Mask, 5873 uint64_t &KnownZero, 5874 uint64_t &KnownOne, 5875 const SelectionDAG &DAG, 5876 unsigned Depth) const { 5877 unsigned Opc = Op.getOpcode(); 5878 assert((Opc >= ISD::BUILTIN_OP_END || 5879 Opc == ISD::INTRINSIC_WO_CHAIN || 5880 Opc == ISD::INTRINSIC_W_CHAIN || 5881 Opc == ISD::INTRINSIC_VOID) && 5882 "Should use MaskedValueIsZero if you don't know whether Op" 5883 " is a target node!"); 5884 5885 KnownZero = KnownOne = 0; // Don't know anything. 5886 switch (Opc) { 5887 default: break; 5888 case X86ISD::SETCC: 5889 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5890 break; 5891 } 5892} 5893 5894/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5895/// element of the result of the vector shuffle. 5896static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5897 MVT::ValueType VT = N->getValueType(0); 5898 SDOperand PermMask = N->getOperand(2); 5899 unsigned NumElems = PermMask.getNumOperands(); 5900 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5901 i %= NumElems; 5902 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5903 return (i == 0) 5904 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5905 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5906 SDOperand Idx = PermMask.getOperand(i); 5907 if (Idx.getOpcode() == ISD::UNDEF) 5908 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5909 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5910 } 5911 return SDOperand(); 5912} 5913 5914/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5915/// node is a GlobalAddress + an offset. 5916static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5917 unsigned Opc = N->getOpcode(); 5918 if (Opc == X86ISD::Wrapper) { 5919 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5920 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5921 return true; 5922 } 5923 } else if (Opc == ISD::ADD) { 5924 SDOperand N1 = N->getOperand(0); 5925 SDOperand N2 = N->getOperand(1); 5926 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5927 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5928 if (V) { 5929 Offset += V->getSignExtended(); 5930 return true; 5931 } 5932 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5933 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5934 if (V) { 5935 Offset += V->getSignExtended(); 5936 return true; 5937 } 5938 } 5939 } 5940 return false; 5941} 5942 5943/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5944/// + Dist * Size. 5945static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5946 MachineFrameInfo *MFI) { 5947 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5948 return false; 5949 5950 SDOperand Loc = N->getOperand(1); 5951 SDOperand BaseLoc = Base->getOperand(1); 5952 if (Loc.getOpcode() == ISD::FrameIndex) { 5953 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5954 return false; 5955 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5956 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5957 int FS = MFI->getObjectSize(FI); 5958 int BFS = MFI->getObjectSize(BFI); 5959 if (FS != BFS || FS != Size) return false; 5960 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5961 } else { 5962 GlobalValue *GV1 = NULL; 5963 GlobalValue *GV2 = NULL; 5964 int64_t Offset1 = 0; 5965 int64_t Offset2 = 0; 5966 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5967 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5968 if (isGA1 && isGA2 && GV1 == GV2) 5969 return Offset1 == (Offset2 + Dist*Size); 5970 } 5971 5972 return false; 5973} 5974 5975static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5976 const X86Subtarget *Subtarget) { 5977 GlobalValue *GV; 5978 int64_t Offset; 5979 if (isGAPlusOffset(Base, GV, Offset)) 5980 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5981 else { 5982 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5983 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5984 if (BFI < 0) 5985 // Fixed objects do not specify alignment, however the offsets are known. 5986 return ((Subtarget->getStackAlignment() % 16) == 0 && 5987 (MFI->getObjectOffset(BFI) % 16) == 0); 5988 else 5989 return MFI->getObjectAlignment(BFI) >= 16; 5990 } 5991 return false; 5992} 5993 5994 5995/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5996/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5997/// if the load addresses are consecutive, non-overlapping, and in the right 5998/// order. 5999static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 6000 const X86Subtarget *Subtarget) { 6001 MachineFunction &MF = DAG.getMachineFunction(); 6002 MachineFrameInfo *MFI = MF.getFrameInfo(); 6003 MVT::ValueType VT = N->getValueType(0); 6004 MVT::ValueType EVT = MVT::getVectorElementType(VT); 6005 SDOperand PermMask = N->getOperand(2); 6006 int NumElems = (int)PermMask.getNumOperands(); 6007 SDNode *Base = NULL; 6008 for (int i = 0; i < NumElems; ++i) { 6009 SDOperand Idx = PermMask.getOperand(i); 6010 if (Idx.getOpcode() == ISD::UNDEF) { 6011 if (!Base) return SDOperand(); 6012 } else { 6013 SDOperand Arg = 6014 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 6015 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 6016 return SDOperand(); 6017 if (!Base) 6018 Base = Arg.Val; 6019 else if (!isConsecutiveLoad(Arg.Val, Base, 6020 i, MVT::getSizeInBits(EVT)/8,MFI)) 6021 return SDOperand(); 6022 } 6023 } 6024 6025 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 6026 LoadSDNode *LD = cast<LoadSDNode>(Base); 6027 if (isAlign16) { 6028 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 6029 LD->getSrcValueOffset(), LD->isVolatile()); 6030 } else { 6031 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 6032 LD->getSrcValueOffset(), LD->isVolatile(), 6033 LD->getAlignment()); 6034 } 6035} 6036 6037/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 6038static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 6039 const X86Subtarget *Subtarget) { 6040 SDOperand Cond = N->getOperand(0); 6041 6042 // If we have SSE[12] support, try to form min/max nodes. 6043 if (Subtarget->hasSSE2() && 6044 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 6045 if (Cond.getOpcode() == ISD::SETCC) { 6046 // Get the LHS/RHS of the select. 6047 SDOperand LHS = N->getOperand(1); 6048 SDOperand RHS = N->getOperand(2); 6049 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 6050 6051 unsigned Opcode = 0; 6052 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 6053 switch (CC) { 6054 default: break; 6055 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 6056 case ISD::SETULE: 6057 case ISD::SETLE: 6058 if (!UnsafeFPMath) break; 6059 // FALL THROUGH. 6060 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 6061 case ISD::SETLT: 6062 Opcode = X86ISD::FMIN; 6063 break; 6064 6065 case ISD::SETOGT: // (X > Y) ? X : Y -> max 6066 case ISD::SETUGT: 6067 case ISD::SETGT: 6068 if (!UnsafeFPMath) break; 6069 // FALL THROUGH. 6070 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 6071 case ISD::SETGE: 6072 Opcode = X86ISD::FMAX; 6073 break; 6074 } 6075 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 6076 switch (CC) { 6077 default: break; 6078 case ISD::SETOGT: // (X > Y) ? Y : X -> min 6079 case ISD::SETUGT: 6080 case ISD::SETGT: 6081 if (!UnsafeFPMath) break; 6082 // FALL THROUGH. 6083 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 6084 case ISD::SETGE: 6085 Opcode = X86ISD::FMIN; 6086 break; 6087 6088 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 6089 case ISD::SETULE: 6090 case ISD::SETLE: 6091 if (!UnsafeFPMath) break; 6092 // FALL THROUGH. 6093 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 6094 case ISD::SETLT: 6095 Opcode = X86ISD::FMAX; 6096 break; 6097 } 6098 } 6099 6100 if (Opcode) 6101 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 6102 } 6103 6104 } 6105 6106 return SDOperand(); 6107} 6108 6109 6110SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 6111 DAGCombinerInfo &DCI) const { 6112 SelectionDAG &DAG = DCI.DAG; 6113 switch (N->getOpcode()) { 6114 default: break; 6115 case ISD::VECTOR_SHUFFLE: 6116 return PerformShuffleCombine(N, DAG, Subtarget); 6117 case ISD::SELECT: 6118 return PerformSELECTCombine(N, DAG, Subtarget); 6119 } 6120 6121 return SDOperand(); 6122} 6123 6124//===----------------------------------------------------------------------===// 6125// X86 Inline Assembly Support 6126//===----------------------------------------------------------------------===// 6127 6128/// getConstraintType - Given a constraint letter, return the type of 6129/// constraint it is for this target. 6130X86TargetLowering::ConstraintType 6131X86TargetLowering::getConstraintType(const std::string &Constraint) const { 6132 if (Constraint.size() == 1) { 6133 switch (Constraint[0]) { 6134 case 'A': 6135 case 'r': 6136 case 'R': 6137 case 'l': 6138 case 'q': 6139 case 'Q': 6140 case 'x': 6141 case 'Y': 6142 return C_RegisterClass; 6143 default: 6144 break; 6145 } 6146 } 6147 return TargetLowering::getConstraintType(Constraint); 6148} 6149 6150/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 6151/// vector. If it is invalid, don't add anything to Ops. 6152void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 6153 char Constraint, 6154 std::vector<SDOperand>&Ops, 6155 SelectionDAG &DAG) { 6156 SDOperand Result(0, 0); 6157 6158 switch (Constraint) { 6159 default: break; 6160 case 'I': 6161 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 6162 if (C->getValue() <= 31) { 6163 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 6164 break; 6165 } 6166 } 6167 return; 6168 case 'N': 6169 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 6170 if (C->getValue() <= 255) { 6171 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 6172 break; 6173 } 6174 } 6175 return; 6176 case 'i': { 6177 // Literal immediates are always ok. 6178 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 6179 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 6180 break; 6181 } 6182 6183 // If we are in non-pic codegen mode, we allow the address of a global (with 6184 // an optional displacement) to be used with 'i'. 6185 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 6186 int64_t Offset = 0; 6187 6188 // Match either (GA) or (GA+C) 6189 if (GA) { 6190 Offset = GA->getOffset(); 6191 } else if (Op.getOpcode() == ISD::ADD) { 6192 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 6193 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 6194 if (C && GA) { 6195 Offset = GA->getOffset()+C->getValue(); 6196 } else { 6197 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 6198 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 6199 if (C && GA) 6200 Offset = GA->getOffset()+C->getValue(); 6201 else 6202 C = 0, GA = 0; 6203 } 6204 } 6205 6206 if (GA) { 6207 // If addressing this global requires a load (e.g. in PIC mode), we can't 6208 // match. 6209 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 6210 false)) 6211 return; 6212 6213 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 6214 Offset); 6215 Result = Op; 6216 break; 6217 } 6218 6219 // Otherwise, not valid for this mode. 6220 return; 6221 } 6222 } 6223 6224 if (Result.Val) { 6225 Ops.push_back(Result); 6226 return; 6227 } 6228 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6229} 6230 6231std::vector<unsigned> X86TargetLowering:: 6232getRegClassForInlineAsmConstraint(const std::string &Constraint, 6233 MVT::ValueType VT) const { 6234 if (Constraint.size() == 1) { 6235 // FIXME: not handling fp-stack yet! 6236 switch (Constraint[0]) { // GCC X86 Constraint Letters 6237 default: break; // Unknown constraint letter 6238 case 'A': // EAX/EDX 6239 if (VT == MVT::i32 || VT == MVT::i64) 6240 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 6241 break; 6242 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 6243 case 'Q': // Q_REGS 6244 if (VT == MVT::i32) 6245 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 6246 else if (VT == MVT::i16) 6247 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 6248 else if (VT == MVT::i8) 6249 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 6250 else if (VT == MVT::i64) 6251 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 6252 break; 6253 } 6254 } 6255 6256 return std::vector<unsigned>(); 6257} 6258 6259std::pair<unsigned, const TargetRegisterClass*> 6260X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 6261 MVT::ValueType VT) const { 6262 // First, see if this is a constraint that directly corresponds to an LLVM 6263 // register class. 6264 if (Constraint.size() == 1) { 6265 // GCC Constraint Letters 6266 switch (Constraint[0]) { 6267 default: break; 6268 case 'r': // GENERAL_REGS 6269 case 'R': // LEGACY_REGS 6270 case 'l': // INDEX_REGS 6271 if (VT == MVT::i64 && Subtarget->is64Bit()) 6272 return std::make_pair(0U, X86::GR64RegisterClass); 6273 if (VT == MVT::i32) 6274 return std::make_pair(0U, X86::GR32RegisterClass); 6275 else if (VT == MVT::i16) 6276 return std::make_pair(0U, X86::GR16RegisterClass); 6277 else if (VT == MVT::i8) 6278 return std::make_pair(0U, X86::GR8RegisterClass); 6279 break; 6280 case 'y': // MMX_REGS if MMX allowed. 6281 if (!Subtarget->hasMMX()) break; 6282 return std::make_pair(0U, X86::VR64RegisterClass); 6283 break; 6284 case 'Y': // SSE_REGS if SSE2 allowed 6285 if (!Subtarget->hasSSE2()) break; 6286 // FALL THROUGH. 6287 case 'x': // SSE_REGS if SSE1 allowed 6288 if (!Subtarget->hasSSE1()) break; 6289 6290 switch (VT) { 6291 default: break; 6292 // Scalar SSE types. 6293 case MVT::f32: 6294 case MVT::i32: 6295 return std::make_pair(0U, X86::FR32RegisterClass); 6296 case MVT::f64: 6297 case MVT::i64: 6298 return std::make_pair(0U, X86::FR64RegisterClass); 6299 // Vector types. 6300 case MVT::v16i8: 6301 case MVT::v8i16: 6302 case MVT::v4i32: 6303 case MVT::v2i64: 6304 case MVT::v4f32: 6305 case MVT::v2f64: 6306 return std::make_pair(0U, X86::VR128RegisterClass); 6307 } 6308 break; 6309 } 6310 } 6311 6312 // Use the default implementation in TargetLowering to convert the register 6313 // constraint into a member of a register class. 6314 std::pair<unsigned, const TargetRegisterClass*> Res; 6315 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6316 6317 // Not found as a standard register? 6318 if (Res.second == 0) { 6319 // GCC calls "st(0)" just plain "st". 6320 if (StringsEqualNoCase("{st}", Constraint)) { 6321 Res.first = X86::ST0; 6322 Res.second = X86::RFP80RegisterClass; 6323 } 6324 6325 return Res; 6326 } 6327 6328 // Otherwise, check to see if this is a register class of the wrong value 6329 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 6330 // turn into {ax},{dx}. 6331 if (Res.second->hasType(VT)) 6332 return Res; // Correct type already, nothing to do. 6333 6334 // All of the single-register GCC register classes map their values onto 6335 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 6336 // really want an 8-bit or 32-bit register, map to the appropriate register 6337 // class and return the appropriate register. 6338 if (Res.second != X86::GR16RegisterClass) 6339 return Res; 6340 6341 if (VT == MVT::i8) { 6342 unsigned DestReg = 0; 6343 switch (Res.first) { 6344 default: break; 6345 case X86::AX: DestReg = X86::AL; break; 6346 case X86::DX: DestReg = X86::DL; break; 6347 case X86::CX: DestReg = X86::CL; break; 6348 case X86::BX: DestReg = X86::BL; break; 6349 } 6350 if (DestReg) { 6351 Res.first = DestReg; 6352 Res.second = Res.second = X86::GR8RegisterClass; 6353 } 6354 } else if (VT == MVT::i32) { 6355 unsigned DestReg = 0; 6356 switch (Res.first) { 6357 default: break; 6358 case X86::AX: DestReg = X86::EAX; break; 6359 case X86::DX: DestReg = X86::EDX; break; 6360 case X86::CX: DestReg = X86::ECX; break; 6361 case X86::BX: DestReg = X86::EBX; break; 6362 case X86::SI: DestReg = X86::ESI; break; 6363 case X86::DI: DestReg = X86::EDI; break; 6364 case X86::BP: DestReg = X86::EBP; break; 6365 case X86::SP: DestReg = X86::ESP; break; 6366 } 6367 if (DestReg) { 6368 Res.first = DestReg; 6369 Res.second = Res.second = X86::GR32RegisterClass; 6370 } 6371 } else if (VT == MVT::i64) { 6372 unsigned DestReg = 0; 6373 switch (Res.first) { 6374 default: break; 6375 case X86::AX: DestReg = X86::RAX; break; 6376 case X86::DX: DestReg = X86::RDX; break; 6377 case X86::CX: DestReg = X86::RCX; break; 6378 case X86::BX: DestReg = X86::RBX; break; 6379 case X86::SI: DestReg = X86::RSI; break; 6380 case X86::DI: DestReg = X86::RDI; break; 6381 case X86::BP: DestReg = X86::RBP; break; 6382 case X86::SP: DestReg = X86::RSP; break; 6383 } 6384 if (DestReg) { 6385 Res.first = DestReg; 6386 Res.second = Res.second = X86::GR64RegisterClass; 6387 } 6388 } 6389 6390 return Res; 6391} 6392