X86ISelLowering.cpp revision 27a446afaf17a12a08c8c883416e7acc2f300acb
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/SelectionDAG.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/StringExtras.h" 40#include "llvm/ParameterAttributes.h" 41using namespace llvm; 42 43X86TargetLowering::X86TargetLowering(TargetMachine &TM) 44 : TargetLowering(TM) { 45 Subtarget = &TM.getSubtarget<X86Subtarget>(); 46 X86ScalarSSEf64 = Subtarget->hasSSE2(); 47 X86ScalarSSEf32 = Subtarget->hasSSE1(); 48 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 49 50 51 RegInfo = TM.getRegisterInfo(); 52 53 // Set up the TargetLowering object. 54 55 // X86 is weird, it always uses i8 for shift amounts and setcc results. 56 setShiftAmountType(MVT::i8); 57 setSetCCResultType(MVT::i8); 58 setSetCCResultContents(ZeroOrOneSetCCResult); 59 setSchedulingPreference(SchedulingForRegPressure); 60 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 61 setStackPointerRegisterToSaveRestore(X86StackPtr); 62 63 if (Subtarget->isTargetDarwin()) { 64 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 65 setUseUnderscoreSetJmp(false); 66 setUseUnderscoreLongJmp(false); 67 } else if (Subtarget->isTargetMingw()) { 68 // MS runtime is weird: it exports _setjmp, but longjmp! 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(false); 71 } else { 72 setUseUnderscoreSetJmp(true); 73 setUseUnderscoreLongJmp(true); 74 } 75 76 // Set up the register classes. 77 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 78 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 79 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 80 if (Subtarget->is64Bit()) 81 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 82 83 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 84 85 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 86 // operation. 87 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 89 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 90 91 if (Subtarget->is64Bit()) { 92 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 93 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 94 } else { 95 if (X86ScalarSSEf64) 96 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 97 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 98 else 99 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 100 } 101 102 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 103 // this operation. 104 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 105 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 106 // SSE has no i16 to fp conversion, only i32 107 if (X86ScalarSSEf32) { 108 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 109 // f32 and f64 cases are Legal, f80 case is not 110 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 111 } else { 112 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 113 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 114 } 115 116 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 117 // are Legal, f80 is custom lowered. 118 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 119 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 120 121 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 122 // this operation. 123 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 124 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 125 126 if (X86ScalarSSEf32) { 127 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 128 // f32 and f64 cases are Legal, f80 case is not 129 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 130 } else { 131 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 132 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 133 } 134 135 // Handle FP_TO_UINT by promoting the destination to a larger signed 136 // conversion. 137 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 138 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 139 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 140 141 if (Subtarget->is64Bit()) { 142 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 143 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 144 } else { 145 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 146 // Expand FP_TO_UINT into a select. 147 // FIXME: We would like to use a Custom expander here eventually to do 148 // the optimal thing for SSE vs. the default expansion in the legalizer. 149 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 150 else 151 // With SSE3 we can use fisttpll to convert to a signed i64. 152 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 153 } 154 155 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 156 if (!X86ScalarSSEf64) { 157 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 158 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 159 } 160 161 // Scalar integer multiply, multiply-high, divide, and remainder are 162 // lowered to use operations that produce two results, to match the 163 // available instructions. This exposes the two-result form to trivial 164 // CSE, which is able to combine x/y and x%y into a single instruction, 165 // for example. The single-result multiply instructions are introduced 166 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 167 // is not needed. 168 setOperationAction(ISD::MUL , MVT::i8 , Expand); 169 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 170 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 171 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 172 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 173 setOperationAction(ISD::SREM , MVT::i8 , Expand); 174 setOperationAction(ISD::UREM , MVT::i8 , Expand); 175 setOperationAction(ISD::MUL , MVT::i16 , Expand); 176 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 177 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 178 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 179 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 180 setOperationAction(ISD::SREM , MVT::i16 , Expand); 181 setOperationAction(ISD::UREM , MVT::i16 , Expand); 182 setOperationAction(ISD::MUL , MVT::i32 , Expand); 183 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 184 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 185 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 186 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 187 setOperationAction(ISD::SREM , MVT::i32 , Expand); 188 setOperationAction(ISD::UREM , MVT::i32 , Expand); 189 setOperationAction(ISD::MUL , MVT::i64 , Expand); 190 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 191 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 192 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 193 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 194 setOperationAction(ISD::SREM , MVT::i64 , Expand); 195 setOperationAction(ISD::UREM , MVT::i64 , Expand); 196 197 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 198 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 199 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 200 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 201 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 202 if (Subtarget->is64Bit()) 203 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 205 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 206 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 207 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 208 setOperationAction(ISD::FREM , MVT::f64 , Expand); 209 setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); 210 211 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 212 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 213 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 214 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 215 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 216 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 217 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 218 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 219 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 220 if (Subtarget->is64Bit()) { 221 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 222 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 223 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 224 } 225 226 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 227 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 228 229 // These should be promoted to a larger select which is supported. 230 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 231 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 232 // X86 wants to expand cmov itself. 233 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 234 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 235 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 236 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 237 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 238 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 239 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 240 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 241 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 242 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 243 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 244 if (Subtarget->is64Bit()) { 245 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 246 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 247 } 248 // X86 ret instruction may pop stack. 249 setOperationAction(ISD::RET , MVT::Other, Custom); 250 if (!Subtarget->is64Bit()) 251 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 252 253 // Darwin ABI issue. 254 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 255 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 256 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 257 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 258 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 259 if (Subtarget->is64Bit()) { 260 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 261 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 262 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 263 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 264 } 265 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 266 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 267 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 268 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 269 // X86 wants to expand memset / memcpy itself. 270 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 271 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 272 273 // Use the default ISD::LOCATION expansion. 274 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 275 // FIXME - use subtarget debug flags 276 if (!Subtarget->isTargetDarwin() && 277 !Subtarget->isTargetELF() && 278 !Subtarget->isTargetCygMing()) 279 setOperationAction(ISD::LABEL, MVT::Other, Expand); 280 281 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 282 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 283 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 284 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 285 if (Subtarget->is64Bit()) { 286 // FIXME: Verify 287 setExceptionPointerRegister(X86::RAX); 288 setExceptionSelectorRegister(X86::RDX); 289 } else { 290 setExceptionPointerRegister(X86::EAX); 291 setExceptionSelectorRegister(X86::EDX); 292 } 293 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 294 295 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 296 297 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 298 setOperationAction(ISD::VASTART , MVT::Other, Custom); 299 setOperationAction(ISD::VAARG , MVT::Other, Expand); 300 setOperationAction(ISD::VAEND , MVT::Other, Expand); 301 if (Subtarget->is64Bit()) 302 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 303 else 304 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 305 306 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 307 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 308 if (Subtarget->is64Bit()) 309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 310 if (Subtarget->isTargetCygMing()) 311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 312 else 313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 314 315 if (X86ScalarSSEf64) { 316 // f32 and f64 use SSE. 317 // Set up the FP register classes. 318 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 319 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 320 321 // Use ANDPD to simulate FABS. 322 setOperationAction(ISD::FABS , MVT::f64, Custom); 323 setOperationAction(ISD::FABS , MVT::f32, Custom); 324 325 // Use XORP to simulate FNEG. 326 setOperationAction(ISD::FNEG , MVT::f64, Custom); 327 setOperationAction(ISD::FNEG , MVT::f32, Custom); 328 329 // Use ANDPD and ORPD to simulate FCOPYSIGN. 330 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 331 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 332 333 // We don't support sin/cos/fmod 334 setOperationAction(ISD::FSIN , MVT::f64, Expand); 335 setOperationAction(ISD::FCOS , MVT::f64, Expand); 336 setOperationAction(ISD::FREM , MVT::f64, Expand); 337 setOperationAction(ISD::FSIN , MVT::f32, Expand); 338 setOperationAction(ISD::FCOS , MVT::f32, Expand); 339 setOperationAction(ISD::FREM , MVT::f32, Expand); 340 341 // Expand FP immediates into loads from the stack, except for the special 342 // cases we handle. 343 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 344 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 345 addLegalFPImmediate(APFloat(+0.0)); // xorpd 346 addLegalFPImmediate(APFloat(+0.0f)); // xorps 347 348 // Conversions to long double (in X87) go through memory. 349 setConvertAction(MVT::f32, MVT::f80, Expand); 350 setConvertAction(MVT::f64, MVT::f80, Expand); 351 352 // Conversions from long double (in X87) go through memory. 353 setConvertAction(MVT::f80, MVT::f32, Expand); 354 setConvertAction(MVT::f80, MVT::f64, Expand); 355 } else if (X86ScalarSSEf32) { 356 // Use SSE for f32, x87 for f64. 357 // Set up the FP register classes. 358 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 359 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 360 361 // Use ANDPS to simulate FABS. 362 setOperationAction(ISD::FABS , MVT::f32, Custom); 363 364 // Use XORP to simulate FNEG. 365 setOperationAction(ISD::FNEG , MVT::f32, Custom); 366 367 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 368 369 // Use ANDPS and ORPS to simulate FCOPYSIGN. 370 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 371 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 372 373 // We don't support sin/cos/fmod 374 setOperationAction(ISD::FSIN , MVT::f32, Expand); 375 setOperationAction(ISD::FCOS , MVT::f32, Expand); 376 setOperationAction(ISD::FREM , MVT::f32, Expand); 377 378 // Expand FP immediates into loads from the stack, except for the special 379 // cases we handle. 380 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 381 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 382 addLegalFPImmediate(APFloat(+0.0f)); // xorps 383 addLegalFPImmediate(APFloat(+0.0)); // FLD0 384 addLegalFPImmediate(APFloat(+1.0)); // FLD1 385 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 386 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 387 388 // SSE->x87 conversions go through memory. 389 setConvertAction(MVT::f32, MVT::f64, Expand); 390 setConvertAction(MVT::f32, MVT::f80, Expand); 391 392 // x87->SSE truncations need to go through memory. 393 setConvertAction(MVT::f80, MVT::f32, Expand); 394 setConvertAction(MVT::f64, MVT::f32, Expand); 395 // And x87->x87 truncations also. 396 setConvertAction(MVT::f80, MVT::f64, Expand); 397 398 if (!UnsafeFPMath) { 399 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 400 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 401 } 402 } else { 403 // f32 and f64 in x87. 404 // Set up the FP register classes. 405 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 406 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 407 408 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 409 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 410 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 411 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 412 413 // Floating truncations need to go through memory. 414 setConvertAction(MVT::f80, MVT::f32, Expand); 415 setConvertAction(MVT::f64, MVT::f32, Expand); 416 setConvertAction(MVT::f80, MVT::f64, Expand); 417 418 if (!UnsafeFPMath) { 419 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 420 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 421 } 422 423 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 424 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 425 addLegalFPImmediate(APFloat(+0.0)); // FLD0 426 addLegalFPImmediate(APFloat(+1.0)); // FLD1 427 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 428 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 429 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 430 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 431 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 432 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 433 } 434 435 // Long double always uses X87. 436 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 437 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 438 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 439 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 440 if (!UnsafeFPMath) { 441 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 442 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 443 } 444 445 // Always use a library call for pow. 446 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 447 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 448 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 449 450 // First set operation action for all vector types to expand. Then we 451 // will selectively turn on ones that can be effectively codegen'd. 452 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 453 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 454 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 455 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 456 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 457 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 458 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 459 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 460 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 461 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 462 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 463 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 464 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 465 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 466 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 467 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 468 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 469 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 470 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 471 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 472 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 473 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 474 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 475 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 476 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 477 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 478 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 479 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 483 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 484 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 485 setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand); 486 setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand); 487 setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand); 488 setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand); 489 setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand); 490 setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand); 491 } 492 493 if (Subtarget->hasMMX()) { 494 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 495 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 496 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 497 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 498 499 // FIXME: add MMX packed arithmetics 500 501 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 502 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 503 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 504 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 505 506 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 507 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 508 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 509 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 510 511 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 512 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 513 514 setOperationAction(ISD::AND, MVT::v8i8, Promote); 515 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 516 setOperationAction(ISD::AND, MVT::v4i16, Promote); 517 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 518 setOperationAction(ISD::AND, MVT::v2i32, Promote); 519 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 520 setOperationAction(ISD::AND, MVT::v1i64, Legal); 521 522 setOperationAction(ISD::OR, MVT::v8i8, Promote); 523 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 524 setOperationAction(ISD::OR, MVT::v4i16, Promote); 525 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 526 setOperationAction(ISD::OR, MVT::v2i32, Promote); 527 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 528 setOperationAction(ISD::OR, MVT::v1i64, Legal); 529 530 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 531 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 532 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 533 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 534 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 535 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 536 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 537 538 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 539 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 540 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 541 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 542 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 543 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 544 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 545 546 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 547 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 548 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 549 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 550 551 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 552 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 553 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 554 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 555 556 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 557 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 558 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 560 } 561 562 if (Subtarget->hasSSE1()) { 563 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 564 565 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 566 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 567 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 568 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 569 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 570 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 571 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 572 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 573 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 574 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 575 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 576 } 577 578 if (Subtarget->hasSSE2()) { 579 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 580 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 581 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 582 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 583 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 584 585 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 586 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 587 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 588 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 589 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 590 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 591 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 592 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 593 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 594 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 595 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 596 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 597 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 598 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 599 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 600 601 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 602 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 603 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 604 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 605 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 606 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 607 608 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 609 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 610 // Do not attempt to custom lower non-power-of-2 vectors 611 if (!isPowerOf2_32(MVT::getVectorNumElements(VT))) 612 continue; 613 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 614 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 615 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 616 } 617 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 618 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 619 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 620 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 621 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 622 if (Subtarget->is64Bit()) 623 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 624 625 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 626 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 627 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 628 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 629 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 630 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 631 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 632 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 633 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 634 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 635 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 636 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 637 } 638 639 // Custom lower v2i64 and v2f64 selects. 640 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 641 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 642 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 643 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 644 } 645 646 // We want to custom lower some of our intrinsics. 647 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 648 649 // We have target-specific dag combine patterns for the following nodes: 650 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 651 setTargetDAGCombine(ISD::SELECT); 652 653 computeRegisterProperties(); 654 655 // FIXME: These should be based on subtarget info. Plus, the values should 656 // be smaller when we are in optimizing for size mode. 657 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 658 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 659 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 660 allowUnalignedMemoryAccesses = true; // x86 supports it! 661} 662 663 664/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 665/// jumptable. 666SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 667 SelectionDAG &DAG) const { 668 if (usesGlobalOffsetTable()) 669 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 670 if (!Subtarget->isPICStyleRIPRel()) 671 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 672 return Table; 673} 674 675//===----------------------------------------------------------------------===// 676// Return Value Calling Convention Implementation 677//===----------------------------------------------------------------------===// 678 679#include "X86GenCallingConv.inc" 680 681/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 682/// exists skip possible ISD:TokenFactor. 683static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 684 if (Chain.getOpcode()==X86ISD::TAILCALL) { 685 return Chain; 686 } else if (Chain.getOpcode()==ISD::TokenFactor) { 687 if (Chain.getNumOperands() && 688 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) 689 return Chain.getOperand(0); 690 } 691 return Chain; 692} 693 694/// LowerRET - Lower an ISD::RET node. 695SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 696 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 697 698 SmallVector<CCValAssign, 16> RVLocs; 699 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 700 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 701 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 702 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 703 704 // If this is the first return lowered for this function, add the regs to the 705 // liveout set for the function. 706 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 707 for (unsigned i = 0; i != RVLocs.size(); ++i) 708 if (RVLocs[i].isRegLoc()) 709 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 710 } 711 SDOperand Chain = Op.getOperand(0); 712 713 // Handle tail call return. 714 Chain = GetPossiblePreceedingTailCall(Chain); 715 if (Chain.getOpcode() == X86ISD::TAILCALL) { 716 SDOperand TailCall = Chain; 717 SDOperand TargetAddress = TailCall.getOperand(1); 718 SDOperand StackAdjustment = TailCall.getOperand(2); 719 assert ( ((TargetAddress.getOpcode() == ISD::Register && 720 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 721 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 722 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 723 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 724 "Expecting an global address, external symbol, or register"); 725 assert( StackAdjustment.getOpcode() == ISD::Constant && 726 "Expecting a const value"); 727 728 SmallVector<SDOperand,8> Operands; 729 Operands.push_back(Chain.getOperand(0)); 730 Operands.push_back(TargetAddress); 731 Operands.push_back(StackAdjustment); 732 // Copy registers used by the call. Last operand is a flag so it is not 733 // copied. 734 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 735 Operands.push_back(Chain.getOperand(i)); 736 } 737 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 738 Operands.size()); 739 } 740 741 // Regular return. 742 SDOperand Flag; 743 744 // Copy the result values into the output registers. 745 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 746 RVLocs[0].getLocReg() != X86::ST0) { 747 for (unsigned i = 0; i != RVLocs.size(); ++i) { 748 CCValAssign &VA = RVLocs[i]; 749 assert(VA.isRegLoc() && "Can only return in registers!"); 750 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 751 Flag); 752 Flag = Chain.getValue(1); 753 } 754 } else { 755 // We need to handle a destination of ST0 specially, because it isn't really 756 // a register. 757 SDOperand Value = Op.getOperand(1); 758 759 // If this is an FP return with ScalarSSE, we need to move the value from 760 // an XMM register onto the fp-stack. 761 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 762 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 763 SDOperand MemLoc; 764 765 // If this is a load into a scalarsse value, don't store the loaded value 766 // back to the stack, only to reload it: just replace the scalar-sse load. 767 if (ISD::isNON_EXTLoad(Value.Val) && 768 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 769 Chain = Value.getOperand(0); 770 MemLoc = Value.getOperand(1); 771 } else { 772 // Spill the value to memory and reload it into top of stack. 773 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 774 MachineFunction &MF = DAG.getMachineFunction(); 775 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 776 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 777 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 778 } 779 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 780 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 781 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 782 Chain = Value.getValue(1); 783 } 784 785 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 786 SDOperand Ops[] = { Chain, Value }; 787 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 788 Flag = Chain.getValue(1); 789 } 790 791 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 792 if (Flag.Val) 793 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 794 else 795 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 796} 797 798 799/// LowerCallResult - Lower the result values of an ISD::CALL into the 800/// appropriate copies out of appropriate physical registers. This assumes that 801/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 802/// being lowered. The returns a SDNode with the same number of values as the 803/// ISD::CALL. 804SDNode *X86TargetLowering:: 805LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 806 unsigned CallingConv, SelectionDAG &DAG) { 807 808 // Assign locations to each value returned by this call. 809 SmallVector<CCValAssign, 16> RVLocs; 810 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 811 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 812 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 813 814 SmallVector<SDOperand, 8> ResultVals; 815 816 // Copy all of the result registers out of their specified physreg. 817 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 818 for (unsigned i = 0; i != RVLocs.size(); ++i) { 819 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 820 RVLocs[i].getValVT(), InFlag).getValue(1); 821 InFlag = Chain.getValue(2); 822 ResultVals.push_back(Chain.getValue(0)); 823 } 824 } else { 825 // Copies from the FP stack are special, as ST0 isn't a valid register 826 // before the fp stackifier runs. 827 828 // Copy ST0 into an RFP register with FP_GET_RESULT. 829 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 830 SDOperand GROps[] = { Chain, InFlag }; 831 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 832 Chain = RetVal.getValue(1); 833 InFlag = RetVal.getValue(2); 834 835 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 836 // an XMM register. 837 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 838 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 839 SDOperand StoreLoc; 840 const Value *SrcVal = 0; 841 int SrcValOffset = 0; 842 MVT::ValueType RetStoreVT = RVLocs[0].getValVT(); 843 844 // Determine where to store the value. If the call result is directly 845 // used by a store, see if we can store directly into the location. In 846 // this case, we'll end up producing a fst + movss[load] + movss[store] to 847 // the same location, and the two movss's will be nuked as dead. This 848 // optimizes common things like "*D = atof(..)" to not need an 849 // intermediate stack slot. 850 if (SDOperand(TheCall, 0).hasOneUse() && 851 SDOperand(TheCall, 1).hasOneUse()) { 852 // In addition to direct uses, we also support a FP_ROUND that uses the 853 // value, if it is directly stored somewhere. 854 SDNode *User = *TheCall->use_begin(); 855 if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse()) 856 User = *User->use_begin(); 857 858 // Ok, we have one use of the value and one use of the chain. See if 859 // they are the same node: a store. 860 if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) { 861 // Verify that the value being stored is either the call or a 862 // truncation of the call. 863 SDNode *StoreVal = N->getValue().Val; 864 if (StoreVal == TheCall) 865 ; // ok. 866 else if (StoreVal->getOpcode() == ISD::FP_ROUND && 867 StoreVal->hasOneUse() && 868 StoreVal->getOperand(0).Val == TheCall) 869 ; // ok. 870 else 871 N = 0; // not ok. 872 873 if (N && N->getChain().Val == TheCall && 874 !N->isVolatile() && !N->isTruncatingStore() && 875 N->getAddressingMode() == ISD::UNINDEXED) { 876 StoreLoc = N->getBasePtr(); 877 SrcVal = N->getSrcValue(); 878 SrcValOffset = N->getSrcValueOffset(); 879 RetStoreVT = N->getValue().getValueType(); 880 } 881 } 882 } 883 884 // If we weren't able to optimize the result, just create a temporary 885 // stack slot. 886 if (StoreLoc.Val == 0) { 887 MachineFunction &MF = DAG.getMachineFunction(); 888 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 889 StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 890 } 891 892 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 893 // shouldn't be necessary except that RFP cannot be live across 894 // multiple blocks (which could happen if a select gets lowered into 895 // multiple blocks and scheduled in between them). When stackifier is 896 // fixed, they can be uncoupled. 897 SDOperand Ops[] = { 898 Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag 899 }; 900 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 901 RetVal = DAG.getLoad(RetStoreVT, Chain, 902 StoreLoc, SrcVal, SrcValOffset); 903 Chain = RetVal.getValue(1); 904 905 // If we optimized a truncate, then extend the result back to its desired 906 // type. 907 if (RVLocs[0].getValVT() != RetStoreVT) 908 RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal); 909 } 910 ResultVals.push_back(RetVal); 911 } 912 913 // Merge everything together with a MERGE_VALUES node. 914 ResultVals.push_back(Chain); 915 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 916 &ResultVals[0], ResultVals.size()).Val; 917} 918 919 920//===----------------------------------------------------------------------===// 921// C & StdCall & Fast Calling Convention implementation 922//===----------------------------------------------------------------------===// 923// StdCall calling convention seems to be standard for many Windows' API 924// routines and around. It differs from C calling convention just a little: 925// callee should clean up the stack, not caller. Symbols should be also 926// decorated in some fancy way :) It doesn't support any vector arguments. 927// For info on fast calling convention see Fast Calling Convention (tail call) 928// implementation LowerX86_32FastCCCallTo. 929 930/// AddLiveIn - This helper function adds the specified physical register to the 931/// MachineFunction as a live in value. It also creates a corresponding virtual 932/// register for it. 933static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 934 const TargetRegisterClass *RC) { 935 assert(RC->contains(PReg) && "Not the correct regclass!"); 936 unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); 937 MF.getRegInfo().addLiveIn(PReg, VReg); 938 return VReg; 939} 940 941// Determines whether a CALL node uses struct return semantics. 942static bool CallIsStructReturn(SDOperand Op) { 943 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 944 if (!NumOps) 945 return false; 946 947 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6)); 948 return Flags->getValue() & ISD::ParamFlags::StructReturn; 949} 950 951// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics. 952static bool ArgsAreStructReturn(SDOperand Op) { 953 unsigned NumArgs = Op.Val->getNumValues() - 1; 954 if (!NumArgs) 955 return false; 956 957 ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3)); 958 return Flags->getValue() & ISD::ParamFlags::StructReturn; 959} 960 961// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop 962// its own arguments. Callee pop is necessary to support tail calls. 963bool X86TargetLowering::IsCalleePop(SDOperand Op) { 964 bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 965 if (IsVarArg) 966 return false; 967 968 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 969 default: 970 return false; 971 case CallingConv::X86_StdCall: 972 return !Subtarget->is64Bit(); 973 case CallingConv::X86_FastCall: 974 return !Subtarget->is64Bit(); 975 case CallingConv::Fast: 976 return PerformTailCallOpt; 977 } 978} 979 980// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node. 981CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const { 982 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 983 984 if (Subtarget->is64Bit()) 985 if (CC == CallingConv::Fast && PerformTailCallOpt) 986 return CC_X86_64_TailCall; 987 else 988 return CC_X86_64_C; 989 990 if (CC == CallingConv::X86_FastCall) 991 return CC_X86_32_FastCall; 992 else if (CC == CallingConv::Fast && PerformTailCallOpt) 993 return CC_X86_32_TailCall; 994 else 995 return CC_X86_32_C; 996} 997 998// Selects the appropriate decoration to apply to a MachineFunction containing a 999// given FORMAL_ARGUMENTS node. 1000NameDecorationStyle 1001X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) { 1002 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1003 if (CC == CallingConv::X86_FastCall) 1004 return FastCall; 1005 else if (CC == CallingConv::X86_StdCall) 1006 return StdCall; 1007 return None; 1008} 1009 1010SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 1011 const CCValAssign &VA, 1012 MachineFrameInfo *MFI, 1013 SDOperand Root, unsigned i) { 1014 // Create the nodes corresponding to a load from this parameter slot. 1015 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1016 VA.getLocMemOffset()); 1017 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1018 1019 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 1020 1021 if (Flags & ISD::ParamFlags::ByVal) 1022 return FIN; 1023 else 1024 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 1025} 1026 1027SDOperand 1028X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 1029 MachineFunction &MF = DAG.getMachineFunction(); 1030 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1031 1032 const Function* Fn = MF.getFunction(); 1033 if (Fn->hasExternalLinkage() && 1034 Subtarget->isTargetCygMing() && 1035 Fn->getName() == "main") 1036 FuncInfo->setForceFramePointer(true); 1037 1038 // Decorate the function name. 1039 FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); 1040 1041 MachineFrameInfo *MFI = MF.getFrameInfo(); 1042 SDOperand Root = Op.getOperand(0); 1043 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1044 unsigned CC = MF.getFunction()->getCallingConv(); 1045 bool Is64Bit = Subtarget->is64Bit(); 1046 1047 assert(!(isVarArg && CC == CallingConv::Fast) && 1048 "Var args not supported with calling convention fastcc"); 1049 1050 // Assign locations to all of the incoming arguments. 1051 SmallVector<CCValAssign, 16> ArgLocs; 1052 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1053 CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op)); 1054 1055 SmallVector<SDOperand, 8> ArgValues; 1056 unsigned LastVal = ~0U; 1057 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1058 CCValAssign &VA = ArgLocs[i]; 1059 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1060 // places. 1061 assert(VA.getValNo() != LastVal && 1062 "Don't support value assigned to multiple locs yet"); 1063 LastVal = VA.getValNo(); 1064 1065 if (VA.isRegLoc()) { 1066 MVT::ValueType RegVT = VA.getLocVT(); 1067 TargetRegisterClass *RC; 1068 if (RegVT == MVT::i32) 1069 RC = X86::GR32RegisterClass; 1070 else if (Is64Bit && RegVT == MVT::i64) 1071 RC = X86::GR64RegisterClass; 1072 else if (Is64Bit && RegVT == MVT::f32) 1073 RC = X86::FR32RegisterClass; 1074 else if (Is64Bit && RegVT == MVT::f64) 1075 RC = X86::FR64RegisterClass; 1076 else { 1077 assert(MVT::isVector(RegVT)); 1078 if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) { 1079 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1080 RegVT = MVT::i64; 1081 } else 1082 RC = X86::VR128RegisterClass; 1083 } 1084 1085 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1086 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1087 1088 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1089 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1090 // right size. 1091 if (VA.getLocInfo() == CCValAssign::SExt) 1092 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1093 DAG.getValueType(VA.getValVT())); 1094 else if (VA.getLocInfo() == CCValAssign::ZExt) 1095 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1096 DAG.getValueType(VA.getValVT())); 1097 1098 if (VA.getLocInfo() != CCValAssign::Full) 1099 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1100 1101 // Handle MMX values passed in GPRs. 1102 if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1103 MVT::getSizeInBits(RegVT) == 64) 1104 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1105 1106 ArgValues.push_back(ArgValue); 1107 } else { 1108 assert(VA.isMemLoc()); 1109 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1110 } 1111 } 1112 1113 unsigned StackSize = CCInfo.getNextStackOffset(); 1114 // align stack specially for tail calls 1115 if (CC == CallingConv::Fast) 1116 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1117 1118 // If the function takes variable number of arguments, make a frame index for 1119 // the start of the first vararg value... for expansion of llvm.va_start. 1120 if (isVarArg) { 1121 if (Is64Bit || CC != CallingConv::X86_FastCall) { 1122 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1123 } 1124 if (Is64Bit) { 1125 static const unsigned GPR64ArgRegs[] = { 1126 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1127 }; 1128 static const unsigned XMMArgRegs[] = { 1129 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1130 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1131 }; 1132 1133 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1134 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1135 1136 // For X86-64, if there are vararg parameters that are passed via 1137 // registers, then we must store them to their spots on the stack so they 1138 // may be loaded by deferencing the result of va_next. 1139 VarArgsGPOffset = NumIntRegs * 8; 1140 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1141 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1142 1143 // Store the integer parameter registers. 1144 SmallVector<SDOperand, 8> MemOps; 1145 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1146 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1147 DAG.getConstant(VarArgsGPOffset, 1148 getPointerTy())); 1149 for (; NumIntRegs != 6; ++NumIntRegs) { 1150 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1151 X86::GR64RegisterClass); 1152 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1153 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1154 MemOps.push_back(Store); 1155 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1156 DAG.getConstant(8, getPointerTy())); 1157 } 1158 1159 // Now store the XMM (fp + vector) parameter registers. 1160 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1161 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1162 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1163 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1164 X86::VR128RegisterClass); 1165 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1166 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1167 MemOps.push_back(Store); 1168 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1169 DAG.getConstant(16, getPointerTy())); 1170 } 1171 if (!MemOps.empty()) 1172 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1173 &MemOps[0], MemOps.size()); 1174 } 1175 } 1176 1177 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1178 // arguments and the arguments after the retaddr has been pushed are 1179 // aligned. 1180 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1181 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1182 (StackSize & 7) == 0) 1183 StackSize += 4; 1184 1185 ArgValues.push_back(Root); 1186 1187 // Some CCs need callee pop. 1188 if (IsCalleePop(Op)) { 1189 BytesToPopOnReturn = StackSize; // Callee pops everything. 1190 BytesCallerReserves = 0; 1191 } else { 1192 BytesToPopOnReturn = 0; // Callee pops nothing. 1193 // If this is an sret function, the return should pop the hidden pointer. 1194 if (!Is64Bit && ArgsAreStructReturn(Op)) 1195 BytesToPopOnReturn = 4; 1196 BytesCallerReserves = StackSize; 1197 } 1198 1199 if (!Is64Bit) { 1200 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1201 if (CC == CallingConv::X86_FastCall) 1202 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1203 } 1204 1205 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1206 1207 // Return the new list of results. 1208 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1209 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1210} 1211 1212SDOperand 1213X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1214 const SDOperand &StackPtr, 1215 const CCValAssign &VA, 1216 SDOperand Chain, 1217 SDOperand Arg) { 1218 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1219 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1220 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1221 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1222 if (Flags & ISD::ParamFlags::ByVal) { 1223 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1224 ISD::ParamFlags::ByValAlignOffs); 1225 1226 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1227 ISD::ParamFlags::ByValSizeOffs; 1228 1229 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1230 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1231 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); 1232 1233 return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, 1234 AlwaysInline); 1235 } else { 1236 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1237 } 1238} 1239 1240SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 1241 MachineFunction &MF = DAG.getMachineFunction(); 1242 SDOperand Chain = Op.getOperand(0); 1243 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 1244 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1245 bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 1246 && CC == CallingConv::Fast && PerformTailCallOpt; 1247 SDOperand Callee = Op.getOperand(4); 1248 bool Is64Bit = Subtarget->is64Bit(); 1249 1250 assert(!(isVarArg && CC == CallingConv::Fast) && 1251 "Var args not supported with calling convention fastcc"); 1252 1253 // Analyze operands of the call, assigning locations to each operand. 1254 SmallVector<CCValAssign, 16> ArgLocs; 1255 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1256 CCInfo.AnalyzeCallOperands(Op.Val, CCAssignFnForNode(Op)); 1257 1258 // Get a count of how many bytes are to be pushed on the stack. 1259 unsigned NumBytes = CCInfo.getNextStackOffset(); 1260 if (CC == CallingConv::Fast) 1261 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1262 1263 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1264 // arguments and the arguments after the retaddr has been pushed are aligned. 1265 if (!Is64Bit && CC == CallingConv::X86_FastCall && 1266 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() && 1267 (NumBytes & 7) == 0) 1268 NumBytes += 4; 1269 1270 int FPDiff = 0; 1271 if (IsTailCall) { 1272 // Lower arguments at fp - stackoffset + fpdiff. 1273 unsigned NumBytesCallerPushed = 1274 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1275 FPDiff = NumBytesCallerPushed - NumBytes; 1276 1277 // Set the delta of movement of the returnaddr stackslot. 1278 // But only set if delta is greater than previous delta. 1279 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1280 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1281 } 1282 1283 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1284 1285 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1286 if (IsTailCall) { 1287 // Adjust the Return address stack slot. 1288 if (FPDiff) { 1289 MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; 1290 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1291 // Load the "old" Return address. 1292 RetAddrFrIdx = 1293 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1294 // Calculate the new stack slot for the return address. 1295 int SlotSize = Is64Bit ? 8 : 4; 1296 int NewReturnAddrFI = 1297 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1298 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1299 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1300 } 1301 } 1302 1303 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1304 SmallVector<SDOperand, 8> MemOpChains; 1305 1306 SDOperand StackPtr; 1307 1308 // Walk the register/memloc assignments, inserting copies/loads. 1309 // For tail calls, lower arguments first to the stack slot where they would 1310 // normally - in case of a normal function call - be. 1311 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1312 CCValAssign &VA = ArgLocs[i]; 1313 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1314 1315 // Promote the value if needed. 1316 switch (VA.getLocInfo()) { 1317 default: assert(0 && "Unknown loc info!"); 1318 case CCValAssign::Full: break; 1319 case CCValAssign::SExt: 1320 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1321 break; 1322 case CCValAssign::ZExt: 1323 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1324 break; 1325 case CCValAssign::AExt: 1326 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1327 break; 1328 } 1329 1330 if (VA.isRegLoc()) { 1331 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1332 } else { 1333 assert(VA.isMemLoc()); 1334 if (StackPtr.Val == 0) 1335 StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); 1336 1337 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1338 Arg)); 1339 } 1340 } 1341 1342 if (!MemOpChains.empty()) 1343 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1344 &MemOpChains[0], MemOpChains.size()); 1345 1346 // Build a sequence of copy-to-reg nodes chained together with token chain 1347 // and flag operands which copy the outgoing args into registers. 1348 SDOperand InFlag; 1349 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1350 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1351 InFlag); 1352 InFlag = Chain.getValue(1); 1353 } 1354 1355 if (IsTailCall) 1356 InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output? 1357 1358 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1359 // GOT pointer. 1360 // Does not work with tail call since ebx is not restored correctly by 1361 // tailcaller. TODO: at least for x86 - verify for x86-64 1362 if (!IsTailCall && !Is64Bit && 1363 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1364 Subtarget->isPICStyleGOT()) { 1365 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1366 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1367 InFlag); 1368 InFlag = Chain.getValue(1); 1369 } 1370 1371 if (Is64Bit && isVarArg) { 1372 // From AMD64 ABI document: 1373 // For calls that may call functions that use varargs or stdargs 1374 // (prototype-less calls or calls to functions containing ellipsis (...) in 1375 // the declaration) %al is used as hidden argument to specify the number 1376 // of SSE registers used. The contents of %al do not need to match exactly 1377 // the number of registers, but must be an ubound on the number of SSE 1378 // registers used and is in the range 0 - 8 inclusive. 1379 1380 // Count the number of XMM registers allocated. 1381 static const unsigned XMMArgRegs[] = { 1382 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1383 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1384 }; 1385 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1386 1387 Chain = DAG.getCopyToReg(Chain, X86::AL, 1388 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1389 InFlag = Chain.getValue(1); 1390 } 1391 1392 // Copy from stack slots to stack slot of a tail called function. This needs 1393 // to be done because if we would lower the arguments directly to their real 1394 // stack slot we might end up overwriting each other. 1395 // TODO: To make this more efficient (sometimes saving a store/load) we could 1396 // analyse the arguments and emit this store/load/store sequence only for 1397 // arguments which would be overwritten otherwise. 1398 if (IsTailCall) { 1399 SmallVector<SDOperand, 8> MemOpChains2; 1400 SDOperand PtrOff; 1401 SDOperand FIN; 1402 int FI = 0; 1403 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1404 CCValAssign &VA = ArgLocs[i]; 1405 if (!VA.isRegLoc()) { 1406 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1407 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1408 1409 // Get source stack slot. 1410 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), 1411 getPointerTy()); 1412 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1413 // Create frame index. 1414 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1415 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1416 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1417 FIN = DAG.getFrameIndex(FI, MVT::i32); 1418 if (Flags & ISD::ParamFlags::ByVal) { 1419 // Copy relative to framepointer. 1420 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1421 ISD::ParamFlags::ByValAlignOffs); 1422 1423 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1424 ISD::ParamFlags::ByValSizeOffs; 1425 1426 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1427 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1428 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); 1429 1430 MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode, 1431 AlignNode,AlwaysInline)); 1432 } else { 1433 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, 1434 NULL, 0); 1435 // Store relative to framepointer. 1436 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); 1437 } 1438 } 1439 } 1440 1441 if (!MemOpChains2.empty()) 1442 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1443 &MemOpChains2[0], MemOpChains.size()); 1444 1445 // Store the return address to the appropriate stack slot. 1446 if (FPDiff) 1447 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1448 } 1449 1450 // If the callee is a GlobalAddress node (quite common, every direct call is) 1451 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1452 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1453 // We should use extra load for direct calls to dllimported functions in 1454 // non-JIT mode. 1455 if ((IsTailCall || !Is64Bit || 1456 getTargetMachine().getCodeModel() != CodeModel::Large) 1457 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1458 getTargetMachine(), true)) 1459 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1460 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1461 if (IsTailCall || !Is64Bit || 1462 getTargetMachine().getCodeModel() != CodeModel::Large) 1463 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1464 } else if (IsTailCall) { 1465 assert(Callee.getOpcode() == ISD::LOAD && 1466 "Function destination must be loaded into virtual register"); 1467 unsigned Opc = Is64Bit ? X86::R9 : X86::ECX; 1468 1469 Chain = DAG.getCopyToReg(Chain, 1470 DAG.getRegister(Opc, getPointerTy()) , 1471 Callee,InFlag); 1472 Callee = DAG.getRegister(Opc, getPointerTy()); 1473 // Add register as live out. 1474 DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); 1475 } 1476 1477 // Returns a chain & a flag for retval copy to use. 1478 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1479 SmallVector<SDOperand, 8> Ops; 1480 1481 if (IsTailCall) { 1482 Ops.push_back(Chain); 1483 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1484 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1485 if (InFlag.Val) 1486 Ops.push_back(InFlag); 1487 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1488 InFlag = Chain.getValue(1); 1489 1490 // Returns a chain & a flag for retval copy to use. 1491 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1492 Ops.clear(); 1493 } 1494 1495 Ops.push_back(Chain); 1496 Ops.push_back(Callee); 1497 1498 if (IsTailCall) 1499 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1500 1501 // Add an implicit use GOT pointer in EBX. 1502 if (!IsTailCall && !Is64Bit && 1503 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1504 Subtarget->isPICStyleGOT()) 1505 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1506 1507 // Add argument registers to the end of the list so that they are known live 1508 // into the call. 1509 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1510 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1511 RegsToPass[i].second.getValueType())); 1512 1513 if (InFlag.Val) 1514 Ops.push_back(InFlag); 1515 1516 if (IsTailCall) { 1517 assert(InFlag.Val && 1518 "Flag must be set. Depend on flag being set in LowerRET"); 1519 Chain = DAG.getNode(X86ISD::TAILCALL, 1520 Op.Val->getVTList(), &Ops[0], Ops.size()); 1521 1522 return SDOperand(Chain.Val, Op.ResNo); 1523 } 1524 1525 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1526 InFlag = Chain.getValue(1); 1527 1528 // Create the CALLSEQ_END node. 1529 unsigned NumBytesForCalleeToPush; 1530 if (IsCalleePop(Op)) 1531 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1532 else if (!Is64Bit && CallIsStructReturn(Op)) 1533 // If this is is a call to a struct-return function, the callee 1534 // pops the hidden struct pointer, so we have to push it back. 1535 // This is common for Darwin/X86, Linux & Mingw32 targets. 1536 NumBytesForCalleeToPush = 4; 1537 else 1538 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1539 1540 // Returns a flag for retval copy to use. 1541 Chain = DAG.getCALLSEQ_END(Chain, 1542 DAG.getConstant(NumBytes, getPointerTy()), 1543 DAG.getConstant(NumBytesForCalleeToPush, 1544 getPointerTy()), 1545 InFlag); 1546 InFlag = Chain.getValue(1); 1547 1548 // Handle result values, copying them out of physregs into vregs that we 1549 // return. 1550 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1551} 1552 1553 1554//===----------------------------------------------------------------------===// 1555// Fast Calling Convention (tail call) implementation 1556//===----------------------------------------------------------------------===// 1557 1558// Like std call, callee cleans arguments, convention except that ECX is 1559// reserved for storing the tail called function address. Only 2 registers are 1560// free for argument passing (inreg). Tail call optimization is performed 1561// provided: 1562// * tailcallopt is enabled 1563// * caller/callee are fastcc 1564// * elf/pic is disabled OR 1565// * elf/pic enabled + callee is in module + callee has 1566// visibility protected or hidden 1567// To keep the stack aligned according to platform abi the function 1568// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1569// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1570// If a tail called function callee has more arguments than the caller the 1571// caller needs to make sure that there is room to move the RETADDR to. This is 1572// achieved by reserving an area the size of the argument delta right after the 1573// original REtADDR, but before the saved framepointer or the spilled registers 1574// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1575// stack layout: 1576// arg1 1577// arg2 1578// RETADDR 1579// [ new RETADDR 1580// move area ] 1581// (possible EBP) 1582// ESI 1583// EDI 1584// local1 .. 1585 1586/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1587/// for a 16 byte align requirement. 1588unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1589 SelectionDAG& DAG) { 1590 if (PerformTailCallOpt) { 1591 MachineFunction &MF = DAG.getMachineFunction(); 1592 const TargetMachine &TM = MF.getTarget(); 1593 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1594 unsigned StackAlignment = TFI.getStackAlignment(); 1595 uint64_t AlignMask = StackAlignment - 1; 1596 int64_t Offset = StackSize; 1597 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1598 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1599 // Number smaller than 12 so just add the difference. 1600 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1601 } else { 1602 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1603 Offset = ((~AlignMask) & Offset) + StackAlignment + 1604 (StackAlignment-SlotSize); 1605 } 1606 StackSize = Offset; 1607 } 1608 return StackSize; 1609} 1610 1611/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1612/// following the call is a return. A function is eligible if caller/callee 1613/// calling conventions match, currently only fastcc supports tail calls, and 1614/// the function CALL is immediatly followed by a RET. 1615bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1616 SDOperand Ret, 1617 SelectionDAG& DAG) const { 1618 if (!PerformTailCallOpt) 1619 return false; 1620 1621 // Check whether CALL node immediatly preceeds the RET node and whether the 1622 // return uses the result of the node or is a void return. 1623 unsigned NumOps = Ret.getNumOperands(); 1624 if ((NumOps == 1 && 1625 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1626 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1627 (NumOps > 1 && 1628 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1629 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1630 MachineFunction &MF = DAG.getMachineFunction(); 1631 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1632 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1633 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1634 SDOperand Callee = Call.getOperand(4); 1635 // On elf/pic %ebx needs to be livein. 1636 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1637 !Subtarget->isPICStyleGOT()) 1638 return true; 1639 1640 // Can only do local tail calls with PIC. 1641 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1642 return G->getGlobal()->hasHiddenVisibility() 1643 || G->getGlobal()->hasProtectedVisibility(); 1644 } 1645 } 1646 1647 return false; 1648} 1649 1650//===----------------------------------------------------------------------===// 1651// Other Lowering Hooks 1652//===----------------------------------------------------------------------===// 1653 1654 1655SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1656 MachineFunction &MF = DAG.getMachineFunction(); 1657 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1658 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1659 1660 if (ReturnAddrIndex == 0) { 1661 // Set up a frame object for the return address. 1662 if (Subtarget->is64Bit()) 1663 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1664 else 1665 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1666 1667 FuncInfo->setRAIndex(ReturnAddrIndex); 1668 } 1669 1670 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1671} 1672 1673 1674 1675/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1676/// specific condition code. It returns a false if it cannot do a direct 1677/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1678/// needed. 1679static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1680 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1681 SelectionDAG &DAG) { 1682 X86CC = X86::COND_INVALID; 1683 if (!isFP) { 1684 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1685 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1686 // X > -1 -> X == 0, jump !sign. 1687 RHS = DAG.getConstant(0, RHS.getValueType()); 1688 X86CC = X86::COND_NS; 1689 return true; 1690 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1691 // X < 0 -> X == 0, jump on sign. 1692 X86CC = X86::COND_S; 1693 return true; 1694 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1695 // X < 1 -> X <= 0 1696 RHS = DAG.getConstant(0, RHS.getValueType()); 1697 X86CC = X86::COND_LE; 1698 return true; 1699 } 1700 } 1701 1702 switch (SetCCOpcode) { 1703 default: break; 1704 case ISD::SETEQ: X86CC = X86::COND_E; break; 1705 case ISD::SETGT: X86CC = X86::COND_G; break; 1706 case ISD::SETGE: X86CC = X86::COND_GE; break; 1707 case ISD::SETLT: X86CC = X86::COND_L; break; 1708 case ISD::SETLE: X86CC = X86::COND_LE; break; 1709 case ISD::SETNE: X86CC = X86::COND_NE; break; 1710 case ISD::SETULT: X86CC = X86::COND_B; break; 1711 case ISD::SETUGT: X86CC = X86::COND_A; break; 1712 case ISD::SETULE: X86CC = X86::COND_BE; break; 1713 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1714 } 1715 } else { 1716 // On a floating point condition, the flags are set as follows: 1717 // ZF PF CF op 1718 // 0 | 0 | 0 | X > Y 1719 // 0 | 0 | 1 | X < Y 1720 // 1 | 0 | 0 | X == Y 1721 // 1 | 1 | 1 | unordered 1722 bool Flip = false; 1723 switch (SetCCOpcode) { 1724 default: break; 1725 case ISD::SETUEQ: 1726 case ISD::SETEQ: X86CC = X86::COND_E; break; 1727 case ISD::SETOLT: Flip = true; // Fallthrough 1728 case ISD::SETOGT: 1729 case ISD::SETGT: X86CC = X86::COND_A; break; 1730 case ISD::SETOLE: Flip = true; // Fallthrough 1731 case ISD::SETOGE: 1732 case ISD::SETGE: X86CC = X86::COND_AE; break; 1733 case ISD::SETUGT: Flip = true; // Fallthrough 1734 case ISD::SETULT: 1735 case ISD::SETLT: X86CC = X86::COND_B; break; 1736 case ISD::SETUGE: Flip = true; // Fallthrough 1737 case ISD::SETULE: 1738 case ISD::SETLE: X86CC = X86::COND_BE; break; 1739 case ISD::SETONE: 1740 case ISD::SETNE: X86CC = X86::COND_NE; break; 1741 case ISD::SETUO: X86CC = X86::COND_P; break; 1742 case ISD::SETO: X86CC = X86::COND_NP; break; 1743 } 1744 if (Flip) 1745 std::swap(LHS, RHS); 1746 } 1747 1748 return X86CC != X86::COND_INVALID; 1749} 1750 1751/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1752/// code. Current x86 isa includes the following FP cmov instructions: 1753/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1754static bool hasFPCMov(unsigned X86CC) { 1755 switch (X86CC) { 1756 default: 1757 return false; 1758 case X86::COND_B: 1759 case X86::COND_BE: 1760 case X86::COND_E: 1761 case X86::COND_P: 1762 case X86::COND_A: 1763 case X86::COND_AE: 1764 case X86::COND_NE: 1765 case X86::COND_NP: 1766 return true; 1767 } 1768} 1769 1770/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1771/// true if Op is undef or if its value falls within the specified range (L, H]. 1772static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1773 if (Op.getOpcode() == ISD::UNDEF) 1774 return true; 1775 1776 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1777 return (Val >= Low && Val < Hi); 1778} 1779 1780/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1781/// true if Op is undef or if its value equal to the specified value. 1782static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1783 if (Op.getOpcode() == ISD::UNDEF) 1784 return true; 1785 return cast<ConstantSDNode>(Op)->getValue() == Val; 1786} 1787 1788/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1789/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1790bool X86::isPSHUFDMask(SDNode *N) { 1791 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1792 1793 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1794 return false; 1795 1796 // Check if the value doesn't reference the second vector. 1797 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1798 SDOperand Arg = N->getOperand(i); 1799 if (Arg.getOpcode() == ISD::UNDEF) continue; 1800 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1801 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1802 return false; 1803 } 1804 1805 return true; 1806} 1807 1808/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1809/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1810bool X86::isPSHUFHWMask(SDNode *N) { 1811 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1812 1813 if (N->getNumOperands() != 8) 1814 return false; 1815 1816 // Lower quadword copied in order. 1817 for (unsigned i = 0; i != 4; ++i) { 1818 SDOperand Arg = N->getOperand(i); 1819 if (Arg.getOpcode() == ISD::UNDEF) continue; 1820 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1821 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1822 return false; 1823 } 1824 1825 // Upper quadword shuffled. 1826 for (unsigned i = 4; i != 8; ++i) { 1827 SDOperand Arg = N->getOperand(i); 1828 if (Arg.getOpcode() == ISD::UNDEF) continue; 1829 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1830 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1831 if (Val < 4 || Val > 7) 1832 return false; 1833 } 1834 1835 return true; 1836} 1837 1838/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1839/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1840bool X86::isPSHUFLWMask(SDNode *N) { 1841 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1842 1843 if (N->getNumOperands() != 8) 1844 return false; 1845 1846 // Upper quadword copied in order. 1847 for (unsigned i = 4; i != 8; ++i) 1848 if (!isUndefOrEqual(N->getOperand(i), i)) 1849 return false; 1850 1851 // Lower quadword shuffled. 1852 for (unsigned i = 0; i != 4; ++i) 1853 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1854 return false; 1855 1856 return true; 1857} 1858 1859/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1860/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1861static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1862 if (NumElems != 2 && NumElems != 4) return false; 1863 1864 unsigned Half = NumElems / 2; 1865 for (unsigned i = 0; i < Half; ++i) 1866 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1867 return false; 1868 for (unsigned i = Half; i < NumElems; ++i) 1869 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1870 return false; 1871 1872 return true; 1873} 1874 1875bool X86::isSHUFPMask(SDNode *N) { 1876 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1877 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1878} 1879 1880/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1881/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1882/// half elements to come from vector 1 (which would equal the dest.) and 1883/// the upper half to come from vector 2. 1884static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1885 if (NumOps != 2 && NumOps != 4) return false; 1886 1887 unsigned Half = NumOps / 2; 1888 for (unsigned i = 0; i < Half; ++i) 1889 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1890 return false; 1891 for (unsigned i = Half; i < NumOps; ++i) 1892 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1893 return false; 1894 return true; 1895} 1896 1897static bool isCommutedSHUFP(SDNode *N) { 1898 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1899 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1900} 1901 1902/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1903/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1904bool X86::isMOVHLPSMask(SDNode *N) { 1905 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1906 1907 if (N->getNumOperands() != 4) 1908 return false; 1909 1910 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1911 return isUndefOrEqual(N->getOperand(0), 6) && 1912 isUndefOrEqual(N->getOperand(1), 7) && 1913 isUndefOrEqual(N->getOperand(2), 2) && 1914 isUndefOrEqual(N->getOperand(3), 3); 1915} 1916 1917/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1918/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1919/// <2, 3, 2, 3> 1920bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1921 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1922 1923 if (N->getNumOperands() != 4) 1924 return false; 1925 1926 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1927 return isUndefOrEqual(N->getOperand(0), 2) && 1928 isUndefOrEqual(N->getOperand(1), 3) && 1929 isUndefOrEqual(N->getOperand(2), 2) && 1930 isUndefOrEqual(N->getOperand(3), 3); 1931} 1932 1933/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1934/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1935bool X86::isMOVLPMask(SDNode *N) { 1936 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1937 1938 unsigned NumElems = N->getNumOperands(); 1939 if (NumElems != 2 && NumElems != 4) 1940 return false; 1941 1942 for (unsigned i = 0; i < NumElems/2; ++i) 1943 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1944 return false; 1945 1946 for (unsigned i = NumElems/2; i < NumElems; ++i) 1947 if (!isUndefOrEqual(N->getOperand(i), i)) 1948 return false; 1949 1950 return true; 1951} 1952 1953/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1954/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1955/// and MOVLHPS. 1956bool X86::isMOVHPMask(SDNode *N) { 1957 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1958 1959 unsigned NumElems = N->getNumOperands(); 1960 if (NumElems != 2 && NumElems != 4) 1961 return false; 1962 1963 for (unsigned i = 0; i < NumElems/2; ++i) 1964 if (!isUndefOrEqual(N->getOperand(i), i)) 1965 return false; 1966 1967 for (unsigned i = 0; i < NumElems/2; ++i) { 1968 SDOperand Arg = N->getOperand(i + NumElems/2); 1969 if (!isUndefOrEqual(Arg, i + NumElems)) 1970 return false; 1971 } 1972 1973 return true; 1974} 1975 1976/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1977/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1978bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1979 bool V2IsSplat = false) { 1980 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1981 return false; 1982 1983 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1984 SDOperand BitI = Elts[i]; 1985 SDOperand BitI1 = Elts[i+1]; 1986 if (!isUndefOrEqual(BitI, j)) 1987 return false; 1988 if (V2IsSplat) { 1989 if (isUndefOrEqual(BitI1, NumElts)) 1990 return false; 1991 } else { 1992 if (!isUndefOrEqual(BitI1, j + NumElts)) 1993 return false; 1994 } 1995 } 1996 1997 return true; 1998} 1999 2000bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2001 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2002 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2003} 2004 2005/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2006/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2007bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2008 bool V2IsSplat = false) { 2009 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2010 return false; 2011 2012 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2013 SDOperand BitI = Elts[i]; 2014 SDOperand BitI1 = Elts[i+1]; 2015 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2016 return false; 2017 if (V2IsSplat) { 2018 if (isUndefOrEqual(BitI1, NumElts)) 2019 return false; 2020 } else { 2021 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2022 return false; 2023 } 2024 } 2025 2026 return true; 2027} 2028 2029bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2030 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2031 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2032} 2033 2034/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2035/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2036/// <0, 0, 1, 1> 2037bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2038 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2039 2040 unsigned NumElems = N->getNumOperands(); 2041 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2042 return false; 2043 2044 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2045 SDOperand BitI = N->getOperand(i); 2046 SDOperand BitI1 = N->getOperand(i+1); 2047 2048 if (!isUndefOrEqual(BitI, j)) 2049 return false; 2050 if (!isUndefOrEqual(BitI1, j)) 2051 return false; 2052 } 2053 2054 return true; 2055} 2056 2057/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2058/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2059/// <2, 2, 3, 3> 2060bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2061 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2062 2063 unsigned NumElems = N->getNumOperands(); 2064 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2065 return false; 2066 2067 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2068 SDOperand BitI = N->getOperand(i); 2069 SDOperand BitI1 = N->getOperand(i + 1); 2070 2071 if (!isUndefOrEqual(BitI, j)) 2072 return false; 2073 if (!isUndefOrEqual(BitI1, j)) 2074 return false; 2075 } 2076 2077 return true; 2078} 2079 2080/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2081/// specifies a shuffle of elements that is suitable for input to MOVSS, 2082/// MOVSD, and MOVD, i.e. setting the lowest element. 2083static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2084 if (NumElts != 2 && NumElts != 4) 2085 return false; 2086 2087 if (!isUndefOrEqual(Elts[0], NumElts)) 2088 return false; 2089 2090 for (unsigned i = 1; i < NumElts; ++i) { 2091 if (!isUndefOrEqual(Elts[i], i)) 2092 return false; 2093 } 2094 2095 return true; 2096} 2097 2098bool X86::isMOVLMask(SDNode *N) { 2099 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2100 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2101} 2102 2103/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2104/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2105/// element of vector 2 and the other elements to come from vector 1 in order. 2106static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2107 bool V2IsSplat = false, 2108 bool V2IsUndef = false) { 2109 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2110 return false; 2111 2112 if (!isUndefOrEqual(Ops[0], 0)) 2113 return false; 2114 2115 for (unsigned i = 1; i < NumOps; ++i) { 2116 SDOperand Arg = Ops[i]; 2117 if (!(isUndefOrEqual(Arg, i+NumOps) || 2118 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2119 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2120 return false; 2121 } 2122 2123 return true; 2124} 2125 2126static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2127 bool V2IsUndef = false) { 2128 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2129 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2130 V2IsSplat, V2IsUndef); 2131} 2132 2133/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2134/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2135bool X86::isMOVSHDUPMask(SDNode *N) { 2136 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2137 2138 if (N->getNumOperands() != 4) 2139 return false; 2140 2141 // Expect 1, 1, 3, 3 2142 for (unsigned i = 0; i < 2; ++i) { 2143 SDOperand Arg = N->getOperand(i); 2144 if (Arg.getOpcode() == ISD::UNDEF) continue; 2145 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2146 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2147 if (Val != 1) return false; 2148 } 2149 2150 bool HasHi = false; 2151 for (unsigned i = 2; i < 4; ++i) { 2152 SDOperand Arg = N->getOperand(i); 2153 if (Arg.getOpcode() == ISD::UNDEF) continue; 2154 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2155 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2156 if (Val != 3) return false; 2157 HasHi = true; 2158 } 2159 2160 // Don't use movshdup if it can be done with a shufps. 2161 return HasHi; 2162} 2163 2164/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2165/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2166bool X86::isMOVSLDUPMask(SDNode *N) { 2167 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2168 2169 if (N->getNumOperands() != 4) 2170 return false; 2171 2172 // Expect 0, 0, 2, 2 2173 for (unsigned i = 0; i < 2; ++i) { 2174 SDOperand Arg = N->getOperand(i); 2175 if (Arg.getOpcode() == ISD::UNDEF) continue; 2176 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2177 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2178 if (Val != 0) return false; 2179 } 2180 2181 bool HasHi = false; 2182 for (unsigned i = 2; i < 4; ++i) { 2183 SDOperand Arg = N->getOperand(i); 2184 if (Arg.getOpcode() == ISD::UNDEF) continue; 2185 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2186 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2187 if (Val != 2) return false; 2188 HasHi = true; 2189 } 2190 2191 // Don't use movshdup if it can be done with a shufps. 2192 return HasHi; 2193} 2194 2195/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2196/// specifies a identity operation on the LHS or RHS. 2197static bool isIdentityMask(SDNode *N, bool RHS = false) { 2198 unsigned NumElems = N->getNumOperands(); 2199 for (unsigned i = 0; i < NumElems; ++i) 2200 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2201 return false; 2202 return true; 2203} 2204 2205/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2206/// a splat of a single element. 2207static bool isSplatMask(SDNode *N) { 2208 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2209 2210 // This is a splat operation if each element of the permute is the same, and 2211 // if the value doesn't reference the second vector. 2212 unsigned NumElems = N->getNumOperands(); 2213 SDOperand ElementBase; 2214 unsigned i = 0; 2215 for (; i != NumElems; ++i) { 2216 SDOperand Elt = N->getOperand(i); 2217 if (isa<ConstantSDNode>(Elt)) { 2218 ElementBase = Elt; 2219 break; 2220 } 2221 } 2222 2223 if (!ElementBase.Val) 2224 return false; 2225 2226 for (; i != NumElems; ++i) { 2227 SDOperand Arg = N->getOperand(i); 2228 if (Arg.getOpcode() == ISD::UNDEF) continue; 2229 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2230 if (Arg != ElementBase) return false; 2231 } 2232 2233 // Make sure it is a splat of the first vector operand. 2234 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2235} 2236 2237/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2238/// a splat of a single element and it's a 2 or 4 element mask. 2239bool X86::isSplatMask(SDNode *N) { 2240 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2241 2242 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2243 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2244 return false; 2245 return ::isSplatMask(N); 2246} 2247 2248/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2249/// specifies a splat of zero element. 2250bool X86::isSplatLoMask(SDNode *N) { 2251 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2252 2253 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2254 if (!isUndefOrEqual(N->getOperand(i), 0)) 2255 return false; 2256 return true; 2257} 2258 2259/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2260/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2261/// instructions. 2262unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2263 unsigned NumOperands = N->getNumOperands(); 2264 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2265 unsigned Mask = 0; 2266 for (unsigned i = 0; i < NumOperands; ++i) { 2267 unsigned Val = 0; 2268 SDOperand Arg = N->getOperand(NumOperands-i-1); 2269 if (Arg.getOpcode() != ISD::UNDEF) 2270 Val = cast<ConstantSDNode>(Arg)->getValue(); 2271 if (Val >= NumOperands) Val -= NumOperands; 2272 Mask |= Val; 2273 if (i != NumOperands - 1) 2274 Mask <<= Shift; 2275 } 2276 2277 return Mask; 2278} 2279 2280/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2281/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2282/// instructions. 2283unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2284 unsigned Mask = 0; 2285 // 8 nodes, but we only care about the last 4. 2286 for (unsigned i = 7; i >= 4; --i) { 2287 unsigned Val = 0; 2288 SDOperand Arg = N->getOperand(i); 2289 if (Arg.getOpcode() != ISD::UNDEF) 2290 Val = cast<ConstantSDNode>(Arg)->getValue(); 2291 Mask |= (Val - 4); 2292 if (i != 4) 2293 Mask <<= 2; 2294 } 2295 2296 return Mask; 2297} 2298 2299/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2300/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2301/// instructions. 2302unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2303 unsigned Mask = 0; 2304 // 8 nodes, but we only care about the first 4. 2305 for (int i = 3; i >= 0; --i) { 2306 unsigned Val = 0; 2307 SDOperand Arg = N->getOperand(i); 2308 if (Arg.getOpcode() != ISD::UNDEF) 2309 Val = cast<ConstantSDNode>(Arg)->getValue(); 2310 Mask |= Val; 2311 if (i != 0) 2312 Mask <<= 2; 2313 } 2314 2315 return Mask; 2316} 2317 2318/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2319/// specifies a 8 element shuffle that can be broken into a pair of 2320/// PSHUFHW and PSHUFLW. 2321static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2322 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2323 2324 if (N->getNumOperands() != 8) 2325 return false; 2326 2327 // Lower quadword shuffled. 2328 for (unsigned i = 0; i != 4; ++i) { 2329 SDOperand Arg = N->getOperand(i); 2330 if (Arg.getOpcode() == ISD::UNDEF) continue; 2331 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2332 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2333 if (Val >= 4) 2334 return false; 2335 } 2336 2337 // Upper quadword shuffled. 2338 for (unsigned i = 4; i != 8; ++i) { 2339 SDOperand Arg = N->getOperand(i); 2340 if (Arg.getOpcode() == ISD::UNDEF) continue; 2341 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2342 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2343 if (Val < 4 || Val > 7) 2344 return false; 2345 } 2346 2347 return true; 2348} 2349 2350/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2351/// values in ther permute mask. 2352static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2353 SDOperand &V2, SDOperand &Mask, 2354 SelectionDAG &DAG) { 2355 MVT::ValueType VT = Op.getValueType(); 2356 MVT::ValueType MaskVT = Mask.getValueType(); 2357 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2358 unsigned NumElems = Mask.getNumOperands(); 2359 SmallVector<SDOperand, 8> MaskVec; 2360 2361 for (unsigned i = 0; i != NumElems; ++i) { 2362 SDOperand Arg = Mask.getOperand(i); 2363 if (Arg.getOpcode() == ISD::UNDEF) { 2364 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2365 continue; 2366 } 2367 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2368 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2369 if (Val < NumElems) 2370 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2371 else 2372 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2373 } 2374 2375 std::swap(V1, V2); 2376 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2377 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2378} 2379 2380/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2381/// the two vector operands have swapped position. 2382static 2383SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { 2384 MVT::ValueType MaskVT = Mask.getValueType(); 2385 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2386 unsigned NumElems = Mask.getNumOperands(); 2387 SmallVector<SDOperand, 8> MaskVec; 2388 for (unsigned i = 0; i != NumElems; ++i) { 2389 SDOperand Arg = Mask.getOperand(i); 2390 if (Arg.getOpcode() == ISD::UNDEF) { 2391 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2392 continue; 2393 } 2394 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2395 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2396 if (Val < NumElems) 2397 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2398 else 2399 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2400 } 2401 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2402} 2403 2404 2405/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2406/// match movhlps. The lower half elements should come from upper half of 2407/// V1 (and in order), and the upper half elements should come from the upper 2408/// half of V2 (and in order). 2409static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2410 unsigned NumElems = Mask->getNumOperands(); 2411 if (NumElems != 4) 2412 return false; 2413 for (unsigned i = 0, e = 2; i != e; ++i) 2414 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2415 return false; 2416 for (unsigned i = 2; i != 4; ++i) 2417 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2418 return false; 2419 return true; 2420} 2421 2422/// isScalarLoadToVector - Returns true if the node is a scalar load that 2423/// is promoted to a vector. 2424static inline bool isScalarLoadToVector(SDNode *N) { 2425 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2426 N = N->getOperand(0).Val; 2427 return ISD::isNON_EXTLoad(N); 2428 } 2429 return false; 2430} 2431 2432/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2433/// match movlp{s|d}. The lower half elements should come from lower half of 2434/// V1 (and in order), and the upper half elements should come from the upper 2435/// half of V2 (and in order). And since V1 will become the source of the 2436/// MOVLP, it must be either a vector load or a scalar load to vector. 2437static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2438 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2439 return false; 2440 // Is V2 is a vector load, don't do this transformation. We will try to use 2441 // load folding shufps op. 2442 if (ISD::isNON_EXTLoad(V2)) 2443 return false; 2444 2445 unsigned NumElems = Mask->getNumOperands(); 2446 if (NumElems != 2 && NumElems != 4) 2447 return false; 2448 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2449 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2450 return false; 2451 for (unsigned i = NumElems/2; i != NumElems; ++i) 2452 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2453 return false; 2454 return true; 2455} 2456 2457/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2458/// all the same. 2459static bool isSplatVector(SDNode *N) { 2460 if (N->getOpcode() != ISD::BUILD_VECTOR) 2461 return false; 2462 2463 SDOperand SplatValue = N->getOperand(0); 2464 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2465 if (N->getOperand(i) != SplatValue) 2466 return false; 2467 return true; 2468} 2469 2470/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2471/// to an undef. 2472static bool isUndefShuffle(SDNode *N) { 2473 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2474 return false; 2475 2476 SDOperand V1 = N->getOperand(0); 2477 SDOperand V2 = N->getOperand(1); 2478 SDOperand Mask = N->getOperand(2); 2479 unsigned NumElems = Mask.getNumOperands(); 2480 for (unsigned i = 0; i != NumElems; ++i) { 2481 SDOperand Arg = Mask.getOperand(i); 2482 if (Arg.getOpcode() != ISD::UNDEF) { 2483 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2484 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2485 return false; 2486 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2487 return false; 2488 } 2489 } 2490 return true; 2491} 2492 2493/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2494/// constant +0.0. 2495static inline bool isZeroNode(SDOperand Elt) { 2496 return ((isa<ConstantSDNode>(Elt) && 2497 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2498 (isa<ConstantFPSDNode>(Elt) && 2499 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2500} 2501 2502/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2503/// to an zero vector. 2504static bool isZeroShuffle(SDNode *N) { 2505 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2506 return false; 2507 2508 SDOperand V1 = N->getOperand(0); 2509 SDOperand V2 = N->getOperand(1); 2510 SDOperand Mask = N->getOperand(2); 2511 unsigned NumElems = Mask.getNumOperands(); 2512 for (unsigned i = 0; i != NumElems; ++i) { 2513 SDOperand Arg = Mask.getOperand(i); 2514 if (Arg.getOpcode() == ISD::UNDEF) 2515 continue; 2516 2517 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2518 if (Idx < NumElems) { 2519 unsigned Opc = V1.Val->getOpcode(); 2520 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) 2521 continue; 2522 if (Opc != ISD::BUILD_VECTOR || 2523 !isZeroNode(V1.Val->getOperand(Idx))) 2524 return false; 2525 } else if (Idx >= NumElems) { 2526 unsigned Opc = V2.Val->getOpcode(); 2527 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) 2528 continue; 2529 if (Opc != ISD::BUILD_VECTOR || 2530 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2531 return false; 2532 } 2533 } 2534 return true; 2535} 2536 2537/// getZeroVector - Returns a vector of specified type with all zero elements. 2538/// 2539static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2540 assert(MVT::isVector(VT) && "Expected a vector type"); 2541 2542 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2543 // type. This ensures they get CSE'd. 2544 SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); 2545 SDOperand Vec; 2546 if (MVT::getSizeInBits(VT) == 64) // MMX 2547 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2548 else // SSE 2549 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2550 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2551} 2552 2553/// getOnesVector - Returns a vector of specified type with all bits set. 2554/// 2555static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { 2556 assert(MVT::isVector(VT) && "Expected a vector type"); 2557 2558 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2559 // type. This ensures they get CSE'd. 2560 SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); 2561 SDOperand Vec; 2562 if (MVT::getSizeInBits(VT) == 64) // MMX 2563 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2564 else // SSE 2565 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2566 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2567} 2568 2569 2570/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2571/// that point to V2 points to its first element. 2572static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2573 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2574 2575 bool Changed = false; 2576 SmallVector<SDOperand, 8> MaskVec; 2577 unsigned NumElems = Mask.getNumOperands(); 2578 for (unsigned i = 0; i != NumElems; ++i) { 2579 SDOperand Arg = Mask.getOperand(i); 2580 if (Arg.getOpcode() != ISD::UNDEF) { 2581 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2582 if (Val > NumElems) { 2583 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2584 Changed = true; 2585 } 2586 } 2587 MaskVec.push_back(Arg); 2588 } 2589 2590 if (Changed) 2591 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2592 &MaskVec[0], MaskVec.size()); 2593 return Mask; 2594} 2595 2596/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2597/// operation of specified width. 2598static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2599 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2600 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2601 2602 SmallVector<SDOperand, 8> MaskVec; 2603 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2604 for (unsigned i = 1; i != NumElems; ++i) 2605 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2606 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2607} 2608 2609/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2610/// of specified width. 2611static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2612 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2613 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2614 SmallVector<SDOperand, 8> MaskVec; 2615 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2616 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2617 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2618 } 2619 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2620} 2621 2622/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2623/// of specified width. 2624static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2625 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2626 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2627 unsigned Half = NumElems/2; 2628 SmallVector<SDOperand, 8> MaskVec; 2629 for (unsigned i = 0; i != Half; ++i) { 2630 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2631 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2632 } 2633 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2634} 2635 2636/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2637/// 2638static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2639 SDOperand V1 = Op.getOperand(0); 2640 SDOperand Mask = Op.getOperand(2); 2641 MVT::ValueType VT = Op.getValueType(); 2642 unsigned NumElems = Mask.getNumOperands(); 2643 Mask = getUnpacklMask(NumElems, DAG); 2644 while (NumElems != 4) { 2645 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2646 NumElems >>= 1; 2647 } 2648 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2649 2650 Mask = getZeroVector(MVT::v4i32, DAG); 2651 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2652 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2653 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2654} 2655 2656/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2657/// vector of zero or undef vector. This produces a shuffle where the low 2658/// element of V2 is swizzled into the zero/undef vector, landing at element 2659/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 2660static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2661 unsigned NumElems, unsigned Idx, 2662 bool isZero, SelectionDAG &DAG) { 2663 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2664 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2665 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2666 SmallVector<SDOperand, 16> MaskVec; 2667 for (unsigned i = 0; i != NumElems; ++i) 2668 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 2669 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 2670 else 2671 MaskVec.push_back(DAG.getConstant(i, EVT)); 2672 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2673 &MaskVec[0], MaskVec.size()); 2674 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2675} 2676 2677/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2678/// 2679static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2680 unsigned NumNonZero, unsigned NumZero, 2681 SelectionDAG &DAG, TargetLowering &TLI) { 2682 if (NumNonZero > 8) 2683 return SDOperand(); 2684 2685 SDOperand V(0, 0); 2686 bool First = true; 2687 for (unsigned i = 0; i < 16; ++i) { 2688 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2689 if (ThisIsNonZero && First) { 2690 if (NumZero) 2691 V = getZeroVector(MVT::v8i16, DAG); 2692 else 2693 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2694 First = false; 2695 } 2696 2697 if ((i & 1) != 0) { 2698 SDOperand ThisElt(0, 0), LastElt(0, 0); 2699 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2700 if (LastIsNonZero) { 2701 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2702 } 2703 if (ThisIsNonZero) { 2704 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2705 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2706 ThisElt, DAG.getConstant(8, MVT::i8)); 2707 if (LastIsNonZero) 2708 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2709 } else 2710 ThisElt = LastElt; 2711 2712 if (ThisElt.Val) 2713 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2714 DAG.getConstant(i/2, TLI.getPointerTy())); 2715 } 2716 } 2717 2718 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2719} 2720 2721/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2722/// 2723static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2724 unsigned NumNonZero, unsigned NumZero, 2725 SelectionDAG &DAG, TargetLowering &TLI) { 2726 if (NumNonZero > 4) 2727 return SDOperand(); 2728 2729 SDOperand V(0, 0); 2730 bool First = true; 2731 for (unsigned i = 0; i < 8; ++i) { 2732 bool isNonZero = (NonZeros & (1 << i)) != 0; 2733 if (isNonZero) { 2734 if (First) { 2735 if (NumZero) 2736 V = getZeroVector(MVT::v8i16, DAG); 2737 else 2738 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2739 First = false; 2740 } 2741 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2742 DAG.getConstant(i, TLI.getPointerTy())); 2743 } 2744 } 2745 2746 return V; 2747} 2748 2749SDOperand 2750X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2751 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 2752 if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { 2753 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 2754 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 2755 // eliminated on x86-32 hosts. 2756 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 2757 return Op; 2758 2759 if (ISD::isBuildVectorAllOnes(Op.Val)) 2760 return getOnesVector(Op.getValueType(), DAG); 2761 return getZeroVector(Op.getValueType(), DAG); 2762 } 2763 2764 MVT::ValueType VT = Op.getValueType(); 2765 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2766 unsigned EVTBits = MVT::getSizeInBits(EVT); 2767 2768 unsigned NumElems = Op.getNumOperands(); 2769 unsigned NumZero = 0; 2770 unsigned NumNonZero = 0; 2771 unsigned NonZeros = 0; 2772 bool HasNonImms = false; 2773 SmallSet<SDOperand, 8> Values; 2774 for (unsigned i = 0; i < NumElems; ++i) { 2775 SDOperand Elt = Op.getOperand(i); 2776 if (Elt.getOpcode() == ISD::UNDEF) 2777 continue; 2778 Values.insert(Elt); 2779 if (Elt.getOpcode() != ISD::Constant && 2780 Elt.getOpcode() != ISD::ConstantFP) 2781 HasNonImms = true; 2782 if (isZeroNode(Elt)) 2783 NumZero++; 2784 else { 2785 NonZeros |= (1 << i); 2786 NumNonZero++; 2787 } 2788 } 2789 2790 if (NumNonZero == 0) { 2791 // All undef vector. Return an UNDEF. All zero vectors were handled above. 2792 return DAG.getNode(ISD::UNDEF, VT); 2793 } 2794 2795 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2796 if (Values.size() == 1) 2797 return SDOperand(); 2798 2799 // Special case for single non-zero element. 2800 if (NumNonZero == 1 && NumElems <= 4) { 2801 unsigned Idx = CountTrailingZeros_32(NonZeros); 2802 SDOperand Item = Op.getOperand(Idx); 2803 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2804 if (Idx == 0) 2805 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2806 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2807 NumZero > 0, DAG); 2808 else if (!HasNonImms) // Otherwise, it's better to do a constpool load. 2809 return SDOperand(); 2810 2811 if (EVTBits == 32) { 2812 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2813 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2814 DAG); 2815 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2816 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2817 SmallVector<SDOperand, 8> MaskVec; 2818 for (unsigned i = 0; i < NumElems; i++) 2819 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2820 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2821 &MaskVec[0], MaskVec.size()); 2822 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2823 DAG.getNode(ISD::UNDEF, VT), Mask); 2824 } 2825 } 2826 2827 // A vector full of immediates; various special cases are already 2828 // handled, so this is best done with a single constant-pool load. 2829 if (!HasNonImms) 2830 return SDOperand(); 2831 2832 // Let legalizer expand 2-wide build_vectors. 2833 if (EVTBits == 64) 2834 return SDOperand(); 2835 2836 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2837 if (EVTBits == 8 && NumElems == 16) { 2838 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2839 *this); 2840 if (V.Val) return V; 2841 } 2842 2843 if (EVTBits == 16 && NumElems == 8) { 2844 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2845 *this); 2846 if (V.Val) return V; 2847 } 2848 2849 // If element VT is == 32 bits, turn it into a number of shuffles. 2850 SmallVector<SDOperand, 8> V; 2851 V.resize(NumElems); 2852 if (NumElems == 4 && NumZero > 0) { 2853 for (unsigned i = 0; i < 4; ++i) { 2854 bool isZero = !(NonZeros & (1 << i)); 2855 if (isZero) 2856 V[i] = getZeroVector(VT, DAG); 2857 else 2858 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2859 } 2860 2861 for (unsigned i = 0; i < 2; ++i) { 2862 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2863 default: break; 2864 case 0: 2865 V[i] = V[i*2]; // Must be a zero vector. 2866 break; 2867 case 1: 2868 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2869 getMOVLMask(NumElems, DAG)); 2870 break; 2871 case 2: 2872 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2873 getMOVLMask(NumElems, DAG)); 2874 break; 2875 case 3: 2876 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2877 getUnpacklMask(NumElems, DAG)); 2878 break; 2879 } 2880 } 2881 2882 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2883 // clears the upper bits. 2884 // FIXME: we can do the same for v4f32 case when we know both parts of 2885 // the lower half come from scalar_to_vector (loadf32). We should do 2886 // that in post legalizer dag combiner with target specific hooks. 2887 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2888 return V[0]; 2889 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2890 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2891 SmallVector<SDOperand, 8> MaskVec; 2892 bool Reverse = (NonZeros & 0x3) == 2; 2893 for (unsigned i = 0; i < 2; ++i) 2894 if (Reverse) 2895 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2896 else 2897 MaskVec.push_back(DAG.getConstant(i, EVT)); 2898 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2899 for (unsigned i = 0; i < 2; ++i) 2900 if (Reverse) 2901 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2902 else 2903 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2904 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2905 &MaskVec[0], MaskVec.size()); 2906 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2907 } 2908 2909 if (Values.size() > 2) { 2910 // Expand into a number of unpckl*. 2911 // e.g. for v4f32 2912 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2913 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2914 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2915 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2916 for (unsigned i = 0; i < NumElems; ++i) 2917 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2918 NumElems >>= 1; 2919 while (NumElems != 0) { 2920 for (unsigned i = 0; i < NumElems; ++i) 2921 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2922 UnpckMask); 2923 NumElems >>= 1; 2924 } 2925 return V[0]; 2926 } 2927 2928 return SDOperand(); 2929} 2930 2931static 2932SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, 2933 SDOperand PermMask, SelectionDAG &DAG, 2934 TargetLowering &TLI) { 2935 SDOperand NewV; 2936 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); 2937 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2938 MVT::ValueType PtrVT = TLI.getPointerTy(); 2939 SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), 2940 PermMask.Val->op_end()); 2941 2942 // First record which half of which vector the low elements come from. 2943 SmallVector<unsigned, 4> LowQuad(4); 2944 for (unsigned i = 0; i < 4; ++i) { 2945 SDOperand Elt = MaskElts[i]; 2946 if (Elt.getOpcode() == ISD::UNDEF) 2947 continue; 2948 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 2949 int QuadIdx = EltIdx / 4; 2950 ++LowQuad[QuadIdx]; 2951 } 2952 int BestLowQuad = -1; 2953 unsigned MaxQuad = 1; 2954 for (unsigned i = 0; i < 4; ++i) { 2955 if (LowQuad[i] > MaxQuad) { 2956 BestLowQuad = i; 2957 MaxQuad = LowQuad[i]; 2958 } 2959 } 2960 2961 // Record which half of which vector the high elements come from. 2962 SmallVector<unsigned, 4> HighQuad(4); 2963 for (unsigned i = 4; i < 8; ++i) { 2964 SDOperand Elt = MaskElts[i]; 2965 if (Elt.getOpcode() == ISD::UNDEF) 2966 continue; 2967 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 2968 int QuadIdx = EltIdx / 4; 2969 ++HighQuad[QuadIdx]; 2970 } 2971 int BestHighQuad = -1; 2972 MaxQuad = 1; 2973 for (unsigned i = 0; i < 4; ++i) { 2974 if (HighQuad[i] > MaxQuad) { 2975 BestHighQuad = i; 2976 MaxQuad = HighQuad[i]; 2977 } 2978 } 2979 2980 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 2981 if (BestLowQuad != -1 || BestHighQuad != -1) { 2982 // First sort the 4 chunks in order using shufpd. 2983 SmallVector<SDOperand, 8> MaskVec; 2984 if (BestLowQuad != -1) 2985 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 2986 else 2987 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 2988 if (BestHighQuad != -1) 2989 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 2990 else 2991 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 2992 SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 2993 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 2994 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 2995 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 2996 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 2997 2998 // Now sort high and low parts separately. 2999 BitVector InOrder(8); 3000 if (BestLowQuad != -1) { 3001 // Sort lower half in order using PSHUFLW. 3002 MaskVec.clear(); 3003 bool AnyOutOrder = false; 3004 for (unsigned i = 0; i != 4; ++i) { 3005 SDOperand Elt = MaskElts[i]; 3006 if (Elt.getOpcode() == ISD::UNDEF) { 3007 MaskVec.push_back(Elt); 3008 InOrder.set(i); 3009 } else { 3010 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3011 if (EltIdx != i) 3012 AnyOutOrder = true; 3013 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3014 // If this element is in the right place after this shuffle, then 3015 // remember it. 3016 if ((int)(EltIdx / 4) == BestLowQuad) 3017 InOrder.set(i); 3018 } 3019 } 3020 if (AnyOutOrder) { 3021 for (unsigned i = 4; i != 8; ++i) 3022 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3023 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3024 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3025 } 3026 } 3027 3028 if (BestHighQuad != -1) { 3029 // Sort high half in order using PSHUFHW if possible. 3030 MaskVec.clear(); 3031 for (unsigned i = 0; i != 4; ++i) 3032 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3033 bool AnyOutOrder = false; 3034 for (unsigned i = 4; i != 8; ++i) { 3035 SDOperand Elt = MaskElts[i]; 3036 if (Elt.getOpcode() == ISD::UNDEF) { 3037 MaskVec.push_back(Elt); 3038 InOrder.set(i); 3039 } else { 3040 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3041 if (EltIdx != i) 3042 AnyOutOrder = true; 3043 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3044 // If this element is in the right place after this shuffle, then 3045 // remember it. 3046 if ((int)(EltIdx / 4) == BestHighQuad) 3047 InOrder.set(i); 3048 } 3049 } 3050 if (AnyOutOrder) { 3051 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3052 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3053 } 3054 } 3055 3056 // The other elements are put in the right place using pextrw and pinsrw. 3057 for (unsigned i = 0; i != 8; ++i) { 3058 if (InOrder[i]) 3059 continue; 3060 SDOperand Elt = MaskElts[i]; 3061 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3062 if (EltIdx == i) 3063 continue; 3064 SDOperand ExtOp = (EltIdx < 8) 3065 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3066 DAG.getConstant(EltIdx, PtrVT)) 3067 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3068 DAG.getConstant(EltIdx - 8, PtrVT)); 3069 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3070 DAG.getConstant(i, PtrVT)); 3071 } 3072 return NewV; 3073 } 3074 3075 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use 3076 ///as few as possible. 3077 // First, let's find out how many elements are already in the right order. 3078 unsigned V1InOrder = 0; 3079 unsigned V1FromV1 = 0; 3080 unsigned V2InOrder = 0; 3081 unsigned V2FromV2 = 0; 3082 SmallVector<SDOperand, 8> V1Elts; 3083 SmallVector<SDOperand, 8> V2Elts; 3084 for (unsigned i = 0; i < 8; ++i) { 3085 SDOperand Elt = MaskElts[i]; 3086 if (Elt.getOpcode() == ISD::UNDEF) { 3087 V1Elts.push_back(Elt); 3088 V2Elts.push_back(Elt); 3089 ++V1InOrder; 3090 ++V2InOrder; 3091 continue; 3092 } 3093 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3094 if (EltIdx == i) { 3095 V1Elts.push_back(Elt); 3096 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3097 ++V1InOrder; 3098 } else if (EltIdx == i+8) { 3099 V1Elts.push_back(Elt); 3100 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3101 ++V2InOrder; 3102 } else if (EltIdx < 8) { 3103 V1Elts.push_back(Elt); 3104 ++V1FromV1; 3105 } else { 3106 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3107 ++V2FromV2; 3108 } 3109 } 3110 3111 if (V2InOrder > V1InOrder) { 3112 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3113 std::swap(V1, V2); 3114 std::swap(V1Elts, V2Elts); 3115 std::swap(V1FromV1, V2FromV2); 3116 } 3117 3118 if ((V1FromV1 + V1InOrder) != 8) { 3119 // Some elements are from V2. 3120 if (V1FromV1) { 3121 // If there are elements that are from V1 but out of place, 3122 // then first sort them in place 3123 SmallVector<SDOperand, 8> MaskVec; 3124 for (unsigned i = 0; i < 8; ++i) { 3125 SDOperand Elt = V1Elts[i]; 3126 if (Elt.getOpcode() == ISD::UNDEF) { 3127 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3128 continue; 3129 } 3130 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3131 if (EltIdx >= 8) 3132 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3133 else 3134 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3135 } 3136 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3137 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3138 } 3139 3140 NewV = V1; 3141 for (unsigned i = 0; i < 8; ++i) { 3142 SDOperand Elt = V1Elts[i]; 3143 if (Elt.getOpcode() == ISD::UNDEF) 3144 continue; 3145 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3146 if (EltIdx < 8) 3147 continue; 3148 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3149 DAG.getConstant(EltIdx - 8, PtrVT)); 3150 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3151 DAG.getConstant(i, PtrVT)); 3152 } 3153 return NewV; 3154 } else { 3155 // All elements are from V1. 3156 NewV = V1; 3157 for (unsigned i = 0; i < 8; ++i) { 3158 SDOperand Elt = V1Elts[i]; 3159 if (Elt.getOpcode() == ISD::UNDEF) 3160 continue; 3161 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3162 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3163 DAG.getConstant(EltIdx, PtrVT)); 3164 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3165 DAG.getConstant(i, PtrVT)); 3166 } 3167 return NewV; 3168 } 3169} 3170 3171/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3172/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 3173/// done when every pair / quad of shuffle mask elements point to elements in 3174/// the right sequence. e.g. 3175/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3176static 3177SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2, 3178 MVT::ValueType VT, 3179 SDOperand PermMask, SelectionDAG &DAG, 3180 TargetLowering &TLI) { 3181 unsigned NumElems = PermMask.getNumOperands(); 3182 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 3183 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 3184 MVT::ValueType NewVT = MaskVT; 3185 switch (VT) { 3186 case MVT::v4f32: NewVT = MVT::v2f64; break; 3187 case MVT::v4i32: NewVT = MVT::v2i64; break; 3188 case MVT::v8i16: NewVT = MVT::v4i32; break; 3189 case MVT::v16i8: NewVT = MVT::v4i32; break; 3190 default: assert(false && "Unexpected!"); 3191 } 3192 3193 if (NewWidth == 2) 3194 if (MVT::isInteger(VT)) 3195 NewVT = MVT::v2i64; 3196 else 3197 NewVT = MVT::v2f64; 3198 unsigned Scale = NumElems / NewWidth; 3199 SmallVector<SDOperand, 8> MaskVec; 3200 for (unsigned i = 0; i < NumElems; i += Scale) { 3201 unsigned StartIdx = ~0U; 3202 for (unsigned j = 0; j < Scale; ++j) { 3203 SDOperand Elt = PermMask.getOperand(i+j); 3204 if (Elt.getOpcode() == ISD::UNDEF) 3205 continue; 3206 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3207 if (StartIdx == ~0U) 3208 StartIdx = EltIdx - (EltIdx % Scale); 3209 if (EltIdx != StartIdx + j) 3210 return SDOperand(); 3211 } 3212 if (StartIdx == ~0U) 3213 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 3214 else 3215 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); 3216 } 3217 3218 V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); 3219 V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); 3220 return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, 3221 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3222 &MaskVec[0], MaskVec.size())); 3223} 3224 3225SDOperand 3226X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3227 SDOperand V1 = Op.getOperand(0); 3228 SDOperand V2 = Op.getOperand(1); 3229 SDOperand PermMask = Op.getOperand(2); 3230 MVT::ValueType VT = Op.getValueType(); 3231 unsigned NumElems = PermMask.getNumOperands(); 3232 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3233 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3234 bool V1IsSplat = false; 3235 bool V2IsSplat = false; 3236 3237 if (isUndefShuffle(Op.Val)) 3238 return DAG.getNode(ISD::UNDEF, VT); 3239 3240 if (isZeroShuffle(Op.Val)) 3241 return getZeroVector(VT, DAG); 3242 3243 if (isIdentityMask(PermMask.Val)) 3244 return V1; 3245 else if (isIdentityMask(PermMask.Val, true)) 3246 return V2; 3247 3248 if (isSplatMask(PermMask.Val)) { 3249 if (NumElems <= 4) return Op; 3250 // Promote it to a v4i32 splat. 3251 return PromoteSplat(Op, DAG); 3252 } 3253 3254 // If the shuffle can be profitably rewritten as a narrower shuffle, then 3255 // do it! 3256 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3257 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3258 if (NewOp.Val) 3259 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3260 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 3261 // FIXME: Figure out a cleaner way to do this. 3262 // Try to make use of movq to zero out the top part. 3263 if (ISD::isBuildVectorAllZeros(V2.Val)) { 3264 SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3265 if (NewOp.Val) { 3266 SDOperand NewV1 = NewOp.getOperand(0); 3267 SDOperand NewV2 = NewOp.getOperand(1); 3268 SDOperand NewMask = NewOp.getOperand(2); 3269 if (isCommutedMOVL(NewMask.Val, true, false)) { 3270 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); 3271 NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(), 3272 NewV1, NewV2, getMOVLMask(2, DAG)); 3273 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3274 } 3275 } 3276 } else if (ISD::isBuildVectorAllZeros(V1.Val)) { 3277 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); 3278 if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val)) 3279 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3280 } 3281 } 3282 3283 if (X86::isMOVLMask(PermMask.Val)) 3284 return (V1IsUndef) ? V2 : Op; 3285 3286 if (X86::isMOVSHDUPMask(PermMask.Val) || 3287 X86::isMOVSLDUPMask(PermMask.Val) || 3288 X86::isMOVHLPSMask(PermMask.Val) || 3289 X86::isMOVHPMask(PermMask.Val) || 3290 X86::isMOVLPMask(PermMask.Val)) 3291 return Op; 3292 3293 if (ShouldXformToMOVHLPS(PermMask.Val) || 3294 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3295 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3296 3297 bool Commuted = false; 3298 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3299 // 1,1,1,1 -> v8i16 though. 3300 V1IsSplat = isSplatVector(V1.Val); 3301 V2IsSplat = isSplatVector(V2.Val); 3302 3303 // Canonicalize the splat or undef, if present, to be on the RHS. 3304 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3305 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3306 std::swap(V1IsSplat, V2IsSplat); 3307 std::swap(V1IsUndef, V2IsUndef); 3308 Commuted = true; 3309 } 3310 3311 // FIXME: Figure out a cleaner way to do this. 3312 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3313 if (V2IsUndef) return V1; 3314 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3315 if (V2IsSplat) { 3316 // V2 is a splat, so the mask may be malformed. That is, it may point 3317 // to any V2 element. The instruction selectior won't like this. Get 3318 // a corrected mask and commute to form a proper MOVS{S|D}. 3319 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3320 if (NewMask.Val != PermMask.Val) 3321 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3322 } 3323 return Op; 3324 } 3325 3326 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3327 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3328 X86::isUNPCKLMask(PermMask.Val) || 3329 X86::isUNPCKHMask(PermMask.Val)) 3330 return Op; 3331 3332 if (V2IsSplat) { 3333 // Normalize mask so all entries that point to V2 points to its first 3334 // element then try to match unpck{h|l} again. If match, return a 3335 // new vector_shuffle with the corrected mask. 3336 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3337 if (NewMask.Val != PermMask.Val) { 3338 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3339 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3340 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3341 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3342 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3343 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3344 } 3345 } 3346 } 3347 3348 // Normalize the node to match x86 shuffle ops if needed 3349 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3350 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3351 3352 if (Commuted) { 3353 // Commute is back and try unpck* again. 3354 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3355 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3356 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3357 X86::isUNPCKLMask(PermMask.Val) || 3358 X86::isUNPCKHMask(PermMask.Val)) 3359 return Op; 3360 } 3361 3362 // If VT is integer, try PSHUF* first, then SHUFP*. 3363 if (MVT::isInteger(VT)) { 3364 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3365 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3366 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3367 X86::isPSHUFDMask(PermMask.Val)) || 3368 X86::isPSHUFHWMask(PermMask.Val) || 3369 X86::isPSHUFLWMask(PermMask.Val)) { 3370 if (V2.getOpcode() != ISD::UNDEF) 3371 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3372 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3373 return Op; 3374 } 3375 3376 if (X86::isSHUFPMask(PermMask.Val) && 3377 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3378 return Op; 3379 } else { 3380 // Floating point cases in the other order. 3381 if (X86::isSHUFPMask(PermMask.Val)) 3382 return Op; 3383 if (X86::isPSHUFDMask(PermMask.Val) || 3384 X86::isPSHUFHWMask(PermMask.Val) || 3385 X86::isPSHUFLWMask(PermMask.Val)) { 3386 if (V2.getOpcode() != ISD::UNDEF) 3387 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3388 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3389 return Op; 3390 } 3391 } 3392 3393 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 3394 if (VT == MVT::v8i16) { 3395 SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 3396 if (NewOp.Val) 3397 return NewOp; 3398 } 3399 3400 // Handle all 4 wide cases with a number of shuffles. 3401 if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { 3402 // Don't do this for MMX. 3403 MVT::ValueType MaskVT = PermMask.getValueType(); 3404 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3405 SmallVector<std::pair<int, int>, 8> Locs; 3406 Locs.reserve(NumElems); 3407 SmallVector<SDOperand, 8> Mask1(NumElems, 3408 DAG.getNode(ISD::UNDEF, MaskEVT)); 3409 SmallVector<SDOperand, 8> Mask2(NumElems, 3410 DAG.getNode(ISD::UNDEF, MaskEVT)); 3411 unsigned NumHi = 0; 3412 unsigned NumLo = 0; 3413 // If no more than two elements come from either vector. This can be 3414 // implemented with two shuffles. First shuffle gather the elements. 3415 // The second shuffle, which takes the first shuffle as both of its 3416 // vector operands, put the elements into the right order. 3417 for (unsigned i = 0; i != NumElems; ++i) { 3418 SDOperand Elt = PermMask.getOperand(i); 3419 if (Elt.getOpcode() == ISD::UNDEF) { 3420 Locs[i] = std::make_pair(-1, -1); 3421 } else { 3422 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3423 if (Val < NumElems) { 3424 Locs[i] = std::make_pair(0, NumLo); 3425 Mask1[NumLo] = Elt; 3426 NumLo++; 3427 } else { 3428 Locs[i] = std::make_pair(1, NumHi); 3429 if (2+NumHi < NumElems) 3430 Mask1[2+NumHi] = Elt; 3431 NumHi++; 3432 } 3433 } 3434 } 3435 if (NumLo <= 2 && NumHi <= 2) { 3436 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3437 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3438 &Mask1[0], Mask1.size())); 3439 for (unsigned i = 0; i != NumElems; ++i) { 3440 if (Locs[i].first == -1) 3441 continue; 3442 else { 3443 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3444 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3445 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3446 } 3447 } 3448 3449 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3450 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3451 &Mask2[0], Mask2.size())); 3452 } 3453 3454 // Break it into (shuffle shuffle_hi, shuffle_lo). 3455 Locs.clear(); 3456 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3457 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3458 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3459 unsigned MaskIdx = 0; 3460 unsigned LoIdx = 0; 3461 unsigned HiIdx = NumElems/2; 3462 for (unsigned i = 0; i != NumElems; ++i) { 3463 if (i == NumElems/2) { 3464 MaskPtr = &HiMask; 3465 MaskIdx = 1; 3466 LoIdx = 0; 3467 HiIdx = NumElems/2; 3468 } 3469 SDOperand Elt = PermMask.getOperand(i); 3470 if (Elt.getOpcode() == ISD::UNDEF) { 3471 Locs[i] = std::make_pair(-1, -1); 3472 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3473 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3474 (*MaskPtr)[LoIdx] = Elt; 3475 LoIdx++; 3476 } else { 3477 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3478 (*MaskPtr)[HiIdx] = Elt; 3479 HiIdx++; 3480 } 3481 } 3482 3483 SDOperand LoShuffle = 3484 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3485 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3486 &LoMask[0], LoMask.size())); 3487 SDOperand HiShuffle = 3488 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3489 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3490 &HiMask[0], HiMask.size())); 3491 SmallVector<SDOperand, 8> MaskOps; 3492 for (unsigned i = 0; i != NumElems; ++i) { 3493 if (Locs[i].first == -1) { 3494 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3495 } else { 3496 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3497 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3498 } 3499 } 3500 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3501 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3502 &MaskOps[0], MaskOps.size())); 3503 } 3504 3505 return SDOperand(); 3506} 3507 3508SDOperand 3509X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3510 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3511 return SDOperand(); 3512 3513 MVT::ValueType VT = Op.getValueType(); 3514 // TODO: handle v16i8. 3515 if (MVT::getSizeInBits(VT) == 16) { 3516 SDOperand Vec = Op.getOperand(0); 3517 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3518 if (Idx == 0) 3519 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 3520 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 3521 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 3522 Op.getOperand(1))); 3523 // Transform it so it match pextrw which produces a 32-bit result. 3524 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3525 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3526 Op.getOperand(0), Op.getOperand(1)); 3527 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3528 DAG.getValueType(VT)); 3529 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3530 } else if (MVT::getSizeInBits(VT) == 32) { 3531 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3532 if (Idx == 0) 3533 return Op; 3534 // SHUFPS the element to the lowest double word, then movss. 3535 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3536 SmallVector<SDOperand, 8> IdxVec; 3537 IdxVec. 3538 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3539 IdxVec. 3540 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3541 IdxVec. 3542 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3543 IdxVec. 3544 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3545 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3546 &IdxVec[0], IdxVec.size()); 3547 SDOperand Vec = Op.getOperand(0); 3548 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3549 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3550 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3551 DAG.getConstant(0, getPointerTy())); 3552 } else if (MVT::getSizeInBits(VT) == 64) { 3553 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3554 if (Idx == 0) 3555 return Op; 3556 3557 // UNPCKHPD the element to the lowest double word, then movsd. 3558 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3559 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3560 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3561 SmallVector<SDOperand, 8> IdxVec; 3562 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3563 IdxVec. 3564 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3565 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3566 &IdxVec[0], IdxVec.size()); 3567 SDOperand Vec = Op.getOperand(0); 3568 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3569 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3570 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3571 DAG.getConstant(0, getPointerTy())); 3572 } 3573 3574 return SDOperand(); 3575} 3576 3577SDOperand 3578X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3579 MVT::ValueType VT = Op.getValueType(); 3580 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3581 if (EVT == MVT::i8) 3582 return SDOperand(); 3583 3584 SDOperand N0 = Op.getOperand(0); 3585 SDOperand N1 = Op.getOperand(1); 3586 SDOperand N2 = Op.getOperand(2); 3587 3588 if (MVT::getSizeInBits(EVT) == 16) { 3589 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3590 // as its second argument. 3591 if (N1.getValueType() != MVT::i32) 3592 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3593 if (N2.getValueType() != MVT::i32) 3594 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3595 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3596 } 3597 return SDOperand(); 3598} 3599 3600SDOperand 3601X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3602 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3603 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3604} 3605 3606// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3607// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3608// one of the above mentioned nodes. It has to be wrapped because otherwise 3609// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3610// be used to form addressing mode. These wrapped nodes will be selected 3611// into MOV32ri. 3612SDOperand 3613X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3614 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3615 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3616 getPointerTy(), 3617 CP->getAlignment()); 3618 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3619 // With PIC, the address is actually $g + Offset. 3620 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3621 !Subtarget->isPICStyleRIPRel()) { 3622 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3623 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3624 Result); 3625 } 3626 3627 return Result; 3628} 3629 3630SDOperand 3631X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3632 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3633 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3634 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3635 // With PIC, the address is actually $g + Offset. 3636 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3637 !Subtarget->isPICStyleRIPRel()) { 3638 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3639 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3640 Result); 3641 } 3642 3643 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3644 // load the value at address GV, not the value of GV itself. This means that 3645 // the GlobalAddress must be in the base or index register of the address, not 3646 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3647 // The same applies for external symbols during PIC codegen 3648 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3649 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3650 3651 return Result; 3652} 3653 3654// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3655static SDOperand 3656LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3657 const MVT::ValueType PtrVT) { 3658 SDOperand InFlag; 3659 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3660 DAG.getNode(X86ISD::GlobalBaseReg, 3661 PtrVT), InFlag); 3662 InFlag = Chain.getValue(1); 3663 3664 // emit leal symbol@TLSGD(,%ebx,1), %eax 3665 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3666 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3667 GA->getValueType(0), 3668 GA->getOffset()); 3669 SDOperand Ops[] = { Chain, TGA, InFlag }; 3670 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3671 InFlag = Result.getValue(2); 3672 Chain = Result.getValue(1); 3673 3674 // call ___tls_get_addr. This function receives its argument in 3675 // the register EAX. 3676 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3677 InFlag = Chain.getValue(1); 3678 3679 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3680 SDOperand Ops1[] = { Chain, 3681 DAG.getTargetExternalSymbol("___tls_get_addr", 3682 PtrVT), 3683 DAG.getRegister(X86::EAX, PtrVT), 3684 DAG.getRegister(X86::EBX, PtrVT), 3685 InFlag }; 3686 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3687 InFlag = Chain.getValue(1); 3688 3689 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3690} 3691 3692// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3693// "local exec" model. 3694static SDOperand 3695LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3696 const MVT::ValueType PtrVT) { 3697 // Get the Thread Pointer 3698 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3699 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3700 // exec) 3701 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3702 GA->getValueType(0), 3703 GA->getOffset()); 3704 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3705 3706 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3707 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3708 3709 // The address of the thread local variable is the add of the thread 3710 // pointer with the offset of the variable. 3711 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3712} 3713 3714SDOperand 3715X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3716 // TODO: implement the "local dynamic" model 3717 // TODO: implement the "initial exec"model for pic executables 3718 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3719 "TLS not implemented for non-ELF and 64-bit targets"); 3720 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3721 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3722 // otherwise use the "Local Exec"TLS Model 3723 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3724 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3725 else 3726 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3727} 3728 3729SDOperand 3730X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3731 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3732 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3733 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3734 // With PIC, the address is actually $g + Offset. 3735 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3736 !Subtarget->isPICStyleRIPRel()) { 3737 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3738 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3739 Result); 3740 } 3741 3742 return Result; 3743} 3744 3745SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3746 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3747 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3748 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3749 // With PIC, the address is actually $g + Offset. 3750 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3751 !Subtarget->isPICStyleRIPRel()) { 3752 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3753 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3754 Result); 3755 } 3756 3757 return Result; 3758} 3759 3760/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 3761/// take a 2 x i32 value to shift plus a shift amount. 3762SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3763 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3764 "Not an i64 shift!"); 3765 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3766 SDOperand ShOpLo = Op.getOperand(0); 3767 SDOperand ShOpHi = Op.getOperand(1); 3768 SDOperand ShAmt = Op.getOperand(2); 3769 SDOperand Tmp1 = isSRA ? 3770 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3771 DAG.getConstant(0, MVT::i32); 3772 3773 SDOperand Tmp2, Tmp3; 3774 if (Op.getOpcode() == ISD::SHL_PARTS) { 3775 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3776 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3777 } else { 3778 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3779 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3780 } 3781 3782 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3783 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3784 DAG.getConstant(32, MVT::i8)); 3785 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 3786 AndNode, DAG.getConstant(0, MVT::i8)); 3787 3788 SDOperand Hi, Lo; 3789 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3790 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3791 SmallVector<SDOperand, 4> Ops; 3792 if (Op.getOpcode() == ISD::SHL_PARTS) { 3793 Ops.push_back(Tmp2); 3794 Ops.push_back(Tmp3); 3795 Ops.push_back(CC); 3796 Ops.push_back(Cond); 3797 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3798 3799 Ops.clear(); 3800 Ops.push_back(Tmp3); 3801 Ops.push_back(Tmp1); 3802 Ops.push_back(CC); 3803 Ops.push_back(Cond); 3804 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3805 } else { 3806 Ops.push_back(Tmp2); 3807 Ops.push_back(Tmp3); 3808 Ops.push_back(CC); 3809 Ops.push_back(Cond); 3810 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3811 3812 Ops.clear(); 3813 Ops.push_back(Tmp3); 3814 Ops.push_back(Tmp1); 3815 Ops.push_back(CC); 3816 Ops.push_back(Cond); 3817 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3818 } 3819 3820 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3821 Ops.clear(); 3822 Ops.push_back(Lo); 3823 Ops.push_back(Hi); 3824 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3825} 3826 3827SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3828 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3829 Op.getOperand(0).getValueType() >= MVT::i16 && 3830 "Unknown SINT_TO_FP to lower!"); 3831 3832 SDOperand Result; 3833 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3834 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3835 MachineFunction &MF = DAG.getMachineFunction(); 3836 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3837 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3838 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3839 StackSlot, NULL, 0); 3840 3841 // These are really Legal; caller falls through into that case. 3842 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 3843 return Result; 3844 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 3845 return Result; 3846 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 3847 Subtarget->is64Bit()) 3848 return Result; 3849 3850 // Build the FILD 3851 SDVTList Tys; 3852 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 3853 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 3854 if (useSSE) 3855 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3856 else 3857 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3858 SmallVector<SDOperand, 8> Ops; 3859 Ops.push_back(Chain); 3860 Ops.push_back(StackSlot); 3861 Ops.push_back(DAG.getValueType(SrcVT)); 3862 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3863 Tys, &Ops[0], Ops.size()); 3864 3865 if (useSSE) { 3866 Chain = Result.getValue(1); 3867 SDOperand InFlag = Result.getValue(2); 3868 3869 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3870 // shouldn't be necessary except that RFP cannot be live across 3871 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3872 MachineFunction &MF = DAG.getMachineFunction(); 3873 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3874 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3875 Tys = DAG.getVTList(MVT::Other); 3876 SmallVector<SDOperand, 8> Ops; 3877 Ops.push_back(Chain); 3878 Ops.push_back(Result); 3879 Ops.push_back(StackSlot); 3880 Ops.push_back(DAG.getValueType(Op.getValueType())); 3881 Ops.push_back(InFlag); 3882 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3883 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3884 } 3885 3886 return Result; 3887} 3888 3889std::pair<SDOperand,SDOperand> X86TargetLowering:: 3890FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { 3891 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3892 "Unknown FP_TO_SINT to lower!"); 3893 3894 // These are really Legal. 3895 if (Op.getValueType() == MVT::i32 && 3896 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 3897 return std::make_pair(SDOperand(), SDOperand()); 3898 if (Op.getValueType() == MVT::i32 && 3899 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 3900 return std::make_pair(SDOperand(), SDOperand()); 3901 if (Subtarget->is64Bit() && 3902 Op.getValueType() == MVT::i64 && 3903 Op.getOperand(0).getValueType() != MVT::f80) 3904 return std::make_pair(SDOperand(), SDOperand()); 3905 3906 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3907 // stack slot. 3908 MachineFunction &MF = DAG.getMachineFunction(); 3909 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3910 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3911 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3912 unsigned Opc; 3913 switch (Op.getValueType()) { 3914 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3915 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3916 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3917 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3918 } 3919 3920 SDOperand Chain = DAG.getEntryNode(); 3921 SDOperand Value = Op.getOperand(0); 3922 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 3923 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 3924 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3925 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3926 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3927 SDOperand Ops[] = { 3928 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3929 }; 3930 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3931 Chain = Value.getValue(1); 3932 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3933 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3934 } 3935 3936 // Build the FP_TO_INT*_IN_MEM 3937 SDOperand Ops[] = { Chain, Value, StackSlot }; 3938 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3939 3940 return std::make_pair(FIST, StackSlot); 3941} 3942 3943SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3944 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG); 3945 SDOperand FIST = Vals.first, StackSlot = Vals.second; 3946 if (FIST.Val == 0) return SDOperand(); 3947 3948 // Load the result. 3949 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3950} 3951 3952SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 3953 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); 3954 SDOperand FIST = Vals.first, StackSlot = Vals.second; 3955 if (FIST.Val == 0) return 0; 3956 3957 // Return an i64 load from the stack slot. 3958 SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); 3959 3960 // Use a MERGE_VALUES node to drop the chain result value. 3961 return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; 3962} 3963 3964SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3965 MVT::ValueType VT = Op.getValueType(); 3966 MVT::ValueType EltVT = VT; 3967 if (MVT::isVector(VT)) 3968 EltVT = MVT::getVectorElementType(VT); 3969 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3970 std::vector<Constant*> CV; 3971 if (EltVT == MVT::f64) { 3972 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3973 CV.push_back(C); 3974 CV.push_back(C); 3975 } else { 3976 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3977 CV.push_back(C); 3978 CV.push_back(C); 3979 CV.push_back(C); 3980 CV.push_back(C); 3981 } 3982 Constant *C = ConstantVector::get(CV); 3983 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3984 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3985 false, 16); 3986 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3987} 3988 3989SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3990 MVT::ValueType VT = Op.getValueType(); 3991 MVT::ValueType EltVT = VT; 3992 unsigned EltNum = 1; 3993 if (MVT::isVector(VT)) { 3994 EltVT = MVT::getVectorElementType(VT); 3995 EltNum = MVT::getVectorNumElements(VT); 3996 } 3997 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3998 std::vector<Constant*> CV; 3999 if (EltVT == MVT::f64) { 4000 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4001 CV.push_back(C); 4002 CV.push_back(C); 4003 } else { 4004 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4005 CV.push_back(C); 4006 CV.push_back(C); 4007 CV.push_back(C); 4008 CV.push_back(C); 4009 } 4010 Constant *C = ConstantVector::get(CV); 4011 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4012 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4013 false, 16); 4014 if (MVT::isVector(VT)) { 4015 return DAG.getNode(ISD::BIT_CONVERT, VT, 4016 DAG.getNode(ISD::XOR, MVT::v2i64, 4017 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4018 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4019 } else { 4020 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4021 } 4022} 4023 4024SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4025 SDOperand Op0 = Op.getOperand(0); 4026 SDOperand Op1 = Op.getOperand(1); 4027 MVT::ValueType VT = Op.getValueType(); 4028 MVT::ValueType SrcVT = Op1.getValueType(); 4029 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4030 4031 // If second operand is smaller, extend it first. 4032 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4033 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4034 SrcVT = VT; 4035 SrcTy = MVT::getTypeForValueType(SrcVT); 4036 } 4037 // And if it is bigger, shrink it first. 4038 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4039 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1); 4040 SrcVT = VT; 4041 SrcTy = MVT::getTypeForValueType(SrcVT); 4042 } 4043 4044 // At this point the operands and the result should have the same 4045 // type, and that won't be f80 since that is not custom lowered. 4046 4047 // First get the sign bit of second operand. 4048 std::vector<Constant*> CV; 4049 if (SrcVT == MVT::f64) { 4050 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4051 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4052 } else { 4053 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4054 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4055 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4056 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4057 } 4058 Constant *C = ConstantVector::get(CV); 4059 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4060 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4061 false, 16); 4062 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4063 4064 // Shift sign bit right or left if the two operands have different types. 4065 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4066 // Op0 is MVT::f32, Op1 is MVT::f64. 4067 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4068 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4069 DAG.getConstant(32, MVT::i32)); 4070 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4071 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4072 DAG.getConstant(0, getPointerTy())); 4073 } 4074 4075 // Clear first operand sign bit. 4076 CV.clear(); 4077 if (VT == MVT::f64) { 4078 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4079 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4080 } else { 4081 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4082 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4083 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4084 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4085 } 4086 C = ConstantVector::get(CV); 4087 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4088 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4089 false, 16); 4090 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4091 4092 // Or the value with the sign bit. 4093 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4094} 4095 4096SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4097 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4098 SDOperand Cond; 4099 SDOperand Op0 = Op.getOperand(0); 4100 SDOperand Op1 = Op.getOperand(1); 4101 SDOperand CC = Op.getOperand(2); 4102 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4103 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4104 unsigned X86CC; 4105 4106 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4107 Op0, Op1, DAG)) { 4108 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4109 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4110 DAG.getConstant(X86CC, MVT::i8), Cond); 4111 } 4112 4113 assert(isFP && "Illegal integer SetCC!"); 4114 4115 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4116 switch (SetCCOpcode) { 4117 default: assert(false && "Illegal floating point SetCC!"); 4118 case ISD::SETOEQ: { // !PF & ZF 4119 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4120 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4121 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4122 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4123 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4124 } 4125 case ISD::SETUNE: { // PF | !ZF 4126 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4127 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4128 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4129 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4130 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4131 } 4132 } 4133} 4134 4135 4136SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4137 bool addTest = true; 4138 SDOperand Cond = Op.getOperand(0); 4139 SDOperand CC; 4140 4141 if (Cond.getOpcode() == ISD::SETCC) 4142 Cond = LowerSETCC(Cond, DAG); 4143 4144 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4145 // setting operand in place of the X86ISD::SETCC. 4146 if (Cond.getOpcode() == X86ISD::SETCC) { 4147 CC = Cond.getOperand(0); 4148 4149 SDOperand Cmp = Cond.getOperand(1); 4150 unsigned Opc = Cmp.getOpcode(); 4151 MVT::ValueType VT = Op.getValueType(); 4152 bool IllegalFPCMov = false; 4153 if (VT == MVT::f32 && !X86ScalarSSEf32) 4154 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4155 else if (VT == MVT::f64 && !X86ScalarSSEf64) 4156 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4157 else if (VT == MVT::f80) 4158 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4159 if ((Opc == X86ISD::CMP || 4160 Opc == X86ISD::COMI || 4161 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4162 Cond = Cmp; 4163 addTest = false; 4164 } 4165 } 4166 4167 if (addTest) { 4168 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4169 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4170 } 4171 4172 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4173 MVT::Flag); 4174 SmallVector<SDOperand, 4> Ops; 4175 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4176 // condition is true. 4177 Ops.push_back(Op.getOperand(2)); 4178 Ops.push_back(Op.getOperand(1)); 4179 Ops.push_back(CC); 4180 Ops.push_back(Cond); 4181 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4182} 4183 4184SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4185 bool addTest = true; 4186 SDOperand Chain = Op.getOperand(0); 4187 SDOperand Cond = Op.getOperand(1); 4188 SDOperand Dest = Op.getOperand(2); 4189 SDOperand CC; 4190 4191 if (Cond.getOpcode() == ISD::SETCC) 4192 Cond = LowerSETCC(Cond, DAG); 4193 4194 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4195 // setting operand in place of the X86ISD::SETCC. 4196 if (Cond.getOpcode() == X86ISD::SETCC) { 4197 CC = Cond.getOperand(0); 4198 4199 SDOperand Cmp = Cond.getOperand(1); 4200 unsigned Opc = Cmp.getOpcode(); 4201 if (Opc == X86ISD::CMP || 4202 Opc == X86ISD::COMI || 4203 Opc == X86ISD::UCOMI) { 4204 Cond = Cmp; 4205 addTest = false; 4206 } 4207 } 4208 4209 if (addTest) { 4210 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4211 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4212 } 4213 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4214 Chain, Op.getOperand(2), CC, Cond); 4215} 4216 4217 4218// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4219// Calls to _alloca is needed to probe the stack when allocating more than 4k 4220// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4221// that the guard pages used by the OS virtual memory manager are allocated in 4222// correct sequence. 4223SDOperand 4224X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4225 SelectionDAG &DAG) { 4226 assert(Subtarget->isTargetCygMing() && 4227 "This should be used only on Cygwin/Mingw targets"); 4228 4229 // Get the inputs. 4230 SDOperand Chain = Op.getOperand(0); 4231 SDOperand Size = Op.getOperand(1); 4232 // FIXME: Ensure alignment here 4233 4234 SDOperand Flag; 4235 4236 MVT::ValueType IntPtr = getPointerTy(); 4237 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 4238 4239 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4240 Flag = Chain.getValue(1); 4241 4242 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4243 SDOperand Ops[] = { Chain, 4244 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4245 DAG.getRegister(X86::EAX, IntPtr), 4246 Flag }; 4247 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4248 Flag = Chain.getValue(1); 4249 4250 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4251 4252 std::vector<MVT::ValueType> Tys; 4253 Tys.push_back(SPTy); 4254 Tys.push_back(MVT::Other); 4255 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4256 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4257} 4258 4259SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4260 SDOperand InFlag(0, 0); 4261 SDOperand Chain = Op.getOperand(0); 4262 unsigned Align = 4263 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4264 if (Align == 0) Align = 1; 4265 4266 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4267 // If not DWORD aligned or size is more than the threshold, call memset. 4268 // The libc version is likely to be faster for these cases. It can use the 4269 // address value and run time information about the CPU. 4270 if ((Align & 3) != 0 || 4271 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4272 MVT::ValueType IntPtr = getPointerTy(); 4273 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4274 TargetLowering::ArgListTy Args; 4275 TargetLowering::ArgListEntry Entry; 4276 Entry.Node = Op.getOperand(1); 4277 Entry.Ty = IntPtrTy; 4278 Args.push_back(Entry); 4279 // Extend the unsigned i8 argument to be an int value for the call. 4280 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4281 Entry.Ty = IntPtrTy; 4282 Args.push_back(Entry); 4283 Entry.Node = Op.getOperand(3); 4284 Args.push_back(Entry); 4285 std::pair<SDOperand,SDOperand> CallResult = 4286 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4287 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4288 return CallResult.second; 4289 } 4290 4291 MVT::ValueType AVT; 4292 SDOperand Count; 4293 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4294 unsigned BytesLeft = 0; 4295 bool TwoRepStos = false; 4296 if (ValC) { 4297 unsigned ValReg; 4298 uint64_t Val = ValC->getValue() & 255; 4299 4300 // If the value is a constant, then we can potentially use larger sets. 4301 switch (Align & 3) { 4302 case 2: // WORD aligned 4303 AVT = MVT::i16; 4304 ValReg = X86::AX; 4305 Val = (Val << 8) | Val; 4306 break; 4307 case 0: // DWORD aligned 4308 AVT = MVT::i32; 4309 ValReg = X86::EAX; 4310 Val = (Val << 8) | Val; 4311 Val = (Val << 16) | Val; 4312 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4313 AVT = MVT::i64; 4314 ValReg = X86::RAX; 4315 Val = (Val << 32) | Val; 4316 } 4317 break; 4318 default: // Byte aligned 4319 AVT = MVT::i8; 4320 ValReg = X86::AL; 4321 Count = Op.getOperand(3); 4322 break; 4323 } 4324 4325 if (AVT > MVT::i8) { 4326 if (I) { 4327 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4328 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4329 BytesLeft = I->getValue() % UBytes; 4330 } else { 4331 assert(AVT >= MVT::i32 && 4332 "Do not use rep;stos if not at least DWORD aligned"); 4333 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4334 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4335 TwoRepStos = true; 4336 } 4337 } 4338 4339 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4340 InFlag); 4341 InFlag = Chain.getValue(1); 4342 } else { 4343 AVT = MVT::i8; 4344 Count = Op.getOperand(3); 4345 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4346 InFlag = Chain.getValue(1); 4347 } 4348 4349 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4350 Count, InFlag); 4351 InFlag = Chain.getValue(1); 4352 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4353 Op.getOperand(1), InFlag); 4354 InFlag = Chain.getValue(1); 4355 4356 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4357 SmallVector<SDOperand, 8> Ops; 4358 Ops.push_back(Chain); 4359 Ops.push_back(DAG.getValueType(AVT)); 4360 Ops.push_back(InFlag); 4361 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4362 4363 if (TwoRepStos) { 4364 InFlag = Chain.getValue(1); 4365 Count = Op.getOperand(3); 4366 MVT::ValueType CVT = Count.getValueType(); 4367 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4368 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4369 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4370 Left, InFlag); 4371 InFlag = Chain.getValue(1); 4372 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4373 Ops.clear(); 4374 Ops.push_back(Chain); 4375 Ops.push_back(DAG.getValueType(MVT::i8)); 4376 Ops.push_back(InFlag); 4377 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4378 } else if (BytesLeft) { 4379 // Issue stores for the last 1 - 7 bytes. 4380 SDOperand Value; 4381 unsigned Val = ValC->getValue() & 255; 4382 unsigned Offset = I->getValue() - BytesLeft; 4383 SDOperand DstAddr = Op.getOperand(1); 4384 MVT::ValueType AddrVT = DstAddr.getValueType(); 4385 if (BytesLeft >= 4) { 4386 Val = (Val << 8) | Val; 4387 Val = (Val << 16) | Val; 4388 Value = DAG.getConstant(Val, MVT::i32); 4389 Chain = DAG.getStore(Chain, Value, 4390 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4391 DAG.getConstant(Offset, AddrVT)), 4392 NULL, 0); 4393 BytesLeft -= 4; 4394 Offset += 4; 4395 } 4396 if (BytesLeft >= 2) { 4397 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4398 Chain = DAG.getStore(Chain, Value, 4399 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4400 DAG.getConstant(Offset, AddrVT)), 4401 NULL, 0); 4402 BytesLeft -= 2; 4403 Offset += 2; 4404 } 4405 if (BytesLeft == 1) { 4406 Value = DAG.getConstant(Val, MVT::i8); 4407 Chain = DAG.getStore(Chain, Value, 4408 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4409 DAG.getConstant(Offset, AddrVT)), 4410 NULL, 0); 4411 } 4412 } 4413 4414 return Chain; 4415} 4416 4417SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4418 SDOperand Dest, 4419 SDOperand Source, 4420 unsigned Size, 4421 unsigned Align, 4422 SelectionDAG &DAG) { 4423 MVT::ValueType AVT; 4424 unsigned BytesLeft = 0; 4425 switch (Align & 3) { 4426 case 2: // WORD aligned 4427 AVT = MVT::i16; 4428 break; 4429 case 0: // DWORD aligned 4430 AVT = MVT::i32; 4431 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4432 AVT = MVT::i64; 4433 break; 4434 default: // Byte aligned 4435 AVT = MVT::i8; 4436 break; 4437 } 4438 4439 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4440 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4441 BytesLeft = Size % UBytes; 4442 4443 SDOperand InFlag(0, 0); 4444 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4445 Count, InFlag); 4446 InFlag = Chain.getValue(1); 4447 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4448 Dest, InFlag); 4449 InFlag = Chain.getValue(1); 4450 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4451 Source, InFlag); 4452 InFlag = Chain.getValue(1); 4453 4454 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4455 SmallVector<SDOperand, 8> Ops; 4456 Ops.push_back(Chain); 4457 Ops.push_back(DAG.getValueType(AVT)); 4458 Ops.push_back(InFlag); 4459 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4460 4461 if (BytesLeft) { 4462 // Issue loads and stores for the last 1 - 7 bytes. 4463 unsigned Offset = Size - BytesLeft; 4464 SDOperand DstAddr = Dest; 4465 MVT::ValueType DstVT = DstAddr.getValueType(); 4466 SDOperand SrcAddr = Source; 4467 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4468 SDOperand Value; 4469 if (BytesLeft >= 4) { 4470 Value = DAG.getLoad(MVT::i32, Chain, 4471 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4472 DAG.getConstant(Offset, SrcVT)), 4473 NULL, 0); 4474 Chain = Value.getValue(1); 4475 Chain = DAG.getStore(Chain, Value, 4476 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4477 DAG.getConstant(Offset, DstVT)), 4478 NULL, 0); 4479 BytesLeft -= 4; 4480 Offset += 4; 4481 } 4482 if (BytesLeft >= 2) { 4483 Value = DAG.getLoad(MVT::i16, Chain, 4484 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4485 DAG.getConstant(Offset, SrcVT)), 4486 NULL, 0); 4487 Chain = Value.getValue(1); 4488 Chain = DAG.getStore(Chain, Value, 4489 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4490 DAG.getConstant(Offset, DstVT)), 4491 NULL, 0); 4492 BytesLeft -= 2; 4493 Offset += 2; 4494 } 4495 4496 if (BytesLeft == 1) { 4497 Value = DAG.getLoad(MVT::i8, Chain, 4498 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4499 DAG.getConstant(Offset, SrcVT)), 4500 NULL, 0); 4501 Chain = Value.getValue(1); 4502 Chain = DAG.getStore(Chain, Value, 4503 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4504 DAG.getConstant(Offset, DstVT)), 4505 NULL, 0); 4506 } 4507 } 4508 4509 return Chain; 4510} 4511 4512/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 4513SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 4514 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4515 SDOperand TheChain = N->getOperand(0); 4516 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 4517 if (Subtarget->is64Bit()) { 4518 SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4519 SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 4520 MVT::i64, rax.getValue(2)); 4521 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 4522 DAG.getConstant(32, MVT::i8)); 4523 SDOperand Ops[] = { 4524 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 4525 }; 4526 4527 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4528 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4529 } 4530 4531 SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4532 SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 4533 MVT::i32, eax.getValue(2)); 4534 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 4535 SDOperand Ops[] = { eax, edx }; 4536 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 4537 4538 // Use a MERGE_VALUES to return the value and chain. 4539 Ops[1] = edx.getValue(1); 4540 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4541 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4542} 4543 4544SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4545 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4546 4547 if (!Subtarget->is64Bit()) { 4548 // vastart just stores the address of the VarArgsFrameIndex slot into the 4549 // memory location argument. 4550 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4551 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4552 SV->getOffset()); 4553 } 4554 4555 // __va_list_tag: 4556 // gp_offset (0 - 6 * 8) 4557 // fp_offset (48 - 48 + 8 * 16) 4558 // overflow_arg_area (point to parameters coming in memory). 4559 // reg_save_area 4560 SmallVector<SDOperand, 8> MemOps; 4561 SDOperand FIN = Op.getOperand(1); 4562 // Store gp_offset 4563 SDOperand Store = DAG.getStore(Op.getOperand(0), 4564 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4565 FIN, SV->getValue(), SV->getOffset()); 4566 MemOps.push_back(Store); 4567 4568 // Store fp_offset 4569 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4570 DAG.getConstant(4, getPointerTy())); 4571 Store = DAG.getStore(Op.getOperand(0), 4572 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4573 FIN, SV->getValue(), SV->getOffset()); 4574 MemOps.push_back(Store); 4575 4576 // Store ptr to overflow_arg_area 4577 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4578 DAG.getConstant(4, getPointerTy())); 4579 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4580 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4581 SV->getOffset()); 4582 MemOps.push_back(Store); 4583 4584 // Store ptr to reg_save_area. 4585 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4586 DAG.getConstant(8, getPointerTy())); 4587 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4588 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4589 SV->getOffset()); 4590 MemOps.push_back(Store); 4591 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4592} 4593 4594SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4595 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4596 SDOperand Chain = Op.getOperand(0); 4597 SDOperand DstPtr = Op.getOperand(1); 4598 SDOperand SrcPtr = Op.getOperand(2); 4599 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4600 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4601 4602 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4603 SrcSV->getValue(), SrcSV->getOffset()); 4604 Chain = SrcPtr.getValue(1); 4605 for (unsigned i = 0; i < 3; ++i) { 4606 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4607 SrcSV->getValue(), SrcSV->getOffset()); 4608 Chain = Val.getValue(1); 4609 Chain = DAG.getStore(Chain, Val, DstPtr, 4610 DstSV->getValue(), DstSV->getOffset()); 4611 if (i == 2) 4612 break; 4613 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4614 DAG.getConstant(8, getPointerTy())); 4615 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4616 DAG.getConstant(8, getPointerTy())); 4617 } 4618 return Chain; 4619} 4620 4621SDOperand 4622X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4623 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4624 switch (IntNo) { 4625 default: return SDOperand(); // Don't custom lower most intrinsics. 4626 // Comparison intrinsics. 4627 case Intrinsic::x86_sse_comieq_ss: 4628 case Intrinsic::x86_sse_comilt_ss: 4629 case Intrinsic::x86_sse_comile_ss: 4630 case Intrinsic::x86_sse_comigt_ss: 4631 case Intrinsic::x86_sse_comige_ss: 4632 case Intrinsic::x86_sse_comineq_ss: 4633 case Intrinsic::x86_sse_ucomieq_ss: 4634 case Intrinsic::x86_sse_ucomilt_ss: 4635 case Intrinsic::x86_sse_ucomile_ss: 4636 case Intrinsic::x86_sse_ucomigt_ss: 4637 case Intrinsic::x86_sse_ucomige_ss: 4638 case Intrinsic::x86_sse_ucomineq_ss: 4639 case Intrinsic::x86_sse2_comieq_sd: 4640 case Intrinsic::x86_sse2_comilt_sd: 4641 case Intrinsic::x86_sse2_comile_sd: 4642 case Intrinsic::x86_sse2_comigt_sd: 4643 case Intrinsic::x86_sse2_comige_sd: 4644 case Intrinsic::x86_sse2_comineq_sd: 4645 case Intrinsic::x86_sse2_ucomieq_sd: 4646 case Intrinsic::x86_sse2_ucomilt_sd: 4647 case Intrinsic::x86_sse2_ucomile_sd: 4648 case Intrinsic::x86_sse2_ucomigt_sd: 4649 case Intrinsic::x86_sse2_ucomige_sd: 4650 case Intrinsic::x86_sse2_ucomineq_sd: { 4651 unsigned Opc = 0; 4652 ISD::CondCode CC = ISD::SETCC_INVALID; 4653 switch (IntNo) { 4654 default: break; 4655 case Intrinsic::x86_sse_comieq_ss: 4656 case Intrinsic::x86_sse2_comieq_sd: 4657 Opc = X86ISD::COMI; 4658 CC = ISD::SETEQ; 4659 break; 4660 case Intrinsic::x86_sse_comilt_ss: 4661 case Intrinsic::x86_sse2_comilt_sd: 4662 Opc = X86ISD::COMI; 4663 CC = ISD::SETLT; 4664 break; 4665 case Intrinsic::x86_sse_comile_ss: 4666 case Intrinsic::x86_sse2_comile_sd: 4667 Opc = X86ISD::COMI; 4668 CC = ISD::SETLE; 4669 break; 4670 case Intrinsic::x86_sse_comigt_ss: 4671 case Intrinsic::x86_sse2_comigt_sd: 4672 Opc = X86ISD::COMI; 4673 CC = ISD::SETGT; 4674 break; 4675 case Intrinsic::x86_sse_comige_ss: 4676 case Intrinsic::x86_sse2_comige_sd: 4677 Opc = X86ISD::COMI; 4678 CC = ISD::SETGE; 4679 break; 4680 case Intrinsic::x86_sse_comineq_ss: 4681 case Intrinsic::x86_sse2_comineq_sd: 4682 Opc = X86ISD::COMI; 4683 CC = ISD::SETNE; 4684 break; 4685 case Intrinsic::x86_sse_ucomieq_ss: 4686 case Intrinsic::x86_sse2_ucomieq_sd: 4687 Opc = X86ISD::UCOMI; 4688 CC = ISD::SETEQ; 4689 break; 4690 case Intrinsic::x86_sse_ucomilt_ss: 4691 case Intrinsic::x86_sse2_ucomilt_sd: 4692 Opc = X86ISD::UCOMI; 4693 CC = ISD::SETLT; 4694 break; 4695 case Intrinsic::x86_sse_ucomile_ss: 4696 case Intrinsic::x86_sse2_ucomile_sd: 4697 Opc = X86ISD::UCOMI; 4698 CC = ISD::SETLE; 4699 break; 4700 case Intrinsic::x86_sse_ucomigt_ss: 4701 case Intrinsic::x86_sse2_ucomigt_sd: 4702 Opc = X86ISD::UCOMI; 4703 CC = ISD::SETGT; 4704 break; 4705 case Intrinsic::x86_sse_ucomige_ss: 4706 case Intrinsic::x86_sse2_ucomige_sd: 4707 Opc = X86ISD::UCOMI; 4708 CC = ISD::SETGE; 4709 break; 4710 case Intrinsic::x86_sse_ucomineq_ss: 4711 case Intrinsic::x86_sse2_ucomineq_sd: 4712 Opc = X86ISD::UCOMI; 4713 CC = ISD::SETNE; 4714 break; 4715 } 4716 4717 unsigned X86CC; 4718 SDOperand LHS = Op.getOperand(1); 4719 SDOperand RHS = Op.getOperand(2); 4720 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4721 4722 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4723 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4724 DAG.getConstant(X86CC, MVT::i8), Cond); 4725 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4726 } 4727 } 4728} 4729 4730SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4731 // Depths > 0 not supported yet! 4732 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4733 return SDOperand(); 4734 4735 // Just load the return address 4736 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4737 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4738} 4739 4740SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4741 // Depths > 0 not supported yet! 4742 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4743 return SDOperand(); 4744 4745 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4746 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4747 DAG.getConstant(4, getPointerTy())); 4748} 4749 4750SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4751 SelectionDAG &DAG) { 4752 // Is not yet supported on x86-64 4753 if (Subtarget->is64Bit()) 4754 return SDOperand(); 4755 4756 return DAG.getConstant(8, getPointerTy()); 4757} 4758 4759SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4760{ 4761 assert(!Subtarget->is64Bit() && 4762 "Lowering of eh_return builtin is not supported yet on x86-64"); 4763 4764 MachineFunction &MF = DAG.getMachineFunction(); 4765 SDOperand Chain = Op.getOperand(0); 4766 SDOperand Offset = Op.getOperand(1); 4767 SDOperand Handler = Op.getOperand(2); 4768 4769 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4770 getPointerTy()); 4771 4772 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4773 DAG.getConstant(-4UL, getPointerTy())); 4774 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4775 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4776 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4777 MF.getRegInfo().addLiveOut(X86::ECX); 4778 4779 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4780 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4781} 4782 4783SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4784 SelectionDAG &DAG) { 4785 SDOperand Root = Op.getOperand(0); 4786 SDOperand Trmp = Op.getOperand(1); // trampoline 4787 SDOperand FPtr = Op.getOperand(2); // nested function 4788 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4789 4790 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4791 4792 if (Subtarget->is64Bit()) { 4793 return SDOperand(); // not yet supported 4794 } else { 4795 Function *Func = (Function *) 4796 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4797 unsigned CC = Func->getCallingConv(); 4798 unsigned NestReg; 4799 4800 switch (CC) { 4801 default: 4802 assert(0 && "Unsupported calling convention"); 4803 case CallingConv::C: 4804 case CallingConv::X86_StdCall: { 4805 // Pass 'nest' parameter in ECX. 4806 // Must be kept in sync with X86CallingConv.td 4807 NestReg = X86::ECX; 4808 4809 // Check that ECX wasn't needed by an 'inreg' parameter. 4810 const FunctionType *FTy = Func->getFunctionType(); 4811 const ParamAttrsList *Attrs = Func->getParamAttrs(); 4812 4813 if (Attrs && !Func->isVarArg()) { 4814 unsigned InRegCount = 0; 4815 unsigned Idx = 1; 4816 4817 for (FunctionType::param_iterator I = FTy->param_begin(), 4818 E = FTy->param_end(); I != E; ++I, ++Idx) 4819 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4820 // FIXME: should only count parameters that are lowered to integers. 4821 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4822 4823 if (InRegCount > 2) { 4824 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4825 abort(); 4826 } 4827 } 4828 break; 4829 } 4830 case CallingConv::X86_FastCall: 4831 // Pass 'nest' parameter in EAX. 4832 // Must be kept in sync with X86CallingConv.td 4833 NestReg = X86::EAX; 4834 break; 4835 } 4836 4837 const X86InstrInfo *TII = 4838 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4839 4840 SDOperand OutChains[4]; 4841 SDOperand Addr, Disp; 4842 4843 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4844 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4845 4846 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4847 unsigned char N86Reg = ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg); 4848 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4849 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4850 4851 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4852 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4853 TrmpSV->getOffset() + 1, false, 1); 4854 4855 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4856 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4857 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4858 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4859 4860 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4861 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4862 TrmpSV->getOffset() + 6, false, 1); 4863 4864 SDOperand Ops[] = 4865 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4866 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4867 } 4868} 4869 4870SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { 4871 /* 4872 The rounding mode is in bits 11:10 of FPSR, and has the following 4873 settings: 4874 00 Round to nearest 4875 01 Round to -inf 4876 10 Round to +inf 4877 11 Round to 0 4878 4879 FLT_ROUNDS, on the other hand, expects the following: 4880 -1 Undefined 4881 0 Round to 0 4882 1 Round to nearest 4883 2 Round to +inf 4884 3 Round to -inf 4885 4886 To perform the conversion, we do: 4887 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 4888 */ 4889 4890 MachineFunction &MF = DAG.getMachineFunction(); 4891 const TargetMachine &TM = MF.getTarget(); 4892 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 4893 unsigned StackAlignment = TFI.getStackAlignment(); 4894 MVT::ValueType VT = Op.getValueType(); 4895 4896 // Save FP Control Word to stack slot 4897 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 4898 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4899 4900 SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 4901 DAG.getEntryNode(), StackSlot); 4902 4903 // Load FP Control Word from stack slot 4904 SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 4905 4906 // Transform as necessary 4907 SDOperand CWD1 = 4908 DAG.getNode(ISD::SRL, MVT::i16, 4909 DAG.getNode(ISD::AND, MVT::i16, 4910 CWD, DAG.getConstant(0x800, MVT::i16)), 4911 DAG.getConstant(11, MVT::i8)); 4912 SDOperand CWD2 = 4913 DAG.getNode(ISD::SRL, MVT::i16, 4914 DAG.getNode(ISD::AND, MVT::i16, 4915 CWD, DAG.getConstant(0x400, MVT::i16)), 4916 DAG.getConstant(9, MVT::i8)); 4917 4918 SDOperand RetVal = 4919 DAG.getNode(ISD::AND, MVT::i16, 4920 DAG.getNode(ISD::ADD, MVT::i16, 4921 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 4922 DAG.getConstant(1, MVT::i16)), 4923 DAG.getConstant(3, MVT::i16)); 4924 4925 4926 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 4927 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 4928} 4929 4930SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { 4931 MVT::ValueType VT = Op.getValueType(); 4932 MVT::ValueType OpVT = VT; 4933 unsigned NumBits = MVT::getSizeInBits(VT); 4934 4935 Op = Op.getOperand(0); 4936 if (VT == MVT::i8) { 4937 // Zero extend to i32 since there is not an i8 bsr. 4938 OpVT = MVT::i32; 4939 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 4940 } 4941 4942 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 4943 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 4944 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 4945 4946 // If src is zero (i.e. bsr sets ZF), returns NumBits. 4947 SmallVector<SDOperand, 4> Ops; 4948 Ops.push_back(Op); 4949 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 4950 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 4951 Ops.push_back(Op.getValue(1)); 4952 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 4953 4954 // Finally xor with NumBits-1. 4955 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 4956 4957 if (VT == MVT::i8) 4958 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 4959 return Op; 4960} 4961 4962SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { 4963 MVT::ValueType VT = Op.getValueType(); 4964 MVT::ValueType OpVT = VT; 4965 unsigned NumBits = MVT::getSizeInBits(VT); 4966 4967 Op = Op.getOperand(0); 4968 if (VT == MVT::i8) { 4969 OpVT = MVT::i32; 4970 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 4971 } 4972 4973 // Issue a bsf (scan bits forward) which also sets EFLAGS. 4974 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 4975 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 4976 4977 // If src is zero (i.e. bsf sets ZF), returns NumBits. 4978 SmallVector<SDOperand, 4> Ops; 4979 Ops.push_back(Op); 4980 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 4981 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 4982 Ops.push_back(Op.getValue(1)); 4983 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 4984 4985 if (VT == MVT::i8) 4986 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 4987 return Op; 4988} 4989 4990/// LowerOperation - Provide custom lowering hooks for some operations. 4991/// 4992SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4993 switch (Op.getOpcode()) { 4994 default: assert(0 && "Should not custom lower this!"); 4995 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4996 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4997 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4998 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4999 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5000 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5001 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5002 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5003 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5004 case ISD::SHL_PARTS: 5005 case ISD::SRA_PARTS: 5006 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5007 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5008 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5009 case ISD::FABS: return LowerFABS(Op, DAG); 5010 case ISD::FNEG: return LowerFNEG(Op, DAG); 5011 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5012 case ISD::SETCC: return LowerSETCC(Op, DAG); 5013 case ISD::SELECT: return LowerSELECT(Op, DAG); 5014 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5015 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5016 case ISD::CALL: return LowerCALL(Op, DAG); 5017 case ISD::RET: return LowerRET(Op, DAG); 5018 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5019 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5020 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5021 case ISD::VASTART: return LowerVASTART(Op, DAG); 5022 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5023 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5024 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5025 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5026 case ISD::FRAME_TO_ARGS_OFFSET: 5027 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5028 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5029 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5030 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5031 case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); 5032 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 5033 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 5034 5035 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 5036 case ISD::READCYCLECOUNTER: 5037 return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); 5038 } 5039} 5040 5041/// ExpandOperation - Provide custom lowering hooks for expanding operations. 5042SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 5043 switch (N->getOpcode()) { 5044 default: assert(0 && "Should not custom lower this!"); 5045 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 5046 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 5047 } 5048} 5049 5050const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5051 switch (Opcode) { 5052 default: return NULL; 5053 case X86ISD::BSF: return "X86ISD::BSF"; 5054 case X86ISD::BSR: return "X86ISD::BSR"; 5055 case X86ISD::SHLD: return "X86ISD::SHLD"; 5056 case X86ISD::SHRD: return "X86ISD::SHRD"; 5057 case X86ISD::FAND: return "X86ISD::FAND"; 5058 case X86ISD::FOR: return "X86ISD::FOR"; 5059 case X86ISD::FXOR: return "X86ISD::FXOR"; 5060 case X86ISD::FSRL: return "X86ISD::FSRL"; 5061 case X86ISD::FILD: return "X86ISD::FILD"; 5062 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5063 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5064 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5065 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5066 case X86ISD::FLD: return "X86ISD::FLD"; 5067 case X86ISD::FST: return "X86ISD::FST"; 5068 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5069 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5070 case X86ISD::CALL: return "X86ISD::CALL"; 5071 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5072 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5073 case X86ISD::CMP: return "X86ISD::CMP"; 5074 case X86ISD::COMI: return "X86ISD::COMI"; 5075 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5076 case X86ISD::SETCC: return "X86ISD::SETCC"; 5077 case X86ISD::CMOV: return "X86ISD::CMOV"; 5078 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5079 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5080 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5081 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5082 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5083 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5084 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5085 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5086 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5087 case X86ISD::FMAX: return "X86ISD::FMAX"; 5088 case X86ISD::FMIN: return "X86ISD::FMIN"; 5089 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5090 case X86ISD::FRCP: return "X86ISD::FRCP"; 5091 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5092 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5093 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5094 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5095 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 5096 } 5097} 5098 5099// isLegalAddressingMode - Return true if the addressing mode represented 5100// by AM is legal for this target, for a load/store of the specified type. 5101bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5102 const Type *Ty) const { 5103 // X86 supports extremely general addressing modes. 5104 5105 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5106 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5107 return false; 5108 5109 if (AM.BaseGV) { 5110 // We can only fold this if we don't need an extra load. 5111 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5112 return false; 5113 5114 // X86-64 only supports addr of globals in small code model. 5115 if (Subtarget->is64Bit()) { 5116 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5117 return false; 5118 // If lower 4G is not available, then we must use rip-relative addressing. 5119 if (AM.BaseOffs || AM.Scale > 1) 5120 return false; 5121 } 5122 } 5123 5124 switch (AM.Scale) { 5125 case 0: 5126 case 1: 5127 case 2: 5128 case 4: 5129 case 8: 5130 // These scales always work. 5131 break; 5132 case 3: 5133 case 5: 5134 case 9: 5135 // These scales are formed with basereg+scalereg. Only accept if there is 5136 // no basereg yet. 5137 if (AM.HasBaseReg) 5138 return false; 5139 break; 5140 default: // Other stuff never works. 5141 return false; 5142 } 5143 5144 return true; 5145} 5146 5147 5148bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5149 if (!Ty1->isInteger() || !Ty2->isInteger()) 5150 return false; 5151 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5152 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5153 if (NumBits1 <= NumBits2) 5154 return false; 5155 return Subtarget->is64Bit() || NumBits1 < 64; 5156} 5157 5158bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5159 MVT::ValueType VT2) const { 5160 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5161 return false; 5162 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5163 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5164 if (NumBits1 <= NumBits2) 5165 return false; 5166 return Subtarget->is64Bit() || NumBits1 < 64; 5167} 5168 5169/// isShuffleMaskLegal - Targets can use this to indicate that they only 5170/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5171/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5172/// are assumed to be legal. 5173bool 5174X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5175 // Only do shuffles on 128-bit vector types for now. 5176 if (MVT::getSizeInBits(VT) == 64) return false; 5177 return (Mask.Val->getNumOperands() <= 4 || 5178 isIdentityMask(Mask.Val) || 5179 isIdentityMask(Mask.Val, true) || 5180 isSplatMask(Mask.Val) || 5181 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5182 X86::isUNPCKLMask(Mask.Val) || 5183 X86::isUNPCKHMask(Mask.Val) || 5184 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5185 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5186} 5187 5188bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5189 MVT::ValueType EVT, 5190 SelectionDAG &DAG) const { 5191 unsigned NumElts = BVOps.size(); 5192 // Only do shuffles on 128-bit vector types for now. 5193 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5194 if (NumElts == 2) return true; 5195 if (NumElts == 4) { 5196 return (isMOVLMask(&BVOps[0], 4) || 5197 isCommutedMOVL(&BVOps[0], 4, true) || 5198 isSHUFPMask(&BVOps[0], 4) || 5199 isCommutedSHUFP(&BVOps[0], 4)); 5200 } 5201 return false; 5202} 5203 5204//===----------------------------------------------------------------------===// 5205// X86 Scheduler Hooks 5206//===----------------------------------------------------------------------===// 5207 5208MachineBasicBlock * 5209X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5210 MachineBasicBlock *BB) { 5211 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5212 switch (MI->getOpcode()) { 5213 default: assert(false && "Unexpected instr type to insert"); 5214 case X86::CMOV_FR32: 5215 case X86::CMOV_FR64: 5216 case X86::CMOV_V4F32: 5217 case X86::CMOV_V2F64: 5218 case X86::CMOV_V2I64: { 5219 // To "insert" a SELECT_CC instruction, we actually have to insert the 5220 // diamond control-flow pattern. The incoming instruction knows the 5221 // destination vreg to set, the condition code register to branch on, the 5222 // true/false values to select between, and a branch opcode to use. 5223 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5224 ilist<MachineBasicBlock>::iterator It = BB; 5225 ++It; 5226 5227 // thisMBB: 5228 // ... 5229 // TrueVal = ... 5230 // cmpTY ccX, r1, r2 5231 // bCC copy1MBB 5232 // fallthrough --> copy0MBB 5233 MachineBasicBlock *thisMBB = BB; 5234 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5235 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5236 unsigned Opc = 5237 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5238 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5239 MachineFunction *F = BB->getParent(); 5240 F->getBasicBlockList().insert(It, copy0MBB); 5241 F->getBasicBlockList().insert(It, sinkMBB); 5242 // Update machine-CFG edges by first adding all successors of the current 5243 // block to the new block which will contain the Phi node for the select. 5244 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5245 e = BB->succ_end(); i != e; ++i) 5246 sinkMBB->addSuccessor(*i); 5247 // Next, remove all successors of the current block, and add the true 5248 // and fallthrough blocks as its successors. 5249 while(!BB->succ_empty()) 5250 BB->removeSuccessor(BB->succ_begin()); 5251 BB->addSuccessor(copy0MBB); 5252 BB->addSuccessor(sinkMBB); 5253 5254 // copy0MBB: 5255 // %FalseValue = ... 5256 // # fallthrough to sinkMBB 5257 BB = copy0MBB; 5258 5259 // Update machine-CFG edges 5260 BB->addSuccessor(sinkMBB); 5261 5262 // sinkMBB: 5263 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5264 // ... 5265 BB = sinkMBB; 5266 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5267 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5268 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5269 5270 delete MI; // The pseudo instruction is gone now. 5271 return BB; 5272 } 5273 5274 case X86::FP32_TO_INT16_IN_MEM: 5275 case X86::FP32_TO_INT32_IN_MEM: 5276 case X86::FP32_TO_INT64_IN_MEM: 5277 case X86::FP64_TO_INT16_IN_MEM: 5278 case X86::FP64_TO_INT32_IN_MEM: 5279 case X86::FP64_TO_INT64_IN_MEM: 5280 case X86::FP80_TO_INT16_IN_MEM: 5281 case X86::FP80_TO_INT32_IN_MEM: 5282 case X86::FP80_TO_INT64_IN_MEM: { 5283 // Change the floating point control register to use "round towards zero" 5284 // mode when truncating to an integer value. 5285 MachineFunction *F = BB->getParent(); 5286 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5287 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5288 5289 // Load the old value of the high byte of the control word... 5290 unsigned OldCW = 5291 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 5292 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5293 5294 // Set the high part to be round to zero... 5295 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5296 .addImm(0xC7F); 5297 5298 // Reload the modified control word now... 5299 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5300 5301 // Restore the memory image of control word to original value 5302 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5303 .addReg(OldCW); 5304 5305 // Get the X86 opcode to use. 5306 unsigned Opc; 5307 switch (MI->getOpcode()) { 5308 default: assert(0 && "illegal opcode!"); 5309 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5310 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5311 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5312 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5313 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5314 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5315 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5316 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5317 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5318 } 5319 5320 X86AddressMode AM; 5321 MachineOperand &Op = MI->getOperand(0); 5322 if (Op.isRegister()) { 5323 AM.BaseType = X86AddressMode::RegBase; 5324 AM.Base.Reg = Op.getReg(); 5325 } else { 5326 AM.BaseType = X86AddressMode::FrameIndexBase; 5327 AM.Base.FrameIndex = Op.getIndex(); 5328 } 5329 Op = MI->getOperand(1); 5330 if (Op.isImmediate()) 5331 AM.Scale = Op.getImm(); 5332 Op = MI->getOperand(2); 5333 if (Op.isImmediate()) 5334 AM.IndexReg = Op.getImm(); 5335 Op = MI->getOperand(3); 5336 if (Op.isGlobalAddress()) { 5337 AM.GV = Op.getGlobal(); 5338 } else { 5339 AM.Disp = Op.getImm(); 5340 } 5341 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5342 .addReg(MI->getOperand(4).getReg()); 5343 5344 // Reload the original control word now. 5345 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5346 5347 delete MI; // The pseudo instruction is gone now. 5348 return BB; 5349 } 5350 } 5351} 5352 5353//===----------------------------------------------------------------------===// 5354// X86 Optimization Hooks 5355//===----------------------------------------------------------------------===// 5356 5357void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5358 uint64_t Mask, 5359 uint64_t &KnownZero, 5360 uint64_t &KnownOne, 5361 const SelectionDAG &DAG, 5362 unsigned Depth) const { 5363 unsigned Opc = Op.getOpcode(); 5364 assert((Opc >= ISD::BUILTIN_OP_END || 5365 Opc == ISD::INTRINSIC_WO_CHAIN || 5366 Opc == ISD::INTRINSIC_W_CHAIN || 5367 Opc == ISD::INTRINSIC_VOID) && 5368 "Should use MaskedValueIsZero if you don't know whether Op" 5369 " is a target node!"); 5370 5371 KnownZero = KnownOne = 0; // Don't know anything. 5372 switch (Opc) { 5373 default: break; 5374 case X86ISD::SETCC: 5375 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5376 break; 5377 } 5378} 5379 5380/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5381/// element of the result of the vector shuffle. 5382static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5383 MVT::ValueType VT = N->getValueType(0); 5384 SDOperand PermMask = N->getOperand(2); 5385 unsigned NumElems = PermMask.getNumOperands(); 5386 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5387 i %= NumElems; 5388 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5389 return (i == 0) 5390 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5391 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5392 SDOperand Idx = PermMask.getOperand(i); 5393 if (Idx.getOpcode() == ISD::UNDEF) 5394 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5395 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5396 } 5397 return SDOperand(); 5398} 5399 5400/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5401/// node is a GlobalAddress + an offset. 5402static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5403 unsigned Opc = N->getOpcode(); 5404 if (Opc == X86ISD::Wrapper) { 5405 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5406 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5407 return true; 5408 } 5409 } else if (Opc == ISD::ADD) { 5410 SDOperand N1 = N->getOperand(0); 5411 SDOperand N2 = N->getOperand(1); 5412 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5413 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5414 if (V) { 5415 Offset += V->getSignExtended(); 5416 return true; 5417 } 5418 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5419 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5420 if (V) { 5421 Offset += V->getSignExtended(); 5422 return true; 5423 } 5424 } 5425 } 5426 return false; 5427} 5428 5429/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5430/// + Dist * Size. 5431static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5432 MachineFrameInfo *MFI) { 5433 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5434 return false; 5435 5436 SDOperand Loc = N->getOperand(1); 5437 SDOperand BaseLoc = Base->getOperand(1); 5438 if (Loc.getOpcode() == ISD::FrameIndex) { 5439 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5440 return false; 5441 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5442 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5443 int FS = MFI->getObjectSize(FI); 5444 int BFS = MFI->getObjectSize(BFI); 5445 if (FS != BFS || FS != Size) return false; 5446 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5447 } else { 5448 GlobalValue *GV1 = NULL; 5449 GlobalValue *GV2 = NULL; 5450 int64_t Offset1 = 0; 5451 int64_t Offset2 = 0; 5452 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5453 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5454 if (isGA1 && isGA2 && GV1 == GV2) 5455 return Offset1 == (Offset2 + Dist*Size); 5456 } 5457 5458 return false; 5459} 5460 5461static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5462 const X86Subtarget *Subtarget) { 5463 GlobalValue *GV; 5464 int64_t Offset; 5465 if (isGAPlusOffset(Base, GV, Offset)) 5466 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5467 else { 5468 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5469 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5470 if (BFI < 0) 5471 // Fixed objects do not specify alignment, however the offsets are known. 5472 return ((Subtarget->getStackAlignment() % 16) == 0 && 5473 (MFI->getObjectOffset(BFI) % 16) == 0); 5474 else 5475 return MFI->getObjectAlignment(BFI) >= 16; 5476 } 5477 return false; 5478} 5479 5480 5481/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5482/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5483/// if the load addresses are consecutive, non-overlapping, and in the right 5484/// order. 5485static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5486 const X86Subtarget *Subtarget) { 5487 MachineFunction &MF = DAG.getMachineFunction(); 5488 MachineFrameInfo *MFI = MF.getFrameInfo(); 5489 MVT::ValueType VT = N->getValueType(0); 5490 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5491 SDOperand PermMask = N->getOperand(2); 5492 int NumElems = (int)PermMask.getNumOperands(); 5493 SDNode *Base = NULL; 5494 for (int i = 0; i < NumElems; ++i) { 5495 SDOperand Idx = PermMask.getOperand(i); 5496 if (Idx.getOpcode() == ISD::UNDEF) { 5497 if (!Base) return SDOperand(); 5498 } else { 5499 SDOperand Arg = 5500 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5501 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5502 return SDOperand(); 5503 if (!Base) 5504 Base = Arg.Val; 5505 else if (!isConsecutiveLoad(Arg.Val, Base, 5506 i, MVT::getSizeInBits(EVT)/8,MFI)) 5507 return SDOperand(); 5508 } 5509 } 5510 5511 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5512 LoadSDNode *LD = cast<LoadSDNode>(Base); 5513 if (isAlign16) { 5514 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5515 LD->getSrcValueOffset(), LD->isVolatile()); 5516 } else { 5517 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5518 LD->getSrcValueOffset(), LD->isVolatile(), 5519 LD->getAlignment()); 5520 } 5521} 5522 5523/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5524static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5525 const X86Subtarget *Subtarget) { 5526 SDOperand Cond = N->getOperand(0); 5527 5528 // If we have SSE[12] support, try to form min/max nodes. 5529 if (Subtarget->hasSSE2() && 5530 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5531 if (Cond.getOpcode() == ISD::SETCC) { 5532 // Get the LHS/RHS of the select. 5533 SDOperand LHS = N->getOperand(1); 5534 SDOperand RHS = N->getOperand(2); 5535 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5536 5537 unsigned Opcode = 0; 5538 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5539 switch (CC) { 5540 default: break; 5541 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5542 case ISD::SETULE: 5543 case ISD::SETLE: 5544 if (!UnsafeFPMath) break; 5545 // FALL THROUGH. 5546 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5547 case ISD::SETLT: 5548 Opcode = X86ISD::FMIN; 5549 break; 5550 5551 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5552 case ISD::SETUGT: 5553 case ISD::SETGT: 5554 if (!UnsafeFPMath) break; 5555 // FALL THROUGH. 5556 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5557 case ISD::SETGE: 5558 Opcode = X86ISD::FMAX; 5559 break; 5560 } 5561 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5562 switch (CC) { 5563 default: break; 5564 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5565 case ISD::SETUGT: 5566 case ISD::SETGT: 5567 if (!UnsafeFPMath) break; 5568 // FALL THROUGH. 5569 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5570 case ISD::SETGE: 5571 Opcode = X86ISD::FMIN; 5572 break; 5573 5574 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5575 case ISD::SETULE: 5576 case ISD::SETLE: 5577 if (!UnsafeFPMath) break; 5578 // FALL THROUGH. 5579 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5580 case ISD::SETLT: 5581 Opcode = X86ISD::FMAX; 5582 break; 5583 } 5584 } 5585 5586 if (Opcode) 5587 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5588 } 5589 5590 } 5591 5592 return SDOperand(); 5593} 5594 5595 5596SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5597 DAGCombinerInfo &DCI) const { 5598 SelectionDAG &DAG = DCI.DAG; 5599 switch (N->getOpcode()) { 5600 default: break; 5601 case ISD::VECTOR_SHUFFLE: 5602 return PerformShuffleCombine(N, DAG, Subtarget); 5603 case ISD::SELECT: 5604 return PerformSELECTCombine(N, DAG, Subtarget); 5605 } 5606 5607 return SDOperand(); 5608} 5609 5610//===----------------------------------------------------------------------===// 5611// X86 Inline Assembly Support 5612//===----------------------------------------------------------------------===// 5613 5614/// getConstraintType - Given a constraint letter, return the type of 5615/// constraint it is for this target. 5616X86TargetLowering::ConstraintType 5617X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5618 if (Constraint.size() == 1) { 5619 switch (Constraint[0]) { 5620 case 'A': 5621 case 'r': 5622 case 'R': 5623 case 'l': 5624 case 'q': 5625 case 'Q': 5626 case 'x': 5627 case 'Y': 5628 return C_RegisterClass; 5629 default: 5630 break; 5631 } 5632 } 5633 return TargetLowering::getConstraintType(Constraint); 5634} 5635 5636/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5637/// vector. If it is invalid, don't add anything to Ops. 5638void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5639 char Constraint, 5640 std::vector<SDOperand>&Ops, 5641 SelectionDAG &DAG) { 5642 SDOperand Result(0, 0); 5643 5644 switch (Constraint) { 5645 default: break; 5646 case 'I': 5647 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5648 if (C->getValue() <= 31) { 5649 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5650 break; 5651 } 5652 } 5653 return; 5654 case 'N': 5655 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5656 if (C->getValue() <= 255) { 5657 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5658 break; 5659 } 5660 } 5661 return; 5662 case 'i': { 5663 // Literal immediates are always ok. 5664 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5665 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5666 break; 5667 } 5668 5669 // If we are in non-pic codegen mode, we allow the address of a global (with 5670 // an optional displacement) to be used with 'i'. 5671 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5672 int64_t Offset = 0; 5673 5674 // Match either (GA) or (GA+C) 5675 if (GA) { 5676 Offset = GA->getOffset(); 5677 } else if (Op.getOpcode() == ISD::ADD) { 5678 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5679 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5680 if (C && GA) { 5681 Offset = GA->getOffset()+C->getValue(); 5682 } else { 5683 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5684 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5685 if (C && GA) 5686 Offset = GA->getOffset()+C->getValue(); 5687 else 5688 C = 0, GA = 0; 5689 } 5690 } 5691 5692 if (GA) { 5693 // If addressing this global requires a load (e.g. in PIC mode), we can't 5694 // match. 5695 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5696 false)) 5697 return; 5698 5699 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5700 Offset); 5701 Result = Op; 5702 break; 5703 } 5704 5705 // Otherwise, not valid for this mode. 5706 return; 5707 } 5708 } 5709 5710 if (Result.Val) { 5711 Ops.push_back(Result); 5712 return; 5713 } 5714 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5715} 5716 5717std::vector<unsigned> X86TargetLowering:: 5718getRegClassForInlineAsmConstraint(const std::string &Constraint, 5719 MVT::ValueType VT) const { 5720 if (Constraint.size() == 1) { 5721 // FIXME: not handling fp-stack yet! 5722 switch (Constraint[0]) { // GCC X86 Constraint Letters 5723 default: break; // Unknown constraint letter 5724 case 'A': // EAX/EDX 5725 if (VT == MVT::i32 || VT == MVT::i64) 5726 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5727 break; 5728 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5729 case 'Q': // Q_REGS 5730 if (VT == MVT::i32) 5731 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5732 else if (VT == MVT::i16) 5733 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5734 else if (VT == MVT::i8) 5735 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5736 else if (VT == MVT::i64) 5737 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 5738 break; 5739 } 5740 } 5741 5742 return std::vector<unsigned>(); 5743} 5744 5745std::pair<unsigned, const TargetRegisterClass*> 5746X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5747 MVT::ValueType VT) const { 5748 // First, see if this is a constraint that directly corresponds to an LLVM 5749 // register class. 5750 if (Constraint.size() == 1) { 5751 // GCC Constraint Letters 5752 switch (Constraint[0]) { 5753 default: break; 5754 case 'r': // GENERAL_REGS 5755 case 'R': // LEGACY_REGS 5756 case 'l': // INDEX_REGS 5757 if (VT == MVT::i64 && Subtarget->is64Bit()) 5758 return std::make_pair(0U, X86::GR64RegisterClass); 5759 if (VT == MVT::i32) 5760 return std::make_pair(0U, X86::GR32RegisterClass); 5761 else if (VT == MVT::i16) 5762 return std::make_pair(0U, X86::GR16RegisterClass); 5763 else if (VT == MVT::i8) 5764 return std::make_pair(0U, X86::GR8RegisterClass); 5765 break; 5766 case 'y': // MMX_REGS if MMX allowed. 5767 if (!Subtarget->hasMMX()) break; 5768 return std::make_pair(0U, X86::VR64RegisterClass); 5769 break; 5770 case 'Y': // SSE_REGS if SSE2 allowed 5771 if (!Subtarget->hasSSE2()) break; 5772 // FALL THROUGH. 5773 case 'x': // SSE_REGS if SSE1 allowed 5774 if (!Subtarget->hasSSE1()) break; 5775 5776 switch (VT) { 5777 default: break; 5778 // Scalar SSE types. 5779 case MVT::f32: 5780 case MVT::i32: 5781 return std::make_pair(0U, X86::FR32RegisterClass); 5782 case MVT::f64: 5783 case MVT::i64: 5784 return std::make_pair(0U, X86::FR64RegisterClass); 5785 // Vector types. 5786 case MVT::v16i8: 5787 case MVT::v8i16: 5788 case MVT::v4i32: 5789 case MVT::v2i64: 5790 case MVT::v4f32: 5791 case MVT::v2f64: 5792 return std::make_pair(0U, X86::VR128RegisterClass); 5793 } 5794 break; 5795 } 5796 } 5797 5798 // Use the default implementation in TargetLowering to convert the register 5799 // constraint into a member of a register class. 5800 std::pair<unsigned, const TargetRegisterClass*> Res; 5801 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5802 5803 // Not found as a standard register? 5804 if (Res.second == 0) { 5805 // GCC calls "st(0)" just plain "st". 5806 if (StringsEqualNoCase("{st}", Constraint)) { 5807 Res.first = X86::ST0; 5808 Res.second = X86::RFP80RegisterClass; 5809 } 5810 5811 return Res; 5812 } 5813 5814 // Otherwise, check to see if this is a register class of the wrong value 5815 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5816 // turn into {ax},{dx}. 5817 if (Res.second->hasType(VT)) 5818 return Res; // Correct type already, nothing to do. 5819 5820 // All of the single-register GCC register classes map their values onto 5821 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5822 // really want an 8-bit or 32-bit register, map to the appropriate register 5823 // class and return the appropriate register. 5824 if (Res.second != X86::GR16RegisterClass) 5825 return Res; 5826 5827 if (VT == MVT::i8) { 5828 unsigned DestReg = 0; 5829 switch (Res.first) { 5830 default: break; 5831 case X86::AX: DestReg = X86::AL; break; 5832 case X86::DX: DestReg = X86::DL; break; 5833 case X86::CX: DestReg = X86::CL; break; 5834 case X86::BX: DestReg = X86::BL; break; 5835 } 5836 if (DestReg) { 5837 Res.first = DestReg; 5838 Res.second = Res.second = X86::GR8RegisterClass; 5839 } 5840 } else if (VT == MVT::i32) { 5841 unsigned DestReg = 0; 5842 switch (Res.first) { 5843 default: break; 5844 case X86::AX: DestReg = X86::EAX; break; 5845 case X86::DX: DestReg = X86::EDX; break; 5846 case X86::CX: DestReg = X86::ECX; break; 5847 case X86::BX: DestReg = X86::EBX; break; 5848 case X86::SI: DestReg = X86::ESI; break; 5849 case X86::DI: DestReg = X86::EDI; break; 5850 case X86::BP: DestReg = X86::EBP; break; 5851 case X86::SP: DestReg = X86::ESP; break; 5852 } 5853 if (DestReg) { 5854 Res.first = DestReg; 5855 Res.second = Res.second = X86::GR32RegisterClass; 5856 } 5857 } else if (VT == MVT::i64) { 5858 unsigned DestReg = 0; 5859 switch (Res.first) { 5860 default: break; 5861 case X86::AX: DestReg = X86::RAX; break; 5862 case X86::DX: DestReg = X86::RDX; break; 5863 case X86::CX: DestReg = X86::RCX; break; 5864 case X86::BX: DestReg = X86::RBX; break; 5865 case X86::SI: DestReg = X86::RSI; break; 5866 case X86::DI: DestReg = X86::RDI; break; 5867 case X86::BP: DestReg = X86::RBP; break; 5868 case X86::SP: DestReg = X86::RSP; break; 5869 } 5870 if (DestReg) { 5871 Res.first = DestReg; 5872 Res.second = Res.second = X86::GR64RegisterClass; 5873 } 5874 } 5875 5876 return Res; 5877} 5878