X86ISelLowering.cpp revision 152804e9c1a9fad358a7a5298cd01d4af0f8cf47
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/BitVector.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/SelectionDAG.h" 34#include "llvm/CodeGen/SSARegMap.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Target/TargetOptions.h" 39#include "llvm/ADT/SmallSet.h" 40#include "llvm/ADT/StringExtras.h" 41#include "llvm/ParameterAttributes.h" 42using namespace llvm; 43 44X86TargetLowering::X86TargetLowering(TargetMachine &TM) 45 : TargetLowering(TM) { 46 Subtarget = &TM.getSubtarget<X86Subtarget>(); 47 X86ScalarSSEf64 = Subtarget->hasSSE2(); 48 X86ScalarSSEf32 = Subtarget->hasSSE1(); 49 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 50 51 52 RegInfo = TM.getRegisterInfo(); 53 54 // Set up the TargetLowering object. 55 56 // X86 is weird, it always uses i8 for shift amounts and setcc results. 57 setShiftAmountType(MVT::i8); 58 setSetCCResultType(MVT::i8); 59 setSetCCResultContents(ZeroOrOneSetCCResult); 60 setSchedulingPreference(SchedulingForRegPressure); 61 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 62 setStackPointerRegisterToSaveRestore(X86StackPtr); 63 64 if (Subtarget->isTargetDarwin()) { 65 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 66 setUseUnderscoreSetJmp(false); 67 setUseUnderscoreLongJmp(false); 68 } else if (Subtarget->isTargetMingw()) { 69 // MS runtime is weird: it exports _setjmp, but longjmp! 70 setUseUnderscoreSetJmp(true); 71 setUseUnderscoreLongJmp(false); 72 } else { 73 setUseUnderscoreSetJmp(true); 74 setUseUnderscoreLongJmp(true); 75 } 76 77 // Set up the register classes. 78 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 79 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 80 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 81 if (Subtarget->is64Bit()) 82 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 83 84 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 85 86 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 87 // operation. 88 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 89 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 90 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 91 92 if (Subtarget->is64Bit()) { 93 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 95 } else { 96 if (X86ScalarSSEf64) 97 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 98 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 99 else 100 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 101 } 102 103 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 104 // this operation. 105 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 106 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 107 // SSE has no i16 to fp conversion, only i32 108 if (X86ScalarSSEf32) { 109 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 110 // f32 and f64 cases are Legal, f80 case is not 111 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 112 } else { 113 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 114 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 115 } 116 117 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 118 // are Legal, f80 is custom lowered. 119 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 120 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 121 122 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 123 // this operation. 124 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 125 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 126 127 if (X86ScalarSSEf32) { 128 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 129 // f32 and f64 cases are Legal, f80 case is not 130 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 131 } else { 132 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 133 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 134 } 135 136 // Handle FP_TO_UINT by promoting the destination to a larger signed 137 // conversion. 138 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 139 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 140 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 141 142 if (Subtarget->is64Bit()) { 143 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 144 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 145 } else { 146 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 147 // Expand FP_TO_UINT into a select. 148 // FIXME: We would like to use a Custom expander here eventually to do 149 // the optimal thing for SSE vs. the default expansion in the legalizer. 150 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 151 else 152 // With SSE3 we can use fisttpll to convert to a signed i64. 153 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 154 } 155 156 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 157 if (!X86ScalarSSEf64) { 158 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 159 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 160 } 161 162 // Scalar integer multiply, multiply-high, divide, and remainder are 163 // lowered to use operations that produce two results, to match the 164 // available instructions. This exposes the two-result form to trivial 165 // CSE, which is able to combine x/y and x%y into a single instruction, 166 // for example. The single-result multiply instructions are introduced 167 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 168 // is not needed. 169 setOperationAction(ISD::MUL , MVT::i8 , Expand); 170 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 171 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 172 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 173 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 174 setOperationAction(ISD::SREM , MVT::i8 , Expand); 175 setOperationAction(ISD::UREM , MVT::i8 , Expand); 176 setOperationAction(ISD::MUL , MVT::i16 , Expand); 177 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 178 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 179 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 180 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 181 setOperationAction(ISD::SREM , MVT::i16 , Expand); 182 setOperationAction(ISD::UREM , MVT::i16 , Expand); 183 setOperationAction(ISD::MUL , MVT::i32 , Expand); 184 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 185 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 186 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 187 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 188 setOperationAction(ISD::SREM , MVT::i32 , Expand); 189 setOperationAction(ISD::UREM , MVT::i32 , Expand); 190 setOperationAction(ISD::MUL , MVT::i64 , Expand); 191 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 192 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 193 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 194 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 195 setOperationAction(ISD::SREM , MVT::i64 , Expand); 196 setOperationAction(ISD::UREM , MVT::i64 , Expand); 197 198 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 199 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 200 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 201 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 202 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 203 if (Subtarget->is64Bit()) 204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 205 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 206 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 207 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 208 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 209 setOperationAction(ISD::FREM , MVT::f64 , Expand); 210 setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); 211 212 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 213 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 214 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 215 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 216 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 217 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 218 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 219 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 220 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 221 if (Subtarget->is64Bit()) { 222 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 223 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 224 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 225 } 226 227 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 228 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 229 230 // These should be promoted to a larger select which is supported. 231 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 232 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 233 // X86 wants to expand cmov itself. 234 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 235 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 236 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 237 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 238 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 239 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 240 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 241 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 242 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 243 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 244 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 245 if (Subtarget->is64Bit()) { 246 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 247 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 248 } 249 // X86 ret instruction may pop stack. 250 setOperationAction(ISD::RET , MVT::Other, Custom); 251 if (!Subtarget->is64Bit()) 252 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 253 254 // Darwin ABI issue. 255 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 256 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 257 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 258 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 259 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 260 if (Subtarget->is64Bit()) { 261 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 262 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 263 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 264 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 265 } 266 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 267 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 268 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 269 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 270 // X86 wants to expand memset / memcpy itself. 271 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 272 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 273 274 // Use the default ISD::LOCATION expansion. 275 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 276 // FIXME - use subtarget debug flags 277 if (!Subtarget->isTargetDarwin() && 278 !Subtarget->isTargetELF() && 279 !Subtarget->isTargetCygMing()) 280 setOperationAction(ISD::LABEL, MVT::Other, Expand); 281 282 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 283 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 284 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 285 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 286 if (Subtarget->is64Bit()) { 287 // FIXME: Verify 288 setExceptionPointerRegister(X86::RAX); 289 setExceptionSelectorRegister(X86::RDX); 290 } else { 291 setExceptionPointerRegister(X86::EAX); 292 setExceptionSelectorRegister(X86::EDX); 293 } 294 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 295 296 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 297 298 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 299 setOperationAction(ISD::VASTART , MVT::Other, Custom); 300 setOperationAction(ISD::VAARG , MVT::Other, Expand); 301 setOperationAction(ISD::VAEND , MVT::Other, Expand); 302 if (Subtarget->is64Bit()) 303 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 304 else 305 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 306 307 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 308 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 309 if (Subtarget->is64Bit()) 310 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 311 if (Subtarget->isTargetCygMing()) 312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 313 else 314 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 315 316 if (X86ScalarSSEf64) { 317 // f32 and f64 use SSE. 318 // Set up the FP register classes. 319 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 320 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 321 322 // Use ANDPD to simulate FABS. 323 setOperationAction(ISD::FABS , MVT::f64, Custom); 324 setOperationAction(ISD::FABS , MVT::f32, Custom); 325 326 // Use XORP to simulate FNEG. 327 setOperationAction(ISD::FNEG , MVT::f64, Custom); 328 setOperationAction(ISD::FNEG , MVT::f32, Custom); 329 330 // Use ANDPD and ORPD to simulate FCOPYSIGN. 331 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 332 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 333 334 // We don't support sin/cos/fmod 335 setOperationAction(ISD::FSIN , MVT::f64, Expand); 336 setOperationAction(ISD::FCOS , MVT::f64, Expand); 337 setOperationAction(ISD::FREM , MVT::f64, Expand); 338 setOperationAction(ISD::FSIN , MVT::f32, Expand); 339 setOperationAction(ISD::FCOS , MVT::f32, Expand); 340 setOperationAction(ISD::FREM , MVT::f32, Expand); 341 342 // Expand FP immediates into loads from the stack, except for the special 343 // cases we handle. 344 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 345 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 346 addLegalFPImmediate(APFloat(+0.0)); // xorpd 347 addLegalFPImmediate(APFloat(+0.0f)); // xorps 348 349 // Conversions to long double (in X87) go through memory. 350 setConvertAction(MVT::f32, MVT::f80, Expand); 351 setConvertAction(MVT::f64, MVT::f80, Expand); 352 353 // Conversions from long double (in X87) go through memory. 354 setConvertAction(MVT::f80, MVT::f32, Expand); 355 setConvertAction(MVT::f80, MVT::f64, Expand); 356 } else if (X86ScalarSSEf32) { 357 // Use SSE for f32, x87 for f64. 358 // Set up the FP register classes. 359 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 360 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 361 362 // Use ANDPS to simulate FABS. 363 setOperationAction(ISD::FABS , MVT::f32, Custom); 364 365 // Use XORP to simulate FNEG. 366 setOperationAction(ISD::FNEG , MVT::f32, Custom); 367 368 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 369 370 // Use ANDPS and ORPS to simulate FCOPYSIGN. 371 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 372 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 373 374 // We don't support sin/cos/fmod 375 setOperationAction(ISD::FSIN , MVT::f32, Expand); 376 setOperationAction(ISD::FCOS , MVT::f32, Expand); 377 setOperationAction(ISD::FREM , MVT::f32, Expand); 378 379 // Expand FP immediates into loads from the stack, except for the special 380 // cases we handle. 381 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 382 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 383 addLegalFPImmediate(APFloat(+0.0f)); // xorps 384 addLegalFPImmediate(APFloat(+0.0)); // FLD0 385 addLegalFPImmediate(APFloat(+1.0)); // FLD1 386 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 387 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 388 389 // SSE->x87 conversions go through memory. 390 setConvertAction(MVT::f32, MVT::f64, Expand); 391 setConvertAction(MVT::f32, MVT::f80, Expand); 392 393 // x87->SSE truncations need to go through memory. 394 setConvertAction(MVT::f80, MVT::f32, Expand); 395 setConvertAction(MVT::f64, MVT::f32, Expand); 396 // And x87->x87 truncations also. 397 setConvertAction(MVT::f80, MVT::f64, Expand); 398 399 if (!UnsafeFPMath) { 400 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 401 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 402 } 403 } else { 404 // f32 and f64 in x87. 405 // Set up the FP register classes. 406 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 407 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 408 409 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 410 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 411 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 412 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 413 414 // Floating truncations need to go through memory. 415 setConvertAction(MVT::f80, MVT::f32, Expand); 416 setConvertAction(MVT::f64, MVT::f32, Expand); 417 setConvertAction(MVT::f80, MVT::f64, Expand); 418 419 if (!UnsafeFPMath) { 420 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 421 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 422 } 423 424 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 425 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 426 addLegalFPImmediate(APFloat(+0.0)); // FLD0 427 addLegalFPImmediate(APFloat(+1.0)); // FLD1 428 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 429 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 430 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 431 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 432 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 433 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 434 } 435 436 // Long double always uses X87. 437 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 438 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 439 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 440 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 441 if (!UnsafeFPMath) { 442 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 443 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 444 } 445 446 // Always use a library call for pow. 447 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 448 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 449 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 450 451 // First set operation action for all vector types to expand. Then we 452 // will selectively turn on ones that can be effectively codegen'd. 453 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 454 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 455 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 456 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 457 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 458 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 459 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 460 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 461 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 462 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 463 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 464 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 465 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 466 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 467 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 468 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 469 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 470 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 471 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 472 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 473 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 474 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 475 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 476 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 477 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 478 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 479 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 483 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 484 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 485 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 486 setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand); 487 setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand); 488 setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand); 489 setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand); 490 setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand); 491 setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand); 492 } 493 494 if (Subtarget->hasMMX()) { 495 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 496 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 497 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 498 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 499 500 // FIXME: add MMX packed arithmetics 501 502 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 503 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 504 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 505 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 506 507 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 508 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 509 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 510 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 511 512 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 513 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 514 515 setOperationAction(ISD::AND, MVT::v8i8, Promote); 516 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 517 setOperationAction(ISD::AND, MVT::v4i16, Promote); 518 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 519 setOperationAction(ISD::AND, MVT::v2i32, Promote); 520 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 521 setOperationAction(ISD::AND, MVT::v1i64, Legal); 522 523 setOperationAction(ISD::OR, MVT::v8i8, Promote); 524 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 525 setOperationAction(ISD::OR, MVT::v4i16, Promote); 526 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 527 setOperationAction(ISD::OR, MVT::v2i32, Promote); 528 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 529 setOperationAction(ISD::OR, MVT::v1i64, Legal); 530 531 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 532 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 533 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 534 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 535 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 536 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 537 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 538 539 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 540 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 541 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 542 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 543 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 544 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 545 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 546 547 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 548 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 549 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 550 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 551 552 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 553 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 554 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 555 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 556 557 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 558 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 561 } 562 563 if (Subtarget->hasSSE1()) { 564 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 565 566 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 567 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 568 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 569 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 570 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 571 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 572 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 573 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 574 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 575 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 576 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 577 } 578 579 if (Subtarget->hasSSE2()) { 580 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 581 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 582 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 583 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 584 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 585 586 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 587 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 588 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 589 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 590 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 591 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 592 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 593 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 594 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 595 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 596 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 597 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 598 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 599 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 600 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 601 602 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 603 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 604 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 605 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 606 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 607 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 608 609 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 610 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 611 // Do not attempt to custom lower non-power-of-2 vectors 612 if (!isPowerOf2_32(MVT::getVectorNumElements(VT))) 613 continue; 614 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 615 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 616 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 617 } 618 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 619 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 620 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 621 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 622 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 623 if (Subtarget->is64Bit()) 624 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 625 626 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 627 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 628 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 629 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 630 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 631 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 632 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 633 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 634 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 635 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 636 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 637 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 638 } 639 640 // Custom lower v2i64 and v2f64 selects. 641 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 642 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 643 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 644 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 645 } 646 647 // We want to custom lower some of our intrinsics. 648 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 649 650 // We have target-specific dag combine patterns for the following nodes: 651 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 652 setTargetDAGCombine(ISD::SELECT); 653 654 computeRegisterProperties(); 655 656 // FIXME: These should be based on subtarget info. Plus, the values should 657 // be smaller when we are in optimizing for size mode. 658 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 659 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 660 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 661 allowUnalignedMemoryAccesses = true; // x86 supports it! 662} 663 664 665/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 666/// jumptable. 667SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 668 SelectionDAG &DAG) const { 669 if (usesGlobalOffsetTable()) 670 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 671 if (!Subtarget->isPICStyleRIPRel()) 672 return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); 673 return Table; 674} 675 676//===----------------------------------------------------------------------===// 677// Return Value Calling Convention Implementation 678//===----------------------------------------------------------------------===// 679 680#include "X86GenCallingConv.inc" 681 682/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 683/// exists skip possible ISD:TokenFactor. 684static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 685 if (Chain.getOpcode()==X86ISD::TAILCALL) { 686 return Chain; 687 } else if (Chain.getOpcode()==ISD::TokenFactor) { 688 if (Chain.getNumOperands() && 689 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) 690 return Chain.getOperand(0); 691 } 692 return Chain; 693} 694 695/// LowerRET - Lower an ISD::RET node. 696SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 697 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 698 699 SmallVector<CCValAssign, 16> RVLocs; 700 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 701 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 702 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 703 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 704 705 // If this is the first return lowered for this function, add the regs to the 706 // liveout set for the function. 707 if (DAG.getMachineFunction().liveout_empty()) { 708 for (unsigned i = 0; i != RVLocs.size(); ++i) 709 if (RVLocs[i].isRegLoc()) 710 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 711 } 712 SDOperand Chain = Op.getOperand(0); 713 714 // Handle tail call return. 715 Chain = GetPossiblePreceedingTailCall(Chain); 716 if (Chain.getOpcode() == X86ISD::TAILCALL) { 717 SDOperand TailCall = Chain; 718 SDOperand TargetAddress = TailCall.getOperand(1); 719 SDOperand StackAdjustment = TailCall.getOperand(2); 720 assert ( ((TargetAddress.getOpcode() == ISD::Register && 721 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 722 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 723 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 724 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 725 "Expecting an global address, external symbol, or register"); 726 assert( StackAdjustment.getOpcode() == ISD::Constant && 727 "Expecting a const value"); 728 729 SmallVector<SDOperand,8> Operands; 730 Operands.push_back(Chain.getOperand(0)); 731 Operands.push_back(TargetAddress); 732 Operands.push_back(StackAdjustment); 733 // Copy registers used by the call. Last operand is a flag so it is not 734 // copied. 735 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 736 Operands.push_back(Chain.getOperand(i)); 737 } 738 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 739 Operands.size()); 740 } 741 742 // Regular return. 743 SDOperand Flag; 744 745 // Copy the result values into the output registers. 746 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 747 RVLocs[0].getLocReg() != X86::ST0) { 748 for (unsigned i = 0; i != RVLocs.size(); ++i) { 749 CCValAssign &VA = RVLocs[i]; 750 assert(VA.isRegLoc() && "Can only return in registers!"); 751 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 752 Flag); 753 Flag = Chain.getValue(1); 754 } 755 } else { 756 // We need to handle a destination of ST0 specially, because it isn't really 757 // a register. 758 SDOperand Value = Op.getOperand(1); 759 760 // If this is an FP return with ScalarSSE, we need to move the value from 761 // an XMM register onto the fp-stack. 762 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 763 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 764 SDOperand MemLoc; 765 766 // If this is a load into a scalarsse value, don't store the loaded value 767 // back to the stack, only to reload it: just replace the scalar-sse load. 768 if (ISD::isNON_EXTLoad(Value.Val) && 769 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 770 Chain = Value.getOperand(0); 771 MemLoc = Value.getOperand(1); 772 } else { 773 // Spill the value to memory and reload it into top of stack. 774 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 775 MachineFunction &MF = DAG.getMachineFunction(); 776 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 777 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 778 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 779 } 780 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 781 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 782 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 783 Chain = Value.getValue(1); 784 } 785 786 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 787 SDOperand Ops[] = { Chain, Value }; 788 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 789 Flag = Chain.getValue(1); 790 } 791 792 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 793 if (Flag.Val) 794 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 795 else 796 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 797} 798 799 800/// LowerCallResult - Lower the result values of an ISD::CALL into the 801/// appropriate copies out of appropriate physical registers. This assumes that 802/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 803/// being lowered. The returns a SDNode with the same number of values as the 804/// ISD::CALL. 805SDNode *X86TargetLowering:: 806LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 807 unsigned CallingConv, SelectionDAG &DAG) { 808 809 // Assign locations to each value returned by this call. 810 SmallVector<CCValAssign, 16> RVLocs; 811 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 812 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 813 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 814 815 816 SmallVector<SDOperand, 8> ResultVals; 817 818 // Copy all of the result registers out of their specified physreg. 819 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 820 for (unsigned i = 0; i != RVLocs.size(); ++i) { 821 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 822 RVLocs[i].getValVT(), InFlag).getValue(1); 823 InFlag = Chain.getValue(2); 824 ResultVals.push_back(Chain.getValue(0)); 825 } 826 } else { 827 // Copies from the FP stack are special, as ST0 isn't a valid register 828 // before the fp stackifier runs. 829 830 // Copy ST0 into an RFP register with FP_GET_RESULT. 831 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 832 SDOperand GROps[] = { Chain, InFlag }; 833 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 834 Chain = RetVal.getValue(1); 835 InFlag = RetVal.getValue(2); 836 837 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 838 // an XMM register. 839 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 840 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 841 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 842 // shouldn't be necessary except that RFP cannot be live across 843 // multiple blocks. When stackifier is fixed, they can be uncoupled. 844 MachineFunction &MF = DAG.getMachineFunction(); 845 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 846 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 847 SDOperand Ops[] = { 848 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 849 }; 850 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 851 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 852 Chain = RetVal.getValue(1); 853 } 854 ResultVals.push_back(RetVal); 855 } 856 857 // Merge everything together with a MERGE_VALUES node. 858 ResultVals.push_back(Chain); 859 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 860 &ResultVals[0], ResultVals.size()).Val; 861} 862 863 864//===----------------------------------------------------------------------===// 865// C & StdCall & Fast Calling Convention implementation 866//===----------------------------------------------------------------------===// 867// StdCall calling convention seems to be standard for many Windows' API 868// routines and around. It differs from C calling convention just a little: 869// callee should clean up the stack, not caller. Symbols should be also 870// decorated in some fancy way :) It doesn't support any vector arguments. 871// For info on fast calling convention see Fast Calling Convention (tail call) 872// implementation LowerX86_32FastCCCallTo. 873 874/// AddLiveIn - This helper function adds the specified physical register to the 875/// MachineFunction as a live in value. It also creates a corresponding virtual 876/// register for it. 877static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 878 const TargetRegisterClass *RC) { 879 assert(RC->contains(PReg) && "Not the correct regclass!"); 880 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 881 MF.addLiveIn(PReg, VReg); 882 return VReg; 883} 884 885// align stack arguments according to platform alignment needed for tail calls 886unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); 887 888SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 889 const CCValAssign &VA, 890 MachineFrameInfo *MFI, 891 SDOperand Root, unsigned i) { 892 // Create the nodes corresponding to a load from this parameter slot. 893 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 894 VA.getLocMemOffset()); 895 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 896 897 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 898 899 if (Flags & ISD::ParamFlags::ByVal) 900 return FIN; 901 else 902 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 903} 904 905SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 906 bool isStdCall) { 907 unsigned NumArgs = Op.Val->getNumValues() - 1; 908 MachineFunction &MF = DAG.getMachineFunction(); 909 MachineFrameInfo *MFI = MF.getFrameInfo(); 910 SDOperand Root = Op.getOperand(0); 911 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 912 unsigned CC = MF.getFunction()->getCallingConv(); 913 // Assign locations to all of the incoming arguments. 914 SmallVector<CCValAssign, 16> ArgLocs; 915 CCState CCInfo(CC, isVarArg, 916 getTargetMachine(), ArgLocs); 917 // Check for possible tail call calling convention. 918 if (CC == CallingConv::Fast && PerformTailCallOpt) 919 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); 920 else 921 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 922 923 SmallVector<SDOperand, 8> ArgValues; 924 unsigned LastVal = ~0U; 925 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 926 CCValAssign &VA = ArgLocs[i]; 927 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 928 // places. 929 assert(VA.getValNo() != LastVal && 930 "Don't support value assigned to multiple locs yet"); 931 LastVal = VA.getValNo(); 932 933 if (VA.isRegLoc()) { 934 MVT::ValueType RegVT = VA.getLocVT(); 935 TargetRegisterClass *RC; 936 if (RegVT == MVT::i32) 937 RC = X86::GR32RegisterClass; 938 else { 939 assert(MVT::isVector(RegVT)); 940 RC = X86::VR128RegisterClass; 941 } 942 943 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 944 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 945 946 // If this is an 8 or 16-bit value, it is really passed promoted to 32 947 // bits. Insert an assert[sz]ext to capture this, then truncate to the 948 // right size. 949 if (VA.getLocInfo() == CCValAssign::SExt) 950 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 951 DAG.getValueType(VA.getValVT())); 952 else if (VA.getLocInfo() == CCValAssign::ZExt) 953 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 954 DAG.getValueType(VA.getValVT())); 955 956 if (VA.getLocInfo() != CCValAssign::Full) 957 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 958 959 ArgValues.push_back(ArgValue); 960 } else { 961 assert(VA.isMemLoc()); 962 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 963 } 964 } 965 966 unsigned StackSize = CCInfo.getNextStackOffset(); 967 // align stack specially for tail calls 968 if (CC==CallingConv::Fast) 969 StackSize = GetAlignedArgumentStackSize(StackSize,DAG); 970 971 ArgValues.push_back(Root); 972 973 // If the function takes variable number of arguments, make a frame index for 974 // the start of the first vararg value... for expansion of llvm.va_start. 975 if (isVarArg) 976 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 977 978 // Tail call calling convention (CallingConv::Fast) does not support varargs. 979 assert( !(isVarArg && CC == CallingConv::Fast) && 980 "CallingConv::Fast does not support varargs."); 981 982 if (isStdCall && !isVarArg && 983 (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { 984 BytesToPopOnReturn = StackSize; // Callee pops everything.. 985 BytesCallerReserves = 0; 986 } else { 987 BytesToPopOnReturn = 0; // Callee pops nothing. 988 989 // If this is an sret function, the return should pop the hidden pointer. 990 if (NumArgs && 991 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 992 ISD::ParamFlags::StructReturn)) 993 BytesToPopOnReturn = 4; 994 995 BytesCallerReserves = StackSize; 996 } 997 998 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 999 1000 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1001 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1002 1003 // Return the new list of results. 1004 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1005 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1006} 1007 1008SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 1009 unsigned CC) { 1010 SDOperand Chain = Op.getOperand(0); 1011 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1012 SDOperand Callee = Op.getOperand(4); 1013 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1014 1015 // Analyze operands of the call, assigning locations to each operand. 1016 SmallVector<CCValAssign, 16> ArgLocs; 1017 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1018 if(CC==CallingConv::Fast && PerformTailCallOpt) 1019 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1020 else 1021 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 1022 1023 // Get a count of how many bytes are to be pushed on the stack. 1024 unsigned NumBytes = CCInfo.getNextStackOffset(); 1025 if (CC==CallingConv::Fast) 1026 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1027 1028 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1029 1030 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1031 SmallVector<SDOperand, 8> MemOpChains; 1032 1033 SDOperand StackPtr; 1034 1035 // Walk the register/memloc assignments, inserting copies/loads. 1036 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1037 CCValAssign &VA = ArgLocs[i]; 1038 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1039 1040 // Promote the value if needed. 1041 switch (VA.getLocInfo()) { 1042 default: assert(0 && "Unknown loc info!"); 1043 case CCValAssign::Full: break; 1044 case CCValAssign::SExt: 1045 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1046 break; 1047 case CCValAssign::ZExt: 1048 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1049 break; 1050 case CCValAssign::AExt: 1051 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1052 break; 1053 } 1054 1055 if (VA.isRegLoc()) { 1056 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1057 } else { 1058 assert(VA.isMemLoc()); 1059 if (StackPtr.Val == 0) 1060 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1061 1062 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1063 Arg)); 1064 } 1065 } 1066 1067 // If the first argument is an sret pointer, remember it. 1068 bool isSRet = NumOps && 1069 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 1070 ISD::ParamFlags::StructReturn); 1071 1072 if (!MemOpChains.empty()) 1073 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1074 &MemOpChains[0], MemOpChains.size()); 1075 1076 // Build a sequence of copy-to-reg nodes chained together with token chain 1077 // and flag operands which copy the outgoing args into registers. 1078 SDOperand InFlag; 1079 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1080 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1081 InFlag); 1082 InFlag = Chain.getValue(1); 1083 } 1084 1085 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1086 // GOT pointer. 1087 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1088 Subtarget->isPICStyleGOT()) { 1089 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1090 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1091 InFlag); 1092 InFlag = Chain.getValue(1); 1093 } 1094 1095 // If the callee is a GlobalAddress node (quite common, every direct call is) 1096 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1097 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1098 // We should use extra load for direct calls to dllimported functions in 1099 // non-JIT mode. 1100 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1101 getTargetMachine(), true)) 1102 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1103 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1104 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1105 1106 // Returns a chain & a flag for retval copy to use. 1107 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1108 SmallVector<SDOperand, 8> Ops; 1109 Ops.push_back(Chain); 1110 Ops.push_back(Callee); 1111 1112 // Add argument registers to the end of the list so that they are known live 1113 // into the call. 1114 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1115 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1116 RegsToPass[i].second.getValueType())); 1117 1118 // Add an implicit use GOT pointer in EBX. 1119 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1120 Subtarget->isPICStyleGOT()) 1121 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1122 1123 if (InFlag.Val) 1124 Ops.push_back(InFlag); 1125 1126 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1127 InFlag = Chain.getValue(1); 1128 1129 // Create the CALLSEQ_END node. 1130 unsigned NumBytesForCalleeToPush = 0; 1131 1132 if (CC == CallingConv::X86_StdCall || 1133 (CC == CallingConv::Fast && PerformTailCallOpt)) { 1134 if (isVarArg) 1135 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1136 else 1137 NumBytesForCalleeToPush = NumBytes; 1138 assert(!(isVarArg && CC==CallingConv::Fast) && 1139 "CallingConv::Fast does not support varargs."); 1140 } else { 1141 // If this is is a call to a struct-return function, the callee 1142 // pops the hidden struct pointer, so we have to push it back. 1143 // This is common for Darwin/X86, Linux & Mingw32 targets. 1144 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1145 } 1146 1147 Chain = DAG.getCALLSEQ_END(Chain, 1148 DAG.getConstant(NumBytes, getPointerTy()), 1149 DAG.getConstant(NumBytesForCalleeToPush, 1150 getPointerTy()), 1151 InFlag); 1152 InFlag = Chain.getValue(1); 1153 1154 // Handle result values, copying them out of physregs into vregs that we 1155 // return. 1156 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1157} 1158 1159 1160//===----------------------------------------------------------------------===// 1161// FastCall Calling Convention implementation 1162//===----------------------------------------------------------------------===// 1163// 1164// The X86 'fastcall' calling convention passes up to two integer arguments in 1165// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1166// and requires that the callee pop its arguments off the stack (allowing proper 1167// tail calls), and has the same return value conventions as C calling convs. 1168// 1169// This calling convention always arranges for the callee pop value to be 8n+4 1170// bytes, which is needed for tail recursion elimination and stack alignment 1171// reasons. 1172SDOperand 1173X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1174 MachineFunction &MF = DAG.getMachineFunction(); 1175 MachineFrameInfo *MFI = MF.getFrameInfo(); 1176 SDOperand Root = Op.getOperand(0); 1177 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1178 1179 // Assign locations to all of the incoming arguments. 1180 SmallVector<CCValAssign, 16> ArgLocs; 1181 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1182 getTargetMachine(), ArgLocs); 1183 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1184 1185 SmallVector<SDOperand, 8> ArgValues; 1186 unsigned LastVal = ~0U; 1187 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1188 CCValAssign &VA = ArgLocs[i]; 1189 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1190 // places. 1191 assert(VA.getValNo() != LastVal && 1192 "Don't support value assigned to multiple locs yet"); 1193 LastVal = VA.getValNo(); 1194 1195 if (VA.isRegLoc()) { 1196 MVT::ValueType RegVT = VA.getLocVT(); 1197 TargetRegisterClass *RC; 1198 if (RegVT == MVT::i32) 1199 RC = X86::GR32RegisterClass; 1200 else { 1201 assert(MVT::isVector(RegVT)); 1202 RC = X86::VR128RegisterClass; 1203 } 1204 1205 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1206 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1207 1208 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1209 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1210 // right size. 1211 if (VA.getLocInfo() == CCValAssign::SExt) 1212 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1213 DAG.getValueType(VA.getValVT())); 1214 else if (VA.getLocInfo() == CCValAssign::ZExt) 1215 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1216 DAG.getValueType(VA.getValVT())); 1217 1218 if (VA.getLocInfo() != CCValAssign::Full) 1219 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1220 1221 ArgValues.push_back(ArgValue); 1222 } else { 1223 assert(VA.isMemLoc()); 1224 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1225 } 1226 } 1227 1228 ArgValues.push_back(Root); 1229 1230 unsigned StackSize = CCInfo.getNextStackOffset(); 1231 1232 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1233 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1234 // arguments and the arguments after the retaddr has been pushed are 1235 // aligned. 1236 if ((StackSize & 7) == 0) 1237 StackSize += 4; 1238 } 1239 1240 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1241 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1242 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1243 BytesCallerReserves = 0; 1244 1245 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1246 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1247 1248 // Return the new list of results. 1249 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1250 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1251} 1252 1253SDOperand 1254X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1255 const SDOperand &StackPtr, 1256 const CCValAssign &VA, 1257 SDOperand Chain, 1258 SDOperand Arg) { 1259 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1260 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1261 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1262 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1263 if (Flags & ISD::ParamFlags::ByVal) { 1264 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1265 ISD::ParamFlags::ByValAlignOffs); 1266 1267 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1268 ISD::ParamFlags::ByValSizeOffs; 1269 1270 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1271 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1272 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); 1273 1274 return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, 1275 AlwaysInline); 1276 } else { 1277 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1278 } 1279} 1280 1281SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1282 unsigned CC) { 1283 SDOperand Chain = Op.getOperand(0); 1284 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1285 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1286 SDOperand Callee = Op.getOperand(4); 1287 1288 // Analyze operands of the call, assigning locations to each operand. 1289 SmallVector<CCValAssign, 16> ArgLocs; 1290 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1291 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1292 1293 // Get a count of how many bytes are to be pushed on the stack. 1294 unsigned NumBytes = CCInfo.getNextStackOffset(); 1295 1296 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1297 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1298 // arguments and the arguments after the retaddr has been pushed are 1299 // aligned. 1300 if ((NumBytes & 7) == 0) 1301 NumBytes += 4; 1302 } 1303 1304 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1305 1306 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1307 SmallVector<SDOperand, 8> MemOpChains; 1308 1309 SDOperand StackPtr; 1310 1311 // Walk the register/memloc assignments, inserting copies/loads. 1312 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1313 CCValAssign &VA = ArgLocs[i]; 1314 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1315 1316 // Promote the value if needed. 1317 switch (VA.getLocInfo()) { 1318 default: assert(0 && "Unknown loc info!"); 1319 case CCValAssign::Full: break; 1320 case CCValAssign::SExt: 1321 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1322 break; 1323 case CCValAssign::ZExt: 1324 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1325 break; 1326 case CCValAssign::AExt: 1327 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1328 break; 1329 } 1330 1331 if (VA.isRegLoc()) { 1332 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1333 } else { 1334 assert(VA.isMemLoc()); 1335 if (StackPtr.Val == 0) 1336 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1337 1338 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1339 Arg)); 1340 } 1341 } 1342 1343 if (!MemOpChains.empty()) 1344 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1345 &MemOpChains[0], MemOpChains.size()); 1346 1347 // Build a sequence of copy-to-reg nodes chained together with token chain 1348 // and flag operands which copy the outgoing args into registers. 1349 SDOperand InFlag; 1350 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1351 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1352 InFlag); 1353 InFlag = Chain.getValue(1); 1354 } 1355 1356 // If the callee is a GlobalAddress node (quite common, every direct call is) 1357 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1358 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1359 // We should use extra load for direct calls to dllimported functions in 1360 // non-JIT mode. 1361 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1362 getTargetMachine(), true)) 1363 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1364 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1365 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1366 1367 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1368 // GOT pointer. 1369 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1370 Subtarget->isPICStyleGOT()) { 1371 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1372 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1373 InFlag); 1374 InFlag = Chain.getValue(1); 1375 } 1376 1377 // Returns a chain & a flag for retval copy to use. 1378 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1379 SmallVector<SDOperand, 8> Ops; 1380 Ops.push_back(Chain); 1381 Ops.push_back(Callee); 1382 1383 // Add argument registers to the end of the list so that they are known live 1384 // into the call. 1385 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1386 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1387 RegsToPass[i].second.getValueType())); 1388 1389 // Add an implicit use GOT pointer in EBX. 1390 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1391 Subtarget->isPICStyleGOT()) 1392 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1393 1394 if (InFlag.Val) 1395 Ops.push_back(InFlag); 1396 1397 assert(isTailCall==false && "no tail call here"); 1398 Chain = DAG.getNode(X86ISD::CALL, 1399 NodeTys, &Ops[0], Ops.size()); 1400 InFlag = Chain.getValue(1); 1401 1402 // Returns a flag for retval copy to use. 1403 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1404 Ops.clear(); 1405 Ops.push_back(Chain); 1406 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1407 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1408 Ops.push_back(InFlag); 1409 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1410 InFlag = Chain.getValue(1); 1411 1412 // Handle result values, copying them out of physregs into vregs that we 1413 // return. 1414 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1415} 1416 1417//===----------------------------------------------------------------------===// 1418// Fast Calling Convention (tail call) implementation 1419//===----------------------------------------------------------------------===// 1420 1421// Like std call, callee cleans arguments, convention except that ECX is 1422// reserved for storing the tail called function address. Only 2 registers are 1423// free for argument passing (inreg). Tail call optimization is performed 1424// provided: 1425// * tailcallopt is enabled 1426// * caller/callee are fastcc 1427// * elf/pic is disabled OR 1428// * elf/pic enabled + callee is in module + callee has 1429// visibility protected or hidden 1430// To keep the stack aligned according to platform abi the function 1431// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1432// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1433// If a tail called function callee has more arguments than the caller the 1434// caller needs to make sure that there is room to move the RETADDR to. This is 1435// achieved by reserving an area the size of the argument delta right after the 1436// original REtADDR, but before the saved framepointer or the spilled registers 1437// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1438// stack layout: 1439// arg1 1440// arg2 1441// RETADDR 1442// [ new RETADDR 1443// move area ] 1444// (possible EBP) 1445// ESI 1446// EDI 1447// local1 .. 1448 1449/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1450/// for a 16 byte align requirement. 1451unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1452 SelectionDAG& DAG) { 1453 if (PerformTailCallOpt) { 1454 MachineFunction &MF = DAG.getMachineFunction(); 1455 const TargetMachine &TM = MF.getTarget(); 1456 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1457 unsigned StackAlignment = TFI.getStackAlignment(); 1458 uint64_t AlignMask = StackAlignment - 1; 1459 int64_t Offset = StackSize; 1460 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1461 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1462 // Number smaller than 12 so just add the difference. 1463 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1464 } else { 1465 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1466 Offset = ((~AlignMask) & Offset) + StackAlignment + 1467 (StackAlignment-SlotSize); 1468 } 1469 StackSize = Offset; 1470 } 1471 return StackSize; 1472} 1473 1474/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1475/// following the call is a return. A function is eligible if caller/callee 1476/// calling conventions match, currently only fastcc supports tail calls, and 1477/// the function CALL is immediatly followed by a RET. 1478bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1479 SDOperand Ret, 1480 SelectionDAG& DAG) const { 1481 if (!PerformTailCallOpt) 1482 return false; 1483 1484 // Check whether CALL node immediatly preceeds the RET node and whether the 1485 // return uses the result of the node or is a void return. 1486 unsigned NumOps = Ret.getNumOperands(); 1487 if ((NumOps == 1 && 1488 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1489 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1490 (NumOps > 1 && 1491 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1492 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1493 MachineFunction &MF = DAG.getMachineFunction(); 1494 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1495 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1496 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1497 SDOperand Callee = Call.getOperand(4); 1498 // On elf/pic %ebx needs to be livein. 1499 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1500 !Subtarget->isPICStyleGOT()) 1501 return true; 1502 1503 // Can only do local tail calls with PIC. 1504 GlobalValue * GV = 0; 1505 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1506 if(G != 0 && 1507 (GV = G->getGlobal()) && 1508 (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) 1509 return true; 1510 } 1511 } 1512 1513 return false; 1514} 1515 1516SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, 1517 SelectionDAG &DAG, 1518 unsigned CC) { 1519 SDOperand Chain = Op.getOperand(0); 1520 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1521 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1522 SDOperand Callee = Op.getOperand(4); 1523 bool is64Bit = Subtarget->is64Bit(); 1524 1525 assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); 1526 1527 // Analyze operands of the call, assigning locations to each operand. 1528 SmallVector<CCValAssign, 16> ArgLocs; 1529 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1530 if (is64Bit) 1531 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1532 else 1533 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1534 1535 1536 // Lower arguments at fp - stackoffset + fpdiff. 1537 MachineFunction &MF = DAG.getMachineFunction(); 1538 1539 unsigned NumBytesToBePushed = 1540 GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); 1541 1542 unsigned NumBytesCallerPushed = 1543 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1544 int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; 1545 1546 // Set the delta of movement of the returnaddr stackslot. 1547 // But only set if delta is greater than previous delta. 1548 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1549 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1550 1551 Chain = DAG. 1552 getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1553 1554 // Adjust the Return address stack slot. 1555 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1556 if (FPDiff) { 1557 MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; 1558 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1559 // Load the "old" Return address. 1560 RetAddrFrIdx = 1561 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1562 // Calculate the new stack slot for the return address. 1563 int SlotSize = is64Bit ? 8 : 4; 1564 int NewReturnAddrFI = 1565 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1566 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1567 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1568 } 1569 1570 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1571 SmallVector<SDOperand, 8> MemOpChains; 1572 SmallVector<SDOperand, 8> MemOpChains2; 1573 SDOperand FramePtr, StackPtr; 1574 SDOperand PtrOff; 1575 SDOperand FIN; 1576 int FI = 0; 1577 1578 // Walk the register/memloc assignments, inserting copies/loads. Lower 1579 // arguments first to the stack slot where they would normally - in case of a 1580 // normal function call - be. 1581 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1582 CCValAssign &VA = ArgLocs[i]; 1583 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1584 1585 // Promote the value if needed. 1586 switch (VA.getLocInfo()) { 1587 default: assert(0 && "Unknown loc info!"); 1588 case CCValAssign::Full: break; 1589 case CCValAssign::SExt: 1590 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1591 break; 1592 case CCValAssign::ZExt: 1593 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1594 break; 1595 case CCValAssign::AExt: 1596 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1597 break; 1598 } 1599 1600 if (VA.isRegLoc()) { 1601 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1602 } else { 1603 assert(VA.isMemLoc()); 1604 if (StackPtr.Val == 0) 1605 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1606 1607 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1608 Arg)); 1609 } 1610 } 1611 1612 if (!MemOpChains.empty()) 1613 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1614 &MemOpChains[0], MemOpChains.size()); 1615 1616 // Build a sequence of copy-to-reg nodes chained together with token chain 1617 // and flag operands which copy the outgoing args into registers. 1618 SDOperand InFlag; 1619 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1620 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1621 InFlag); 1622 InFlag = Chain.getValue(1); 1623 } 1624 InFlag = SDOperand(); 1625 1626 // Copy from stack slots to stack slot of a tail called function. This needs 1627 // to be done because if we would lower the arguments directly to their real 1628 // stack slot we might end up overwriting each other. 1629 // TODO: To make this more efficient (sometimes saving a store/load) we could 1630 // analyse the arguments and emit this store/load/store sequence only for 1631 // arguments which would be overwritten otherwise. 1632 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1633 CCValAssign &VA = ArgLocs[i]; 1634 if (!VA.isRegLoc()) { 1635 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1636 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1637 1638 // Get source stack slot. 1639 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1640 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1641 // Create frame index. 1642 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1643 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1644 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1645 FIN = DAG.getFrameIndex(FI, MVT::i32); 1646 if (Flags & ISD::ParamFlags::ByVal) { 1647 // Copy relative to framepointer. 1648 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1649 ISD::ParamFlags::ByValAlignOffs); 1650 1651 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1652 ISD::ParamFlags::ByValSizeOffs; 1653 1654 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1655 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1656 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); 1657 1658 MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode, 1659 AlignNode,AlwaysInline)); 1660 } else { 1661 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); 1662 // Store relative to framepointer. 1663 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); 1664 } 1665 } 1666 } 1667 1668 if (!MemOpChains2.empty()) 1669 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1670 &MemOpChains2[0], MemOpChains.size()); 1671 1672 // Store the return address to the appropriate stack slot. 1673 if (FPDiff) 1674 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1675 1676 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1677 // GOT pointer. 1678 // Does not work with tail call since ebx is not restored correctly by 1679 // tailcaller. TODO: at least for x86 - verify for x86-64 1680 1681 // If the callee is a GlobalAddress node (quite common, every direct call is) 1682 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1683 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1684 // We should use extra load for direct calls to dllimported functions in 1685 // non-JIT mode. 1686 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1687 getTargetMachine(), true)) 1688 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1689 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1690 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1691 else { 1692 assert(Callee.getOpcode() == ISD::LOAD && 1693 "Function destination must be loaded into virtual register"); 1694 unsigned Opc = is64Bit ? X86::R9 : X86::ECX; 1695 1696 Chain = DAG.getCopyToReg(Chain, 1697 DAG.getRegister(Opc, getPointerTy()) , 1698 Callee,InFlag); 1699 Callee = DAG.getRegister(Opc, getPointerTy()); 1700 // Add register as live out. 1701 DAG.getMachineFunction().addLiveOut(Opc); 1702 } 1703 1704 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1705 SmallVector<SDOperand, 8> Ops; 1706 1707 Ops.push_back(Chain); 1708 Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1709 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1710 if (InFlag.Val) 1711 Ops.push_back(InFlag); 1712 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1713 InFlag = Chain.getValue(1); 1714 1715 // Returns a chain & a flag for retval copy to use. 1716 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1717 Ops.clear(); 1718 Ops.push_back(Chain); 1719 Ops.push_back(Callee); 1720 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1721 // Add argument registers to the end of the list so that they are known live 1722 // into the call. 1723 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1724 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1725 RegsToPass[i].second.getValueType())); 1726 if (InFlag.Val) 1727 Ops.push_back(InFlag); 1728 assert(InFlag.Val && 1729 "Flag must be set. Depend on flag being set in LowerRET"); 1730 Chain = DAG.getNode(X86ISD::TAILCALL, 1731 Op.Val->getVTList(), &Ops[0], Ops.size()); 1732 1733 return SDOperand(Chain.Val, Op.ResNo); 1734} 1735 1736//===----------------------------------------------------------------------===// 1737// X86-64 C Calling Convention implementation 1738//===----------------------------------------------------------------------===// 1739 1740SDOperand 1741X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1742 MachineFunction &MF = DAG.getMachineFunction(); 1743 MachineFrameInfo *MFI = MF.getFrameInfo(); 1744 SDOperand Root = Op.getOperand(0); 1745 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1746 unsigned CC= MF.getFunction()->getCallingConv(); 1747 1748 static const unsigned GPR64ArgRegs[] = { 1749 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1750 }; 1751 static const unsigned XMMArgRegs[] = { 1752 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1753 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1754 }; 1755 1756 1757 // Assign locations to all of the incoming arguments. 1758 SmallVector<CCValAssign, 16> ArgLocs; 1759 CCState CCInfo(CC, isVarArg, 1760 getTargetMachine(), ArgLocs); 1761 if (CC == CallingConv::Fast && PerformTailCallOpt) 1762 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); 1763 else 1764 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1765 1766 SmallVector<SDOperand, 8> ArgValues; 1767 unsigned LastVal = ~0U; 1768 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1769 CCValAssign &VA = ArgLocs[i]; 1770 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1771 // places. 1772 assert(VA.getValNo() != LastVal && 1773 "Don't support value assigned to multiple locs yet"); 1774 LastVal = VA.getValNo(); 1775 1776 if (VA.isRegLoc()) { 1777 MVT::ValueType RegVT = VA.getLocVT(); 1778 TargetRegisterClass *RC; 1779 if (RegVT == MVT::i32) 1780 RC = X86::GR32RegisterClass; 1781 else if (RegVT == MVT::i64) 1782 RC = X86::GR64RegisterClass; 1783 else if (RegVT == MVT::f32) 1784 RC = X86::FR32RegisterClass; 1785 else if (RegVT == MVT::f64) 1786 RC = X86::FR64RegisterClass; 1787 else { 1788 assert(MVT::isVector(RegVT)); 1789 if (MVT::getSizeInBits(RegVT) == 64) { 1790 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1791 RegVT = MVT::i64; 1792 } else 1793 RC = X86::VR128RegisterClass; 1794 } 1795 1796 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1797 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1798 1799 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1800 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1801 // right size. 1802 if (VA.getLocInfo() == CCValAssign::SExt) 1803 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1804 DAG.getValueType(VA.getValVT())); 1805 else if (VA.getLocInfo() == CCValAssign::ZExt) 1806 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1807 DAG.getValueType(VA.getValVT())); 1808 1809 if (VA.getLocInfo() != CCValAssign::Full) 1810 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1811 1812 // Handle MMX values passed in GPRs. 1813 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1814 MVT::getSizeInBits(RegVT) == 64) 1815 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1816 1817 ArgValues.push_back(ArgValue); 1818 } else { 1819 assert(VA.isMemLoc()); 1820 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1821 } 1822 } 1823 1824 unsigned StackSize = CCInfo.getNextStackOffset(); 1825 if (CC==CallingConv::Fast) 1826 StackSize =GetAlignedArgumentStackSize(StackSize, DAG); 1827 1828 // If the function takes variable number of arguments, make a frame index for 1829 // the start of the first vararg value... for expansion of llvm.va_start. 1830 if (isVarArg) { 1831 assert(CC!=CallingConv::Fast 1832 && "Var arg not supported with calling convention fastcc"); 1833 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1834 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1835 1836 // For X86-64, if there are vararg parameters that are passed via 1837 // registers, then we must store them to their spots on the stack so they 1838 // may be loaded by deferencing the result of va_next. 1839 VarArgsGPOffset = NumIntRegs * 8; 1840 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1841 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1842 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1843 1844 // Store the integer parameter registers. 1845 SmallVector<SDOperand, 8> MemOps; 1846 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1847 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1848 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1849 for (; NumIntRegs != 6; ++NumIntRegs) { 1850 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1851 X86::GR64RegisterClass); 1852 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1853 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1854 MemOps.push_back(Store); 1855 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1856 DAG.getConstant(8, getPointerTy())); 1857 } 1858 1859 // Now store the XMM (fp + vector) parameter registers. 1860 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1861 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1862 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1863 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1864 X86::VR128RegisterClass); 1865 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1866 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1867 MemOps.push_back(Store); 1868 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1869 DAG.getConstant(16, getPointerTy())); 1870 } 1871 if (!MemOps.empty()) 1872 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1873 &MemOps[0], MemOps.size()); 1874 } 1875 1876 ArgValues.push_back(Root); 1877 // Tail call convention (fastcc) needs callee pop. 1878 if (CC == CallingConv::Fast && PerformTailCallOpt) { 1879 BytesToPopOnReturn = StackSize; // Callee pops everything. 1880 BytesCallerReserves = 0; 1881 } else { 1882 BytesToPopOnReturn = 0; // Callee pops nothing. 1883 BytesCallerReserves = StackSize; 1884 } 1885 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1886 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1887 1888 // Return the new list of results. 1889 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1890 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1891} 1892 1893SDOperand 1894X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1895 unsigned CC) { 1896 SDOperand Chain = Op.getOperand(0); 1897 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1898 SDOperand Callee = Op.getOperand(4); 1899 1900 // Analyze operands of the call, assigning locations to each operand. 1901 SmallVector<CCValAssign, 16> ArgLocs; 1902 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1903 if (CC==CallingConv::Fast && PerformTailCallOpt) 1904 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1905 else 1906 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1907 1908 // Get a count of how many bytes are to be pushed on the stack. 1909 unsigned NumBytes = CCInfo.getNextStackOffset(); 1910 if (CC == CallingConv::Fast) 1911 NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); 1912 1913 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1914 1915 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1916 SmallVector<SDOperand, 8> MemOpChains; 1917 1918 SDOperand StackPtr; 1919 1920 // Walk the register/memloc assignments, inserting copies/loads. 1921 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1922 CCValAssign &VA = ArgLocs[i]; 1923 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1924 1925 // Promote the value if needed. 1926 switch (VA.getLocInfo()) { 1927 default: assert(0 && "Unknown loc info!"); 1928 case CCValAssign::Full: break; 1929 case CCValAssign::SExt: 1930 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1931 break; 1932 case CCValAssign::ZExt: 1933 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1934 break; 1935 case CCValAssign::AExt: 1936 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1937 break; 1938 } 1939 1940 if (VA.isRegLoc()) { 1941 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1942 } else { 1943 assert(VA.isMemLoc()); 1944 if (StackPtr.Val == 0) 1945 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1946 1947 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1948 Arg)); 1949 } 1950 } 1951 1952 if (!MemOpChains.empty()) 1953 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1954 &MemOpChains[0], MemOpChains.size()); 1955 1956 // Build a sequence of copy-to-reg nodes chained together with token chain 1957 // and flag operands which copy the outgoing args into registers. 1958 SDOperand InFlag; 1959 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1960 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1961 InFlag); 1962 InFlag = Chain.getValue(1); 1963 } 1964 1965 if (isVarArg) { 1966 assert ( CallingConv::Fast != CC && 1967 "Var args not supported with calling convention fastcc"); 1968 1969 // From AMD64 ABI document: 1970 // For calls that may call functions that use varargs or stdargs 1971 // (prototype-less calls or calls to functions containing ellipsis (...) in 1972 // the declaration) %al is used as hidden argument to specify the number 1973 // of SSE registers used. The contents of %al do not need to match exactly 1974 // the number of registers, but must be an ubound on the number of SSE 1975 // registers used and is in the range 0 - 8 inclusive. 1976 1977 // Count the number of XMM registers allocated. 1978 static const unsigned XMMArgRegs[] = { 1979 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1980 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1981 }; 1982 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1983 1984 Chain = DAG.getCopyToReg(Chain, X86::AL, 1985 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1986 InFlag = Chain.getValue(1); 1987 } 1988 1989 // If the callee is a GlobalAddress node (quite common, every direct call is) 1990 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1991 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1992 // We should use extra load for direct calls to dllimported functions in 1993 // non-JIT mode. 1994 if (getTargetMachine().getCodeModel() != CodeModel::Large 1995 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1996 getTargetMachine(), true)) 1997 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1998 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1999 if (getTargetMachine().getCodeModel() != CodeModel::Large) 2000 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 2001 2002 // Returns a chain & a flag for retval copy to use. 2003 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2004 SmallVector<SDOperand, 8> Ops; 2005 Ops.push_back(Chain); 2006 Ops.push_back(Callee); 2007 2008 // Add argument registers to the end of the list so that they are known live 2009 // into the call. 2010 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2011 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2012 RegsToPass[i].second.getValueType())); 2013 2014 if (InFlag.Val) 2015 Ops.push_back(InFlag); 2016 2017 Chain = DAG.getNode(X86ISD::CALL, 2018 NodeTys, &Ops[0], Ops.size()); 2019 InFlag = Chain.getValue(1); 2020 int NumBytesForCalleeToPush = 0; 2021 if (CC==CallingConv::Fast && PerformTailCallOpt) { 2022 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 2023 } else { 2024 NumBytesForCalleeToPush = 0; // Callee pops nothing. 2025 } 2026 // Returns a flag for retval copy to use. 2027 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2028 Ops.clear(); 2029 Ops.push_back(Chain); 2030 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2031 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2032 Ops.push_back(InFlag); 2033 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2034 InFlag = Chain.getValue(1); 2035 2036 // Handle result values, copying them out of physregs into vregs that we 2037 // return. 2038 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 2039} 2040 2041 2042//===----------------------------------------------------------------------===// 2043// Other Lowering Hooks 2044//===----------------------------------------------------------------------===// 2045 2046 2047SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2048 MachineFunction &MF = DAG.getMachineFunction(); 2049 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2050 int ReturnAddrIndex = FuncInfo->getRAIndex(); 2051 2052 if (ReturnAddrIndex == 0) { 2053 // Set up a frame object for the return address. 2054 if (Subtarget->is64Bit()) 2055 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2056 else 2057 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2058 2059 FuncInfo->setRAIndex(ReturnAddrIndex); 2060 } 2061 2062 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2063} 2064 2065 2066 2067/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2068/// specific condition code. It returns a false if it cannot do a direct 2069/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2070/// needed. 2071static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2072 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2073 SelectionDAG &DAG) { 2074 X86CC = X86::COND_INVALID; 2075 if (!isFP) { 2076 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2077 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2078 // X > -1 -> X == 0, jump !sign. 2079 RHS = DAG.getConstant(0, RHS.getValueType()); 2080 X86CC = X86::COND_NS; 2081 return true; 2082 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2083 // X < 0 -> X == 0, jump on sign. 2084 X86CC = X86::COND_S; 2085 return true; 2086 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 2087 // X < 1 -> X <= 0 2088 RHS = DAG.getConstant(0, RHS.getValueType()); 2089 X86CC = X86::COND_LE; 2090 return true; 2091 } 2092 } 2093 2094 switch (SetCCOpcode) { 2095 default: break; 2096 case ISD::SETEQ: X86CC = X86::COND_E; break; 2097 case ISD::SETGT: X86CC = X86::COND_G; break; 2098 case ISD::SETGE: X86CC = X86::COND_GE; break; 2099 case ISD::SETLT: X86CC = X86::COND_L; break; 2100 case ISD::SETLE: X86CC = X86::COND_LE; break; 2101 case ISD::SETNE: X86CC = X86::COND_NE; break; 2102 case ISD::SETULT: X86CC = X86::COND_B; break; 2103 case ISD::SETUGT: X86CC = X86::COND_A; break; 2104 case ISD::SETULE: X86CC = X86::COND_BE; break; 2105 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2106 } 2107 } else { 2108 // On a floating point condition, the flags are set as follows: 2109 // ZF PF CF op 2110 // 0 | 0 | 0 | X > Y 2111 // 0 | 0 | 1 | X < Y 2112 // 1 | 0 | 0 | X == Y 2113 // 1 | 1 | 1 | unordered 2114 bool Flip = false; 2115 switch (SetCCOpcode) { 2116 default: break; 2117 case ISD::SETUEQ: 2118 case ISD::SETEQ: X86CC = X86::COND_E; break; 2119 case ISD::SETOLT: Flip = true; // Fallthrough 2120 case ISD::SETOGT: 2121 case ISD::SETGT: X86CC = X86::COND_A; break; 2122 case ISD::SETOLE: Flip = true; // Fallthrough 2123 case ISD::SETOGE: 2124 case ISD::SETGE: X86CC = X86::COND_AE; break; 2125 case ISD::SETUGT: Flip = true; // Fallthrough 2126 case ISD::SETULT: 2127 case ISD::SETLT: X86CC = X86::COND_B; break; 2128 case ISD::SETUGE: Flip = true; // Fallthrough 2129 case ISD::SETULE: 2130 case ISD::SETLE: X86CC = X86::COND_BE; break; 2131 case ISD::SETONE: 2132 case ISD::SETNE: X86CC = X86::COND_NE; break; 2133 case ISD::SETUO: X86CC = X86::COND_P; break; 2134 case ISD::SETO: X86CC = X86::COND_NP; break; 2135 } 2136 if (Flip) 2137 std::swap(LHS, RHS); 2138 } 2139 2140 return X86CC != X86::COND_INVALID; 2141} 2142 2143/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2144/// code. Current x86 isa includes the following FP cmov instructions: 2145/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2146static bool hasFPCMov(unsigned X86CC) { 2147 switch (X86CC) { 2148 default: 2149 return false; 2150 case X86::COND_B: 2151 case X86::COND_BE: 2152 case X86::COND_E: 2153 case X86::COND_P: 2154 case X86::COND_A: 2155 case X86::COND_AE: 2156 case X86::COND_NE: 2157 case X86::COND_NP: 2158 return true; 2159 } 2160} 2161 2162/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2163/// true if Op is undef or if its value falls within the specified range (L, H]. 2164static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2165 if (Op.getOpcode() == ISD::UNDEF) 2166 return true; 2167 2168 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2169 return (Val >= Low && Val < Hi); 2170} 2171 2172/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2173/// true if Op is undef or if its value equal to the specified value. 2174static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2175 if (Op.getOpcode() == ISD::UNDEF) 2176 return true; 2177 return cast<ConstantSDNode>(Op)->getValue() == Val; 2178} 2179 2180/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2181/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2182bool X86::isPSHUFDMask(SDNode *N) { 2183 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2184 2185 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2186 return false; 2187 2188 // Check if the value doesn't reference the second vector. 2189 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2190 SDOperand Arg = N->getOperand(i); 2191 if (Arg.getOpcode() == ISD::UNDEF) continue; 2192 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2193 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 2194 return false; 2195 } 2196 2197 return true; 2198} 2199 2200/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2201/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2202bool X86::isPSHUFHWMask(SDNode *N) { 2203 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2204 2205 if (N->getNumOperands() != 8) 2206 return false; 2207 2208 // Lower quadword copied in order. 2209 for (unsigned i = 0; i != 4; ++i) { 2210 SDOperand Arg = N->getOperand(i); 2211 if (Arg.getOpcode() == ISD::UNDEF) continue; 2212 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2213 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2214 return false; 2215 } 2216 2217 // Upper quadword shuffled. 2218 for (unsigned i = 4; i != 8; ++i) { 2219 SDOperand Arg = N->getOperand(i); 2220 if (Arg.getOpcode() == ISD::UNDEF) continue; 2221 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2222 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2223 if (Val < 4 || Val > 7) 2224 return false; 2225 } 2226 2227 return true; 2228} 2229 2230/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2231/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2232bool X86::isPSHUFLWMask(SDNode *N) { 2233 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2234 2235 if (N->getNumOperands() != 8) 2236 return false; 2237 2238 // Upper quadword copied in order. 2239 for (unsigned i = 4; i != 8; ++i) 2240 if (!isUndefOrEqual(N->getOperand(i), i)) 2241 return false; 2242 2243 // Lower quadword shuffled. 2244 for (unsigned i = 0; i != 4; ++i) 2245 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2246 return false; 2247 2248 return true; 2249} 2250 2251/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2252/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2253static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2254 if (NumElems != 2 && NumElems != 4) return false; 2255 2256 unsigned Half = NumElems / 2; 2257 for (unsigned i = 0; i < Half; ++i) 2258 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2259 return false; 2260 for (unsigned i = Half; i < NumElems; ++i) 2261 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2262 return false; 2263 2264 return true; 2265} 2266 2267bool X86::isSHUFPMask(SDNode *N) { 2268 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2269 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2270} 2271 2272/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2273/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2274/// half elements to come from vector 1 (which would equal the dest.) and 2275/// the upper half to come from vector 2. 2276static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2277 if (NumOps != 2 && NumOps != 4) return false; 2278 2279 unsigned Half = NumOps / 2; 2280 for (unsigned i = 0; i < Half; ++i) 2281 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2282 return false; 2283 for (unsigned i = Half; i < NumOps; ++i) 2284 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2285 return false; 2286 return true; 2287} 2288 2289static bool isCommutedSHUFP(SDNode *N) { 2290 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2291 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2292} 2293 2294/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2295/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2296bool X86::isMOVHLPSMask(SDNode *N) { 2297 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2298 2299 if (N->getNumOperands() != 4) 2300 return false; 2301 2302 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2303 return isUndefOrEqual(N->getOperand(0), 6) && 2304 isUndefOrEqual(N->getOperand(1), 7) && 2305 isUndefOrEqual(N->getOperand(2), 2) && 2306 isUndefOrEqual(N->getOperand(3), 3); 2307} 2308 2309/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2310/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2311/// <2, 3, 2, 3> 2312bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2313 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2314 2315 if (N->getNumOperands() != 4) 2316 return false; 2317 2318 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2319 return isUndefOrEqual(N->getOperand(0), 2) && 2320 isUndefOrEqual(N->getOperand(1), 3) && 2321 isUndefOrEqual(N->getOperand(2), 2) && 2322 isUndefOrEqual(N->getOperand(3), 3); 2323} 2324 2325/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2326/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2327bool X86::isMOVLPMask(SDNode *N) { 2328 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2329 2330 unsigned NumElems = N->getNumOperands(); 2331 if (NumElems != 2 && NumElems != 4) 2332 return false; 2333 2334 for (unsigned i = 0; i < NumElems/2; ++i) 2335 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2336 return false; 2337 2338 for (unsigned i = NumElems/2; i < NumElems; ++i) 2339 if (!isUndefOrEqual(N->getOperand(i), i)) 2340 return false; 2341 2342 return true; 2343} 2344 2345/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2346/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2347/// and MOVLHPS. 2348bool X86::isMOVHPMask(SDNode *N) { 2349 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2350 2351 unsigned NumElems = N->getNumOperands(); 2352 if (NumElems != 2 && NumElems != 4) 2353 return false; 2354 2355 for (unsigned i = 0; i < NumElems/2; ++i) 2356 if (!isUndefOrEqual(N->getOperand(i), i)) 2357 return false; 2358 2359 for (unsigned i = 0; i < NumElems/2; ++i) { 2360 SDOperand Arg = N->getOperand(i + NumElems/2); 2361 if (!isUndefOrEqual(Arg, i + NumElems)) 2362 return false; 2363 } 2364 2365 return true; 2366} 2367 2368/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2369/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2370bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2371 bool V2IsSplat = false) { 2372 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2373 return false; 2374 2375 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2376 SDOperand BitI = Elts[i]; 2377 SDOperand BitI1 = Elts[i+1]; 2378 if (!isUndefOrEqual(BitI, j)) 2379 return false; 2380 if (V2IsSplat) { 2381 if (isUndefOrEqual(BitI1, NumElts)) 2382 return false; 2383 } else { 2384 if (!isUndefOrEqual(BitI1, j + NumElts)) 2385 return false; 2386 } 2387 } 2388 2389 return true; 2390} 2391 2392bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2393 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2394 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2395} 2396 2397/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2398/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2399bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2400 bool V2IsSplat = false) { 2401 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2402 return false; 2403 2404 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2405 SDOperand BitI = Elts[i]; 2406 SDOperand BitI1 = Elts[i+1]; 2407 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2408 return false; 2409 if (V2IsSplat) { 2410 if (isUndefOrEqual(BitI1, NumElts)) 2411 return false; 2412 } else { 2413 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2414 return false; 2415 } 2416 } 2417 2418 return true; 2419} 2420 2421bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2422 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2423 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2424} 2425 2426/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2427/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2428/// <0, 0, 1, 1> 2429bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2430 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2431 2432 unsigned NumElems = N->getNumOperands(); 2433 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2434 return false; 2435 2436 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2437 SDOperand BitI = N->getOperand(i); 2438 SDOperand BitI1 = N->getOperand(i+1); 2439 2440 if (!isUndefOrEqual(BitI, j)) 2441 return false; 2442 if (!isUndefOrEqual(BitI1, j)) 2443 return false; 2444 } 2445 2446 return true; 2447} 2448 2449/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2450/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2451/// <2, 2, 3, 3> 2452bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2453 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2454 2455 unsigned NumElems = N->getNumOperands(); 2456 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2457 return false; 2458 2459 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2460 SDOperand BitI = N->getOperand(i); 2461 SDOperand BitI1 = N->getOperand(i + 1); 2462 2463 if (!isUndefOrEqual(BitI, j)) 2464 return false; 2465 if (!isUndefOrEqual(BitI1, j)) 2466 return false; 2467 } 2468 2469 return true; 2470} 2471 2472/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2473/// specifies a shuffle of elements that is suitable for input to MOVSS, 2474/// MOVSD, and MOVD, i.e. setting the lowest element. 2475static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2476 if (NumElts != 2 && NumElts != 4) 2477 return false; 2478 2479 if (!isUndefOrEqual(Elts[0], NumElts)) 2480 return false; 2481 2482 for (unsigned i = 1; i < NumElts; ++i) { 2483 if (!isUndefOrEqual(Elts[i], i)) 2484 return false; 2485 } 2486 2487 return true; 2488} 2489 2490bool X86::isMOVLMask(SDNode *N) { 2491 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2492 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2493} 2494 2495/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2496/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2497/// element of vector 2 and the other elements to come from vector 1 in order. 2498static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2499 bool V2IsSplat = false, 2500 bool V2IsUndef = false) { 2501 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2502 return false; 2503 2504 if (!isUndefOrEqual(Ops[0], 0)) 2505 return false; 2506 2507 for (unsigned i = 1; i < NumOps; ++i) { 2508 SDOperand Arg = Ops[i]; 2509 if (!(isUndefOrEqual(Arg, i+NumOps) || 2510 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2511 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2512 return false; 2513 } 2514 2515 return true; 2516} 2517 2518static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2519 bool V2IsUndef = false) { 2520 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2521 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2522 V2IsSplat, V2IsUndef); 2523} 2524 2525/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2526/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2527bool X86::isMOVSHDUPMask(SDNode *N) { 2528 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2529 2530 if (N->getNumOperands() != 4) 2531 return false; 2532 2533 // Expect 1, 1, 3, 3 2534 for (unsigned i = 0; i < 2; ++i) { 2535 SDOperand Arg = N->getOperand(i); 2536 if (Arg.getOpcode() == ISD::UNDEF) continue; 2537 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2538 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2539 if (Val != 1) return false; 2540 } 2541 2542 bool HasHi = false; 2543 for (unsigned i = 2; i < 4; ++i) { 2544 SDOperand Arg = N->getOperand(i); 2545 if (Arg.getOpcode() == ISD::UNDEF) continue; 2546 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2547 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2548 if (Val != 3) return false; 2549 HasHi = true; 2550 } 2551 2552 // Don't use movshdup if it can be done with a shufps. 2553 return HasHi; 2554} 2555 2556/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2557/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2558bool X86::isMOVSLDUPMask(SDNode *N) { 2559 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2560 2561 if (N->getNumOperands() != 4) 2562 return false; 2563 2564 // Expect 0, 0, 2, 2 2565 for (unsigned i = 0; i < 2; ++i) { 2566 SDOperand Arg = N->getOperand(i); 2567 if (Arg.getOpcode() == ISD::UNDEF) continue; 2568 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2569 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2570 if (Val != 0) return false; 2571 } 2572 2573 bool HasHi = false; 2574 for (unsigned i = 2; i < 4; ++i) { 2575 SDOperand Arg = N->getOperand(i); 2576 if (Arg.getOpcode() == ISD::UNDEF) continue; 2577 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2578 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2579 if (Val != 2) return false; 2580 HasHi = true; 2581 } 2582 2583 // Don't use movshdup if it can be done with a shufps. 2584 return HasHi; 2585} 2586 2587/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2588/// specifies a identity operation on the LHS or RHS. 2589static bool isIdentityMask(SDNode *N, bool RHS = false) { 2590 unsigned NumElems = N->getNumOperands(); 2591 for (unsigned i = 0; i < NumElems; ++i) 2592 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2593 return false; 2594 return true; 2595} 2596 2597/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2598/// a splat of a single element. 2599static bool isSplatMask(SDNode *N) { 2600 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2601 2602 // This is a splat operation if each element of the permute is the same, and 2603 // if the value doesn't reference the second vector. 2604 unsigned NumElems = N->getNumOperands(); 2605 SDOperand ElementBase; 2606 unsigned i = 0; 2607 for (; i != NumElems; ++i) { 2608 SDOperand Elt = N->getOperand(i); 2609 if (isa<ConstantSDNode>(Elt)) { 2610 ElementBase = Elt; 2611 break; 2612 } 2613 } 2614 2615 if (!ElementBase.Val) 2616 return false; 2617 2618 for (; i != NumElems; ++i) { 2619 SDOperand Arg = N->getOperand(i); 2620 if (Arg.getOpcode() == ISD::UNDEF) continue; 2621 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2622 if (Arg != ElementBase) return false; 2623 } 2624 2625 // Make sure it is a splat of the first vector operand. 2626 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2627} 2628 2629/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2630/// a splat of a single element and it's a 2 or 4 element mask. 2631bool X86::isSplatMask(SDNode *N) { 2632 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2633 2634 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2635 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2636 return false; 2637 return ::isSplatMask(N); 2638} 2639 2640/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2641/// specifies a splat of zero element. 2642bool X86::isSplatLoMask(SDNode *N) { 2643 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2644 2645 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2646 if (!isUndefOrEqual(N->getOperand(i), 0)) 2647 return false; 2648 return true; 2649} 2650 2651/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2652/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2653/// instructions. 2654unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2655 unsigned NumOperands = N->getNumOperands(); 2656 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2657 unsigned Mask = 0; 2658 for (unsigned i = 0; i < NumOperands; ++i) { 2659 unsigned Val = 0; 2660 SDOperand Arg = N->getOperand(NumOperands-i-1); 2661 if (Arg.getOpcode() != ISD::UNDEF) 2662 Val = cast<ConstantSDNode>(Arg)->getValue(); 2663 if (Val >= NumOperands) Val -= NumOperands; 2664 Mask |= Val; 2665 if (i != NumOperands - 1) 2666 Mask <<= Shift; 2667 } 2668 2669 return Mask; 2670} 2671 2672/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2673/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2674/// instructions. 2675unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2676 unsigned Mask = 0; 2677 // 8 nodes, but we only care about the last 4. 2678 for (unsigned i = 7; i >= 4; --i) { 2679 unsigned Val = 0; 2680 SDOperand Arg = N->getOperand(i); 2681 if (Arg.getOpcode() != ISD::UNDEF) 2682 Val = cast<ConstantSDNode>(Arg)->getValue(); 2683 Mask |= (Val - 4); 2684 if (i != 4) 2685 Mask <<= 2; 2686 } 2687 2688 return Mask; 2689} 2690 2691/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2692/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2693/// instructions. 2694unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2695 unsigned Mask = 0; 2696 // 8 nodes, but we only care about the first 4. 2697 for (int i = 3; i >= 0; --i) { 2698 unsigned Val = 0; 2699 SDOperand Arg = N->getOperand(i); 2700 if (Arg.getOpcode() != ISD::UNDEF) 2701 Val = cast<ConstantSDNode>(Arg)->getValue(); 2702 Mask |= Val; 2703 if (i != 0) 2704 Mask <<= 2; 2705 } 2706 2707 return Mask; 2708} 2709 2710/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2711/// specifies a 8 element shuffle that can be broken into a pair of 2712/// PSHUFHW and PSHUFLW. 2713static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2714 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2715 2716 if (N->getNumOperands() != 8) 2717 return false; 2718 2719 // Lower quadword shuffled. 2720 for (unsigned i = 0; i != 4; ++i) { 2721 SDOperand Arg = N->getOperand(i); 2722 if (Arg.getOpcode() == ISD::UNDEF) continue; 2723 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2724 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2725 if (Val >= 4) 2726 return false; 2727 } 2728 2729 // Upper quadword shuffled. 2730 for (unsigned i = 4; i != 8; ++i) { 2731 SDOperand Arg = N->getOperand(i); 2732 if (Arg.getOpcode() == ISD::UNDEF) continue; 2733 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2734 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2735 if (Val < 4 || Val > 7) 2736 return false; 2737 } 2738 2739 return true; 2740} 2741 2742/// CommuteVectorShuffle - Swap vector_shuffle operands as well as 2743/// values in ther permute mask. 2744static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2745 SDOperand &V2, SDOperand &Mask, 2746 SelectionDAG &DAG) { 2747 MVT::ValueType VT = Op.getValueType(); 2748 MVT::ValueType MaskVT = Mask.getValueType(); 2749 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2750 unsigned NumElems = Mask.getNumOperands(); 2751 SmallVector<SDOperand, 8> MaskVec; 2752 2753 for (unsigned i = 0; i != NumElems; ++i) { 2754 SDOperand Arg = Mask.getOperand(i); 2755 if (Arg.getOpcode() == ISD::UNDEF) { 2756 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2757 continue; 2758 } 2759 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2760 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2761 if (Val < NumElems) 2762 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2763 else 2764 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2765 } 2766 2767 std::swap(V1, V2); 2768 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2769 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2770} 2771 2772/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 2773/// the two vector operands have swapped position. 2774static 2775SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) { 2776 MVT::ValueType MaskVT = Mask.getValueType(); 2777 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2778 unsigned NumElems = Mask.getNumOperands(); 2779 SmallVector<SDOperand, 8> MaskVec; 2780 for (unsigned i = 0; i != NumElems; ++i) { 2781 SDOperand Arg = Mask.getOperand(i); 2782 if (Arg.getOpcode() == ISD::UNDEF) { 2783 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2784 continue; 2785 } 2786 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2787 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2788 if (Val < NumElems) 2789 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2790 else 2791 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2792 } 2793 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems); 2794} 2795 2796 2797/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2798/// match movhlps. The lower half elements should come from upper half of 2799/// V1 (and in order), and the upper half elements should come from the upper 2800/// half of V2 (and in order). 2801static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2802 unsigned NumElems = Mask->getNumOperands(); 2803 if (NumElems != 4) 2804 return false; 2805 for (unsigned i = 0, e = 2; i != e; ++i) 2806 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2807 return false; 2808 for (unsigned i = 2; i != 4; ++i) 2809 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2810 return false; 2811 return true; 2812} 2813 2814/// isScalarLoadToVector - Returns true if the node is a scalar load that 2815/// is promoted to a vector. 2816static inline bool isScalarLoadToVector(SDNode *N) { 2817 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2818 N = N->getOperand(0).Val; 2819 return ISD::isNON_EXTLoad(N); 2820 } 2821 return false; 2822} 2823 2824/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2825/// match movlp{s|d}. The lower half elements should come from lower half of 2826/// V1 (and in order), and the upper half elements should come from the upper 2827/// half of V2 (and in order). And since V1 will become the source of the 2828/// MOVLP, it must be either a vector load or a scalar load to vector. 2829static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2830 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2831 return false; 2832 // Is V2 is a vector load, don't do this transformation. We will try to use 2833 // load folding shufps op. 2834 if (ISD::isNON_EXTLoad(V2)) 2835 return false; 2836 2837 unsigned NumElems = Mask->getNumOperands(); 2838 if (NumElems != 2 && NumElems != 4) 2839 return false; 2840 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2841 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2842 return false; 2843 for (unsigned i = NumElems/2; i != NumElems; ++i) 2844 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2845 return false; 2846 return true; 2847} 2848 2849/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2850/// all the same. 2851static bool isSplatVector(SDNode *N) { 2852 if (N->getOpcode() != ISD::BUILD_VECTOR) 2853 return false; 2854 2855 SDOperand SplatValue = N->getOperand(0); 2856 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2857 if (N->getOperand(i) != SplatValue) 2858 return false; 2859 return true; 2860} 2861 2862/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2863/// to an undef. 2864static bool isUndefShuffle(SDNode *N) { 2865 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2866 return false; 2867 2868 SDOperand V1 = N->getOperand(0); 2869 SDOperand V2 = N->getOperand(1); 2870 SDOperand Mask = N->getOperand(2); 2871 unsigned NumElems = Mask.getNumOperands(); 2872 for (unsigned i = 0; i != NumElems; ++i) { 2873 SDOperand Arg = Mask.getOperand(i); 2874 if (Arg.getOpcode() != ISD::UNDEF) { 2875 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2876 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2877 return false; 2878 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2879 return false; 2880 } 2881 } 2882 return true; 2883} 2884 2885/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2886/// constant +0.0. 2887static inline bool isZeroNode(SDOperand Elt) { 2888 return ((isa<ConstantSDNode>(Elt) && 2889 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2890 (isa<ConstantFPSDNode>(Elt) && 2891 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2892} 2893 2894/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2895/// to an zero vector. 2896static bool isZeroShuffle(SDNode *N) { 2897 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2898 return false; 2899 2900 SDOperand V1 = N->getOperand(0); 2901 SDOperand V2 = N->getOperand(1); 2902 SDOperand Mask = N->getOperand(2); 2903 unsigned NumElems = Mask.getNumOperands(); 2904 for (unsigned i = 0; i != NumElems; ++i) { 2905 SDOperand Arg = Mask.getOperand(i); 2906 if (Arg.getOpcode() == ISD::UNDEF) 2907 continue; 2908 2909 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2910 if (Idx < NumElems) { 2911 unsigned Opc = V1.Val->getOpcode(); 2912 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) 2913 continue; 2914 if (Opc != ISD::BUILD_VECTOR || 2915 !isZeroNode(V1.Val->getOperand(Idx))) 2916 return false; 2917 } else if (Idx >= NumElems) { 2918 unsigned Opc = V2.Val->getOpcode(); 2919 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) 2920 continue; 2921 if (Opc != ISD::BUILD_VECTOR || 2922 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2923 return false; 2924 } 2925 } 2926 return true; 2927} 2928 2929/// getZeroVector - Returns a vector of specified type with all zero elements. 2930/// 2931static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2932 assert(MVT::isVector(VT) && "Expected a vector type"); 2933 2934 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2935 // type. This ensures they get CSE'd. 2936 SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); 2937 SDOperand Vec; 2938 if (MVT::getSizeInBits(VT) == 64) // MMX 2939 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2940 else // SSE 2941 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2942 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2943} 2944 2945/// getOnesVector - Returns a vector of specified type with all bits set. 2946/// 2947static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { 2948 assert(MVT::isVector(VT) && "Expected a vector type"); 2949 2950 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 2951 // type. This ensures they get CSE'd. 2952 SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); 2953 SDOperand Vec; 2954 if (MVT::getSizeInBits(VT) == 64) // MMX 2955 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); 2956 else // SSE 2957 Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); 2958 return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); 2959} 2960 2961 2962/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2963/// that point to V2 points to its first element. 2964static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2965 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2966 2967 bool Changed = false; 2968 SmallVector<SDOperand, 8> MaskVec; 2969 unsigned NumElems = Mask.getNumOperands(); 2970 for (unsigned i = 0; i != NumElems; ++i) { 2971 SDOperand Arg = Mask.getOperand(i); 2972 if (Arg.getOpcode() != ISD::UNDEF) { 2973 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2974 if (Val > NumElems) { 2975 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2976 Changed = true; 2977 } 2978 } 2979 MaskVec.push_back(Arg); 2980 } 2981 2982 if (Changed) 2983 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2984 &MaskVec[0], MaskVec.size()); 2985 return Mask; 2986} 2987 2988/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2989/// operation of specified width. 2990static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2991 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2992 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2993 2994 SmallVector<SDOperand, 8> MaskVec; 2995 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2996 for (unsigned i = 1; i != NumElems; ++i) 2997 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2998 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2999} 3000 3001/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 3002/// of specified width. 3003static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 3004 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3005 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3006 SmallVector<SDOperand, 8> MaskVec; 3007 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 3008 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3009 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 3010 } 3011 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3012} 3013 3014/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 3015/// of specified width. 3016static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 3017 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3018 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3019 unsigned Half = NumElems/2; 3020 SmallVector<SDOperand, 8> MaskVec; 3021 for (unsigned i = 0; i != Half; ++i) { 3022 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 3023 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 3024 } 3025 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 3026} 3027 3028/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 3029/// 3030static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 3031 SDOperand V1 = Op.getOperand(0); 3032 SDOperand Mask = Op.getOperand(2); 3033 MVT::ValueType VT = Op.getValueType(); 3034 unsigned NumElems = Mask.getNumOperands(); 3035 Mask = getUnpacklMask(NumElems, DAG); 3036 while (NumElems != 4) { 3037 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 3038 NumElems >>= 1; 3039 } 3040 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 3041 3042 Mask = getZeroVector(MVT::v4i32, DAG); 3043 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 3044 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 3045 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 3046} 3047 3048/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3049/// vector of zero or undef vector. This produces a shuffle where the low 3050/// element of V2 is swizzled into the zero/undef vector, landing at element 3051/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3052static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 3053 unsigned NumElems, unsigned Idx, 3054 bool isZero, SelectionDAG &DAG) { 3055 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 3056 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3057 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3058 SmallVector<SDOperand, 16> MaskVec; 3059 for (unsigned i = 0; i != NumElems; ++i) 3060 if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. 3061 MaskVec.push_back(DAG.getConstant(NumElems, EVT)); 3062 else 3063 MaskVec.push_back(DAG.getConstant(i, EVT)); 3064 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3065 &MaskVec[0], MaskVec.size()); 3066 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3067} 3068 3069/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3070/// 3071static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 3072 unsigned NumNonZero, unsigned NumZero, 3073 SelectionDAG &DAG, TargetLowering &TLI) { 3074 if (NumNonZero > 8) 3075 return SDOperand(); 3076 3077 SDOperand V(0, 0); 3078 bool First = true; 3079 for (unsigned i = 0; i < 16; ++i) { 3080 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3081 if (ThisIsNonZero && First) { 3082 if (NumZero) 3083 V = getZeroVector(MVT::v8i16, DAG); 3084 else 3085 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3086 First = false; 3087 } 3088 3089 if ((i & 1) != 0) { 3090 SDOperand ThisElt(0, 0), LastElt(0, 0); 3091 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3092 if (LastIsNonZero) { 3093 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3094 } 3095 if (ThisIsNonZero) { 3096 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3097 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3098 ThisElt, DAG.getConstant(8, MVT::i8)); 3099 if (LastIsNonZero) 3100 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3101 } else 3102 ThisElt = LastElt; 3103 3104 if (ThisElt.Val) 3105 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3106 DAG.getConstant(i/2, TLI.getPointerTy())); 3107 } 3108 } 3109 3110 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3111} 3112 3113/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3114/// 3115static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3116 unsigned NumNonZero, unsigned NumZero, 3117 SelectionDAG &DAG, TargetLowering &TLI) { 3118 if (NumNonZero > 4) 3119 return SDOperand(); 3120 3121 SDOperand V(0, 0); 3122 bool First = true; 3123 for (unsigned i = 0; i < 8; ++i) { 3124 bool isNonZero = (NonZeros & (1 << i)) != 0; 3125 if (isNonZero) { 3126 if (First) { 3127 if (NumZero) 3128 V = getZeroVector(MVT::v8i16, DAG); 3129 else 3130 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3131 First = false; 3132 } 3133 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3134 DAG.getConstant(i, TLI.getPointerTy())); 3135 } 3136 } 3137 3138 return V; 3139} 3140 3141/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is 3142/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x> 3143SDOperand 3144X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3145 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3146 if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { 3147 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3148 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3149 // eliminated on x86-32 hosts. 3150 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3151 return Op; 3152 3153 if (ISD::isBuildVectorAllOnes(Op.Val)) 3154 return getOnesVector(Op.getValueType(), DAG); 3155 return getZeroVector(Op.getValueType(), DAG); 3156 } 3157 3158 MVT::ValueType VT = Op.getValueType(); 3159 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3160 unsigned EVTBits = MVT::getSizeInBits(EVT); 3161 3162 unsigned NumElems = Op.getNumOperands(); 3163 unsigned NumZero = 0; 3164 unsigned NumNonZero = 0; 3165 unsigned NonZeros = 0; 3166 bool HasNonImms = false; 3167 SmallSet<SDOperand, 8> Values; 3168 for (unsigned i = 0; i < NumElems; ++i) { 3169 SDOperand Elt = Op.getOperand(i); 3170 if (Elt.getOpcode() == ISD::UNDEF) 3171 continue; 3172 Values.insert(Elt); 3173 if (Elt.getOpcode() != ISD::Constant && 3174 Elt.getOpcode() != ISD::ConstantFP) 3175 HasNonImms = true; 3176 if (isZeroNode(Elt)) 3177 NumZero++; 3178 else { 3179 NonZeros |= (1 << i); 3180 NumNonZero++; 3181 } 3182 } 3183 3184 if (NumNonZero == 0) { 3185 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3186 return DAG.getNode(ISD::UNDEF, VT); 3187 } 3188 3189 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3190 if (Values.size() == 1) 3191 return SDOperand(); 3192 3193 // Special case for single non-zero element. 3194 if (NumNonZero == 1 && NumElems <= 4) { 3195 unsigned Idx = CountTrailingZeros_32(NonZeros); 3196 SDOperand Item = Op.getOperand(Idx); 3197 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3198 if (Idx == 0) 3199 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3200 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3201 NumZero > 0, DAG); 3202 else if (!HasNonImms) // Otherwise, it's better to do a constpool load. 3203 return SDOperand(); 3204 3205 if (EVTBits == 32) { 3206 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3207 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3208 DAG); 3209 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3210 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3211 SmallVector<SDOperand, 8> MaskVec; 3212 for (unsigned i = 0; i < NumElems; i++) 3213 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3214 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3215 &MaskVec[0], MaskVec.size()); 3216 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3217 DAG.getNode(ISD::UNDEF, VT), Mask); 3218 } 3219 } 3220 3221 // A vector full of immediates; various special cases are already 3222 // handled, so this is best done with a single constant-pool load. 3223 if (!HasNonImms) 3224 return SDOperand(); 3225 3226 // Let legalizer expand 2-wide build_vectors. 3227 if (EVTBits == 64) 3228 return SDOperand(); 3229 3230 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3231 if (EVTBits == 8 && NumElems == 16) { 3232 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3233 *this); 3234 if (V.Val) return V; 3235 } 3236 3237 if (EVTBits == 16 && NumElems == 8) { 3238 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3239 *this); 3240 if (V.Val) return V; 3241 } 3242 3243 // If element VT is == 32 bits, turn it into a number of shuffles. 3244 SmallVector<SDOperand, 8> V; 3245 V.resize(NumElems); 3246 if (NumElems == 4 && NumZero > 0) { 3247 for (unsigned i = 0; i < 4; ++i) { 3248 bool isZero = !(NonZeros & (1 << i)); 3249 if (isZero) 3250 V[i] = getZeroVector(VT, DAG); 3251 else 3252 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3253 } 3254 3255 for (unsigned i = 0; i < 2; ++i) { 3256 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3257 default: break; 3258 case 0: 3259 V[i] = V[i*2]; // Must be a zero vector. 3260 break; 3261 case 1: 3262 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3263 getMOVLMask(NumElems, DAG)); 3264 break; 3265 case 2: 3266 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3267 getMOVLMask(NumElems, DAG)); 3268 break; 3269 case 3: 3270 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3271 getUnpacklMask(NumElems, DAG)); 3272 break; 3273 } 3274 } 3275 3276 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3277 // clears the upper bits. 3278 // FIXME: we can do the same for v4f32 case when we know both parts of 3279 // the lower half come from scalar_to_vector (loadf32). We should do 3280 // that in post legalizer dag combiner with target specific hooks. 3281 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3282 return V[0]; 3283 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3284 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3285 SmallVector<SDOperand, 8> MaskVec; 3286 bool Reverse = (NonZeros & 0x3) == 2; 3287 for (unsigned i = 0; i < 2; ++i) 3288 if (Reverse) 3289 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3290 else 3291 MaskVec.push_back(DAG.getConstant(i, EVT)); 3292 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3293 for (unsigned i = 0; i < 2; ++i) 3294 if (Reverse) 3295 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3296 else 3297 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3298 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3299 &MaskVec[0], MaskVec.size()); 3300 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3301 } 3302 3303 if (Values.size() > 2) { 3304 // Expand into a number of unpckl*. 3305 // e.g. for v4f32 3306 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3307 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3308 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3309 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3310 for (unsigned i = 0; i < NumElems; ++i) 3311 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3312 NumElems >>= 1; 3313 while (NumElems != 0) { 3314 for (unsigned i = 0; i < NumElems; ++i) 3315 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3316 UnpckMask); 3317 NumElems >>= 1; 3318 } 3319 return V[0]; 3320 } 3321 3322 return SDOperand(); 3323} 3324 3325static 3326SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, 3327 SDOperand PermMask, SelectionDAG &DAG, 3328 TargetLowering &TLI) { 3329 SDOperand NewV; 3330 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); 3331 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3332 MVT::ValueType PtrVT = TLI.getPointerTy(); 3333 SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), 3334 PermMask.Val->op_end()); 3335 3336 // First record which half of which vector the low elements come from. 3337 SmallVector<unsigned, 4> LowQuad(4); 3338 for (unsigned i = 0; i < 4; ++i) { 3339 SDOperand Elt = MaskElts[i]; 3340 if (Elt.getOpcode() == ISD::UNDEF) 3341 continue; 3342 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3343 int QuadIdx = EltIdx / 4; 3344 ++LowQuad[QuadIdx]; 3345 } 3346 int BestLowQuad = -1; 3347 unsigned MaxQuad = 1; 3348 for (unsigned i = 0; i < 4; ++i) { 3349 if (LowQuad[i] > MaxQuad) { 3350 BestLowQuad = i; 3351 MaxQuad = LowQuad[i]; 3352 } 3353 } 3354 3355 // Record which half of which vector the high elements come from. 3356 SmallVector<unsigned, 4> HighQuad(4); 3357 for (unsigned i = 4; i < 8; ++i) { 3358 SDOperand Elt = MaskElts[i]; 3359 if (Elt.getOpcode() == ISD::UNDEF) 3360 continue; 3361 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3362 int QuadIdx = EltIdx / 4; 3363 ++HighQuad[QuadIdx]; 3364 } 3365 int BestHighQuad = -1; 3366 MaxQuad = 1; 3367 for (unsigned i = 0; i < 4; ++i) { 3368 if (HighQuad[i] > MaxQuad) { 3369 BestHighQuad = i; 3370 MaxQuad = HighQuad[i]; 3371 } 3372 } 3373 3374 // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. 3375 if (BestLowQuad != -1 || BestHighQuad != -1) { 3376 // First sort the 4 chunks in order using shufpd. 3377 SmallVector<SDOperand, 8> MaskVec; 3378 if (BestLowQuad != -1) 3379 MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); 3380 else 3381 MaskVec.push_back(DAG.getConstant(0, MVT::i32)); 3382 if (BestHighQuad != -1) 3383 MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); 3384 else 3385 MaskVec.push_back(DAG.getConstant(1, MVT::i32)); 3386 SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); 3387 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, 3388 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), 3389 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); 3390 NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); 3391 3392 // Now sort high and low parts separately. 3393 BitVector InOrder(8); 3394 if (BestLowQuad != -1) { 3395 // Sort lower half in order using PSHUFLW. 3396 MaskVec.clear(); 3397 bool AnyOutOrder = false; 3398 for (unsigned i = 0; i != 4; ++i) { 3399 SDOperand Elt = MaskElts[i]; 3400 if (Elt.getOpcode() == ISD::UNDEF) { 3401 MaskVec.push_back(Elt); 3402 InOrder.set(i); 3403 } else { 3404 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3405 if (EltIdx != i) 3406 AnyOutOrder = true; 3407 MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); 3408 // If this element is in the right place after this shuffle, then 3409 // remember it. 3410 if ((int)(EltIdx / 4) == BestLowQuad) 3411 InOrder.set(i); 3412 } 3413 } 3414 if (AnyOutOrder) { 3415 for (unsigned i = 4; i != 8; ++i) 3416 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3417 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3418 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3419 } 3420 } 3421 3422 if (BestHighQuad != -1) { 3423 // Sort high half in order using PSHUFHW if possible. 3424 MaskVec.clear(); 3425 for (unsigned i = 0; i != 4; ++i) 3426 MaskVec.push_back(DAG.getConstant(i, MaskEVT)); 3427 bool AnyOutOrder = false; 3428 for (unsigned i = 4; i != 8; ++i) { 3429 SDOperand Elt = MaskElts[i]; 3430 if (Elt.getOpcode() == ISD::UNDEF) { 3431 MaskVec.push_back(Elt); 3432 InOrder.set(i); 3433 } else { 3434 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3435 if (EltIdx != i) 3436 AnyOutOrder = true; 3437 MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); 3438 // If this element is in the right place after this shuffle, then 3439 // remember it. 3440 if ((int)(EltIdx / 4) == BestHighQuad) 3441 InOrder.set(i); 3442 } 3443 } 3444 if (AnyOutOrder) { 3445 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3446 NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); 3447 } 3448 } 3449 3450 // The other elements are put in the right place using pextrw and pinsrw. 3451 for (unsigned i = 0; i != 8; ++i) { 3452 if (InOrder[i]) 3453 continue; 3454 SDOperand Elt = MaskElts[i]; 3455 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3456 if (EltIdx == i) 3457 continue; 3458 SDOperand ExtOp = (EltIdx < 8) 3459 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3460 DAG.getConstant(EltIdx, PtrVT)) 3461 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3462 DAG.getConstant(EltIdx - 8, PtrVT)); 3463 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3464 DAG.getConstant(i, PtrVT)); 3465 } 3466 return NewV; 3467 } 3468 3469 // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use 3470 ///as few as possible. 3471 // First, let's find out how many elements are already in the right order. 3472 unsigned V1InOrder = 0; 3473 unsigned V1FromV1 = 0; 3474 unsigned V2InOrder = 0; 3475 unsigned V2FromV2 = 0; 3476 SmallVector<SDOperand, 8> V1Elts; 3477 SmallVector<SDOperand, 8> V2Elts; 3478 for (unsigned i = 0; i < 8; ++i) { 3479 SDOperand Elt = MaskElts[i]; 3480 if (Elt.getOpcode() == ISD::UNDEF) { 3481 V1Elts.push_back(Elt); 3482 V2Elts.push_back(Elt); 3483 ++V1InOrder; 3484 ++V2InOrder; 3485 continue; 3486 } 3487 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3488 if (EltIdx == i) { 3489 V1Elts.push_back(Elt); 3490 V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); 3491 ++V1InOrder; 3492 } else if (EltIdx == i+8) { 3493 V1Elts.push_back(Elt); 3494 V2Elts.push_back(DAG.getConstant(i, MaskEVT)); 3495 ++V2InOrder; 3496 } else if (EltIdx < 8) { 3497 V1Elts.push_back(Elt); 3498 ++V1FromV1; 3499 } else { 3500 V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); 3501 ++V2FromV2; 3502 } 3503 } 3504 3505 if (V2InOrder > V1InOrder) { 3506 PermMask = CommuteVectorShuffleMask(PermMask, DAG); 3507 std::swap(V1, V2); 3508 std::swap(V1Elts, V2Elts); 3509 std::swap(V1FromV1, V2FromV2); 3510 } 3511 3512 if ((V1FromV1 + V1InOrder) != 8) { 3513 // Some elements are from V2. 3514 if (V1FromV1) { 3515 // If there are elements that are from V1 but out of place, 3516 // then first sort them in place 3517 SmallVector<SDOperand, 8> MaskVec; 3518 for (unsigned i = 0; i < 8; ++i) { 3519 SDOperand Elt = V1Elts[i]; 3520 if (Elt.getOpcode() == ISD::UNDEF) { 3521 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3522 continue; 3523 } 3524 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3525 if (EltIdx >= 8) 3526 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3527 else 3528 MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); 3529 } 3530 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); 3531 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); 3532 } 3533 3534 NewV = V1; 3535 for (unsigned i = 0; i < 8; ++i) { 3536 SDOperand Elt = V1Elts[i]; 3537 if (Elt.getOpcode() == ISD::UNDEF) 3538 continue; 3539 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3540 if (EltIdx < 8) 3541 continue; 3542 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, 3543 DAG.getConstant(EltIdx - 8, PtrVT)); 3544 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3545 DAG.getConstant(i, PtrVT)); 3546 } 3547 return NewV; 3548 } else { 3549 // All elements are from V1. 3550 NewV = V1; 3551 for (unsigned i = 0; i < 8; ++i) { 3552 SDOperand Elt = V1Elts[i]; 3553 if (Elt.getOpcode() == ISD::UNDEF) 3554 continue; 3555 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3556 SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, 3557 DAG.getConstant(EltIdx, PtrVT)); 3558 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, 3559 DAG.getConstant(i, PtrVT)); 3560 } 3561 return NewV; 3562 } 3563} 3564 3565/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 3566/// ones if possible. This can be done when every pair / quad of shuffle mask 3567/// elements point to elements in the right sequence. e.g. 3568/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 3569static 3570SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2, 3571 SDOperand PermMask, SelectionDAG &DAG, 3572 TargetLowering &TLI) { 3573 unsigned NumElems = PermMask.getNumOperands(); 3574 unsigned Scale = NumElems / 4; 3575 SmallVector<SDOperand, 4> MaskVec; 3576 for (unsigned i = 0; i < NumElems; i += Scale) { 3577 unsigned StartIdx = ~0U; 3578 for (unsigned j = 0; j < Scale; ++j) { 3579 SDOperand Elt = PermMask.getOperand(i+j); 3580 if (Elt.getOpcode() == ISD::UNDEF) 3581 continue; 3582 unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); 3583 if (StartIdx == ~0U) 3584 StartIdx = EltIdx - (EltIdx % Scale); 3585 if (EltIdx != StartIdx + j) 3586 return SDOperand(); 3587 } 3588 if (StartIdx == ~0U) 3589 MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 3590 else 3591 MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); 3592 } 3593 3594 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 3595 V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2); 3596 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2, 3597 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4)); 3598} 3599 3600SDOperand 3601X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3602 SDOperand V1 = Op.getOperand(0); 3603 SDOperand V2 = Op.getOperand(1); 3604 SDOperand PermMask = Op.getOperand(2); 3605 MVT::ValueType VT = Op.getValueType(); 3606 unsigned NumElems = PermMask.getNumOperands(); 3607 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3608 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3609 bool V1IsSplat = false; 3610 bool V2IsSplat = false; 3611 3612 if (isUndefShuffle(Op.Val)) 3613 return DAG.getNode(ISD::UNDEF, VT); 3614 3615 if (isZeroShuffle(Op.Val)) 3616 return getZeroVector(VT, DAG); 3617 3618 if (isIdentityMask(PermMask.Val)) 3619 return V1; 3620 else if (isIdentityMask(PermMask.Val, true)) 3621 return V2; 3622 3623 if (isSplatMask(PermMask.Val)) { 3624 if (NumElems <= 4) return Op; 3625 // Promote it to a v4i32 splat. 3626 return PromoteSplat(Op, DAG); 3627 } 3628 3629 if (X86::isMOVLMask(PermMask.Val)) 3630 return (V1IsUndef) ? V2 : Op; 3631 3632 if (X86::isMOVSHDUPMask(PermMask.Val) || 3633 X86::isMOVSLDUPMask(PermMask.Val) || 3634 X86::isMOVHLPSMask(PermMask.Val) || 3635 X86::isMOVHPMask(PermMask.Val) || 3636 X86::isMOVLPMask(PermMask.Val)) 3637 return Op; 3638 3639 if (ShouldXformToMOVHLPS(PermMask.Val) || 3640 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3641 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3642 3643 bool Commuted = false; 3644 // FIXME: This should also accept a bitcast of a splat? Be careful, not 3645 // 1,1,1,1 -> v8i16 though. 3646 V1IsSplat = isSplatVector(V1.Val); 3647 V2IsSplat = isSplatVector(V2.Val); 3648 3649 // Canonicalize the splat or undef, if present, to be on the RHS. 3650 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3651 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3652 std::swap(V1IsSplat, V2IsSplat); 3653 std::swap(V1IsUndef, V2IsUndef); 3654 Commuted = true; 3655 } 3656 3657 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3658 if (V2IsUndef) return V1; 3659 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3660 if (V2IsSplat) { 3661 // V2 is a splat, so the mask may be malformed. That is, it may point 3662 // to any V2 element. The instruction selectior won't like this. Get 3663 // a corrected mask and commute to form a proper MOVS{S|D}. 3664 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3665 if (NewMask.Val != PermMask.Val) 3666 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3667 } 3668 return Op; 3669 } 3670 3671 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3672 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3673 X86::isUNPCKLMask(PermMask.Val) || 3674 X86::isUNPCKHMask(PermMask.Val)) 3675 return Op; 3676 3677 if (V2IsSplat) { 3678 // Normalize mask so all entries that point to V2 points to its first 3679 // element then try to match unpck{h|l} again. If match, return a 3680 // new vector_shuffle with the corrected mask. 3681 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3682 if (NewMask.Val != PermMask.Val) { 3683 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3684 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3685 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3686 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3687 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3688 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3689 } 3690 } 3691 } 3692 3693 // Normalize the node to match x86 shuffle ops if needed 3694 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3695 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3696 3697 if (Commuted) { 3698 // Commute is back and try unpck* again. 3699 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3700 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3701 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3702 X86::isUNPCKLMask(PermMask.Val) || 3703 X86::isUNPCKHMask(PermMask.Val)) 3704 return Op; 3705 } 3706 3707 // If VT is integer, try PSHUF* first, then SHUFP*. 3708 if (MVT::isInteger(VT)) { 3709 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3710 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3711 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3712 X86::isPSHUFDMask(PermMask.Val)) || 3713 X86::isPSHUFHWMask(PermMask.Val) || 3714 X86::isPSHUFLWMask(PermMask.Val)) { 3715 if (V2.getOpcode() != ISD::UNDEF) 3716 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3717 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3718 return Op; 3719 } 3720 3721 if (X86::isSHUFPMask(PermMask.Val) && 3722 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3723 return Op; 3724 } else { 3725 // Floating point cases in the other order. 3726 if (X86::isSHUFPMask(PermMask.Val)) 3727 return Op; 3728 if (X86::isPSHUFDMask(PermMask.Val) || 3729 X86::isPSHUFHWMask(PermMask.Val) || 3730 X86::isPSHUFLWMask(PermMask.Val)) { 3731 if (V2.getOpcode() != ISD::UNDEF) 3732 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3733 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3734 return Op; 3735 } 3736 } 3737 3738 // If the shuffle can be rewritten as a 4 wide shuffle, then do it! 3739 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 3740 SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this); 3741 if (NewOp.Val) 3742 return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); 3743 } 3744 3745 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 3746 if (VT == MVT::v8i16) { 3747 SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); 3748 if (NewOp.Val) 3749 return NewOp; 3750 } 3751 3752 // Handle all 4 wide cases with a number of shuffles. 3753 if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { 3754 // Don't do this for MMX. 3755 MVT::ValueType MaskVT = PermMask.getValueType(); 3756 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3757 SmallVector<std::pair<int, int>, 8> Locs; 3758 Locs.reserve(NumElems); 3759 SmallVector<SDOperand, 8> Mask1(NumElems, 3760 DAG.getNode(ISD::UNDEF, MaskEVT)); 3761 SmallVector<SDOperand, 8> Mask2(NumElems, 3762 DAG.getNode(ISD::UNDEF, MaskEVT)); 3763 unsigned NumHi = 0; 3764 unsigned NumLo = 0; 3765 // If no more than two elements come from either vector. This can be 3766 // implemented with two shuffles. First shuffle gather the elements. 3767 // The second shuffle, which takes the first shuffle as both of its 3768 // vector operands, put the elements into the right order. 3769 for (unsigned i = 0; i != NumElems; ++i) { 3770 SDOperand Elt = PermMask.getOperand(i); 3771 if (Elt.getOpcode() == ISD::UNDEF) { 3772 Locs[i] = std::make_pair(-1, -1); 3773 } else { 3774 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3775 if (Val < NumElems) { 3776 Locs[i] = std::make_pair(0, NumLo); 3777 Mask1[NumLo] = Elt; 3778 NumLo++; 3779 } else { 3780 Locs[i] = std::make_pair(1, NumHi); 3781 if (2+NumHi < NumElems) 3782 Mask1[2+NumHi] = Elt; 3783 NumHi++; 3784 } 3785 } 3786 } 3787 if (NumLo <= 2 && NumHi <= 2) { 3788 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3789 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3790 &Mask1[0], Mask1.size())); 3791 for (unsigned i = 0; i != NumElems; ++i) { 3792 if (Locs[i].first == -1) 3793 continue; 3794 else { 3795 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3796 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3797 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3798 } 3799 } 3800 3801 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3802 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3803 &Mask2[0], Mask2.size())); 3804 } 3805 3806 // Break it into (shuffle shuffle_hi, shuffle_lo). 3807 Locs.clear(); 3808 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3809 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3810 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3811 unsigned MaskIdx = 0; 3812 unsigned LoIdx = 0; 3813 unsigned HiIdx = NumElems/2; 3814 for (unsigned i = 0; i != NumElems; ++i) { 3815 if (i == NumElems/2) { 3816 MaskPtr = &HiMask; 3817 MaskIdx = 1; 3818 LoIdx = 0; 3819 HiIdx = NumElems/2; 3820 } 3821 SDOperand Elt = PermMask.getOperand(i); 3822 if (Elt.getOpcode() == ISD::UNDEF) { 3823 Locs[i] = std::make_pair(-1, -1); 3824 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3825 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3826 (*MaskPtr)[LoIdx] = Elt; 3827 LoIdx++; 3828 } else { 3829 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3830 (*MaskPtr)[HiIdx] = Elt; 3831 HiIdx++; 3832 } 3833 } 3834 3835 SDOperand LoShuffle = 3836 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3837 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3838 &LoMask[0], LoMask.size())); 3839 SDOperand HiShuffle = 3840 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3841 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3842 &HiMask[0], HiMask.size())); 3843 SmallVector<SDOperand, 8> MaskOps; 3844 for (unsigned i = 0; i != NumElems; ++i) { 3845 if (Locs[i].first == -1) { 3846 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3847 } else { 3848 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3849 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3850 } 3851 } 3852 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3853 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3854 &MaskOps[0], MaskOps.size())); 3855 } 3856 3857 return SDOperand(); 3858} 3859 3860SDOperand 3861X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3862 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3863 return SDOperand(); 3864 3865 MVT::ValueType VT = Op.getValueType(); 3866 // TODO: handle v16i8. 3867 if (MVT::getSizeInBits(VT) == 16) { 3868 SDOperand Vec = Op.getOperand(0); 3869 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3870 if (Idx == 0) 3871 return DAG.getNode(ISD::TRUNCATE, MVT::i16, 3872 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, 3873 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), 3874 Op.getOperand(1))); 3875 // Transform it so it match pextrw which produces a 32-bit result. 3876 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3877 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3878 Op.getOperand(0), Op.getOperand(1)); 3879 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3880 DAG.getValueType(VT)); 3881 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3882 } else if (MVT::getSizeInBits(VT) == 32) { 3883 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3884 if (Idx == 0) 3885 return Op; 3886 // SHUFPS the element to the lowest double word, then movss. 3887 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3888 SmallVector<SDOperand, 8> IdxVec; 3889 IdxVec. 3890 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3891 IdxVec. 3892 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3893 IdxVec. 3894 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3895 IdxVec. 3896 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3897 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3898 &IdxVec[0], IdxVec.size()); 3899 SDOperand Vec = Op.getOperand(0); 3900 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3901 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3902 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3903 DAG.getConstant(0, getPointerTy())); 3904 } else if (MVT::getSizeInBits(VT) == 64) { 3905 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3906 if (Idx == 0) 3907 return Op; 3908 3909 // UNPCKHPD the element to the lowest double word, then movsd. 3910 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3911 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3912 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3913 SmallVector<SDOperand, 8> IdxVec; 3914 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3915 IdxVec. 3916 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3917 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3918 &IdxVec[0], IdxVec.size()); 3919 SDOperand Vec = Op.getOperand(0); 3920 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3921 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3922 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3923 DAG.getConstant(0, getPointerTy())); 3924 } 3925 3926 return SDOperand(); 3927} 3928 3929SDOperand 3930X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3931 MVT::ValueType VT = Op.getValueType(); 3932 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3933 if (EVT == MVT::i8) 3934 return SDOperand(); 3935 3936 SDOperand N0 = Op.getOperand(0); 3937 SDOperand N1 = Op.getOperand(1); 3938 SDOperand N2 = Op.getOperand(2); 3939 3940 if (MVT::getSizeInBits(EVT) == 16) { 3941 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3942 // as its second argument. 3943 if (N1.getValueType() != MVT::i32) 3944 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3945 if (N2.getValueType() != MVT::i32) 3946 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3947 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3948 } 3949 3950 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3951 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3952 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3953 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3954 SmallVector<SDOperand, 4> MaskVec; 3955 for (unsigned i = 0; i < 4; ++i) 3956 MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT)); 3957 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3958 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3959 &MaskVec[0], MaskVec.size())); 3960} 3961 3962SDOperand 3963X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3964 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3965 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3966} 3967 3968// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3969// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3970// one of the above mentioned nodes. It has to be wrapped because otherwise 3971// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3972// be used to form addressing mode. These wrapped nodes will be selected 3973// into MOV32ri. 3974SDOperand 3975X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3976 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3977 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3978 getPointerTy(), 3979 CP->getAlignment()); 3980 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3981 // With PIC, the address is actually $g + Offset. 3982 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3983 !Subtarget->isPICStyleRIPRel()) { 3984 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3985 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3986 Result); 3987 } 3988 3989 return Result; 3990} 3991 3992SDOperand 3993X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3994 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3995 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3996 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3997 // With PIC, the address is actually $g + Offset. 3998 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3999 !Subtarget->isPICStyleRIPRel()) { 4000 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4001 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4002 Result); 4003 } 4004 4005 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 4006 // load the value at address GV, not the value of GV itself. This means that 4007 // the GlobalAddress must be in the base or index register of the address, not 4008 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 4009 // The same applies for external symbols during PIC codegen 4010 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 4011 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 4012 4013 return Result; 4014} 4015 4016// Lower ISD::GlobalTLSAddress using the "general dynamic" model 4017static SDOperand 4018LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4019 const MVT::ValueType PtrVT) { 4020 SDOperand InFlag; 4021 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 4022 DAG.getNode(X86ISD::GlobalBaseReg, 4023 PtrVT), InFlag); 4024 InFlag = Chain.getValue(1); 4025 4026 // emit leal symbol@TLSGD(,%ebx,1), %eax 4027 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 4028 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4029 GA->getValueType(0), 4030 GA->getOffset()); 4031 SDOperand Ops[] = { Chain, TGA, InFlag }; 4032 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 4033 InFlag = Result.getValue(2); 4034 Chain = Result.getValue(1); 4035 4036 // call ___tls_get_addr. This function receives its argument in 4037 // the register EAX. 4038 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 4039 InFlag = Chain.getValue(1); 4040 4041 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4042 SDOperand Ops1[] = { Chain, 4043 DAG.getTargetExternalSymbol("___tls_get_addr", 4044 PtrVT), 4045 DAG.getRegister(X86::EAX, PtrVT), 4046 DAG.getRegister(X86::EBX, PtrVT), 4047 InFlag }; 4048 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 4049 InFlag = Chain.getValue(1); 4050 4051 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 4052} 4053 4054// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 4055// "local exec" model. 4056static SDOperand 4057LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 4058 const MVT::ValueType PtrVT) { 4059 // Get the Thread Pointer 4060 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 4061 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 4062 // exec) 4063 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 4064 GA->getValueType(0), 4065 GA->getOffset()); 4066 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 4067 4068 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 4069 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 4070 4071 // The address of the thread local variable is the add of the thread 4072 // pointer with the offset of the variable. 4073 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 4074} 4075 4076SDOperand 4077X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 4078 // TODO: implement the "local dynamic" model 4079 // TODO: implement the "initial exec"model for pic executables 4080 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 4081 "TLS not implemented for non-ELF and 64-bit targets"); 4082 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 4083 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 4084 // otherwise use the "Local Exec"TLS Model 4085 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 4086 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 4087 else 4088 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 4089} 4090 4091SDOperand 4092X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 4093 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 4094 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 4095 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4096 // With PIC, the address is actually $g + Offset. 4097 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4098 !Subtarget->isPICStyleRIPRel()) { 4099 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4100 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4101 Result); 4102 } 4103 4104 return Result; 4105} 4106 4107SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 4108 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 4109 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 4110 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 4111 // With PIC, the address is actually $g + Offset. 4112 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 4113 !Subtarget->isPICStyleRIPRel()) { 4114 Result = DAG.getNode(ISD::ADD, getPointerTy(), 4115 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 4116 Result); 4117 } 4118 4119 return Result; 4120} 4121 4122/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 4123/// take a 2 x i32 value to shift plus a shift amount. 4124SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 4125 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 4126 "Not an i64 shift!"); 4127 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 4128 SDOperand ShOpLo = Op.getOperand(0); 4129 SDOperand ShOpHi = Op.getOperand(1); 4130 SDOperand ShAmt = Op.getOperand(2); 4131 SDOperand Tmp1 = isSRA ? 4132 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 4133 DAG.getConstant(0, MVT::i32); 4134 4135 SDOperand Tmp2, Tmp3; 4136 if (Op.getOpcode() == ISD::SHL_PARTS) { 4137 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 4138 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 4139 } else { 4140 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 4141 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 4142 } 4143 4144 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4145 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 4146 DAG.getConstant(32, MVT::i8)); 4147 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 4148 AndNode, DAG.getConstant(0, MVT::i8)); 4149 4150 SDOperand Hi, Lo; 4151 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4152 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 4153 SmallVector<SDOperand, 4> Ops; 4154 if (Op.getOpcode() == ISD::SHL_PARTS) { 4155 Ops.push_back(Tmp2); 4156 Ops.push_back(Tmp3); 4157 Ops.push_back(CC); 4158 Ops.push_back(Cond); 4159 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4160 4161 Ops.clear(); 4162 Ops.push_back(Tmp3); 4163 Ops.push_back(Tmp1); 4164 Ops.push_back(CC); 4165 Ops.push_back(Cond); 4166 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4167 } else { 4168 Ops.push_back(Tmp2); 4169 Ops.push_back(Tmp3); 4170 Ops.push_back(CC); 4171 Ops.push_back(Cond); 4172 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4173 4174 Ops.clear(); 4175 Ops.push_back(Tmp3); 4176 Ops.push_back(Tmp1); 4177 Ops.push_back(CC); 4178 Ops.push_back(Cond); 4179 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 4180 } 4181 4182 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 4183 Ops.clear(); 4184 Ops.push_back(Lo); 4185 Ops.push_back(Hi); 4186 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 4187} 4188 4189SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 4190 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 4191 Op.getOperand(0).getValueType() >= MVT::i16 && 4192 "Unknown SINT_TO_FP to lower!"); 4193 4194 SDOperand Result; 4195 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 4196 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 4197 MachineFunction &MF = DAG.getMachineFunction(); 4198 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 4199 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4200 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 4201 StackSlot, NULL, 0); 4202 4203 // These are really Legal; caller falls through into that case. 4204 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 4205 return Result; 4206 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 4207 return Result; 4208 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 4209 Subtarget->is64Bit()) 4210 return Result; 4211 4212 // Build the FILD 4213 SDVTList Tys; 4214 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 4215 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 4216 if (useSSE) 4217 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 4218 else 4219 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 4220 SmallVector<SDOperand, 8> Ops; 4221 Ops.push_back(Chain); 4222 Ops.push_back(StackSlot); 4223 Ops.push_back(DAG.getValueType(SrcVT)); 4224 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 4225 Tys, &Ops[0], Ops.size()); 4226 4227 if (useSSE) { 4228 Chain = Result.getValue(1); 4229 SDOperand InFlag = Result.getValue(2); 4230 4231 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 4232 // shouldn't be necessary except that RFP cannot be live across 4233 // multiple blocks. When stackifier is fixed, they can be uncoupled. 4234 MachineFunction &MF = DAG.getMachineFunction(); 4235 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 4236 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4237 Tys = DAG.getVTList(MVT::Other); 4238 SmallVector<SDOperand, 8> Ops; 4239 Ops.push_back(Chain); 4240 Ops.push_back(Result); 4241 Ops.push_back(StackSlot); 4242 Ops.push_back(DAG.getValueType(Op.getValueType())); 4243 Ops.push_back(InFlag); 4244 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 4245 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 4246 } 4247 4248 return Result; 4249} 4250 4251std::pair<SDOperand,SDOperand> X86TargetLowering:: 4252FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { 4253 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 4254 "Unknown FP_TO_SINT to lower!"); 4255 4256 // These are really Legal. 4257 if (Op.getValueType() == MVT::i32 && 4258 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 4259 return std::make_pair(SDOperand(), SDOperand()); 4260 if (Op.getValueType() == MVT::i32 && 4261 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 4262 return std::make_pair(SDOperand(), SDOperand()); 4263 if (Subtarget->is64Bit() && 4264 Op.getValueType() == MVT::i64 && 4265 Op.getOperand(0).getValueType() != MVT::f80) 4266 return std::make_pair(SDOperand(), SDOperand()); 4267 4268 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 4269 // stack slot. 4270 MachineFunction &MF = DAG.getMachineFunction(); 4271 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 4272 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4273 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4274 unsigned Opc; 4275 switch (Op.getValueType()) { 4276 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 4277 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 4278 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 4279 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 4280 } 4281 4282 SDOperand Chain = DAG.getEntryNode(); 4283 SDOperand Value = Op.getOperand(0); 4284 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 4285 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 4286 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 4287 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 4288 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 4289 SDOperand Ops[] = { 4290 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 4291 }; 4292 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 4293 Chain = Value.getValue(1); 4294 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 4295 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 4296 } 4297 4298 // Build the FP_TO_INT*_IN_MEM 4299 SDOperand Ops[] = { Chain, Value, StackSlot }; 4300 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 4301 4302 return std::make_pair(FIST, StackSlot); 4303} 4304 4305SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 4306 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG); 4307 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4308 if (FIST.Val == 0) return SDOperand(); 4309 4310 // Load the result. 4311 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 4312} 4313 4314SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { 4315 std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); 4316 SDOperand FIST = Vals.first, StackSlot = Vals.second; 4317 if (FIST.Val == 0) return 0; 4318 4319 // Return an i64 load from the stack slot. 4320 SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); 4321 4322 // Use a MERGE_VALUES node to drop the chain result value. 4323 return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; 4324} 4325 4326SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 4327 MVT::ValueType VT = Op.getValueType(); 4328 MVT::ValueType EltVT = VT; 4329 if (MVT::isVector(VT)) 4330 EltVT = MVT::getVectorElementType(VT); 4331 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4332 std::vector<Constant*> CV; 4333 if (EltVT == MVT::f64) { 4334 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 4335 CV.push_back(C); 4336 CV.push_back(C); 4337 } else { 4338 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 4339 CV.push_back(C); 4340 CV.push_back(C); 4341 CV.push_back(C); 4342 CV.push_back(C); 4343 } 4344 Constant *C = ConstantVector::get(CV); 4345 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4346 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4347 false, 16); 4348 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 4349} 4350 4351SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4352 MVT::ValueType VT = Op.getValueType(); 4353 MVT::ValueType EltVT = VT; 4354 unsigned EltNum = 1; 4355 if (MVT::isVector(VT)) { 4356 EltVT = MVT::getVectorElementType(VT); 4357 EltNum = MVT::getVectorNumElements(VT); 4358 } 4359 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4360 std::vector<Constant*> CV; 4361 if (EltVT == MVT::f64) { 4362 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4363 CV.push_back(C); 4364 CV.push_back(C); 4365 } else { 4366 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4367 CV.push_back(C); 4368 CV.push_back(C); 4369 CV.push_back(C); 4370 CV.push_back(C); 4371 } 4372 Constant *C = ConstantVector::get(CV); 4373 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4374 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4375 false, 16); 4376 if (MVT::isVector(VT)) { 4377 return DAG.getNode(ISD::BIT_CONVERT, VT, 4378 DAG.getNode(ISD::XOR, MVT::v2i64, 4379 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4380 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4381 } else { 4382 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4383 } 4384} 4385 4386SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4387 SDOperand Op0 = Op.getOperand(0); 4388 SDOperand Op1 = Op.getOperand(1); 4389 MVT::ValueType VT = Op.getValueType(); 4390 MVT::ValueType SrcVT = Op1.getValueType(); 4391 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4392 4393 // If second operand is smaller, extend it first. 4394 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4395 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4396 SrcVT = VT; 4397 SrcTy = MVT::getTypeForValueType(SrcVT); 4398 } 4399 // And if it is bigger, shrink it first. 4400 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4401 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1); 4402 SrcVT = VT; 4403 SrcTy = MVT::getTypeForValueType(SrcVT); 4404 } 4405 4406 // At this point the operands and the result should have the same 4407 // type, and that won't be f80 since that is not custom lowered. 4408 4409 // First get the sign bit of second operand. 4410 std::vector<Constant*> CV; 4411 if (SrcVT == MVT::f64) { 4412 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4413 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4414 } else { 4415 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4416 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4417 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4418 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4419 } 4420 Constant *C = ConstantVector::get(CV); 4421 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4422 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4423 false, 16); 4424 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4425 4426 // Shift sign bit right or left if the two operands have different types. 4427 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4428 // Op0 is MVT::f32, Op1 is MVT::f64. 4429 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4430 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4431 DAG.getConstant(32, MVT::i32)); 4432 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4433 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4434 DAG.getConstant(0, getPointerTy())); 4435 } 4436 4437 // Clear first operand sign bit. 4438 CV.clear(); 4439 if (VT == MVT::f64) { 4440 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4441 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4442 } else { 4443 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4444 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4445 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4446 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4447 } 4448 C = ConstantVector::get(CV); 4449 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4450 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4451 false, 16); 4452 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4453 4454 // Or the value with the sign bit. 4455 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4456} 4457 4458SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4459 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4460 SDOperand Cond; 4461 SDOperand Op0 = Op.getOperand(0); 4462 SDOperand Op1 = Op.getOperand(1); 4463 SDOperand CC = Op.getOperand(2); 4464 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4465 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4466 unsigned X86CC; 4467 4468 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4469 Op0, Op1, DAG)) { 4470 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4471 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4472 DAG.getConstant(X86CC, MVT::i8), Cond); 4473 } 4474 4475 assert(isFP && "Illegal integer SetCC!"); 4476 4477 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4478 switch (SetCCOpcode) { 4479 default: assert(false && "Illegal floating point SetCC!"); 4480 case ISD::SETOEQ: { // !PF & ZF 4481 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4482 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4483 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4484 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4485 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4486 } 4487 case ISD::SETUNE: { // PF | !ZF 4488 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4489 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4490 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4491 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4492 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4493 } 4494 } 4495} 4496 4497 4498SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4499 bool addTest = true; 4500 SDOperand Cond = Op.getOperand(0); 4501 SDOperand CC; 4502 4503 if (Cond.getOpcode() == ISD::SETCC) 4504 Cond = LowerSETCC(Cond, DAG); 4505 4506 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4507 // setting operand in place of the X86ISD::SETCC. 4508 if (Cond.getOpcode() == X86ISD::SETCC) { 4509 CC = Cond.getOperand(0); 4510 4511 SDOperand Cmp = Cond.getOperand(1); 4512 unsigned Opc = Cmp.getOpcode(); 4513 MVT::ValueType VT = Op.getValueType(); 4514 bool IllegalFPCMov = false; 4515 if (VT == MVT::f32 && !X86ScalarSSEf32) 4516 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4517 else if (VT == MVT::f64 && !X86ScalarSSEf64) 4518 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4519 else if (VT == MVT::f80) 4520 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4521 if ((Opc == X86ISD::CMP || 4522 Opc == X86ISD::COMI || 4523 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4524 Cond = Cmp; 4525 addTest = false; 4526 } 4527 } 4528 4529 if (addTest) { 4530 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4531 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4532 } 4533 4534 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4535 MVT::Flag); 4536 SmallVector<SDOperand, 4> Ops; 4537 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4538 // condition is true. 4539 Ops.push_back(Op.getOperand(2)); 4540 Ops.push_back(Op.getOperand(1)); 4541 Ops.push_back(CC); 4542 Ops.push_back(Cond); 4543 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4544} 4545 4546SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4547 bool addTest = true; 4548 SDOperand Chain = Op.getOperand(0); 4549 SDOperand Cond = Op.getOperand(1); 4550 SDOperand Dest = Op.getOperand(2); 4551 SDOperand CC; 4552 4553 if (Cond.getOpcode() == ISD::SETCC) 4554 Cond = LowerSETCC(Cond, DAG); 4555 4556 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4557 // setting operand in place of the X86ISD::SETCC. 4558 if (Cond.getOpcode() == X86ISD::SETCC) { 4559 CC = Cond.getOperand(0); 4560 4561 SDOperand Cmp = Cond.getOperand(1); 4562 unsigned Opc = Cmp.getOpcode(); 4563 if (Opc == X86ISD::CMP || 4564 Opc == X86ISD::COMI || 4565 Opc == X86ISD::UCOMI) { 4566 Cond = Cmp; 4567 addTest = false; 4568 } 4569 } 4570 4571 if (addTest) { 4572 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4573 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4574 } 4575 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4576 Chain, Op.getOperand(2), CC, Cond); 4577} 4578 4579SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4580 unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4581 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 4582 4583 if (Subtarget->is64Bit()) 4584 if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) 4585 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4586 else 4587 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 4588 else 4589 switch (CallingConv) { 4590 default: 4591 assert(0 && "Unsupported calling convention"); 4592 case CallingConv::Fast: 4593 if (isTailCall && PerformTailCallOpt) 4594 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4595 else 4596 return LowerCCCCallTo(Op,DAG, CallingConv); 4597 case CallingConv::C: 4598 case CallingConv::X86_StdCall: 4599 return LowerCCCCallTo(Op, DAG, CallingConv); 4600 case CallingConv::X86_FastCall: 4601 return LowerFastCCCallTo(Op, DAG, CallingConv); 4602 } 4603} 4604 4605 4606// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4607// Calls to _alloca is needed to probe the stack when allocating more than 4k 4608// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4609// that the guard pages used by the OS virtual memory manager are allocated in 4610// correct sequence. 4611SDOperand 4612X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4613 SelectionDAG &DAG) { 4614 assert(Subtarget->isTargetCygMing() && 4615 "This should be used only on Cygwin/Mingw targets"); 4616 4617 // Get the inputs. 4618 SDOperand Chain = Op.getOperand(0); 4619 SDOperand Size = Op.getOperand(1); 4620 // FIXME: Ensure alignment here 4621 4622 SDOperand Flag; 4623 4624 MVT::ValueType IntPtr = getPointerTy(); 4625 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 4626 4627 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4628 Flag = Chain.getValue(1); 4629 4630 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4631 SDOperand Ops[] = { Chain, 4632 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4633 DAG.getRegister(X86::EAX, IntPtr), 4634 Flag }; 4635 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4636 Flag = Chain.getValue(1); 4637 4638 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4639 4640 std::vector<MVT::ValueType> Tys; 4641 Tys.push_back(SPTy); 4642 Tys.push_back(MVT::Other); 4643 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4644 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4645} 4646 4647SDOperand 4648X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4649 MachineFunction &MF = DAG.getMachineFunction(); 4650 const Function* Fn = MF.getFunction(); 4651 if (Fn->hasExternalLinkage() && 4652 Subtarget->isTargetCygMing() && 4653 Fn->getName() == "main") 4654 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 4655 4656 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4657 if (Subtarget->is64Bit()) 4658 return LowerX86_64CCCArguments(Op, DAG); 4659 else 4660 switch(CC) { 4661 default: 4662 assert(0 && "Unsupported calling convention"); 4663 case CallingConv::Fast: 4664 return LowerCCCArguments(Op,DAG, true); 4665 // Falls through 4666 case CallingConv::C: 4667 return LowerCCCArguments(Op, DAG); 4668 case CallingConv::X86_StdCall: 4669 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 4670 return LowerCCCArguments(Op, DAG, true); 4671 case CallingConv::X86_FastCall: 4672 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 4673 return LowerFastCCArguments(Op, DAG); 4674 } 4675} 4676 4677SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4678 SDOperand InFlag(0, 0); 4679 SDOperand Chain = Op.getOperand(0); 4680 unsigned Align = 4681 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4682 if (Align == 0) Align = 1; 4683 4684 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4685 // If not DWORD aligned or size is more than the threshold, call memset. 4686 // The libc version is likely to be faster for these cases. It can use the 4687 // address value and run time information about the CPU. 4688 if ((Align & 3) != 0 || 4689 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4690 MVT::ValueType IntPtr = getPointerTy(); 4691 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4692 TargetLowering::ArgListTy Args; 4693 TargetLowering::ArgListEntry Entry; 4694 Entry.Node = Op.getOperand(1); 4695 Entry.Ty = IntPtrTy; 4696 Args.push_back(Entry); 4697 // Extend the unsigned i8 argument to be an int value for the call. 4698 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4699 Entry.Ty = IntPtrTy; 4700 Args.push_back(Entry); 4701 Entry.Node = Op.getOperand(3); 4702 Args.push_back(Entry); 4703 std::pair<SDOperand,SDOperand> CallResult = 4704 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4705 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4706 return CallResult.second; 4707 } 4708 4709 MVT::ValueType AVT; 4710 SDOperand Count; 4711 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4712 unsigned BytesLeft = 0; 4713 bool TwoRepStos = false; 4714 if (ValC) { 4715 unsigned ValReg; 4716 uint64_t Val = ValC->getValue() & 255; 4717 4718 // If the value is a constant, then we can potentially use larger sets. 4719 switch (Align & 3) { 4720 case 2: // WORD aligned 4721 AVT = MVT::i16; 4722 ValReg = X86::AX; 4723 Val = (Val << 8) | Val; 4724 break; 4725 case 0: // DWORD aligned 4726 AVT = MVT::i32; 4727 ValReg = X86::EAX; 4728 Val = (Val << 8) | Val; 4729 Val = (Val << 16) | Val; 4730 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4731 AVT = MVT::i64; 4732 ValReg = X86::RAX; 4733 Val = (Val << 32) | Val; 4734 } 4735 break; 4736 default: // Byte aligned 4737 AVT = MVT::i8; 4738 ValReg = X86::AL; 4739 Count = Op.getOperand(3); 4740 break; 4741 } 4742 4743 if (AVT > MVT::i8) { 4744 if (I) { 4745 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4746 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4747 BytesLeft = I->getValue() % UBytes; 4748 } else { 4749 assert(AVT >= MVT::i32 && 4750 "Do not use rep;stos if not at least DWORD aligned"); 4751 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4752 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4753 TwoRepStos = true; 4754 } 4755 } 4756 4757 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4758 InFlag); 4759 InFlag = Chain.getValue(1); 4760 } else { 4761 AVT = MVT::i8; 4762 Count = Op.getOperand(3); 4763 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4764 InFlag = Chain.getValue(1); 4765 } 4766 4767 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4768 Count, InFlag); 4769 InFlag = Chain.getValue(1); 4770 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4771 Op.getOperand(1), InFlag); 4772 InFlag = Chain.getValue(1); 4773 4774 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4775 SmallVector<SDOperand, 8> Ops; 4776 Ops.push_back(Chain); 4777 Ops.push_back(DAG.getValueType(AVT)); 4778 Ops.push_back(InFlag); 4779 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4780 4781 if (TwoRepStos) { 4782 InFlag = Chain.getValue(1); 4783 Count = Op.getOperand(3); 4784 MVT::ValueType CVT = Count.getValueType(); 4785 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4786 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4787 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4788 Left, InFlag); 4789 InFlag = Chain.getValue(1); 4790 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4791 Ops.clear(); 4792 Ops.push_back(Chain); 4793 Ops.push_back(DAG.getValueType(MVT::i8)); 4794 Ops.push_back(InFlag); 4795 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4796 } else if (BytesLeft) { 4797 // Issue stores for the last 1 - 7 bytes. 4798 SDOperand Value; 4799 unsigned Val = ValC->getValue() & 255; 4800 unsigned Offset = I->getValue() - BytesLeft; 4801 SDOperand DstAddr = Op.getOperand(1); 4802 MVT::ValueType AddrVT = DstAddr.getValueType(); 4803 if (BytesLeft >= 4) { 4804 Val = (Val << 8) | Val; 4805 Val = (Val << 16) | Val; 4806 Value = DAG.getConstant(Val, MVT::i32); 4807 Chain = DAG.getStore(Chain, Value, 4808 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4809 DAG.getConstant(Offset, AddrVT)), 4810 NULL, 0); 4811 BytesLeft -= 4; 4812 Offset += 4; 4813 } 4814 if (BytesLeft >= 2) { 4815 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4816 Chain = DAG.getStore(Chain, Value, 4817 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4818 DAG.getConstant(Offset, AddrVT)), 4819 NULL, 0); 4820 BytesLeft -= 2; 4821 Offset += 2; 4822 } 4823 if (BytesLeft == 1) { 4824 Value = DAG.getConstant(Val, MVT::i8); 4825 Chain = DAG.getStore(Chain, Value, 4826 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4827 DAG.getConstant(Offset, AddrVT)), 4828 NULL, 0); 4829 } 4830 } 4831 4832 return Chain; 4833} 4834 4835SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4836 SDOperand Dest, 4837 SDOperand Source, 4838 unsigned Size, 4839 unsigned Align, 4840 SelectionDAG &DAG) { 4841 MVT::ValueType AVT; 4842 unsigned BytesLeft = 0; 4843 switch (Align & 3) { 4844 case 2: // WORD aligned 4845 AVT = MVT::i16; 4846 break; 4847 case 0: // DWORD aligned 4848 AVT = MVT::i32; 4849 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4850 AVT = MVT::i64; 4851 break; 4852 default: // Byte aligned 4853 AVT = MVT::i8; 4854 break; 4855 } 4856 4857 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4858 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4859 BytesLeft = Size % UBytes; 4860 4861 SDOperand InFlag(0, 0); 4862 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4863 Count, InFlag); 4864 InFlag = Chain.getValue(1); 4865 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4866 Dest, InFlag); 4867 InFlag = Chain.getValue(1); 4868 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4869 Source, InFlag); 4870 InFlag = Chain.getValue(1); 4871 4872 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4873 SmallVector<SDOperand, 8> Ops; 4874 Ops.push_back(Chain); 4875 Ops.push_back(DAG.getValueType(AVT)); 4876 Ops.push_back(InFlag); 4877 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4878 4879 if (BytesLeft) { 4880 // Issue loads and stores for the last 1 - 7 bytes. 4881 unsigned Offset = Size - BytesLeft; 4882 SDOperand DstAddr = Dest; 4883 MVT::ValueType DstVT = DstAddr.getValueType(); 4884 SDOperand SrcAddr = Source; 4885 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4886 SDOperand Value; 4887 if (BytesLeft >= 4) { 4888 Value = DAG.getLoad(MVT::i32, Chain, 4889 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4890 DAG.getConstant(Offset, SrcVT)), 4891 NULL, 0); 4892 Chain = Value.getValue(1); 4893 Chain = DAG.getStore(Chain, Value, 4894 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4895 DAG.getConstant(Offset, DstVT)), 4896 NULL, 0); 4897 BytesLeft -= 4; 4898 Offset += 4; 4899 } 4900 if (BytesLeft >= 2) { 4901 Value = DAG.getLoad(MVT::i16, Chain, 4902 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4903 DAG.getConstant(Offset, SrcVT)), 4904 NULL, 0); 4905 Chain = Value.getValue(1); 4906 Chain = DAG.getStore(Chain, Value, 4907 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4908 DAG.getConstant(Offset, DstVT)), 4909 NULL, 0); 4910 BytesLeft -= 2; 4911 Offset += 2; 4912 } 4913 4914 if (BytesLeft == 1) { 4915 Value = DAG.getLoad(MVT::i8, Chain, 4916 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4917 DAG.getConstant(Offset, SrcVT)), 4918 NULL, 0); 4919 Chain = Value.getValue(1); 4920 Chain = DAG.getStore(Chain, Value, 4921 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4922 DAG.getConstant(Offset, DstVT)), 4923 NULL, 0); 4924 } 4925 } 4926 4927 return Chain; 4928} 4929 4930/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain 4931SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ 4932 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4933 SDOperand TheChain = N->getOperand(0); 4934 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); 4935 if (Subtarget->is64Bit()) { 4936 SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4937 SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, 4938 MVT::i64, rax.getValue(2)); 4939 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, 4940 DAG.getConstant(32, MVT::i8)); 4941 SDOperand Ops[] = { 4942 DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) 4943 }; 4944 4945 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4946 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4947 } 4948 4949 SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4950 SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, 4951 MVT::i32, eax.getValue(2)); 4952 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 4953 SDOperand Ops[] = { eax, edx }; 4954 Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); 4955 4956 // Use a MERGE_VALUES to return the value and chain. 4957 Ops[1] = edx.getValue(1); 4958 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4959 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; 4960} 4961 4962SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4963 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4964 4965 if (!Subtarget->is64Bit()) { 4966 // vastart just stores the address of the VarArgsFrameIndex slot into the 4967 // memory location argument. 4968 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4969 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4970 SV->getOffset()); 4971 } 4972 4973 // __va_list_tag: 4974 // gp_offset (0 - 6 * 8) 4975 // fp_offset (48 - 48 + 8 * 16) 4976 // overflow_arg_area (point to parameters coming in memory). 4977 // reg_save_area 4978 SmallVector<SDOperand, 8> MemOps; 4979 SDOperand FIN = Op.getOperand(1); 4980 // Store gp_offset 4981 SDOperand Store = DAG.getStore(Op.getOperand(0), 4982 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4983 FIN, SV->getValue(), SV->getOffset()); 4984 MemOps.push_back(Store); 4985 4986 // Store fp_offset 4987 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4988 DAG.getConstant(4, getPointerTy())); 4989 Store = DAG.getStore(Op.getOperand(0), 4990 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4991 FIN, SV->getValue(), SV->getOffset()); 4992 MemOps.push_back(Store); 4993 4994 // Store ptr to overflow_arg_area 4995 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4996 DAG.getConstant(4, getPointerTy())); 4997 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4998 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4999 SV->getOffset()); 5000 MemOps.push_back(Store); 5001 5002 // Store ptr to reg_save_area. 5003 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 5004 DAG.getConstant(8, getPointerTy())); 5005 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 5006 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 5007 SV->getOffset()); 5008 MemOps.push_back(Store); 5009 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 5010} 5011 5012SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 5013 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 5014 SDOperand Chain = Op.getOperand(0); 5015 SDOperand DstPtr = Op.getOperand(1); 5016 SDOperand SrcPtr = Op.getOperand(2); 5017 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 5018 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 5019 5020 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 5021 SrcSV->getValue(), SrcSV->getOffset()); 5022 Chain = SrcPtr.getValue(1); 5023 for (unsigned i = 0; i < 3; ++i) { 5024 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 5025 SrcSV->getValue(), SrcSV->getOffset()); 5026 Chain = Val.getValue(1); 5027 Chain = DAG.getStore(Chain, Val, DstPtr, 5028 DstSV->getValue(), DstSV->getOffset()); 5029 if (i == 2) 5030 break; 5031 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 5032 DAG.getConstant(8, getPointerTy())); 5033 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 5034 DAG.getConstant(8, getPointerTy())); 5035 } 5036 return Chain; 5037} 5038 5039SDOperand 5040X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 5041 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 5042 switch (IntNo) { 5043 default: return SDOperand(); // Don't custom lower most intrinsics. 5044 // Comparison intrinsics. 5045 case Intrinsic::x86_sse_comieq_ss: 5046 case Intrinsic::x86_sse_comilt_ss: 5047 case Intrinsic::x86_sse_comile_ss: 5048 case Intrinsic::x86_sse_comigt_ss: 5049 case Intrinsic::x86_sse_comige_ss: 5050 case Intrinsic::x86_sse_comineq_ss: 5051 case Intrinsic::x86_sse_ucomieq_ss: 5052 case Intrinsic::x86_sse_ucomilt_ss: 5053 case Intrinsic::x86_sse_ucomile_ss: 5054 case Intrinsic::x86_sse_ucomigt_ss: 5055 case Intrinsic::x86_sse_ucomige_ss: 5056 case Intrinsic::x86_sse_ucomineq_ss: 5057 case Intrinsic::x86_sse2_comieq_sd: 5058 case Intrinsic::x86_sse2_comilt_sd: 5059 case Intrinsic::x86_sse2_comile_sd: 5060 case Intrinsic::x86_sse2_comigt_sd: 5061 case Intrinsic::x86_sse2_comige_sd: 5062 case Intrinsic::x86_sse2_comineq_sd: 5063 case Intrinsic::x86_sse2_ucomieq_sd: 5064 case Intrinsic::x86_sse2_ucomilt_sd: 5065 case Intrinsic::x86_sse2_ucomile_sd: 5066 case Intrinsic::x86_sse2_ucomigt_sd: 5067 case Intrinsic::x86_sse2_ucomige_sd: 5068 case Intrinsic::x86_sse2_ucomineq_sd: { 5069 unsigned Opc = 0; 5070 ISD::CondCode CC = ISD::SETCC_INVALID; 5071 switch (IntNo) { 5072 default: break; 5073 case Intrinsic::x86_sse_comieq_ss: 5074 case Intrinsic::x86_sse2_comieq_sd: 5075 Opc = X86ISD::COMI; 5076 CC = ISD::SETEQ; 5077 break; 5078 case Intrinsic::x86_sse_comilt_ss: 5079 case Intrinsic::x86_sse2_comilt_sd: 5080 Opc = X86ISD::COMI; 5081 CC = ISD::SETLT; 5082 break; 5083 case Intrinsic::x86_sse_comile_ss: 5084 case Intrinsic::x86_sse2_comile_sd: 5085 Opc = X86ISD::COMI; 5086 CC = ISD::SETLE; 5087 break; 5088 case Intrinsic::x86_sse_comigt_ss: 5089 case Intrinsic::x86_sse2_comigt_sd: 5090 Opc = X86ISD::COMI; 5091 CC = ISD::SETGT; 5092 break; 5093 case Intrinsic::x86_sse_comige_ss: 5094 case Intrinsic::x86_sse2_comige_sd: 5095 Opc = X86ISD::COMI; 5096 CC = ISD::SETGE; 5097 break; 5098 case Intrinsic::x86_sse_comineq_ss: 5099 case Intrinsic::x86_sse2_comineq_sd: 5100 Opc = X86ISD::COMI; 5101 CC = ISD::SETNE; 5102 break; 5103 case Intrinsic::x86_sse_ucomieq_ss: 5104 case Intrinsic::x86_sse2_ucomieq_sd: 5105 Opc = X86ISD::UCOMI; 5106 CC = ISD::SETEQ; 5107 break; 5108 case Intrinsic::x86_sse_ucomilt_ss: 5109 case Intrinsic::x86_sse2_ucomilt_sd: 5110 Opc = X86ISD::UCOMI; 5111 CC = ISD::SETLT; 5112 break; 5113 case Intrinsic::x86_sse_ucomile_ss: 5114 case Intrinsic::x86_sse2_ucomile_sd: 5115 Opc = X86ISD::UCOMI; 5116 CC = ISD::SETLE; 5117 break; 5118 case Intrinsic::x86_sse_ucomigt_ss: 5119 case Intrinsic::x86_sse2_ucomigt_sd: 5120 Opc = X86ISD::UCOMI; 5121 CC = ISD::SETGT; 5122 break; 5123 case Intrinsic::x86_sse_ucomige_ss: 5124 case Intrinsic::x86_sse2_ucomige_sd: 5125 Opc = X86ISD::UCOMI; 5126 CC = ISD::SETGE; 5127 break; 5128 case Intrinsic::x86_sse_ucomineq_ss: 5129 case Intrinsic::x86_sse2_ucomineq_sd: 5130 Opc = X86ISD::UCOMI; 5131 CC = ISD::SETNE; 5132 break; 5133 } 5134 5135 unsigned X86CC; 5136 SDOperand LHS = Op.getOperand(1); 5137 SDOperand RHS = Op.getOperand(2); 5138 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 5139 5140 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 5141 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 5142 DAG.getConstant(X86CC, MVT::i8), Cond); 5143 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 5144 } 5145 } 5146} 5147 5148SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 5149 // Depths > 0 not supported yet! 5150 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5151 return SDOperand(); 5152 5153 // Just load the return address 5154 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 5155 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 5156} 5157 5158SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 5159 // Depths > 0 not supported yet! 5160 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 5161 return SDOperand(); 5162 5163 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 5164 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 5165 DAG.getConstant(4, getPointerTy())); 5166} 5167 5168SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 5169 SelectionDAG &DAG) { 5170 // Is not yet supported on x86-64 5171 if (Subtarget->is64Bit()) 5172 return SDOperand(); 5173 5174 return DAG.getConstant(8, getPointerTy()); 5175} 5176 5177SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 5178{ 5179 assert(!Subtarget->is64Bit() && 5180 "Lowering of eh_return builtin is not supported yet on x86-64"); 5181 5182 MachineFunction &MF = DAG.getMachineFunction(); 5183 SDOperand Chain = Op.getOperand(0); 5184 SDOperand Offset = Op.getOperand(1); 5185 SDOperand Handler = Op.getOperand(2); 5186 5187 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 5188 getPointerTy()); 5189 5190 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 5191 DAG.getConstant(-4UL, getPointerTy())); 5192 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 5193 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 5194 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 5195 MF.addLiveOut(X86::ECX); 5196 5197 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 5198 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 5199} 5200 5201SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 5202 SelectionDAG &DAG) { 5203 SDOperand Root = Op.getOperand(0); 5204 SDOperand Trmp = Op.getOperand(1); // trampoline 5205 SDOperand FPtr = Op.getOperand(2); // nested function 5206 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 5207 5208 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 5209 5210 if (Subtarget->is64Bit()) { 5211 return SDOperand(); // not yet supported 5212 } else { 5213 Function *Func = (Function *) 5214 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 5215 unsigned CC = Func->getCallingConv(); 5216 unsigned NestReg; 5217 5218 switch (CC) { 5219 default: 5220 assert(0 && "Unsupported calling convention"); 5221 case CallingConv::C: 5222 case CallingConv::X86_StdCall: { 5223 // Pass 'nest' parameter in ECX. 5224 // Must be kept in sync with X86CallingConv.td 5225 NestReg = X86::ECX; 5226 5227 // Check that ECX wasn't needed by an 'inreg' parameter. 5228 const FunctionType *FTy = Func->getFunctionType(); 5229 const ParamAttrsList *Attrs = Func->getParamAttrs(); 5230 5231 if (Attrs && !Func->isVarArg()) { 5232 unsigned InRegCount = 0; 5233 unsigned Idx = 1; 5234 5235 for (FunctionType::param_iterator I = FTy->param_begin(), 5236 E = FTy->param_end(); I != E; ++I, ++Idx) 5237 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 5238 // FIXME: should only count parameters that are lowered to integers. 5239 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 5240 5241 if (InRegCount > 2) { 5242 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 5243 abort(); 5244 } 5245 } 5246 break; 5247 } 5248 case CallingConv::X86_FastCall: 5249 // Pass 'nest' parameter in EAX. 5250 // Must be kept in sync with X86CallingConv.td 5251 NestReg = X86::EAX; 5252 break; 5253 } 5254 5255 const X86InstrInfo *TII = 5256 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 5257 5258 SDOperand OutChains[4]; 5259 SDOperand Addr, Disp; 5260 5261 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 5262 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 5263 5264 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 5265 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 5266 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 5267 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 5268 5269 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 5270 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 5271 TrmpSV->getOffset() + 1, false, 1); 5272 5273 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 5274 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 5275 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 5276 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 5277 5278 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 5279 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 5280 TrmpSV->getOffset() + 6, false, 1); 5281 5282 SDOperand Ops[] = 5283 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 5284 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 5285 } 5286} 5287 5288SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { 5289 /* 5290 The rounding mode is in bits 11:10 of FPSR, and has the following 5291 settings: 5292 00 Round to nearest 5293 01 Round to -inf 5294 10 Round to +inf 5295 11 Round to 0 5296 5297 FLT_ROUNDS, on the other hand, expects the following: 5298 -1 Undefined 5299 0 Round to 0 5300 1 Round to nearest 5301 2 Round to +inf 5302 3 Round to -inf 5303 5304 To perform the conversion, we do: 5305 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 5306 */ 5307 5308 MachineFunction &MF = DAG.getMachineFunction(); 5309 const TargetMachine &TM = MF.getTarget(); 5310 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 5311 unsigned StackAlignment = TFI.getStackAlignment(); 5312 MVT::ValueType VT = Op.getValueType(); 5313 5314 // Save FP Control Word to stack slot 5315 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); 5316 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5317 5318 SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, 5319 DAG.getEntryNode(), StackSlot); 5320 5321 // Load FP Control Word from stack slot 5322 SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); 5323 5324 // Transform as necessary 5325 SDOperand CWD1 = 5326 DAG.getNode(ISD::SRL, MVT::i16, 5327 DAG.getNode(ISD::AND, MVT::i16, 5328 CWD, DAG.getConstant(0x800, MVT::i16)), 5329 DAG.getConstant(11, MVT::i8)); 5330 SDOperand CWD2 = 5331 DAG.getNode(ISD::SRL, MVT::i16, 5332 DAG.getNode(ISD::AND, MVT::i16, 5333 CWD, DAG.getConstant(0x400, MVT::i16)), 5334 DAG.getConstant(9, MVT::i8)); 5335 5336 SDOperand RetVal = 5337 DAG.getNode(ISD::AND, MVT::i16, 5338 DAG.getNode(ISD::ADD, MVT::i16, 5339 DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), 5340 DAG.getConstant(1, MVT::i16)), 5341 DAG.getConstant(3, MVT::i16)); 5342 5343 5344 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 5345 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 5346} 5347 5348SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) { 5349 MVT::ValueType VT = Op.getValueType(); 5350 MVT::ValueType OpVT = VT; 5351 unsigned NumBits = MVT::getSizeInBits(VT); 5352 5353 Op = Op.getOperand(0); 5354 if (VT == MVT::i8) { 5355 // Zero extend to i32 since there is not an i8 bsr. 5356 OpVT = MVT::i32; 5357 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5358 } 5359 5360 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 5361 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5362 Op = DAG.getNode(X86ISD::BSR, VTs, Op); 5363 5364 // If src is zero (i.e. bsr sets ZF), returns NumBits. 5365 SmallVector<SDOperand, 4> Ops; 5366 Ops.push_back(Op); 5367 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); 5368 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5369 Ops.push_back(Op.getValue(1)); 5370 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5371 5372 // Finally xor with NumBits-1. 5373 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 5374 5375 if (VT == MVT::i8) 5376 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5377 return Op; 5378} 5379 5380SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) { 5381 MVT::ValueType VT = Op.getValueType(); 5382 MVT::ValueType OpVT = VT; 5383 unsigned NumBits = MVT::getSizeInBits(VT); 5384 5385 Op = Op.getOperand(0); 5386 if (VT == MVT::i8) { 5387 OpVT = MVT::i32; 5388 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op); 5389 } 5390 5391 // Issue a bsf (scan bits forward) which also sets EFLAGS. 5392 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 5393 Op = DAG.getNode(X86ISD::BSF, VTs, Op); 5394 5395 // If src is zero (i.e. bsf sets ZF), returns NumBits. 5396 SmallVector<SDOperand, 4> Ops; 5397 Ops.push_back(Op); 5398 Ops.push_back(DAG.getConstant(NumBits, OpVT)); 5399 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); 5400 Ops.push_back(Op.getValue(1)); 5401 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); 5402 5403 if (VT == MVT::i8) 5404 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op); 5405 return Op; 5406} 5407 5408/// LowerOperation - Provide custom lowering hooks for some operations. 5409/// 5410SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 5411 switch (Op.getOpcode()) { 5412 default: assert(0 && "Should not custom lower this!"); 5413 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5414 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5415 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5416 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5417 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5418 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5419 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5420 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5421 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 5422 case ISD::SHL_PARTS: 5423 case ISD::SRA_PARTS: 5424 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 5425 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5426 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5427 case ISD::FABS: return LowerFABS(Op, DAG); 5428 case ISD::FNEG: return LowerFNEG(Op, DAG); 5429 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5430 case ISD::SETCC: return LowerSETCC(Op, DAG); 5431 case ISD::SELECT: return LowerSELECT(Op, DAG); 5432 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5433 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5434 case ISD::CALL: return LowerCALL(Op, DAG); 5435 case ISD::RET: return LowerRET(Op, DAG); 5436 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5437 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5438 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5439 case ISD::VASTART: return LowerVASTART(Op, DAG); 5440 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5441 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5442 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5443 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5444 case ISD::FRAME_TO_ARGS_OFFSET: 5445 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5446 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5447 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5448 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5449 case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); 5450 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 5451 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 5452 5453 // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. 5454 case ISD::READCYCLECOUNTER: 5455 return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); 5456 } 5457} 5458 5459/// ExpandOperation - Provide custom lowering hooks for expanding operations. 5460SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 5461 switch (N->getOpcode()) { 5462 default: assert(0 && "Should not custom lower this!"); 5463 case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); 5464 case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); 5465 } 5466} 5467 5468const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5469 switch (Opcode) { 5470 default: return NULL; 5471 case X86ISD::BSF: return "X86ISD::BSF"; 5472 case X86ISD::BSR: return "X86ISD::BSR"; 5473 case X86ISD::SHLD: return "X86ISD::SHLD"; 5474 case X86ISD::SHRD: return "X86ISD::SHRD"; 5475 case X86ISD::FAND: return "X86ISD::FAND"; 5476 case X86ISD::FOR: return "X86ISD::FOR"; 5477 case X86ISD::FXOR: return "X86ISD::FXOR"; 5478 case X86ISD::FSRL: return "X86ISD::FSRL"; 5479 case X86ISD::FILD: return "X86ISD::FILD"; 5480 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5481 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5482 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5483 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5484 case X86ISD::FLD: return "X86ISD::FLD"; 5485 case X86ISD::FST: return "X86ISD::FST"; 5486 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5487 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5488 case X86ISD::CALL: return "X86ISD::CALL"; 5489 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5490 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5491 case X86ISD::CMP: return "X86ISD::CMP"; 5492 case X86ISD::COMI: return "X86ISD::COMI"; 5493 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5494 case X86ISD::SETCC: return "X86ISD::SETCC"; 5495 case X86ISD::CMOV: return "X86ISD::CMOV"; 5496 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5497 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5498 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5499 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5500 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5501 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5502 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5503 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5504 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5505 case X86ISD::FMAX: return "X86ISD::FMAX"; 5506 case X86ISD::FMIN: return "X86ISD::FMIN"; 5507 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5508 case X86ISD::FRCP: return "X86ISD::FRCP"; 5509 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5510 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5511 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5512 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5513 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 5514 } 5515} 5516 5517// isLegalAddressingMode - Return true if the addressing mode represented 5518// by AM is legal for this target, for a load/store of the specified type. 5519bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5520 const Type *Ty) const { 5521 // X86 supports extremely general addressing modes. 5522 5523 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5524 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5525 return false; 5526 5527 if (AM.BaseGV) { 5528 // We can only fold this if we don't need an extra load. 5529 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5530 return false; 5531 5532 // X86-64 only supports addr of globals in small code model. 5533 if (Subtarget->is64Bit()) { 5534 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5535 return false; 5536 // If lower 4G is not available, then we must use rip-relative addressing. 5537 if (AM.BaseOffs || AM.Scale > 1) 5538 return false; 5539 } 5540 } 5541 5542 switch (AM.Scale) { 5543 case 0: 5544 case 1: 5545 case 2: 5546 case 4: 5547 case 8: 5548 // These scales always work. 5549 break; 5550 case 3: 5551 case 5: 5552 case 9: 5553 // These scales are formed with basereg+scalereg. Only accept if there is 5554 // no basereg yet. 5555 if (AM.HasBaseReg) 5556 return false; 5557 break; 5558 default: // Other stuff never works. 5559 return false; 5560 } 5561 5562 return true; 5563} 5564 5565 5566bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5567 if (!Ty1->isInteger() || !Ty2->isInteger()) 5568 return false; 5569 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5570 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5571 if (NumBits1 <= NumBits2) 5572 return false; 5573 return Subtarget->is64Bit() || NumBits1 < 64; 5574} 5575 5576bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5577 MVT::ValueType VT2) const { 5578 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5579 return false; 5580 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5581 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5582 if (NumBits1 <= NumBits2) 5583 return false; 5584 return Subtarget->is64Bit() || NumBits1 < 64; 5585} 5586 5587/// isShuffleMaskLegal - Targets can use this to indicate that they only 5588/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5589/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5590/// are assumed to be legal. 5591bool 5592X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5593 // Only do shuffles on 128-bit vector types for now. 5594 if (MVT::getSizeInBits(VT) == 64) return false; 5595 return (Mask.Val->getNumOperands() <= 4 || 5596 isIdentityMask(Mask.Val) || 5597 isIdentityMask(Mask.Val, true) || 5598 isSplatMask(Mask.Val) || 5599 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5600 X86::isUNPCKLMask(Mask.Val) || 5601 X86::isUNPCKHMask(Mask.Val) || 5602 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5603 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5604} 5605 5606bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5607 MVT::ValueType EVT, 5608 SelectionDAG &DAG) const { 5609 unsigned NumElts = BVOps.size(); 5610 // Only do shuffles on 128-bit vector types for now. 5611 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5612 if (NumElts == 2) return true; 5613 if (NumElts == 4) { 5614 return (isMOVLMask(&BVOps[0], 4) || 5615 isCommutedMOVL(&BVOps[0], 4, true) || 5616 isSHUFPMask(&BVOps[0], 4) || 5617 isCommutedSHUFP(&BVOps[0], 4)); 5618 } 5619 return false; 5620} 5621 5622//===----------------------------------------------------------------------===// 5623// X86 Scheduler Hooks 5624//===----------------------------------------------------------------------===// 5625 5626MachineBasicBlock * 5627X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5628 MachineBasicBlock *BB) { 5629 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5630 switch (MI->getOpcode()) { 5631 default: assert(false && "Unexpected instr type to insert"); 5632 case X86::CMOV_FR32: 5633 case X86::CMOV_FR64: 5634 case X86::CMOV_V4F32: 5635 case X86::CMOV_V2F64: 5636 case X86::CMOV_V2I64: { 5637 // To "insert" a SELECT_CC instruction, we actually have to insert the 5638 // diamond control-flow pattern. The incoming instruction knows the 5639 // destination vreg to set, the condition code register to branch on, the 5640 // true/false values to select between, and a branch opcode to use. 5641 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5642 ilist<MachineBasicBlock>::iterator It = BB; 5643 ++It; 5644 5645 // thisMBB: 5646 // ... 5647 // TrueVal = ... 5648 // cmpTY ccX, r1, r2 5649 // bCC copy1MBB 5650 // fallthrough --> copy0MBB 5651 MachineBasicBlock *thisMBB = BB; 5652 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5653 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5654 unsigned Opc = 5655 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5656 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5657 MachineFunction *F = BB->getParent(); 5658 F->getBasicBlockList().insert(It, copy0MBB); 5659 F->getBasicBlockList().insert(It, sinkMBB); 5660 // Update machine-CFG edges by first adding all successors of the current 5661 // block to the new block which will contain the Phi node for the select. 5662 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5663 e = BB->succ_end(); i != e; ++i) 5664 sinkMBB->addSuccessor(*i); 5665 // Next, remove all successors of the current block, and add the true 5666 // and fallthrough blocks as its successors. 5667 while(!BB->succ_empty()) 5668 BB->removeSuccessor(BB->succ_begin()); 5669 BB->addSuccessor(copy0MBB); 5670 BB->addSuccessor(sinkMBB); 5671 5672 // copy0MBB: 5673 // %FalseValue = ... 5674 // # fallthrough to sinkMBB 5675 BB = copy0MBB; 5676 5677 // Update machine-CFG edges 5678 BB->addSuccessor(sinkMBB); 5679 5680 // sinkMBB: 5681 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5682 // ... 5683 BB = sinkMBB; 5684 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5685 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5686 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5687 5688 delete MI; // The pseudo instruction is gone now. 5689 return BB; 5690 } 5691 5692 case X86::FP32_TO_INT16_IN_MEM: 5693 case X86::FP32_TO_INT32_IN_MEM: 5694 case X86::FP32_TO_INT64_IN_MEM: 5695 case X86::FP64_TO_INT16_IN_MEM: 5696 case X86::FP64_TO_INT32_IN_MEM: 5697 case X86::FP64_TO_INT64_IN_MEM: 5698 case X86::FP80_TO_INT16_IN_MEM: 5699 case X86::FP80_TO_INT32_IN_MEM: 5700 case X86::FP80_TO_INT64_IN_MEM: { 5701 // Change the floating point control register to use "round towards zero" 5702 // mode when truncating to an integer value. 5703 MachineFunction *F = BB->getParent(); 5704 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5705 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5706 5707 // Load the old value of the high byte of the control word... 5708 unsigned OldCW = 5709 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5710 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5711 5712 // Set the high part to be round to zero... 5713 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5714 .addImm(0xC7F); 5715 5716 // Reload the modified control word now... 5717 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5718 5719 // Restore the memory image of control word to original value 5720 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5721 .addReg(OldCW); 5722 5723 // Get the X86 opcode to use. 5724 unsigned Opc; 5725 switch (MI->getOpcode()) { 5726 default: assert(0 && "illegal opcode!"); 5727 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5728 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5729 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5730 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5731 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5732 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5733 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5734 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5735 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5736 } 5737 5738 X86AddressMode AM; 5739 MachineOperand &Op = MI->getOperand(0); 5740 if (Op.isRegister()) { 5741 AM.BaseType = X86AddressMode::RegBase; 5742 AM.Base.Reg = Op.getReg(); 5743 } else { 5744 AM.BaseType = X86AddressMode::FrameIndexBase; 5745 AM.Base.FrameIndex = Op.getFrameIndex(); 5746 } 5747 Op = MI->getOperand(1); 5748 if (Op.isImmediate()) 5749 AM.Scale = Op.getImm(); 5750 Op = MI->getOperand(2); 5751 if (Op.isImmediate()) 5752 AM.IndexReg = Op.getImm(); 5753 Op = MI->getOperand(3); 5754 if (Op.isGlobalAddress()) { 5755 AM.GV = Op.getGlobal(); 5756 } else { 5757 AM.Disp = Op.getImm(); 5758 } 5759 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5760 .addReg(MI->getOperand(4).getReg()); 5761 5762 // Reload the original control word now. 5763 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5764 5765 delete MI; // The pseudo instruction is gone now. 5766 return BB; 5767 } 5768 } 5769} 5770 5771//===----------------------------------------------------------------------===// 5772// X86 Optimization Hooks 5773//===----------------------------------------------------------------------===// 5774 5775void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5776 uint64_t Mask, 5777 uint64_t &KnownZero, 5778 uint64_t &KnownOne, 5779 const SelectionDAG &DAG, 5780 unsigned Depth) const { 5781 unsigned Opc = Op.getOpcode(); 5782 assert((Opc >= ISD::BUILTIN_OP_END || 5783 Opc == ISD::INTRINSIC_WO_CHAIN || 5784 Opc == ISD::INTRINSIC_W_CHAIN || 5785 Opc == ISD::INTRINSIC_VOID) && 5786 "Should use MaskedValueIsZero if you don't know whether Op" 5787 " is a target node!"); 5788 5789 KnownZero = KnownOne = 0; // Don't know anything. 5790 switch (Opc) { 5791 default: break; 5792 case X86ISD::SETCC: 5793 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5794 break; 5795 } 5796} 5797 5798/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5799/// element of the result of the vector shuffle. 5800static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5801 MVT::ValueType VT = N->getValueType(0); 5802 SDOperand PermMask = N->getOperand(2); 5803 unsigned NumElems = PermMask.getNumOperands(); 5804 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5805 i %= NumElems; 5806 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5807 return (i == 0) 5808 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5809 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5810 SDOperand Idx = PermMask.getOperand(i); 5811 if (Idx.getOpcode() == ISD::UNDEF) 5812 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5813 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5814 } 5815 return SDOperand(); 5816} 5817 5818/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5819/// node is a GlobalAddress + an offset. 5820static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5821 unsigned Opc = N->getOpcode(); 5822 if (Opc == X86ISD::Wrapper) { 5823 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5824 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5825 return true; 5826 } 5827 } else if (Opc == ISD::ADD) { 5828 SDOperand N1 = N->getOperand(0); 5829 SDOperand N2 = N->getOperand(1); 5830 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5831 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5832 if (V) { 5833 Offset += V->getSignExtended(); 5834 return true; 5835 } 5836 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5837 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5838 if (V) { 5839 Offset += V->getSignExtended(); 5840 return true; 5841 } 5842 } 5843 } 5844 return false; 5845} 5846 5847/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5848/// + Dist * Size. 5849static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5850 MachineFrameInfo *MFI) { 5851 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5852 return false; 5853 5854 SDOperand Loc = N->getOperand(1); 5855 SDOperand BaseLoc = Base->getOperand(1); 5856 if (Loc.getOpcode() == ISD::FrameIndex) { 5857 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5858 return false; 5859 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5860 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5861 int FS = MFI->getObjectSize(FI); 5862 int BFS = MFI->getObjectSize(BFI); 5863 if (FS != BFS || FS != Size) return false; 5864 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5865 } else { 5866 GlobalValue *GV1 = NULL; 5867 GlobalValue *GV2 = NULL; 5868 int64_t Offset1 = 0; 5869 int64_t Offset2 = 0; 5870 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5871 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5872 if (isGA1 && isGA2 && GV1 == GV2) 5873 return Offset1 == (Offset2 + Dist*Size); 5874 } 5875 5876 return false; 5877} 5878 5879static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5880 const X86Subtarget *Subtarget) { 5881 GlobalValue *GV; 5882 int64_t Offset; 5883 if (isGAPlusOffset(Base, GV, Offset)) 5884 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5885 else { 5886 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5887 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5888 if (BFI < 0) 5889 // Fixed objects do not specify alignment, however the offsets are known. 5890 return ((Subtarget->getStackAlignment() % 16) == 0 && 5891 (MFI->getObjectOffset(BFI) % 16) == 0); 5892 else 5893 return MFI->getObjectAlignment(BFI) >= 16; 5894 } 5895 return false; 5896} 5897 5898 5899/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5900/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5901/// if the load addresses are consecutive, non-overlapping, and in the right 5902/// order. 5903static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5904 const X86Subtarget *Subtarget) { 5905 MachineFunction &MF = DAG.getMachineFunction(); 5906 MachineFrameInfo *MFI = MF.getFrameInfo(); 5907 MVT::ValueType VT = N->getValueType(0); 5908 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5909 SDOperand PermMask = N->getOperand(2); 5910 int NumElems = (int)PermMask.getNumOperands(); 5911 SDNode *Base = NULL; 5912 for (int i = 0; i < NumElems; ++i) { 5913 SDOperand Idx = PermMask.getOperand(i); 5914 if (Idx.getOpcode() == ISD::UNDEF) { 5915 if (!Base) return SDOperand(); 5916 } else { 5917 SDOperand Arg = 5918 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5919 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5920 return SDOperand(); 5921 if (!Base) 5922 Base = Arg.Val; 5923 else if (!isConsecutiveLoad(Arg.Val, Base, 5924 i, MVT::getSizeInBits(EVT)/8,MFI)) 5925 return SDOperand(); 5926 } 5927 } 5928 5929 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5930 LoadSDNode *LD = cast<LoadSDNode>(Base); 5931 if (isAlign16) { 5932 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5933 LD->getSrcValueOffset(), LD->isVolatile()); 5934 } else { 5935 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5936 LD->getSrcValueOffset(), LD->isVolatile(), 5937 LD->getAlignment()); 5938 } 5939} 5940 5941/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5942static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5943 const X86Subtarget *Subtarget) { 5944 SDOperand Cond = N->getOperand(0); 5945 5946 // If we have SSE[12] support, try to form min/max nodes. 5947 if (Subtarget->hasSSE2() && 5948 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5949 if (Cond.getOpcode() == ISD::SETCC) { 5950 // Get the LHS/RHS of the select. 5951 SDOperand LHS = N->getOperand(1); 5952 SDOperand RHS = N->getOperand(2); 5953 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5954 5955 unsigned Opcode = 0; 5956 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5957 switch (CC) { 5958 default: break; 5959 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5960 case ISD::SETULE: 5961 case ISD::SETLE: 5962 if (!UnsafeFPMath) break; 5963 // FALL THROUGH. 5964 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5965 case ISD::SETLT: 5966 Opcode = X86ISD::FMIN; 5967 break; 5968 5969 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5970 case ISD::SETUGT: 5971 case ISD::SETGT: 5972 if (!UnsafeFPMath) break; 5973 // FALL THROUGH. 5974 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5975 case ISD::SETGE: 5976 Opcode = X86ISD::FMAX; 5977 break; 5978 } 5979 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5980 switch (CC) { 5981 default: break; 5982 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5983 case ISD::SETUGT: 5984 case ISD::SETGT: 5985 if (!UnsafeFPMath) break; 5986 // FALL THROUGH. 5987 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5988 case ISD::SETGE: 5989 Opcode = X86ISD::FMIN; 5990 break; 5991 5992 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5993 case ISD::SETULE: 5994 case ISD::SETLE: 5995 if (!UnsafeFPMath) break; 5996 // FALL THROUGH. 5997 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5998 case ISD::SETLT: 5999 Opcode = X86ISD::FMAX; 6000 break; 6001 } 6002 } 6003 6004 if (Opcode) 6005 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 6006 } 6007 6008 } 6009 6010 return SDOperand(); 6011} 6012 6013 6014SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 6015 DAGCombinerInfo &DCI) const { 6016 SelectionDAG &DAG = DCI.DAG; 6017 switch (N->getOpcode()) { 6018 default: break; 6019 case ISD::VECTOR_SHUFFLE: 6020 return PerformShuffleCombine(N, DAG, Subtarget); 6021 case ISD::SELECT: 6022 return PerformSELECTCombine(N, DAG, Subtarget); 6023 } 6024 6025 return SDOperand(); 6026} 6027 6028//===----------------------------------------------------------------------===// 6029// X86 Inline Assembly Support 6030//===----------------------------------------------------------------------===// 6031 6032/// getConstraintType - Given a constraint letter, return the type of 6033/// constraint it is for this target. 6034X86TargetLowering::ConstraintType 6035X86TargetLowering::getConstraintType(const std::string &Constraint) const { 6036 if (Constraint.size() == 1) { 6037 switch (Constraint[0]) { 6038 case 'A': 6039 case 'r': 6040 case 'R': 6041 case 'l': 6042 case 'q': 6043 case 'Q': 6044 case 'x': 6045 case 'Y': 6046 return C_RegisterClass; 6047 default: 6048 break; 6049 } 6050 } 6051 return TargetLowering::getConstraintType(Constraint); 6052} 6053 6054/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 6055/// vector. If it is invalid, don't add anything to Ops. 6056void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 6057 char Constraint, 6058 std::vector<SDOperand>&Ops, 6059 SelectionDAG &DAG) { 6060 SDOperand Result(0, 0); 6061 6062 switch (Constraint) { 6063 default: break; 6064 case 'I': 6065 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 6066 if (C->getValue() <= 31) { 6067 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 6068 break; 6069 } 6070 } 6071 return; 6072 case 'N': 6073 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 6074 if (C->getValue() <= 255) { 6075 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 6076 break; 6077 } 6078 } 6079 return; 6080 case 'i': { 6081 // Literal immediates are always ok. 6082 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 6083 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 6084 break; 6085 } 6086 6087 // If we are in non-pic codegen mode, we allow the address of a global (with 6088 // an optional displacement) to be used with 'i'. 6089 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 6090 int64_t Offset = 0; 6091 6092 // Match either (GA) or (GA+C) 6093 if (GA) { 6094 Offset = GA->getOffset(); 6095 } else if (Op.getOpcode() == ISD::ADD) { 6096 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 6097 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 6098 if (C && GA) { 6099 Offset = GA->getOffset()+C->getValue(); 6100 } else { 6101 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 6102 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 6103 if (C && GA) 6104 Offset = GA->getOffset()+C->getValue(); 6105 else 6106 C = 0, GA = 0; 6107 } 6108 } 6109 6110 if (GA) { 6111 // If addressing this global requires a load (e.g. in PIC mode), we can't 6112 // match. 6113 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 6114 false)) 6115 return; 6116 6117 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 6118 Offset); 6119 Result = Op; 6120 break; 6121 } 6122 6123 // Otherwise, not valid for this mode. 6124 return; 6125 } 6126 } 6127 6128 if (Result.Val) { 6129 Ops.push_back(Result); 6130 return; 6131 } 6132 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6133} 6134 6135std::vector<unsigned> X86TargetLowering:: 6136getRegClassForInlineAsmConstraint(const std::string &Constraint, 6137 MVT::ValueType VT) const { 6138 if (Constraint.size() == 1) { 6139 // FIXME: not handling fp-stack yet! 6140 switch (Constraint[0]) { // GCC X86 Constraint Letters 6141 default: break; // Unknown constraint letter 6142 case 'A': // EAX/EDX 6143 if (VT == MVT::i32 || VT == MVT::i64) 6144 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 6145 break; 6146 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 6147 case 'Q': // Q_REGS 6148 if (VT == MVT::i32) 6149 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 6150 else if (VT == MVT::i16) 6151 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 6152 else if (VT == MVT::i8) 6153 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 6154 else if (VT == MVT::i64) 6155 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 6156 break; 6157 } 6158 } 6159 6160 return std::vector<unsigned>(); 6161} 6162 6163std::pair<unsigned, const TargetRegisterClass*> 6164X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 6165 MVT::ValueType VT) const { 6166 // First, see if this is a constraint that directly corresponds to an LLVM 6167 // register class. 6168 if (Constraint.size() == 1) { 6169 // GCC Constraint Letters 6170 switch (Constraint[0]) { 6171 default: break; 6172 case 'r': // GENERAL_REGS 6173 case 'R': // LEGACY_REGS 6174 case 'l': // INDEX_REGS 6175 if (VT == MVT::i64 && Subtarget->is64Bit()) 6176 return std::make_pair(0U, X86::GR64RegisterClass); 6177 if (VT == MVT::i32) 6178 return std::make_pair(0U, X86::GR32RegisterClass); 6179 else if (VT == MVT::i16) 6180 return std::make_pair(0U, X86::GR16RegisterClass); 6181 else if (VT == MVT::i8) 6182 return std::make_pair(0U, X86::GR8RegisterClass); 6183 break; 6184 case 'y': // MMX_REGS if MMX allowed. 6185 if (!Subtarget->hasMMX()) break; 6186 return std::make_pair(0U, X86::VR64RegisterClass); 6187 break; 6188 case 'Y': // SSE_REGS if SSE2 allowed 6189 if (!Subtarget->hasSSE2()) break; 6190 // FALL THROUGH. 6191 case 'x': // SSE_REGS if SSE1 allowed 6192 if (!Subtarget->hasSSE1()) break; 6193 6194 switch (VT) { 6195 default: break; 6196 // Scalar SSE types. 6197 case MVT::f32: 6198 case MVT::i32: 6199 return std::make_pair(0U, X86::FR32RegisterClass); 6200 case MVT::f64: 6201 case MVT::i64: 6202 return std::make_pair(0U, X86::FR64RegisterClass); 6203 // Vector types. 6204 case MVT::v16i8: 6205 case MVT::v8i16: 6206 case MVT::v4i32: 6207 case MVT::v2i64: 6208 case MVT::v4f32: 6209 case MVT::v2f64: 6210 return std::make_pair(0U, X86::VR128RegisterClass); 6211 } 6212 break; 6213 } 6214 } 6215 6216 // Use the default implementation in TargetLowering to convert the register 6217 // constraint into a member of a register class. 6218 std::pair<unsigned, const TargetRegisterClass*> Res; 6219 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6220 6221 // Not found as a standard register? 6222 if (Res.second == 0) { 6223 // GCC calls "st(0)" just plain "st". 6224 if (StringsEqualNoCase("{st}", Constraint)) { 6225 Res.first = X86::ST0; 6226 Res.second = X86::RFP80RegisterClass; 6227 } 6228 6229 return Res; 6230 } 6231 6232 // Otherwise, check to see if this is a register class of the wrong value 6233 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 6234 // turn into {ax},{dx}. 6235 if (Res.second->hasType(VT)) 6236 return Res; // Correct type already, nothing to do. 6237 6238 // All of the single-register GCC register classes map their values onto 6239 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 6240 // really want an 8-bit or 32-bit register, map to the appropriate register 6241 // class and return the appropriate register. 6242 if (Res.second != X86::GR16RegisterClass) 6243 return Res; 6244 6245 if (VT == MVT::i8) { 6246 unsigned DestReg = 0; 6247 switch (Res.first) { 6248 default: break; 6249 case X86::AX: DestReg = X86::AL; break; 6250 case X86::DX: DestReg = X86::DL; break; 6251 case X86::CX: DestReg = X86::CL; break; 6252 case X86::BX: DestReg = X86::BL; break; 6253 } 6254 if (DestReg) { 6255 Res.first = DestReg; 6256 Res.second = Res.second = X86::GR8RegisterClass; 6257 } 6258 } else if (VT == MVT::i32) { 6259 unsigned DestReg = 0; 6260 switch (Res.first) { 6261 default: break; 6262 case X86::AX: DestReg = X86::EAX; break; 6263 case X86::DX: DestReg = X86::EDX; break; 6264 case X86::CX: DestReg = X86::ECX; break; 6265 case X86::BX: DestReg = X86::EBX; break; 6266 case X86::SI: DestReg = X86::ESI; break; 6267 case X86::DI: DestReg = X86::EDI; break; 6268 case X86::BP: DestReg = X86::EBP; break; 6269 case X86::SP: DestReg = X86::ESP; break; 6270 } 6271 if (DestReg) { 6272 Res.first = DestReg; 6273 Res.second = Res.second = X86::GR32RegisterClass; 6274 } 6275 } else if (VT == MVT::i64) { 6276 unsigned DestReg = 0; 6277 switch (Res.first) { 6278 default: break; 6279 case X86::AX: DestReg = X86::RAX; break; 6280 case X86::DX: DestReg = X86::RDX; break; 6281 case X86::CX: DestReg = X86::RCX; break; 6282 case X86::BX: DestReg = X86::RBX; break; 6283 case X86::SI: DestReg = X86::RSI; break; 6284 case X86::DI: DestReg = X86::RDI; break; 6285 case X86::BP: DestReg = X86::RBP; break; 6286 case X86::SP: DestReg = X86::RSP; break; 6287 } 6288 if (DestReg) { 6289 Res.first = DestReg; 6290 Res.second = Res.second = X86::GR64RegisterClass; 6291 } 6292 } 6293 6294 return Res; 6295} 6296