X86ISelLowering.cpp revision 9df7dc52e8fd5c69a5d6464d02b7496091e18c29
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/StringExtras.h" 39#include "llvm/ParameterAttributes.h" 40using namespace llvm; 41 42X86TargetLowering::X86TargetLowering(TargetMachine &TM) 43 : TargetLowering(TM) { 44 Subtarget = &TM.getSubtarget<X86Subtarget>(); 45 X86ScalarSSEf64 = Subtarget->hasSSE2(); 46 X86ScalarSSEf32 = Subtarget->hasSSE1(); 47 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 48 49 50 RegInfo = TM.getRegisterInfo(); 51 52 // Set up the TargetLowering object. 53 54 // X86 is weird, it always uses i8 for shift amounts and setcc results. 55 setShiftAmountType(MVT::i8); 56 setSetCCResultType(MVT::i8); 57 setSetCCResultContents(ZeroOrOneSetCCResult); 58 setSchedulingPreference(SchedulingForRegPressure); 59 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 60 setStackPointerRegisterToSaveRestore(X86StackPtr); 61 62 if (Subtarget->isTargetDarwin()) { 63 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(false); 65 setUseUnderscoreLongJmp(false); 66 } else if (Subtarget->isTargetMingw()) { 67 // MS runtime is weird: it exports _setjmp, but longjmp! 68 setUseUnderscoreSetJmp(true); 69 setUseUnderscoreLongJmp(false); 70 } else { 71 setUseUnderscoreSetJmp(true); 72 setUseUnderscoreLongJmp(true); 73 } 74 75 // Set up the register classes. 76 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 77 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 78 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 79 if (Subtarget->is64Bit()) 80 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 81 82 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 83 84 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 85 // operation. 86 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 87 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 89 90 if (Subtarget->is64Bit()) { 91 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 93 } else { 94 if (X86ScalarSSEf64) 95 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 96 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 97 else 98 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 99 } 100 101 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 102 // this operation. 103 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 104 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 105 // SSE has no i16 to fp conversion, only i32 106 if (X86ScalarSSEf32) { 107 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 108 // f32 and f64 cases are Legal, f80 case is not 109 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 110 } else { 111 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 112 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 113 } 114 115 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 116 // are Legal, f80 is custom lowered. 117 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 118 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 119 120 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 121 // this operation. 122 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 123 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 124 125 if (X86ScalarSSEf32) { 126 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 127 // f32 and f64 cases are Legal, f80 case is not 128 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 129 } else { 130 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 131 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 132 } 133 134 // Handle FP_TO_UINT by promoting the destination to a larger signed 135 // conversion. 136 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 137 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 138 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 139 140 if (Subtarget->is64Bit()) { 141 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 143 } else { 144 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 145 // Expand FP_TO_UINT into a select. 146 // FIXME: We would like to use a Custom expander here eventually to do 147 // the optimal thing for SSE vs. the default expansion in the legalizer. 148 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 149 else 150 // With SSE3 we can use fisttpll to convert to a signed i64. 151 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 152 } 153 154 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 155 if (!X86ScalarSSEf64) { 156 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 157 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 158 } 159 160 // Scalar integer multiply, multiply-high, divide, and remainder are 161 // lowered to use operations that produce two results, to match the 162 // available instructions. This exposes the two-result form to trivial 163 // CSE, which is able to combine x/y and x%y into a single instruction, 164 // for example. The single-result multiply instructions are introduced 165 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 166 // is not needed. 167 setOperationAction(ISD::MUL , MVT::i8 , Expand); 168 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 169 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 170 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 171 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 172 setOperationAction(ISD::SREM , MVT::i8 , Expand); 173 setOperationAction(ISD::UREM , MVT::i8 , Expand); 174 setOperationAction(ISD::MUL , MVT::i16 , Expand); 175 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 176 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 177 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 178 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 179 setOperationAction(ISD::SREM , MVT::i16 , Expand); 180 setOperationAction(ISD::UREM , MVT::i16 , Expand); 181 setOperationAction(ISD::MUL , MVT::i32 , Expand); 182 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 183 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 184 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 185 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 186 setOperationAction(ISD::SREM , MVT::i32 , Expand); 187 setOperationAction(ISD::UREM , MVT::i32 , Expand); 188 setOperationAction(ISD::MUL , MVT::i64 , Expand); 189 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 190 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 191 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 192 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 193 setOperationAction(ISD::SREM , MVT::i64 , Expand); 194 setOperationAction(ISD::UREM , MVT::i64 , Expand); 195 196 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 197 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 198 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 199 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 200 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 201 if (Subtarget->is64Bit()) 202 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 203 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 205 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 206 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 207 setOperationAction(ISD::FREM , MVT::f64 , Expand); 208 209 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 211 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 212 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 213 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 214 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 215 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 216 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 217 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 218 if (Subtarget->is64Bit()) { 219 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 220 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 221 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 222 } 223 224 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 225 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 226 227 // These should be promoted to a larger select which is supported. 228 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 229 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 230 // X86 wants to expand cmov itself. 231 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 232 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 233 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 234 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 235 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 236 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 237 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 238 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 239 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 240 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 241 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 242 if (Subtarget->is64Bit()) { 243 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 244 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 245 } 246 // X86 ret instruction may pop stack. 247 setOperationAction(ISD::RET , MVT::Other, Custom); 248 if (!Subtarget->is64Bit()) 249 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 250 251 // Darwin ABI issue. 252 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 253 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 254 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 255 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 256 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 257 if (Subtarget->is64Bit()) { 258 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 259 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 260 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 261 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 262 } 263 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 264 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 265 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 266 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 267 // X86 wants to expand memset / memcpy itself. 268 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 269 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 270 271 // Use the default ISD::LOCATION expansion. 272 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 273 // FIXME - use subtarget debug flags 274 if (!Subtarget->isTargetDarwin() && 275 !Subtarget->isTargetELF() && 276 !Subtarget->isTargetCygMing()) 277 setOperationAction(ISD::LABEL, MVT::Other, Expand); 278 279 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 280 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 281 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 282 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 283 if (Subtarget->is64Bit()) { 284 // FIXME: Verify 285 setExceptionPointerRegister(X86::RAX); 286 setExceptionSelectorRegister(X86::RDX); 287 } else { 288 setExceptionPointerRegister(X86::EAX); 289 setExceptionSelectorRegister(X86::EDX); 290 } 291 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 292 293 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 294 295 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 296 setOperationAction(ISD::VASTART , MVT::Other, Custom); 297 setOperationAction(ISD::VAARG , MVT::Other, Expand); 298 setOperationAction(ISD::VAEND , MVT::Other, Expand); 299 if (Subtarget->is64Bit()) 300 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 301 else 302 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 303 304 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 305 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 306 if (Subtarget->is64Bit()) 307 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 308 if (Subtarget->isTargetCygMing()) 309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 310 else 311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 312 313 if (X86ScalarSSEf64) { 314 // f32 and f64 use SSE. 315 // Set up the FP register classes. 316 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 317 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 318 319 // Use ANDPD to simulate FABS. 320 setOperationAction(ISD::FABS , MVT::f64, Custom); 321 setOperationAction(ISD::FABS , MVT::f32, Custom); 322 323 // Use XORP to simulate FNEG. 324 setOperationAction(ISD::FNEG , MVT::f64, Custom); 325 setOperationAction(ISD::FNEG , MVT::f32, Custom); 326 327 // Use ANDPD and ORPD to simulate FCOPYSIGN. 328 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 329 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 330 331 // We don't support sin/cos/fmod 332 setOperationAction(ISD::FSIN , MVT::f64, Expand); 333 setOperationAction(ISD::FCOS , MVT::f64, Expand); 334 setOperationAction(ISD::FREM , MVT::f64, Expand); 335 setOperationAction(ISD::FSIN , MVT::f32, Expand); 336 setOperationAction(ISD::FCOS , MVT::f32, Expand); 337 setOperationAction(ISD::FREM , MVT::f32, Expand); 338 339 // Expand FP immediates into loads from the stack, except for the special 340 // cases we handle. 341 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 342 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 343 addLegalFPImmediate(APFloat(+0.0)); // xorpd 344 addLegalFPImmediate(APFloat(+0.0f)); // xorps 345 346 // Conversions to long double (in X87) go through memory. 347 setConvertAction(MVT::f32, MVT::f80, Expand); 348 setConvertAction(MVT::f64, MVT::f80, Expand); 349 350 // Conversions from long double (in X87) go through memory. 351 setConvertAction(MVT::f80, MVT::f32, Expand); 352 setConvertAction(MVT::f80, MVT::f64, Expand); 353 } else if (X86ScalarSSEf32) { 354 // Use SSE for f32, x87 for f64. 355 // Set up the FP register classes. 356 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 357 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 358 359 // Use ANDPS to simulate FABS. 360 setOperationAction(ISD::FABS , MVT::f32, Custom); 361 362 // Use XORP to simulate FNEG. 363 setOperationAction(ISD::FNEG , MVT::f32, Custom); 364 365 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 366 367 // Use ANDPS and ORPS to simulate FCOPYSIGN. 368 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 369 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 370 371 // We don't support sin/cos/fmod 372 setOperationAction(ISD::FSIN , MVT::f32, Expand); 373 setOperationAction(ISD::FCOS , MVT::f32, Expand); 374 setOperationAction(ISD::FREM , MVT::f32, Expand); 375 376 // Expand FP immediates into loads from the stack, except for the special 377 // cases we handle. 378 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 379 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 380 addLegalFPImmediate(APFloat(+0.0f)); // xorps 381 addLegalFPImmediate(APFloat(+0.0)); // FLD0 382 addLegalFPImmediate(APFloat(+1.0)); // FLD1 383 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 384 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 385 386 // SSE->x87 conversions go through memory. 387 setConvertAction(MVT::f32, MVT::f64, Expand); 388 setConvertAction(MVT::f32, MVT::f80, Expand); 389 390 // x87->SSE truncations need to go through memory. 391 setConvertAction(MVT::f80, MVT::f32, Expand); 392 setConvertAction(MVT::f64, MVT::f32, Expand); 393 // And x87->x87 truncations also. 394 setConvertAction(MVT::f80, MVT::f64, Expand); 395 396 if (!UnsafeFPMath) { 397 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 398 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 399 } 400 } else { 401 // f32 and f64 in x87. 402 // Set up the FP register classes. 403 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 404 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 405 406 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 407 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 408 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 409 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 410 411 // Floating truncations need to go through memory. 412 setConvertAction(MVT::f80, MVT::f32, Expand); 413 setConvertAction(MVT::f64, MVT::f32, Expand); 414 setConvertAction(MVT::f80, MVT::f64, Expand); 415 416 if (!UnsafeFPMath) { 417 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 418 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 419 } 420 421 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 422 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 423 addLegalFPImmediate(APFloat(+0.0)); // FLD0 424 addLegalFPImmediate(APFloat(+1.0)); // FLD1 425 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 426 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 427 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 428 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 429 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 430 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 431 } 432 433 // Long double always uses X87. 434 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 435 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 436 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 437 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 438 if (!UnsafeFPMath) { 439 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 440 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 441 } 442 443 // Always use a library call for pow. 444 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 445 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 446 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 447 448 // First set operation action for all vector types to expand. Then we 449 // will selectively turn on ones that can be effectively codegen'd. 450 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 451 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 452 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 453 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 454 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 455 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 456 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 457 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 458 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 459 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 460 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 461 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 462 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 463 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 464 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 465 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 466 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 467 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 468 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 469 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 470 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 471 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 472 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 473 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 474 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 475 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 476 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 477 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 478 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 479 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 483 } 484 485 if (Subtarget->hasMMX()) { 486 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 487 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 488 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 489 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 490 491 // FIXME: add MMX packed arithmetics 492 493 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 494 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 495 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 496 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 497 498 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 499 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 500 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 501 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 502 503 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 504 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 505 506 setOperationAction(ISD::AND, MVT::v8i8, Promote); 507 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 508 setOperationAction(ISD::AND, MVT::v4i16, Promote); 509 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 510 setOperationAction(ISD::AND, MVT::v2i32, Promote); 511 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 512 setOperationAction(ISD::AND, MVT::v1i64, Legal); 513 514 setOperationAction(ISD::OR, MVT::v8i8, Promote); 515 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 516 setOperationAction(ISD::OR, MVT::v4i16, Promote); 517 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 518 setOperationAction(ISD::OR, MVT::v2i32, Promote); 519 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 520 setOperationAction(ISD::OR, MVT::v1i64, Legal); 521 522 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 523 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 524 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 525 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 526 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 527 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 528 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 529 530 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 531 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 532 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 533 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 534 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 535 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 536 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 537 538 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 539 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 540 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 541 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 542 543 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 544 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 545 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 546 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 547 548 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 549 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 550 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 551 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 552 } 553 554 if (Subtarget->hasSSE1()) { 555 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 556 557 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 558 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 559 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 560 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 561 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 562 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 563 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 564 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 565 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 566 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 567 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 568 } 569 570 if (Subtarget->hasSSE2()) { 571 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 572 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 573 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 574 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 575 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 576 577 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 578 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 579 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 580 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 581 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 582 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 583 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 584 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 585 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 586 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 587 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 588 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 589 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 590 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 591 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 592 593 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 594 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 595 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 596 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 597 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 598 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 599 600 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 601 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 602 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 603 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 604 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 605 } 606 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 607 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 608 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 609 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 610 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 611 if (Subtarget->is64Bit()) 612 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 613 614 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 615 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 616 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 617 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 618 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 619 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 620 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 621 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 622 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 623 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 624 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 625 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 626 } 627 628 // Custom lower v2i64 and v2f64 selects. 629 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 630 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 631 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 632 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 633 } 634 635 // We want to custom lower some of our intrinsics. 636 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 637 638 // We have target-specific dag combine patterns for the following nodes: 639 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 640 setTargetDAGCombine(ISD::SELECT); 641 642 computeRegisterProperties(); 643 644 // FIXME: These should be based on subtarget info. Plus, the values should 645 // be smaller when we are in optimizing for size mode. 646 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 647 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 648 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 649 allowUnalignedMemoryAccesses = true; // x86 supports it! 650} 651 652 653//===----------------------------------------------------------------------===// 654// Return Value Calling Convention Implementation 655//===----------------------------------------------------------------------===// 656 657#include "X86GenCallingConv.inc" 658 659/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 660/// exists skip possible ISD:TokenFactor. 661static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 662 if (Chain.getOpcode()==X86ISD::TAILCALL) { 663 return Chain; 664 } else if (Chain.getOpcode()==ISD::TokenFactor) { 665 if (Chain.getNumOperands() && 666 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) 667 return Chain.getOperand(0); 668 } 669 return Chain; 670} 671 672/// LowerRET - Lower an ISD::RET node. 673SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 674 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 675 676 SmallVector<CCValAssign, 16> RVLocs; 677 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 678 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 679 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 680 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 681 682 // If this is the first return lowered for this function, add the regs to the 683 // liveout set for the function. 684 if (DAG.getMachineFunction().liveout_empty()) { 685 for (unsigned i = 0; i != RVLocs.size(); ++i) 686 if (RVLocs[i].isRegLoc()) 687 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 688 } 689 SDOperand Chain = Op.getOperand(0); 690 691 // Handle tail call return. 692 Chain = GetPossiblePreceedingTailCall(Chain); 693 if (Chain.getOpcode() == X86ISD::TAILCALL) { 694 SDOperand TailCall = Chain; 695 SDOperand TargetAddress = TailCall.getOperand(1); 696 SDOperand StackAdjustment = TailCall.getOperand(2); 697 assert ( ((TargetAddress.getOpcode() == ISD::Register && 698 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 699 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 700 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 701 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 702 "Expecting an global address, external symbol, or register"); 703 assert( StackAdjustment.getOpcode() == ISD::Constant && 704 "Expecting a const value"); 705 706 SmallVector<SDOperand,8> Operands; 707 Operands.push_back(Chain.getOperand(0)); 708 Operands.push_back(TargetAddress); 709 Operands.push_back(StackAdjustment); 710 // Copy registers used by the call. Last operand is a flag so it is not 711 // copied. 712 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 713 Operands.push_back(Chain.getOperand(i)); 714 } 715 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 716 Operands.size()); 717 } 718 719 // Regular return. 720 SDOperand Flag; 721 722 // Copy the result values into the output registers. 723 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 724 RVLocs[0].getLocReg() != X86::ST0) { 725 for (unsigned i = 0; i != RVLocs.size(); ++i) { 726 CCValAssign &VA = RVLocs[i]; 727 assert(VA.isRegLoc() && "Can only return in registers!"); 728 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 729 Flag); 730 Flag = Chain.getValue(1); 731 } 732 } else { 733 // We need to handle a destination of ST0 specially, because it isn't really 734 // a register. 735 SDOperand Value = Op.getOperand(1); 736 737 // If this is an FP return with ScalarSSE, we need to move the value from 738 // an XMM register onto the fp-stack. 739 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 740 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 741 SDOperand MemLoc; 742 743 // If this is a load into a scalarsse value, don't store the loaded value 744 // back to the stack, only to reload it: just replace the scalar-sse load. 745 if (ISD::isNON_EXTLoad(Value.Val) && 746 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 747 Chain = Value.getOperand(0); 748 MemLoc = Value.getOperand(1); 749 } else { 750 // Spill the value to memory and reload it into top of stack. 751 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 752 MachineFunction &MF = DAG.getMachineFunction(); 753 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 754 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 755 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 756 } 757 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 758 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 759 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 760 Chain = Value.getValue(1); 761 } 762 763 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 764 SDOperand Ops[] = { Chain, Value }; 765 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 766 Flag = Chain.getValue(1); 767 } 768 769 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 770 if (Flag.Val) 771 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 772 else 773 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 774} 775 776 777/// LowerCallResult - Lower the result values of an ISD::CALL into the 778/// appropriate copies out of appropriate physical registers. This assumes that 779/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 780/// being lowered. The returns a SDNode with the same number of values as the 781/// ISD::CALL. 782SDNode *X86TargetLowering:: 783LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 784 unsigned CallingConv, SelectionDAG &DAG) { 785 786 // Assign locations to each value returned by this call. 787 SmallVector<CCValAssign, 16> RVLocs; 788 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 789 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 790 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 791 792 793 SmallVector<SDOperand, 8> ResultVals; 794 795 // Copy all of the result registers out of their specified physreg. 796 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 797 for (unsigned i = 0; i != RVLocs.size(); ++i) { 798 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 799 RVLocs[i].getValVT(), InFlag).getValue(1); 800 InFlag = Chain.getValue(2); 801 ResultVals.push_back(Chain.getValue(0)); 802 } 803 } else { 804 // Copies from the FP stack are special, as ST0 isn't a valid register 805 // before the fp stackifier runs. 806 807 // Copy ST0 into an RFP register with FP_GET_RESULT. 808 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 809 SDOperand GROps[] = { Chain, InFlag }; 810 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 811 Chain = RetVal.getValue(1); 812 InFlag = RetVal.getValue(2); 813 814 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 815 // an XMM register. 816 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 817 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 818 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 819 // shouldn't be necessary except that RFP cannot be live across 820 // multiple blocks. When stackifier is fixed, they can be uncoupled. 821 MachineFunction &MF = DAG.getMachineFunction(); 822 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 823 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 824 SDOperand Ops[] = { 825 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 826 }; 827 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 828 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 829 Chain = RetVal.getValue(1); 830 } 831 ResultVals.push_back(RetVal); 832 } 833 834 // Merge everything together with a MERGE_VALUES node. 835 ResultVals.push_back(Chain); 836 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 837 &ResultVals[0], ResultVals.size()).Val; 838} 839 840 841//===----------------------------------------------------------------------===// 842// C & StdCall & Fast Calling Convention implementation 843//===----------------------------------------------------------------------===// 844// StdCall calling convention seems to be standard for many Windows' API 845// routines and around. It differs from C calling convention just a little: 846// callee should clean up the stack, not caller. Symbols should be also 847// decorated in some fancy way :) It doesn't support any vector arguments. 848// For info on fast calling convention see Fast Calling Convention (tail call) 849// implementation LowerX86_32FastCCCallTo. 850 851/// AddLiveIn - This helper function adds the specified physical register to the 852/// MachineFunction as a live in value. It also creates a corresponding virtual 853/// register for it. 854static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 855 const TargetRegisterClass *RC) { 856 assert(RC->contains(PReg) && "Not the correct regclass!"); 857 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 858 MF.addLiveIn(PReg, VReg); 859 return VReg; 860} 861 862// align stack arguments according to platform alignment needed for tail calls 863unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); 864 865SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 866 const CCValAssign &VA, 867 MachineFrameInfo *MFI, 868 SDOperand Root, unsigned i) { 869 // Create the nodes corresponding to a load from this parameter slot. 870 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 871 VA.getLocMemOffset()); 872 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 873 874 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 875 876 if (Flags & ISD::ParamFlags::ByVal) 877 return FIN; 878 else 879 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 880} 881 882SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 883 bool isStdCall) { 884 unsigned NumArgs = Op.Val->getNumValues() - 1; 885 MachineFunction &MF = DAG.getMachineFunction(); 886 MachineFrameInfo *MFI = MF.getFrameInfo(); 887 SDOperand Root = Op.getOperand(0); 888 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 889 unsigned CC = MF.getFunction()->getCallingConv(); 890 // Assign locations to all of the incoming arguments. 891 SmallVector<CCValAssign, 16> ArgLocs; 892 CCState CCInfo(CC, isVarArg, 893 getTargetMachine(), ArgLocs); 894 // Check for possible tail call calling convention. 895 if (CC == CallingConv::Fast && PerformTailCallOpt) 896 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); 897 else 898 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 899 900 SmallVector<SDOperand, 8> ArgValues; 901 unsigned LastVal = ~0U; 902 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 903 CCValAssign &VA = ArgLocs[i]; 904 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 905 // places. 906 assert(VA.getValNo() != LastVal && 907 "Don't support value assigned to multiple locs yet"); 908 LastVal = VA.getValNo(); 909 910 if (VA.isRegLoc()) { 911 MVT::ValueType RegVT = VA.getLocVT(); 912 TargetRegisterClass *RC; 913 if (RegVT == MVT::i32) 914 RC = X86::GR32RegisterClass; 915 else { 916 assert(MVT::isVector(RegVT)); 917 RC = X86::VR128RegisterClass; 918 } 919 920 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 921 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 922 923 // If this is an 8 or 16-bit value, it is really passed promoted to 32 924 // bits. Insert an assert[sz]ext to capture this, then truncate to the 925 // right size. 926 if (VA.getLocInfo() == CCValAssign::SExt) 927 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 928 DAG.getValueType(VA.getValVT())); 929 else if (VA.getLocInfo() == CCValAssign::ZExt) 930 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 931 DAG.getValueType(VA.getValVT())); 932 933 if (VA.getLocInfo() != CCValAssign::Full) 934 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 935 936 ArgValues.push_back(ArgValue); 937 } else { 938 assert(VA.isMemLoc()); 939 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 940 } 941 } 942 943 unsigned StackSize = CCInfo.getNextStackOffset(); 944 // align stack specially for tail calls 945 if (CC==CallingConv::Fast) 946 StackSize = GetAlignedArgumentStackSize(StackSize,DAG); 947 948 ArgValues.push_back(Root); 949 950 // If the function takes variable number of arguments, make a frame index for 951 // the start of the first vararg value... for expansion of llvm.va_start. 952 if (isVarArg) 953 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 954 955 // Tail call calling convention (CallingConv::Fast) does not support varargs. 956 assert( !(isVarArg && CC == CallingConv::Fast) && 957 "CallingConv::Fast does not support varargs."); 958 959 if (isStdCall && !isVarArg && 960 (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { 961 BytesToPopOnReturn = StackSize; // Callee pops everything.. 962 BytesCallerReserves = 0; 963 } else { 964 BytesToPopOnReturn = 0; // Callee pops nothing. 965 966 // If this is an sret function, the return should pop the hidden pointer. 967 if (NumArgs && 968 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 969 ISD::ParamFlags::StructReturn)) 970 BytesToPopOnReturn = 4; 971 972 BytesCallerReserves = StackSize; 973 } 974 975 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 976 977 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 978 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 979 980 // Return the new list of results. 981 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 982 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 983} 984 985SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 986 unsigned CC) { 987 SDOperand Chain = Op.getOperand(0); 988 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 989 SDOperand Callee = Op.getOperand(4); 990 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 991 992 // Analyze operands of the call, assigning locations to each operand. 993 SmallVector<CCValAssign, 16> ArgLocs; 994 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 995 if(CC==CallingConv::Fast && PerformTailCallOpt) 996 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 997 else 998 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 999 1000 // Get a count of how many bytes are to be pushed on the stack. 1001 unsigned NumBytes = CCInfo.getNextStackOffset(); 1002 if (CC==CallingConv::Fast) 1003 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1004 1005 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1006 1007 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1008 SmallVector<SDOperand, 8> MemOpChains; 1009 1010 SDOperand StackPtr; 1011 1012 // Walk the register/memloc assignments, inserting copies/loads. 1013 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1014 CCValAssign &VA = ArgLocs[i]; 1015 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1016 1017 // Promote the value if needed. 1018 switch (VA.getLocInfo()) { 1019 default: assert(0 && "Unknown loc info!"); 1020 case CCValAssign::Full: break; 1021 case CCValAssign::SExt: 1022 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1023 break; 1024 case CCValAssign::ZExt: 1025 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1026 break; 1027 case CCValAssign::AExt: 1028 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1029 break; 1030 } 1031 1032 if (VA.isRegLoc()) { 1033 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1034 } else { 1035 assert(VA.isMemLoc()); 1036 if (StackPtr.Val == 0) 1037 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1038 1039 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1040 Arg)); 1041 } 1042 } 1043 1044 // If the first argument is an sret pointer, remember it. 1045 bool isSRet = NumOps && 1046 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 1047 ISD::ParamFlags::StructReturn); 1048 1049 if (!MemOpChains.empty()) 1050 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1051 &MemOpChains[0], MemOpChains.size()); 1052 1053 // Build a sequence of copy-to-reg nodes chained together with token chain 1054 // and flag operands which copy the outgoing args into registers. 1055 SDOperand InFlag; 1056 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1057 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1058 InFlag); 1059 InFlag = Chain.getValue(1); 1060 } 1061 1062 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1063 // GOT pointer. 1064 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1065 Subtarget->isPICStyleGOT()) { 1066 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1067 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1068 InFlag); 1069 InFlag = Chain.getValue(1); 1070 } 1071 1072 // If the callee is a GlobalAddress node (quite common, every direct call is) 1073 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1074 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1075 // We should use extra load for direct calls to dllimported functions in 1076 // non-JIT mode. 1077 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1078 getTargetMachine(), true)) 1079 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1080 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1081 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1082 1083 // Returns a chain & a flag for retval copy to use. 1084 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1085 SmallVector<SDOperand, 8> Ops; 1086 Ops.push_back(Chain); 1087 Ops.push_back(Callee); 1088 1089 // Add argument registers to the end of the list so that they are known live 1090 // into the call. 1091 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1092 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1093 RegsToPass[i].second.getValueType())); 1094 1095 // Add an implicit use GOT pointer in EBX. 1096 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1097 Subtarget->isPICStyleGOT()) 1098 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1099 1100 if (InFlag.Val) 1101 Ops.push_back(InFlag); 1102 1103 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1104 InFlag = Chain.getValue(1); 1105 1106 // Create the CALLSEQ_END node. 1107 unsigned NumBytesForCalleeToPush = 0; 1108 1109 if (CC == CallingConv::X86_StdCall || 1110 (CC == CallingConv::Fast && PerformTailCallOpt)) { 1111 if (isVarArg) 1112 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1113 else 1114 NumBytesForCalleeToPush = NumBytes; 1115 assert(!(isVarArg && CC==CallingConv::Fast) && 1116 "CallingConv::Fast does not support varargs."); 1117 } else { 1118 // If this is is a call to a struct-return function, the callee 1119 // pops the hidden struct pointer, so we have to push it back. 1120 // This is common for Darwin/X86, Linux & Mingw32 targets. 1121 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1122 } 1123 1124 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1125 Ops.clear(); 1126 Ops.push_back(Chain); 1127 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1128 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 1129 Ops.push_back(InFlag); 1130 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1131 InFlag = Chain.getValue(1); 1132 1133 // Handle result values, copying them out of physregs into vregs that we 1134 // return. 1135 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1136} 1137 1138 1139//===----------------------------------------------------------------------===// 1140// FastCall Calling Convention implementation 1141//===----------------------------------------------------------------------===// 1142// 1143// The X86 'fastcall' calling convention passes up to two integer arguments in 1144// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1145// and requires that the callee pop its arguments off the stack (allowing proper 1146// tail calls), and has the same return value conventions as C calling convs. 1147// 1148// This calling convention always arranges for the callee pop value to be 8n+4 1149// bytes, which is needed for tail recursion elimination and stack alignment 1150// reasons. 1151SDOperand 1152X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1153 MachineFunction &MF = DAG.getMachineFunction(); 1154 MachineFrameInfo *MFI = MF.getFrameInfo(); 1155 SDOperand Root = Op.getOperand(0); 1156 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1157 1158 // Assign locations to all of the incoming arguments. 1159 SmallVector<CCValAssign, 16> ArgLocs; 1160 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1161 getTargetMachine(), ArgLocs); 1162 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1163 1164 SmallVector<SDOperand, 8> ArgValues; 1165 unsigned LastVal = ~0U; 1166 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1167 CCValAssign &VA = ArgLocs[i]; 1168 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1169 // places. 1170 assert(VA.getValNo() != LastVal && 1171 "Don't support value assigned to multiple locs yet"); 1172 LastVal = VA.getValNo(); 1173 1174 if (VA.isRegLoc()) { 1175 MVT::ValueType RegVT = VA.getLocVT(); 1176 TargetRegisterClass *RC; 1177 if (RegVT == MVT::i32) 1178 RC = X86::GR32RegisterClass; 1179 else { 1180 assert(MVT::isVector(RegVT)); 1181 RC = X86::VR128RegisterClass; 1182 } 1183 1184 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1185 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1186 1187 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1188 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1189 // right size. 1190 if (VA.getLocInfo() == CCValAssign::SExt) 1191 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1192 DAG.getValueType(VA.getValVT())); 1193 else if (VA.getLocInfo() == CCValAssign::ZExt) 1194 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1195 DAG.getValueType(VA.getValVT())); 1196 1197 if (VA.getLocInfo() != CCValAssign::Full) 1198 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1199 1200 ArgValues.push_back(ArgValue); 1201 } else { 1202 assert(VA.isMemLoc()); 1203 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1204 } 1205 } 1206 1207 ArgValues.push_back(Root); 1208 1209 unsigned StackSize = CCInfo.getNextStackOffset(); 1210 1211 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1212 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1213 // arguments and the arguments after the retaddr has been pushed are 1214 // aligned. 1215 if ((StackSize & 7) == 0) 1216 StackSize += 4; 1217 } 1218 1219 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1220 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1221 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1222 BytesCallerReserves = 0; 1223 1224 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1225 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1226 1227 // Return the new list of results. 1228 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1229 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1230} 1231 1232SDOperand 1233X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1234 const SDOperand &StackPtr, 1235 const CCValAssign &VA, 1236 SDOperand Chain, 1237 SDOperand Arg) { 1238 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1239 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1240 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1241 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1242 if (Flags & ISD::ParamFlags::ByVal) { 1243 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1244 ISD::ParamFlags::ByValAlignOffs); 1245 1246 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1247 ISD::ParamFlags::ByValSizeOffs; 1248 1249 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1250 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1251 SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); 1252 1253 return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, 1254 AlwaysInline); 1255 } else { 1256 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1257 } 1258} 1259 1260SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1261 unsigned CC) { 1262 SDOperand Chain = Op.getOperand(0); 1263 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1264 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1265 SDOperand Callee = Op.getOperand(4); 1266 1267 // Analyze operands of the call, assigning locations to each operand. 1268 SmallVector<CCValAssign, 16> ArgLocs; 1269 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1270 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1271 1272 // Get a count of how many bytes are to be pushed on the stack. 1273 unsigned NumBytes = CCInfo.getNextStackOffset(); 1274 1275 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1276 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1277 // arguments and the arguments after the retaddr has been pushed are 1278 // aligned. 1279 if ((NumBytes & 7) == 0) 1280 NumBytes += 4; 1281 } 1282 1283 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1284 1285 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1286 SmallVector<SDOperand, 8> MemOpChains; 1287 1288 SDOperand StackPtr; 1289 1290 // Walk the register/memloc assignments, inserting copies/loads. 1291 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1292 CCValAssign &VA = ArgLocs[i]; 1293 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1294 1295 // Promote the value if needed. 1296 switch (VA.getLocInfo()) { 1297 default: assert(0 && "Unknown loc info!"); 1298 case CCValAssign::Full: break; 1299 case CCValAssign::SExt: 1300 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1301 break; 1302 case CCValAssign::ZExt: 1303 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1304 break; 1305 case CCValAssign::AExt: 1306 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1307 break; 1308 } 1309 1310 if (VA.isRegLoc()) { 1311 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1312 } else { 1313 assert(VA.isMemLoc()); 1314 if (StackPtr.Val == 0) 1315 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1316 1317 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1318 Arg)); 1319 } 1320 } 1321 1322 if (!MemOpChains.empty()) 1323 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1324 &MemOpChains[0], MemOpChains.size()); 1325 1326 // Build a sequence of copy-to-reg nodes chained together with token chain 1327 // and flag operands which copy the outgoing args into registers. 1328 SDOperand InFlag; 1329 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1330 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1331 InFlag); 1332 InFlag = Chain.getValue(1); 1333 } 1334 1335 // If the callee is a GlobalAddress node (quite common, every direct call is) 1336 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1337 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1338 // We should use extra load for direct calls to dllimported functions in 1339 // non-JIT mode. 1340 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1341 getTargetMachine(), true)) 1342 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1343 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1344 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1345 1346 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1347 // GOT pointer. 1348 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1349 Subtarget->isPICStyleGOT()) { 1350 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1351 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1352 InFlag); 1353 InFlag = Chain.getValue(1); 1354 } 1355 1356 // Returns a chain & a flag for retval copy to use. 1357 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1358 SmallVector<SDOperand, 8> Ops; 1359 Ops.push_back(Chain); 1360 Ops.push_back(Callee); 1361 1362 // Add argument registers to the end of the list so that they are known live 1363 // into the call. 1364 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1365 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1366 RegsToPass[i].second.getValueType())); 1367 1368 // Add an implicit use GOT pointer in EBX. 1369 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1370 Subtarget->isPICStyleGOT()) 1371 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1372 1373 if (InFlag.Val) 1374 Ops.push_back(InFlag); 1375 1376 assert(isTailCall==false && "no tail call here"); 1377 Chain = DAG.getNode(X86ISD::CALL, 1378 NodeTys, &Ops[0], Ops.size()); 1379 InFlag = Chain.getValue(1); 1380 1381 // Returns a flag for retval copy to use. 1382 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1383 Ops.clear(); 1384 Ops.push_back(Chain); 1385 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1386 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1387 Ops.push_back(InFlag); 1388 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1389 InFlag = Chain.getValue(1); 1390 1391 // Handle result values, copying them out of physregs into vregs that we 1392 // return. 1393 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1394} 1395 1396//===----------------------------------------------------------------------===// 1397// Fast Calling Convention (tail call) implementation 1398//===----------------------------------------------------------------------===// 1399 1400// Like std call, callee cleans arguments, convention except that ECX is 1401// reserved for storing the tail called function address. Only 2 registers are 1402// free for argument passing (inreg). Tail call optimization is performed 1403// provided: 1404// * tailcallopt is enabled 1405// * caller/callee are fastcc 1406// * elf/pic is disabled OR 1407// * elf/pic enabled + callee is in module + callee has 1408// visibility protected or hidden 1409// To keep the stack aligned according to platform abi the function 1410// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1411// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1412// If a tail called function callee has more arguments than the caller the 1413// caller needs to make sure that there is room to move the RETADDR to. This is 1414// achieved by reserving an area the size of the argument delta right after the 1415// original REtADDR, but before the saved framepointer or the spilled registers 1416// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1417// stack layout: 1418// arg1 1419// arg2 1420// RETADDR 1421// [ new RETADDR 1422// move area ] 1423// (possible EBP) 1424// ESI 1425// EDI 1426// local1 .. 1427 1428/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1429/// for a 16 byte align requirement. 1430unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1431 SelectionDAG& DAG) { 1432 if (PerformTailCallOpt) { 1433 MachineFunction &MF = DAG.getMachineFunction(); 1434 const TargetMachine &TM = MF.getTarget(); 1435 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1436 unsigned StackAlignment = TFI.getStackAlignment(); 1437 uint64_t AlignMask = StackAlignment - 1; 1438 int64_t Offset = StackSize; 1439 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1440 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1441 // Number smaller than 12 so just add the difference. 1442 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1443 } else { 1444 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1445 Offset = ((~AlignMask) & Offset) + StackAlignment + 1446 (StackAlignment-SlotSize); 1447 } 1448 StackSize = Offset; 1449 } 1450 return StackSize; 1451} 1452 1453/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1454/// following the call is a return. A function is eligible if caller/callee 1455/// calling conventions match, currently only fastcc supports tail calls, and 1456/// the function CALL is immediatly followed by a RET. 1457bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1458 SDOperand Ret, 1459 SelectionDAG& DAG) const { 1460 if (!PerformTailCallOpt) 1461 return false; 1462 1463 // Check whether CALL node immediatly preceeds the RET node and whether the 1464 // return uses the result of the node or is a void return. 1465 unsigned NumOps = Ret.getNumOperands(); 1466 if ((NumOps == 1 && 1467 (Ret.getOperand(0) == SDOperand(Call.Val,1) || 1468 Ret.getOperand(0) == SDOperand(Call.Val,0))) || 1469 (NumOps == 2 && 1470 Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1471 Ret.getOperand(1) == SDOperand(Call.Val,0))) { 1472 MachineFunction &MF = DAG.getMachineFunction(); 1473 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1474 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1475 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1476 SDOperand Callee = Call.getOperand(4); 1477 // On elf/pic %ebx needs to be livein. 1478 if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || 1479 !Subtarget->isPICStyleGOT()) 1480 return true; 1481 1482 // Can only do local tail calls with PIC. 1483 GlobalValue * GV = 0; 1484 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1485 if(G != 0 && 1486 (GV = G->getGlobal()) && 1487 (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) 1488 return true; 1489 } 1490 } 1491 1492 return false; 1493} 1494 1495SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, 1496 SelectionDAG &DAG, 1497 unsigned CC) { 1498 SDOperand Chain = Op.getOperand(0); 1499 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1500 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1501 SDOperand Callee = Op.getOperand(4); 1502 bool is64Bit = Subtarget->is64Bit(); 1503 1504 assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); 1505 1506 // Analyze operands of the call, assigning locations to each operand. 1507 SmallVector<CCValAssign, 16> ArgLocs; 1508 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1509 if (is64Bit) 1510 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1511 else 1512 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1513 1514 1515 // Lower arguments at fp - stackoffset + fpdiff. 1516 MachineFunction &MF = DAG.getMachineFunction(); 1517 1518 unsigned NumBytesToBePushed = 1519 GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); 1520 1521 unsigned NumBytesCallerPushed = 1522 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1523 int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; 1524 1525 // Set the delta of movement of the returnaddr stackslot. 1526 // But only set if delta is greater than previous delta. 1527 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1528 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1529 1530 Chain = DAG. 1531 getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1532 1533 // Adjust the Return address stack slot. 1534 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1535 if (FPDiff) { 1536 MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; 1537 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1538 // Load the "old" Return address. 1539 RetAddrFrIdx = 1540 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1541 // Calculate the new stack slot for the return address. 1542 int SlotSize = is64Bit ? 8 : 4; 1543 int NewReturnAddrFI = 1544 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1545 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1546 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1547 } 1548 1549 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1550 SmallVector<SDOperand, 8> MemOpChains; 1551 SmallVector<SDOperand, 8> MemOpChains2; 1552 SDOperand FramePtr, StackPtr; 1553 SDOperand PtrOff; 1554 SDOperand FIN; 1555 int FI = 0; 1556 1557 // Walk the register/memloc assignments, inserting copies/loads. Lower 1558 // arguments first to the stack slot where they would normally - in case of a 1559 // normal function call - be. 1560 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1561 CCValAssign &VA = ArgLocs[i]; 1562 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1563 1564 // Promote the value if needed. 1565 switch (VA.getLocInfo()) { 1566 default: assert(0 && "Unknown loc info!"); 1567 case CCValAssign::Full: break; 1568 case CCValAssign::SExt: 1569 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1570 break; 1571 case CCValAssign::ZExt: 1572 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1573 break; 1574 case CCValAssign::AExt: 1575 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1576 break; 1577 } 1578 1579 if (VA.isRegLoc()) { 1580 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1581 } else { 1582 assert(VA.isMemLoc()); 1583 if (StackPtr.Val == 0) 1584 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1585 1586 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1587 Arg)); 1588 } 1589 } 1590 1591 if (!MemOpChains.empty()) 1592 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1593 &MemOpChains[0], MemOpChains.size()); 1594 1595 // Build a sequence of copy-to-reg nodes chained together with token chain 1596 // and flag operands which copy the outgoing args into registers. 1597 SDOperand InFlag; 1598 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1599 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1600 InFlag); 1601 InFlag = Chain.getValue(1); 1602 } 1603 InFlag = SDOperand(); 1604 1605 // Copy from stack slots to stack slot of a tail called function. This needs 1606 // to be done because if we would lower the arguments directly to their real 1607 // stack slot we might end up overwriting each other. 1608 // TODO: To make this more efficient (sometimes saving a store/load) we could 1609 // analyse the arguments and emit this store/load/store sequence only for 1610 // arguments which would be overwritten otherwise. 1611 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1612 CCValAssign &VA = ArgLocs[i]; 1613 if (!VA.isRegLoc()) { 1614 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1615 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1616 1617 // Get source stack slot. 1618 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1619 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1620 // Create frame index. 1621 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1622 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1623 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1624 FIN = DAG.getFrameIndex(FI, MVT::i32); 1625 if (Flags & ISD::ParamFlags::ByVal) { 1626 // Copy relative to framepointer. 1627 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1628 ISD::ParamFlags::ByValAlignOffs); 1629 1630 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1631 ISD::ParamFlags::ByValSizeOffs; 1632 1633 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1634 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1635 // Copy relative to framepointer. 1636 MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN, 1637 PtrOff, SizeNode, AlignNode)); 1638 } else { 1639 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); 1640 // Store relative to framepointer. 1641 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); 1642 } 1643 } 1644 } 1645 1646 if (!MemOpChains2.empty()) 1647 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1648 &MemOpChains2[0], MemOpChains.size()); 1649 1650 // Store the return address to the appropriate stack slot. 1651 if (FPDiff) 1652 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1653 1654 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1655 // GOT pointer. 1656 // Does not work with tail call since ebx is not restored correctly by 1657 // tailcaller. TODO: at least for x86 - verify for x86-64 1658 1659 // If the callee is a GlobalAddress node (quite common, every direct call is) 1660 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1661 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1662 // We should use extra load for direct calls to dllimported functions in 1663 // non-JIT mode. 1664 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1665 getTargetMachine(), true)) 1666 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1667 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1668 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1669 else { 1670 assert(Callee.getOpcode() == ISD::LOAD && 1671 "Function destination must be loaded into virtual register"); 1672 unsigned Opc = is64Bit ? X86::R9 : X86::ECX; 1673 1674 Chain = DAG.getCopyToReg(Chain, 1675 DAG.getRegister(Opc, getPointerTy()) , 1676 Callee,InFlag); 1677 Callee = DAG.getRegister(Opc, getPointerTy()); 1678 // Add register as live out. 1679 DAG.getMachineFunction().addLiveOut(Opc); 1680 } 1681 1682 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1683 SmallVector<SDOperand, 8> Ops; 1684 1685 Ops.push_back(Chain); 1686 Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1687 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1688 if (InFlag.Val) 1689 Ops.push_back(InFlag); 1690 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1691 InFlag = Chain.getValue(1); 1692 1693 // Returns a chain & a flag for retval copy to use. 1694 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1695 Ops.clear(); 1696 Ops.push_back(Chain); 1697 Ops.push_back(Callee); 1698 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1699 // Add argument registers to the end of the list so that they are known live 1700 // into the call. 1701 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1702 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1703 RegsToPass[i].second.getValueType())); 1704 if (InFlag.Val) 1705 Ops.push_back(InFlag); 1706 assert(InFlag.Val && 1707 "Flag must be set. Depend on flag being set in LowerRET"); 1708 Chain = DAG.getNode(X86ISD::TAILCALL, 1709 Op.Val->getVTList(), &Ops[0], Ops.size()); 1710 1711 return SDOperand(Chain.Val, Op.ResNo); 1712} 1713 1714//===----------------------------------------------------------------------===// 1715// X86-64 C Calling Convention implementation 1716//===----------------------------------------------------------------------===// 1717 1718SDOperand 1719X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1720 MachineFunction &MF = DAG.getMachineFunction(); 1721 MachineFrameInfo *MFI = MF.getFrameInfo(); 1722 SDOperand Root = Op.getOperand(0); 1723 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1724 unsigned CC= MF.getFunction()->getCallingConv(); 1725 1726 static const unsigned GPR64ArgRegs[] = { 1727 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1728 }; 1729 static const unsigned XMMArgRegs[] = { 1730 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1731 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1732 }; 1733 1734 1735 // Assign locations to all of the incoming arguments. 1736 SmallVector<CCValAssign, 16> ArgLocs; 1737 CCState CCInfo(CC, isVarArg, 1738 getTargetMachine(), ArgLocs); 1739 if (CC == CallingConv::Fast && PerformTailCallOpt) 1740 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); 1741 else 1742 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1743 1744 SmallVector<SDOperand, 8> ArgValues; 1745 unsigned LastVal = ~0U; 1746 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1747 CCValAssign &VA = ArgLocs[i]; 1748 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1749 // places. 1750 assert(VA.getValNo() != LastVal && 1751 "Don't support value assigned to multiple locs yet"); 1752 LastVal = VA.getValNo(); 1753 1754 if (VA.isRegLoc()) { 1755 MVT::ValueType RegVT = VA.getLocVT(); 1756 TargetRegisterClass *RC; 1757 if (RegVT == MVT::i32) 1758 RC = X86::GR32RegisterClass; 1759 else if (RegVT == MVT::i64) 1760 RC = X86::GR64RegisterClass; 1761 else if (RegVT == MVT::f32) 1762 RC = X86::FR32RegisterClass; 1763 else if (RegVT == MVT::f64) 1764 RC = X86::FR64RegisterClass; 1765 else { 1766 assert(MVT::isVector(RegVT)); 1767 if (MVT::getSizeInBits(RegVT) == 64) { 1768 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1769 RegVT = MVT::i64; 1770 } else 1771 RC = X86::VR128RegisterClass; 1772 } 1773 1774 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1775 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1776 1777 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1778 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1779 // right size. 1780 if (VA.getLocInfo() == CCValAssign::SExt) 1781 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1782 DAG.getValueType(VA.getValVT())); 1783 else if (VA.getLocInfo() == CCValAssign::ZExt) 1784 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1785 DAG.getValueType(VA.getValVT())); 1786 1787 if (VA.getLocInfo() != CCValAssign::Full) 1788 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1789 1790 // Handle MMX values passed in GPRs. 1791 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1792 MVT::getSizeInBits(RegVT) == 64) 1793 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1794 1795 ArgValues.push_back(ArgValue); 1796 } else { 1797 assert(VA.isMemLoc()); 1798 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1799 } 1800 } 1801 1802 unsigned StackSize = CCInfo.getNextStackOffset(); 1803 if (CC==CallingConv::Fast) 1804 StackSize =GetAlignedArgumentStackSize(StackSize, DAG); 1805 1806 // If the function takes variable number of arguments, make a frame index for 1807 // the start of the first vararg value... for expansion of llvm.va_start. 1808 if (isVarArg) { 1809 assert(CC!=CallingConv::Fast 1810 && "Var arg not supported with calling convention fastcc"); 1811 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1812 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1813 1814 // For X86-64, if there are vararg parameters that are passed via 1815 // registers, then we must store them to their spots on the stack so they 1816 // may be loaded by deferencing the result of va_next. 1817 VarArgsGPOffset = NumIntRegs * 8; 1818 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1819 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1820 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1821 1822 // Store the integer parameter registers. 1823 SmallVector<SDOperand, 8> MemOps; 1824 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1825 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1826 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1827 for (; NumIntRegs != 6; ++NumIntRegs) { 1828 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1829 X86::GR64RegisterClass); 1830 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1831 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1832 MemOps.push_back(Store); 1833 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1834 DAG.getConstant(8, getPointerTy())); 1835 } 1836 1837 // Now store the XMM (fp + vector) parameter registers. 1838 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1839 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1840 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1841 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1842 X86::VR128RegisterClass); 1843 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1844 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1845 MemOps.push_back(Store); 1846 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1847 DAG.getConstant(16, getPointerTy())); 1848 } 1849 if (!MemOps.empty()) 1850 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1851 &MemOps[0], MemOps.size()); 1852 } 1853 1854 ArgValues.push_back(Root); 1855 // Tail call convention (fastcc) needs callee pop. 1856 if (CC == CallingConv::Fast && PerformTailCallOpt) { 1857 BytesToPopOnReturn = StackSize; // Callee pops everything. 1858 BytesCallerReserves = 0; 1859 } else { 1860 BytesToPopOnReturn = 0; // Callee pops nothing. 1861 BytesCallerReserves = StackSize; 1862 } 1863 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1864 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1865 1866 // Return the new list of results. 1867 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1868 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1869} 1870 1871SDOperand 1872X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1873 unsigned CC) { 1874 SDOperand Chain = Op.getOperand(0); 1875 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1876 SDOperand Callee = Op.getOperand(4); 1877 1878 // Analyze operands of the call, assigning locations to each operand. 1879 SmallVector<CCValAssign, 16> ArgLocs; 1880 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1881 if (CC==CallingConv::Fast && PerformTailCallOpt) 1882 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1883 else 1884 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1885 1886 // Get a count of how many bytes are to be pushed on the stack. 1887 unsigned NumBytes = CCInfo.getNextStackOffset(); 1888 if (CC == CallingConv::Fast) 1889 NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); 1890 1891 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1892 1893 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1894 SmallVector<SDOperand, 8> MemOpChains; 1895 1896 SDOperand StackPtr; 1897 1898 // Walk the register/memloc assignments, inserting copies/loads. 1899 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1900 CCValAssign &VA = ArgLocs[i]; 1901 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1902 1903 // Promote the value if needed. 1904 switch (VA.getLocInfo()) { 1905 default: assert(0 && "Unknown loc info!"); 1906 case CCValAssign::Full: break; 1907 case CCValAssign::SExt: 1908 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1909 break; 1910 case CCValAssign::ZExt: 1911 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1912 break; 1913 case CCValAssign::AExt: 1914 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1915 break; 1916 } 1917 1918 if (VA.isRegLoc()) { 1919 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1920 } else { 1921 assert(VA.isMemLoc()); 1922 if (StackPtr.Val == 0) 1923 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1924 1925 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1926 Arg)); 1927 } 1928 } 1929 1930 if (!MemOpChains.empty()) 1931 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1932 &MemOpChains[0], MemOpChains.size()); 1933 1934 // Build a sequence of copy-to-reg nodes chained together with token chain 1935 // and flag operands which copy the outgoing args into registers. 1936 SDOperand InFlag; 1937 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1938 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1939 InFlag); 1940 InFlag = Chain.getValue(1); 1941 } 1942 1943 if (isVarArg) { 1944 assert ( CallingConv::Fast != CC && 1945 "Var args not supported with calling convention fastcc"); 1946 1947 // From AMD64 ABI document: 1948 // For calls that may call functions that use varargs or stdargs 1949 // (prototype-less calls or calls to functions containing ellipsis (...) in 1950 // the declaration) %al is used as hidden argument to specify the number 1951 // of SSE registers used. The contents of %al do not need to match exactly 1952 // the number of registers, but must be an ubound on the number of SSE 1953 // registers used and is in the range 0 - 8 inclusive. 1954 1955 // Count the number of XMM registers allocated. 1956 static const unsigned XMMArgRegs[] = { 1957 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1958 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1959 }; 1960 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1961 1962 Chain = DAG.getCopyToReg(Chain, X86::AL, 1963 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1964 InFlag = Chain.getValue(1); 1965 } 1966 1967 // If the callee is a GlobalAddress node (quite common, every direct call is) 1968 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1969 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1970 // We should use extra load for direct calls to dllimported functions in 1971 // non-JIT mode. 1972 if (getTargetMachine().getCodeModel() != CodeModel::Large 1973 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1974 getTargetMachine(), true)) 1975 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1976 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1977 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1978 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1979 1980 // Returns a chain & a flag for retval copy to use. 1981 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1982 SmallVector<SDOperand, 8> Ops; 1983 Ops.push_back(Chain); 1984 Ops.push_back(Callee); 1985 1986 // Add argument registers to the end of the list so that they are known live 1987 // into the call. 1988 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1989 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1990 RegsToPass[i].second.getValueType())); 1991 1992 if (InFlag.Val) 1993 Ops.push_back(InFlag); 1994 1995 Chain = DAG.getNode(X86ISD::CALL, 1996 NodeTys, &Ops[0], Ops.size()); 1997 InFlag = Chain.getValue(1); 1998 int NumBytesForCalleeToPush = 0; 1999 if (CC==CallingConv::Fast && PerformTailCallOpt) { 2000 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 2001 } else { 2002 NumBytesForCalleeToPush = 0; // Callee pops nothing. 2003 } 2004 // Returns a flag for retval copy to use. 2005 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2006 Ops.clear(); 2007 Ops.push_back(Chain); 2008 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2009 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2010 Ops.push_back(InFlag); 2011 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2012 InFlag = Chain.getValue(1); 2013 2014 // Handle result values, copying them out of physregs into vregs that we 2015 // return. 2016 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 2017} 2018 2019 2020//===----------------------------------------------------------------------===// 2021// Other Lowering Hooks 2022//===----------------------------------------------------------------------===// 2023 2024 2025SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2026 MachineFunction &MF = DAG.getMachineFunction(); 2027 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2028 int ReturnAddrIndex = FuncInfo->getRAIndex(); 2029 2030 if (ReturnAddrIndex == 0) { 2031 // Set up a frame object for the return address. 2032 if (Subtarget->is64Bit()) 2033 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2034 else 2035 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2036 2037 FuncInfo->setRAIndex(ReturnAddrIndex); 2038 } 2039 2040 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2041} 2042 2043 2044 2045/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2046/// specific condition code. It returns a false if it cannot do a direct 2047/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2048/// needed. 2049static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2050 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2051 SelectionDAG &DAG) { 2052 X86CC = X86::COND_INVALID; 2053 if (!isFP) { 2054 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2055 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2056 // X > -1 -> X == 0, jump !sign. 2057 RHS = DAG.getConstant(0, RHS.getValueType()); 2058 X86CC = X86::COND_NS; 2059 return true; 2060 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2061 // X < 0 -> X == 0, jump on sign. 2062 X86CC = X86::COND_S; 2063 return true; 2064 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 2065 // X < 1 -> X <= 0 2066 RHS = DAG.getConstant(0, RHS.getValueType()); 2067 X86CC = X86::COND_LE; 2068 return true; 2069 } 2070 } 2071 2072 switch (SetCCOpcode) { 2073 default: break; 2074 case ISD::SETEQ: X86CC = X86::COND_E; break; 2075 case ISD::SETGT: X86CC = X86::COND_G; break; 2076 case ISD::SETGE: X86CC = X86::COND_GE; break; 2077 case ISD::SETLT: X86CC = X86::COND_L; break; 2078 case ISD::SETLE: X86CC = X86::COND_LE; break; 2079 case ISD::SETNE: X86CC = X86::COND_NE; break; 2080 case ISD::SETULT: X86CC = X86::COND_B; break; 2081 case ISD::SETUGT: X86CC = X86::COND_A; break; 2082 case ISD::SETULE: X86CC = X86::COND_BE; break; 2083 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2084 } 2085 } else { 2086 // On a floating point condition, the flags are set as follows: 2087 // ZF PF CF op 2088 // 0 | 0 | 0 | X > Y 2089 // 0 | 0 | 1 | X < Y 2090 // 1 | 0 | 0 | X == Y 2091 // 1 | 1 | 1 | unordered 2092 bool Flip = false; 2093 switch (SetCCOpcode) { 2094 default: break; 2095 case ISD::SETUEQ: 2096 case ISD::SETEQ: X86CC = X86::COND_E; break; 2097 case ISD::SETOLT: Flip = true; // Fallthrough 2098 case ISD::SETOGT: 2099 case ISD::SETGT: X86CC = X86::COND_A; break; 2100 case ISD::SETOLE: Flip = true; // Fallthrough 2101 case ISD::SETOGE: 2102 case ISD::SETGE: X86CC = X86::COND_AE; break; 2103 case ISD::SETUGT: Flip = true; // Fallthrough 2104 case ISD::SETULT: 2105 case ISD::SETLT: X86CC = X86::COND_B; break; 2106 case ISD::SETUGE: Flip = true; // Fallthrough 2107 case ISD::SETULE: 2108 case ISD::SETLE: X86CC = X86::COND_BE; break; 2109 case ISD::SETONE: 2110 case ISD::SETNE: X86CC = X86::COND_NE; break; 2111 case ISD::SETUO: X86CC = X86::COND_P; break; 2112 case ISD::SETO: X86CC = X86::COND_NP; break; 2113 } 2114 if (Flip) 2115 std::swap(LHS, RHS); 2116 } 2117 2118 return X86CC != X86::COND_INVALID; 2119} 2120 2121/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2122/// code. Current x86 isa includes the following FP cmov instructions: 2123/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2124static bool hasFPCMov(unsigned X86CC) { 2125 switch (X86CC) { 2126 default: 2127 return false; 2128 case X86::COND_B: 2129 case X86::COND_BE: 2130 case X86::COND_E: 2131 case X86::COND_P: 2132 case X86::COND_A: 2133 case X86::COND_AE: 2134 case X86::COND_NE: 2135 case X86::COND_NP: 2136 return true; 2137 } 2138} 2139 2140/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2141/// true if Op is undef or if its value falls within the specified range (L, H]. 2142static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2143 if (Op.getOpcode() == ISD::UNDEF) 2144 return true; 2145 2146 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2147 return (Val >= Low && Val < Hi); 2148} 2149 2150/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2151/// true if Op is undef or if its value equal to the specified value. 2152static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2153 if (Op.getOpcode() == ISD::UNDEF) 2154 return true; 2155 return cast<ConstantSDNode>(Op)->getValue() == Val; 2156} 2157 2158/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2159/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2160bool X86::isPSHUFDMask(SDNode *N) { 2161 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2162 2163 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2164 return false; 2165 2166 // Check if the value doesn't reference the second vector. 2167 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2168 SDOperand Arg = N->getOperand(i); 2169 if (Arg.getOpcode() == ISD::UNDEF) continue; 2170 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2171 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 2172 return false; 2173 } 2174 2175 return true; 2176} 2177 2178/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2179/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2180bool X86::isPSHUFHWMask(SDNode *N) { 2181 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2182 2183 if (N->getNumOperands() != 8) 2184 return false; 2185 2186 // Lower quadword copied in order. 2187 for (unsigned i = 0; i != 4; ++i) { 2188 SDOperand Arg = N->getOperand(i); 2189 if (Arg.getOpcode() == ISD::UNDEF) continue; 2190 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2191 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2192 return false; 2193 } 2194 2195 // Upper quadword shuffled. 2196 for (unsigned i = 4; i != 8; ++i) { 2197 SDOperand Arg = N->getOperand(i); 2198 if (Arg.getOpcode() == ISD::UNDEF) continue; 2199 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2200 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2201 if (Val < 4 || Val > 7) 2202 return false; 2203 } 2204 2205 return true; 2206} 2207 2208/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2209/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2210bool X86::isPSHUFLWMask(SDNode *N) { 2211 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2212 2213 if (N->getNumOperands() != 8) 2214 return false; 2215 2216 // Upper quadword copied in order. 2217 for (unsigned i = 4; i != 8; ++i) 2218 if (!isUndefOrEqual(N->getOperand(i), i)) 2219 return false; 2220 2221 // Lower quadword shuffled. 2222 for (unsigned i = 0; i != 4; ++i) 2223 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2224 return false; 2225 2226 return true; 2227} 2228 2229/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2230/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2231static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2232 if (NumElems != 2 && NumElems != 4) return false; 2233 2234 unsigned Half = NumElems / 2; 2235 for (unsigned i = 0; i < Half; ++i) 2236 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2237 return false; 2238 for (unsigned i = Half; i < NumElems; ++i) 2239 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2240 return false; 2241 2242 return true; 2243} 2244 2245bool X86::isSHUFPMask(SDNode *N) { 2246 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2247 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2248} 2249 2250/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2251/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2252/// half elements to come from vector 1 (which would equal the dest.) and 2253/// the upper half to come from vector 2. 2254static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2255 if (NumOps != 2 && NumOps != 4) return false; 2256 2257 unsigned Half = NumOps / 2; 2258 for (unsigned i = 0; i < Half; ++i) 2259 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2260 return false; 2261 for (unsigned i = Half; i < NumOps; ++i) 2262 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2263 return false; 2264 return true; 2265} 2266 2267static bool isCommutedSHUFP(SDNode *N) { 2268 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2269 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2270} 2271 2272/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2273/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2274bool X86::isMOVHLPSMask(SDNode *N) { 2275 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2276 2277 if (N->getNumOperands() != 4) 2278 return false; 2279 2280 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2281 return isUndefOrEqual(N->getOperand(0), 6) && 2282 isUndefOrEqual(N->getOperand(1), 7) && 2283 isUndefOrEqual(N->getOperand(2), 2) && 2284 isUndefOrEqual(N->getOperand(3), 3); 2285} 2286 2287/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2288/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2289/// <2, 3, 2, 3> 2290bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2291 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2292 2293 if (N->getNumOperands() != 4) 2294 return false; 2295 2296 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2297 return isUndefOrEqual(N->getOperand(0), 2) && 2298 isUndefOrEqual(N->getOperand(1), 3) && 2299 isUndefOrEqual(N->getOperand(2), 2) && 2300 isUndefOrEqual(N->getOperand(3), 3); 2301} 2302 2303/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2304/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2305bool X86::isMOVLPMask(SDNode *N) { 2306 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2307 2308 unsigned NumElems = N->getNumOperands(); 2309 if (NumElems != 2 && NumElems != 4) 2310 return false; 2311 2312 for (unsigned i = 0; i < NumElems/2; ++i) 2313 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2314 return false; 2315 2316 for (unsigned i = NumElems/2; i < NumElems; ++i) 2317 if (!isUndefOrEqual(N->getOperand(i), i)) 2318 return false; 2319 2320 return true; 2321} 2322 2323/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2324/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2325/// and MOVLHPS. 2326bool X86::isMOVHPMask(SDNode *N) { 2327 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2328 2329 unsigned NumElems = N->getNumOperands(); 2330 if (NumElems != 2 && NumElems != 4) 2331 return false; 2332 2333 for (unsigned i = 0; i < NumElems/2; ++i) 2334 if (!isUndefOrEqual(N->getOperand(i), i)) 2335 return false; 2336 2337 for (unsigned i = 0; i < NumElems/2; ++i) { 2338 SDOperand Arg = N->getOperand(i + NumElems/2); 2339 if (!isUndefOrEqual(Arg, i + NumElems)) 2340 return false; 2341 } 2342 2343 return true; 2344} 2345 2346/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2347/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2348bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2349 bool V2IsSplat = false) { 2350 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2351 return false; 2352 2353 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2354 SDOperand BitI = Elts[i]; 2355 SDOperand BitI1 = Elts[i+1]; 2356 if (!isUndefOrEqual(BitI, j)) 2357 return false; 2358 if (V2IsSplat) { 2359 if (isUndefOrEqual(BitI1, NumElts)) 2360 return false; 2361 } else { 2362 if (!isUndefOrEqual(BitI1, j + NumElts)) 2363 return false; 2364 } 2365 } 2366 2367 return true; 2368} 2369 2370bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2371 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2372 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2373} 2374 2375/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2376/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2377bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2378 bool V2IsSplat = false) { 2379 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2380 return false; 2381 2382 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2383 SDOperand BitI = Elts[i]; 2384 SDOperand BitI1 = Elts[i+1]; 2385 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2386 return false; 2387 if (V2IsSplat) { 2388 if (isUndefOrEqual(BitI1, NumElts)) 2389 return false; 2390 } else { 2391 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2392 return false; 2393 } 2394 } 2395 2396 return true; 2397} 2398 2399bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2400 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2401 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2402} 2403 2404/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2405/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2406/// <0, 0, 1, 1> 2407bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2408 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2409 2410 unsigned NumElems = N->getNumOperands(); 2411 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2412 return false; 2413 2414 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2415 SDOperand BitI = N->getOperand(i); 2416 SDOperand BitI1 = N->getOperand(i+1); 2417 2418 if (!isUndefOrEqual(BitI, j)) 2419 return false; 2420 if (!isUndefOrEqual(BitI1, j)) 2421 return false; 2422 } 2423 2424 return true; 2425} 2426 2427/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2428/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2429/// <2, 2, 3, 3> 2430bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2431 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2432 2433 unsigned NumElems = N->getNumOperands(); 2434 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2435 return false; 2436 2437 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2438 SDOperand BitI = N->getOperand(i); 2439 SDOperand BitI1 = N->getOperand(i + 1); 2440 2441 if (!isUndefOrEqual(BitI, j)) 2442 return false; 2443 if (!isUndefOrEqual(BitI1, j)) 2444 return false; 2445 } 2446 2447 return true; 2448} 2449 2450/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2451/// specifies a shuffle of elements that is suitable for input to MOVSS, 2452/// MOVSD, and MOVD, i.e. setting the lowest element. 2453static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2454 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2455 return false; 2456 2457 if (!isUndefOrEqual(Elts[0], NumElts)) 2458 return false; 2459 2460 for (unsigned i = 1; i < NumElts; ++i) { 2461 if (!isUndefOrEqual(Elts[i], i)) 2462 return false; 2463 } 2464 2465 return true; 2466} 2467 2468bool X86::isMOVLMask(SDNode *N) { 2469 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2470 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2471} 2472 2473/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2474/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2475/// element of vector 2 and the other elements to come from vector 1 in order. 2476static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2477 bool V2IsSplat = false, 2478 bool V2IsUndef = false) { 2479 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2480 return false; 2481 2482 if (!isUndefOrEqual(Ops[0], 0)) 2483 return false; 2484 2485 for (unsigned i = 1; i < NumOps; ++i) { 2486 SDOperand Arg = Ops[i]; 2487 if (!(isUndefOrEqual(Arg, i+NumOps) || 2488 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2489 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2490 return false; 2491 } 2492 2493 return true; 2494} 2495 2496static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2497 bool V2IsUndef = false) { 2498 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2499 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2500 V2IsSplat, V2IsUndef); 2501} 2502 2503/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2504/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2505bool X86::isMOVSHDUPMask(SDNode *N) { 2506 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2507 2508 if (N->getNumOperands() != 4) 2509 return false; 2510 2511 // Expect 1, 1, 3, 3 2512 for (unsigned i = 0; i < 2; ++i) { 2513 SDOperand Arg = N->getOperand(i); 2514 if (Arg.getOpcode() == ISD::UNDEF) continue; 2515 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2516 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2517 if (Val != 1) return false; 2518 } 2519 2520 bool HasHi = false; 2521 for (unsigned i = 2; i < 4; ++i) { 2522 SDOperand Arg = N->getOperand(i); 2523 if (Arg.getOpcode() == ISD::UNDEF) continue; 2524 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2525 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2526 if (Val != 3) return false; 2527 HasHi = true; 2528 } 2529 2530 // Don't use movshdup if it can be done with a shufps. 2531 return HasHi; 2532} 2533 2534/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2535/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2536bool X86::isMOVSLDUPMask(SDNode *N) { 2537 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2538 2539 if (N->getNumOperands() != 4) 2540 return false; 2541 2542 // Expect 0, 0, 2, 2 2543 for (unsigned i = 0; i < 2; ++i) { 2544 SDOperand Arg = N->getOperand(i); 2545 if (Arg.getOpcode() == ISD::UNDEF) continue; 2546 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2547 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2548 if (Val != 0) return false; 2549 } 2550 2551 bool HasHi = false; 2552 for (unsigned i = 2; i < 4; ++i) { 2553 SDOperand Arg = N->getOperand(i); 2554 if (Arg.getOpcode() == ISD::UNDEF) continue; 2555 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2556 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2557 if (Val != 2) return false; 2558 HasHi = true; 2559 } 2560 2561 // Don't use movshdup if it can be done with a shufps. 2562 return HasHi; 2563} 2564 2565/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2566/// specifies a identity operation on the LHS or RHS. 2567static bool isIdentityMask(SDNode *N, bool RHS = false) { 2568 unsigned NumElems = N->getNumOperands(); 2569 for (unsigned i = 0; i < NumElems; ++i) 2570 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2571 return false; 2572 return true; 2573} 2574 2575/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2576/// a splat of a single element. 2577static bool isSplatMask(SDNode *N) { 2578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2579 2580 // This is a splat operation if each element of the permute is the same, and 2581 // if the value doesn't reference the second vector. 2582 unsigned NumElems = N->getNumOperands(); 2583 SDOperand ElementBase; 2584 unsigned i = 0; 2585 for (; i != NumElems; ++i) { 2586 SDOperand Elt = N->getOperand(i); 2587 if (isa<ConstantSDNode>(Elt)) { 2588 ElementBase = Elt; 2589 break; 2590 } 2591 } 2592 2593 if (!ElementBase.Val) 2594 return false; 2595 2596 for (; i != NumElems; ++i) { 2597 SDOperand Arg = N->getOperand(i); 2598 if (Arg.getOpcode() == ISD::UNDEF) continue; 2599 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2600 if (Arg != ElementBase) return false; 2601 } 2602 2603 // Make sure it is a splat of the first vector operand. 2604 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2605} 2606 2607/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2608/// a splat of a single element and it's a 2 or 4 element mask. 2609bool X86::isSplatMask(SDNode *N) { 2610 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2611 2612 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2613 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2614 return false; 2615 return ::isSplatMask(N); 2616} 2617 2618/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2619/// specifies a splat of zero element. 2620bool X86::isSplatLoMask(SDNode *N) { 2621 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2622 2623 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2624 if (!isUndefOrEqual(N->getOperand(i), 0)) 2625 return false; 2626 return true; 2627} 2628 2629/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2630/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2631/// instructions. 2632unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2633 unsigned NumOperands = N->getNumOperands(); 2634 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2635 unsigned Mask = 0; 2636 for (unsigned i = 0; i < NumOperands; ++i) { 2637 unsigned Val = 0; 2638 SDOperand Arg = N->getOperand(NumOperands-i-1); 2639 if (Arg.getOpcode() != ISD::UNDEF) 2640 Val = cast<ConstantSDNode>(Arg)->getValue(); 2641 if (Val >= NumOperands) Val -= NumOperands; 2642 Mask |= Val; 2643 if (i != NumOperands - 1) 2644 Mask <<= Shift; 2645 } 2646 2647 return Mask; 2648} 2649 2650/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2651/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2652/// instructions. 2653unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2654 unsigned Mask = 0; 2655 // 8 nodes, but we only care about the last 4. 2656 for (unsigned i = 7; i >= 4; --i) { 2657 unsigned Val = 0; 2658 SDOperand Arg = N->getOperand(i); 2659 if (Arg.getOpcode() != ISD::UNDEF) 2660 Val = cast<ConstantSDNode>(Arg)->getValue(); 2661 Mask |= (Val - 4); 2662 if (i != 4) 2663 Mask <<= 2; 2664 } 2665 2666 return Mask; 2667} 2668 2669/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2670/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2671/// instructions. 2672unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2673 unsigned Mask = 0; 2674 // 8 nodes, but we only care about the first 4. 2675 for (int i = 3; i >= 0; --i) { 2676 unsigned Val = 0; 2677 SDOperand Arg = N->getOperand(i); 2678 if (Arg.getOpcode() != ISD::UNDEF) 2679 Val = cast<ConstantSDNode>(Arg)->getValue(); 2680 Mask |= Val; 2681 if (i != 0) 2682 Mask <<= 2; 2683 } 2684 2685 return Mask; 2686} 2687 2688/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2689/// specifies a 8 element shuffle that can be broken into a pair of 2690/// PSHUFHW and PSHUFLW. 2691static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2692 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2693 2694 if (N->getNumOperands() != 8) 2695 return false; 2696 2697 // Lower quadword shuffled. 2698 for (unsigned i = 0; i != 4; ++i) { 2699 SDOperand Arg = N->getOperand(i); 2700 if (Arg.getOpcode() == ISD::UNDEF) continue; 2701 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2702 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2703 if (Val > 4) 2704 return false; 2705 } 2706 2707 // Upper quadword shuffled. 2708 for (unsigned i = 4; i != 8; ++i) { 2709 SDOperand Arg = N->getOperand(i); 2710 if (Arg.getOpcode() == ISD::UNDEF) continue; 2711 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2712 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2713 if (Val < 4 || Val > 7) 2714 return false; 2715 } 2716 2717 return true; 2718} 2719 2720/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2721/// values in ther permute mask. 2722static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2723 SDOperand &V2, SDOperand &Mask, 2724 SelectionDAG &DAG) { 2725 MVT::ValueType VT = Op.getValueType(); 2726 MVT::ValueType MaskVT = Mask.getValueType(); 2727 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2728 unsigned NumElems = Mask.getNumOperands(); 2729 SmallVector<SDOperand, 8> MaskVec; 2730 2731 for (unsigned i = 0; i != NumElems; ++i) { 2732 SDOperand Arg = Mask.getOperand(i); 2733 if (Arg.getOpcode() == ISD::UNDEF) { 2734 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2735 continue; 2736 } 2737 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2738 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2739 if (Val < NumElems) 2740 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2741 else 2742 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2743 } 2744 2745 std::swap(V1, V2); 2746 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2747 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2748} 2749 2750/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2751/// match movhlps. The lower half elements should come from upper half of 2752/// V1 (and in order), and the upper half elements should come from the upper 2753/// half of V2 (and in order). 2754static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2755 unsigned NumElems = Mask->getNumOperands(); 2756 if (NumElems != 4) 2757 return false; 2758 for (unsigned i = 0, e = 2; i != e; ++i) 2759 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2760 return false; 2761 for (unsigned i = 2; i != 4; ++i) 2762 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2763 return false; 2764 return true; 2765} 2766 2767/// isScalarLoadToVector - Returns true if the node is a scalar load that 2768/// is promoted to a vector. 2769static inline bool isScalarLoadToVector(SDNode *N) { 2770 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2771 N = N->getOperand(0).Val; 2772 return ISD::isNON_EXTLoad(N); 2773 } 2774 return false; 2775} 2776 2777/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2778/// match movlp{s|d}. The lower half elements should come from lower half of 2779/// V1 (and in order), and the upper half elements should come from the upper 2780/// half of V2 (and in order). And since V1 will become the source of the 2781/// MOVLP, it must be either a vector load or a scalar load to vector. 2782static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2783 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2784 return false; 2785 // Is V2 is a vector load, don't do this transformation. We will try to use 2786 // load folding shufps op. 2787 if (ISD::isNON_EXTLoad(V2)) 2788 return false; 2789 2790 unsigned NumElems = Mask->getNumOperands(); 2791 if (NumElems != 2 && NumElems != 4) 2792 return false; 2793 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2794 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2795 return false; 2796 for (unsigned i = NumElems/2; i != NumElems; ++i) 2797 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2798 return false; 2799 return true; 2800} 2801 2802/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2803/// all the same. 2804static bool isSplatVector(SDNode *N) { 2805 if (N->getOpcode() != ISD::BUILD_VECTOR) 2806 return false; 2807 2808 SDOperand SplatValue = N->getOperand(0); 2809 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2810 if (N->getOperand(i) != SplatValue) 2811 return false; 2812 return true; 2813} 2814 2815/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2816/// to an undef. 2817static bool isUndefShuffle(SDNode *N) { 2818 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2819 return false; 2820 2821 SDOperand V1 = N->getOperand(0); 2822 SDOperand V2 = N->getOperand(1); 2823 SDOperand Mask = N->getOperand(2); 2824 unsigned NumElems = Mask.getNumOperands(); 2825 for (unsigned i = 0; i != NumElems; ++i) { 2826 SDOperand Arg = Mask.getOperand(i); 2827 if (Arg.getOpcode() != ISD::UNDEF) { 2828 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2829 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2830 return false; 2831 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2832 return false; 2833 } 2834 } 2835 return true; 2836} 2837 2838/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2839/// constant +0.0. 2840static inline bool isZeroNode(SDOperand Elt) { 2841 return ((isa<ConstantSDNode>(Elt) && 2842 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2843 (isa<ConstantFPSDNode>(Elt) && 2844 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2845} 2846 2847/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2848/// to an zero vector. 2849static bool isZeroShuffle(SDNode *N) { 2850 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2851 return false; 2852 2853 SDOperand V1 = N->getOperand(0); 2854 SDOperand V2 = N->getOperand(1); 2855 SDOperand Mask = N->getOperand(2); 2856 unsigned NumElems = Mask.getNumOperands(); 2857 for (unsigned i = 0; i != NumElems; ++i) { 2858 SDOperand Arg = Mask.getOperand(i); 2859 if (Arg.getOpcode() != ISD::UNDEF) { 2860 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2861 if (Idx < NumElems) { 2862 unsigned Opc = V1.Val->getOpcode(); 2863 if (Opc == ISD::UNDEF) 2864 continue; 2865 if (Opc != ISD::BUILD_VECTOR || 2866 !isZeroNode(V1.Val->getOperand(Idx))) 2867 return false; 2868 } else if (Idx >= NumElems) { 2869 unsigned Opc = V2.Val->getOpcode(); 2870 if (Opc == ISD::UNDEF) 2871 continue; 2872 if (Opc != ISD::BUILD_VECTOR || 2873 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2874 return false; 2875 } 2876 } 2877 } 2878 return true; 2879} 2880 2881/// getZeroVector - Returns a vector of specified type with all zero elements. 2882/// 2883static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2884 assert(MVT::isVector(VT) && "Expected a vector type"); 2885 unsigned NumElems = MVT::getVectorNumElements(VT); 2886 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2887 bool isFP = MVT::isFloatingPoint(EVT); 2888 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2889 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2890 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2891} 2892 2893/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2894/// that point to V2 points to its first element. 2895static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2896 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2897 2898 bool Changed = false; 2899 SmallVector<SDOperand, 8> MaskVec; 2900 unsigned NumElems = Mask.getNumOperands(); 2901 for (unsigned i = 0; i != NumElems; ++i) { 2902 SDOperand Arg = Mask.getOperand(i); 2903 if (Arg.getOpcode() != ISD::UNDEF) { 2904 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2905 if (Val > NumElems) { 2906 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2907 Changed = true; 2908 } 2909 } 2910 MaskVec.push_back(Arg); 2911 } 2912 2913 if (Changed) 2914 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2915 &MaskVec[0], MaskVec.size()); 2916 return Mask; 2917} 2918 2919/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2920/// operation of specified width. 2921static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2922 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2923 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2924 2925 SmallVector<SDOperand, 8> MaskVec; 2926 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2927 for (unsigned i = 1; i != NumElems; ++i) 2928 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2929 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2930} 2931 2932/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2933/// of specified width. 2934static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2935 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2936 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2937 SmallVector<SDOperand, 8> MaskVec; 2938 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2939 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2940 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2941 } 2942 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2943} 2944 2945/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2946/// of specified width. 2947static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2948 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2949 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2950 unsigned Half = NumElems/2; 2951 SmallVector<SDOperand, 8> MaskVec; 2952 for (unsigned i = 0; i != Half; ++i) { 2953 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2954 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2955 } 2956 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2957} 2958 2959/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2960/// 2961static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2962 SDOperand V1 = Op.getOperand(0); 2963 SDOperand Mask = Op.getOperand(2); 2964 MVT::ValueType VT = Op.getValueType(); 2965 unsigned NumElems = Mask.getNumOperands(); 2966 Mask = getUnpacklMask(NumElems, DAG); 2967 while (NumElems != 4) { 2968 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2969 NumElems >>= 1; 2970 } 2971 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2972 2973 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2974 Mask = getZeroVector(MaskVT, DAG); 2975 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2976 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2977 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2978} 2979 2980/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2981/// vector of zero or undef vector. 2982static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2983 unsigned NumElems, unsigned Idx, 2984 bool isZero, SelectionDAG &DAG) { 2985 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2986 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2987 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2988 SDOperand Zero = DAG.getConstant(0, EVT); 2989 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2990 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2991 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2992 &MaskVec[0], MaskVec.size()); 2993 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2994} 2995 2996/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2997/// 2998static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2999 unsigned NumNonZero, unsigned NumZero, 3000 SelectionDAG &DAG, TargetLowering &TLI) { 3001 if (NumNonZero > 8) 3002 return SDOperand(); 3003 3004 SDOperand V(0, 0); 3005 bool First = true; 3006 for (unsigned i = 0; i < 16; ++i) { 3007 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3008 if (ThisIsNonZero && First) { 3009 if (NumZero) 3010 V = getZeroVector(MVT::v8i16, DAG); 3011 else 3012 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3013 First = false; 3014 } 3015 3016 if ((i & 1) != 0) { 3017 SDOperand ThisElt(0, 0), LastElt(0, 0); 3018 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3019 if (LastIsNonZero) { 3020 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3021 } 3022 if (ThisIsNonZero) { 3023 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3024 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3025 ThisElt, DAG.getConstant(8, MVT::i8)); 3026 if (LastIsNonZero) 3027 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3028 } else 3029 ThisElt = LastElt; 3030 3031 if (ThisElt.Val) 3032 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3033 DAG.getConstant(i/2, TLI.getPointerTy())); 3034 } 3035 } 3036 3037 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3038} 3039 3040/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3041/// 3042static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3043 unsigned NumNonZero, unsigned NumZero, 3044 SelectionDAG &DAG, TargetLowering &TLI) { 3045 if (NumNonZero > 4) 3046 return SDOperand(); 3047 3048 SDOperand V(0, 0); 3049 bool First = true; 3050 for (unsigned i = 0; i < 8; ++i) { 3051 bool isNonZero = (NonZeros & (1 << i)) != 0; 3052 if (isNonZero) { 3053 if (First) { 3054 if (NumZero) 3055 V = getZeroVector(MVT::v8i16, DAG); 3056 else 3057 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3058 First = false; 3059 } 3060 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3061 DAG.getConstant(i, TLI.getPointerTy())); 3062 } 3063 } 3064 3065 return V; 3066} 3067 3068SDOperand 3069X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3070 // All zero's are handled with pxor. 3071 if (ISD::isBuildVectorAllZeros(Op.Val)) 3072 return Op; 3073 3074 // All one's are handled with pcmpeqd. 3075 if (ISD::isBuildVectorAllOnes(Op.Val)) 3076 return Op; 3077 3078 MVT::ValueType VT = Op.getValueType(); 3079 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3080 unsigned EVTBits = MVT::getSizeInBits(EVT); 3081 3082 unsigned NumElems = Op.getNumOperands(); 3083 unsigned NumZero = 0; 3084 unsigned NumNonZero = 0; 3085 unsigned NonZeros = 0; 3086 unsigned NumNonZeroImms = 0; 3087 std::set<SDOperand> Values; 3088 for (unsigned i = 0; i < NumElems; ++i) { 3089 SDOperand Elt = Op.getOperand(i); 3090 if (Elt.getOpcode() != ISD::UNDEF) { 3091 Values.insert(Elt); 3092 if (isZeroNode(Elt)) 3093 NumZero++; 3094 else { 3095 NonZeros |= (1 << i); 3096 NumNonZero++; 3097 if (Elt.getOpcode() == ISD::Constant || 3098 Elt.getOpcode() == ISD::ConstantFP) 3099 NumNonZeroImms++; 3100 } 3101 } 3102 } 3103 3104 if (NumNonZero == 0) { 3105 if (NumZero == 0) 3106 // All undef vector. Return an UNDEF. 3107 return DAG.getNode(ISD::UNDEF, VT); 3108 else 3109 // A mix of zero and undef. Return a zero vector. 3110 return getZeroVector(VT, DAG); 3111 } 3112 3113 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3114 if (Values.size() == 1) 3115 return SDOperand(); 3116 3117 // Special case for single non-zero element. 3118 if (NumNonZero == 1) { 3119 unsigned Idx = CountTrailingZeros_32(NonZeros); 3120 SDOperand Item = Op.getOperand(Idx); 3121 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3122 if (Idx == 0) 3123 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3124 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3125 NumZero > 0, DAG); 3126 3127 if (EVTBits == 32) { 3128 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3129 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3130 DAG); 3131 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3132 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3133 SmallVector<SDOperand, 8> MaskVec; 3134 for (unsigned i = 0; i < NumElems; i++) 3135 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3136 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3137 &MaskVec[0], MaskVec.size()); 3138 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3139 DAG.getNode(ISD::UNDEF, VT), Mask); 3140 } 3141 } 3142 3143 // A vector full of immediates; various special cases are already 3144 // handled, so this is best done with a single constant-pool load. 3145 if (NumNonZero == NumNonZeroImms) 3146 return SDOperand(); 3147 3148 // Let legalizer expand 2-wide build_vectors. 3149 if (EVTBits == 64) 3150 return SDOperand(); 3151 3152 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3153 if (EVTBits == 8 && NumElems == 16) { 3154 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3155 *this); 3156 if (V.Val) return V; 3157 } 3158 3159 if (EVTBits == 16 && NumElems == 8) { 3160 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3161 *this); 3162 if (V.Val) return V; 3163 } 3164 3165 // If element VT is == 32 bits, turn it into a number of shuffles. 3166 SmallVector<SDOperand, 8> V; 3167 V.resize(NumElems); 3168 if (NumElems == 4 && NumZero > 0) { 3169 for (unsigned i = 0; i < 4; ++i) { 3170 bool isZero = !(NonZeros & (1 << i)); 3171 if (isZero) 3172 V[i] = getZeroVector(VT, DAG); 3173 else 3174 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3175 } 3176 3177 for (unsigned i = 0; i < 2; ++i) { 3178 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3179 default: break; 3180 case 0: 3181 V[i] = V[i*2]; // Must be a zero vector. 3182 break; 3183 case 1: 3184 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3185 getMOVLMask(NumElems, DAG)); 3186 break; 3187 case 2: 3188 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3189 getMOVLMask(NumElems, DAG)); 3190 break; 3191 case 3: 3192 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3193 getUnpacklMask(NumElems, DAG)); 3194 break; 3195 } 3196 } 3197 3198 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3199 // clears the upper bits. 3200 // FIXME: we can do the same for v4f32 case when we know both parts of 3201 // the lower half come from scalar_to_vector (loadf32). We should do 3202 // that in post legalizer dag combiner with target specific hooks. 3203 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3204 return V[0]; 3205 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3206 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3207 SmallVector<SDOperand, 8> MaskVec; 3208 bool Reverse = (NonZeros & 0x3) == 2; 3209 for (unsigned i = 0; i < 2; ++i) 3210 if (Reverse) 3211 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3212 else 3213 MaskVec.push_back(DAG.getConstant(i, EVT)); 3214 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3215 for (unsigned i = 0; i < 2; ++i) 3216 if (Reverse) 3217 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3218 else 3219 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3220 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3221 &MaskVec[0], MaskVec.size()); 3222 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3223 } 3224 3225 if (Values.size() > 2) { 3226 // Expand into a number of unpckl*. 3227 // e.g. for v4f32 3228 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3229 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3230 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3231 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3232 for (unsigned i = 0; i < NumElems; ++i) 3233 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3234 NumElems >>= 1; 3235 while (NumElems != 0) { 3236 for (unsigned i = 0; i < NumElems; ++i) 3237 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3238 UnpckMask); 3239 NumElems >>= 1; 3240 } 3241 return V[0]; 3242 } 3243 3244 return SDOperand(); 3245} 3246 3247SDOperand 3248X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3249 SDOperand V1 = Op.getOperand(0); 3250 SDOperand V2 = Op.getOperand(1); 3251 SDOperand PermMask = Op.getOperand(2); 3252 MVT::ValueType VT = Op.getValueType(); 3253 unsigned NumElems = PermMask.getNumOperands(); 3254 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3255 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3256 bool V1IsSplat = false; 3257 bool V2IsSplat = false; 3258 3259 if (isUndefShuffle(Op.Val)) 3260 return DAG.getNode(ISD::UNDEF, VT); 3261 3262 if (isZeroShuffle(Op.Val)) 3263 return getZeroVector(VT, DAG); 3264 3265 if (isIdentityMask(PermMask.Val)) 3266 return V1; 3267 else if (isIdentityMask(PermMask.Val, true)) 3268 return V2; 3269 3270 if (isSplatMask(PermMask.Val)) { 3271 if (NumElems <= 4) return Op; 3272 // Promote it to a v4i32 splat. 3273 return PromoteSplat(Op, DAG); 3274 } 3275 3276 if (X86::isMOVLMask(PermMask.Val)) 3277 return (V1IsUndef) ? V2 : Op; 3278 3279 if (X86::isMOVSHDUPMask(PermMask.Val) || 3280 X86::isMOVSLDUPMask(PermMask.Val) || 3281 X86::isMOVHLPSMask(PermMask.Val) || 3282 X86::isMOVHPMask(PermMask.Val) || 3283 X86::isMOVLPMask(PermMask.Val)) 3284 return Op; 3285 3286 if (ShouldXformToMOVHLPS(PermMask.Val) || 3287 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3288 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3289 3290 bool Commuted = false; 3291 V1IsSplat = isSplatVector(V1.Val); 3292 V2IsSplat = isSplatVector(V2.Val); 3293 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3294 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3295 std::swap(V1IsSplat, V2IsSplat); 3296 std::swap(V1IsUndef, V2IsUndef); 3297 Commuted = true; 3298 } 3299 3300 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3301 if (V2IsUndef) return V1; 3302 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3303 if (V2IsSplat) { 3304 // V2 is a splat, so the mask may be malformed. That is, it may point 3305 // to any V2 element. The instruction selectior won't like this. Get 3306 // a corrected mask and commute to form a proper MOVS{S|D}. 3307 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3308 if (NewMask.Val != PermMask.Val) 3309 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3310 } 3311 return Op; 3312 } 3313 3314 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3315 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3316 X86::isUNPCKLMask(PermMask.Val) || 3317 X86::isUNPCKHMask(PermMask.Val)) 3318 return Op; 3319 3320 if (V2IsSplat) { 3321 // Normalize mask so all entries that point to V2 points to its first 3322 // element then try to match unpck{h|l} again. If match, return a 3323 // new vector_shuffle with the corrected mask. 3324 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3325 if (NewMask.Val != PermMask.Val) { 3326 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3327 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3328 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3329 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3330 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3331 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3332 } 3333 } 3334 } 3335 3336 // Normalize the node to match x86 shuffle ops if needed 3337 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3338 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3339 3340 if (Commuted) { 3341 // Commute is back and try unpck* again. 3342 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3343 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3344 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3345 X86::isUNPCKLMask(PermMask.Val) || 3346 X86::isUNPCKHMask(PermMask.Val)) 3347 return Op; 3348 } 3349 3350 // If VT is integer, try PSHUF* first, then SHUFP*. 3351 if (MVT::isInteger(VT)) { 3352 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3353 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3354 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3355 X86::isPSHUFDMask(PermMask.Val)) || 3356 X86::isPSHUFHWMask(PermMask.Val) || 3357 X86::isPSHUFLWMask(PermMask.Val)) { 3358 if (V2.getOpcode() != ISD::UNDEF) 3359 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3360 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3361 return Op; 3362 } 3363 3364 if (X86::isSHUFPMask(PermMask.Val) && 3365 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3366 return Op; 3367 3368 // Handle v8i16 shuffle high / low shuffle node pair. 3369 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3370 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3371 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3372 SmallVector<SDOperand, 8> MaskVec; 3373 for (unsigned i = 0; i != 4; ++i) 3374 MaskVec.push_back(PermMask.getOperand(i)); 3375 for (unsigned i = 4; i != 8; ++i) 3376 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3377 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3378 &MaskVec[0], MaskVec.size()); 3379 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3380 MaskVec.clear(); 3381 for (unsigned i = 0; i != 4; ++i) 3382 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3383 for (unsigned i = 4; i != 8; ++i) 3384 MaskVec.push_back(PermMask.getOperand(i)); 3385 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3386 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3387 } 3388 } else { 3389 // Floating point cases in the other order. 3390 if (X86::isSHUFPMask(PermMask.Val)) 3391 return Op; 3392 if (X86::isPSHUFDMask(PermMask.Val) || 3393 X86::isPSHUFHWMask(PermMask.Val) || 3394 X86::isPSHUFLWMask(PermMask.Val)) { 3395 if (V2.getOpcode() != ISD::UNDEF) 3396 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3397 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3398 return Op; 3399 } 3400 } 3401 3402 if (NumElems == 4 && 3403 // Don't do this for MMX. 3404 MVT::getSizeInBits(VT) != 64) { 3405 MVT::ValueType MaskVT = PermMask.getValueType(); 3406 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3407 SmallVector<std::pair<int, int>, 8> Locs; 3408 Locs.reserve(NumElems); 3409 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3410 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3411 unsigned NumHi = 0; 3412 unsigned NumLo = 0; 3413 // If no more than two elements come from either vector. This can be 3414 // implemented with two shuffles. First shuffle gather the elements. 3415 // The second shuffle, which takes the first shuffle as both of its 3416 // vector operands, put the elements into the right order. 3417 for (unsigned i = 0; i != NumElems; ++i) { 3418 SDOperand Elt = PermMask.getOperand(i); 3419 if (Elt.getOpcode() == ISD::UNDEF) { 3420 Locs[i] = std::make_pair(-1, -1); 3421 } else { 3422 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3423 if (Val < NumElems) { 3424 Locs[i] = std::make_pair(0, NumLo); 3425 Mask1[NumLo] = Elt; 3426 NumLo++; 3427 } else { 3428 Locs[i] = std::make_pair(1, NumHi); 3429 if (2+NumHi < NumElems) 3430 Mask1[2+NumHi] = Elt; 3431 NumHi++; 3432 } 3433 } 3434 } 3435 if (NumLo <= 2 && NumHi <= 2) { 3436 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3437 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3438 &Mask1[0], Mask1.size())); 3439 for (unsigned i = 0; i != NumElems; ++i) { 3440 if (Locs[i].first == -1) 3441 continue; 3442 else { 3443 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3444 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3445 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3446 } 3447 } 3448 3449 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3450 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3451 &Mask2[0], Mask2.size())); 3452 } 3453 3454 // Break it into (shuffle shuffle_hi, shuffle_lo). 3455 Locs.clear(); 3456 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3457 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3458 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3459 unsigned MaskIdx = 0; 3460 unsigned LoIdx = 0; 3461 unsigned HiIdx = NumElems/2; 3462 for (unsigned i = 0; i != NumElems; ++i) { 3463 if (i == NumElems/2) { 3464 MaskPtr = &HiMask; 3465 MaskIdx = 1; 3466 LoIdx = 0; 3467 HiIdx = NumElems/2; 3468 } 3469 SDOperand Elt = PermMask.getOperand(i); 3470 if (Elt.getOpcode() == ISD::UNDEF) { 3471 Locs[i] = std::make_pair(-1, -1); 3472 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3473 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3474 (*MaskPtr)[LoIdx] = Elt; 3475 LoIdx++; 3476 } else { 3477 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3478 (*MaskPtr)[HiIdx] = Elt; 3479 HiIdx++; 3480 } 3481 } 3482 3483 SDOperand LoShuffle = 3484 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3485 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3486 &LoMask[0], LoMask.size())); 3487 SDOperand HiShuffle = 3488 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3489 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3490 &HiMask[0], HiMask.size())); 3491 SmallVector<SDOperand, 8> MaskOps; 3492 for (unsigned i = 0; i != NumElems; ++i) { 3493 if (Locs[i].first == -1) { 3494 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3495 } else { 3496 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3497 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3498 } 3499 } 3500 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3501 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3502 &MaskOps[0], MaskOps.size())); 3503 } 3504 3505 return SDOperand(); 3506} 3507 3508SDOperand 3509X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3510 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3511 return SDOperand(); 3512 3513 MVT::ValueType VT = Op.getValueType(); 3514 // TODO: handle v16i8. 3515 if (MVT::getSizeInBits(VT) == 16) { 3516 // Transform it so it match pextrw which produces a 32-bit result. 3517 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3518 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3519 Op.getOperand(0), Op.getOperand(1)); 3520 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3521 DAG.getValueType(VT)); 3522 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3523 } else if (MVT::getSizeInBits(VT) == 32) { 3524 SDOperand Vec = Op.getOperand(0); 3525 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3526 if (Idx == 0) 3527 return Op; 3528 // SHUFPS the element to the lowest double word, then movss. 3529 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3530 SmallVector<SDOperand, 8> IdxVec; 3531 IdxVec. 3532 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3533 IdxVec. 3534 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3535 IdxVec. 3536 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3537 IdxVec. 3538 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3539 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3540 &IdxVec[0], IdxVec.size()); 3541 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3542 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3543 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3544 DAG.getConstant(0, getPointerTy())); 3545 } else if (MVT::getSizeInBits(VT) == 64) { 3546 SDOperand Vec = Op.getOperand(0); 3547 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3548 if (Idx == 0) 3549 return Op; 3550 3551 // UNPCKHPD the element to the lowest double word, then movsd. 3552 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3553 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3554 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3555 SmallVector<SDOperand, 8> IdxVec; 3556 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3557 IdxVec. 3558 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3559 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3560 &IdxVec[0], IdxVec.size()); 3561 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3562 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3563 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3564 DAG.getConstant(0, getPointerTy())); 3565 } 3566 3567 return SDOperand(); 3568} 3569 3570SDOperand 3571X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3572 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3573 // as its second argument. 3574 MVT::ValueType VT = Op.getValueType(); 3575 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3576 SDOperand N0 = Op.getOperand(0); 3577 SDOperand N1 = Op.getOperand(1); 3578 SDOperand N2 = Op.getOperand(2); 3579 if (MVT::getSizeInBits(BaseVT) == 16) { 3580 if (N1.getValueType() != MVT::i32) 3581 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3582 if (N2.getValueType() != MVT::i32) 3583 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3584 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3585 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3586 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3587 if (Idx == 0) { 3588 // Use a movss. 3589 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3590 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3591 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3592 SmallVector<SDOperand, 8> MaskVec; 3593 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3594 for (unsigned i = 1; i <= 3; ++i) 3595 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3596 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3597 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3598 &MaskVec[0], MaskVec.size())); 3599 } else { 3600 // Use two pinsrw instructions to insert a 32 bit value. 3601 Idx <<= 1; 3602 if (MVT::isFloatingPoint(N1.getValueType())) { 3603 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3604 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3605 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3606 DAG.getConstant(0, getPointerTy())); 3607 } 3608 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3609 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3610 DAG.getConstant(Idx, getPointerTy())); 3611 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3612 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3613 DAG.getConstant(Idx+1, getPointerTy())); 3614 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3615 } 3616 } 3617 3618 return SDOperand(); 3619} 3620 3621SDOperand 3622X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3623 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3624 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3625} 3626 3627// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3628// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3629// one of the above mentioned nodes. It has to be wrapped because otherwise 3630// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3631// be used to form addressing mode. These wrapped nodes will be selected 3632// into MOV32ri. 3633SDOperand 3634X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3635 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3636 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3637 getPointerTy(), 3638 CP->getAlignment()); 3639 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3640 // With PIC, the address is actually $g + Offset. 3641 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3642 !Subtarget->isPICStyleRIPRel()) { 3643 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3644 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3645 Result); 3646 } 3647 3648 return Result; 3649} 3650 3651SDOperand 3652X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3653 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3654 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3655 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3656 // With PIC, the address is actually $g + Offset. 3657 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3658 !Subtarget->isPICStyleRIPRel()) { 3659 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3660 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3661 Result); 3662 } 3663 3664 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3665 // load the value at address GV, not the value of GV itself. This means that 3666 // the GlobalAddress must be in the base or index register of the address, not 3667 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3668 // The same applies for external symbols during PIC codegen 3669 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3670 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3671 3672 return Result; 3673} 3674 3675// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3676static SDOperand 3677LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3678 const MVT::ValueType PtrVT) { 3679 SDOperand InFlag; 3680 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3681 DAG.getNode(X86ISD::GlobalBaseReg, 3682 PtrVT), InFlag); 3683 InFlag = Chain.getValue(1); 3684 3685 // emit leal symbol@TLSGD(,%ebx,1), %eax 3686 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3687 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3688 GA->getValueType(0), 3689 GA->getOffset()); 3690 SDOperand Ops[] = { Chain, TGA, InFlag }; 3691 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3692 InFlag = Result.getValue(2); 3693 Chain = Result.getValue(1); 3694 3695 // call ___tls_get_addr. This function receives its argument in 3696 // the register EAX. 3697 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3698 InFlag = Chain.getValue(1); 3699 3700 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3701 SDOperand Ops1[] = { Chain, 3702 DAG.getTargetExternalSymbol("___tls_get_addr", 3703 PtrVT), 3704 DAG.getRegister(X86::EAX, PtrVT), 3705 DAG.getRegister(X86::EBX, PtrVT), 3706 InFlag }; 3707 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3708 InFlag = Chain.getValue(1); 3709 3710 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3711} 3712 3713// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3714// "local exec" model. 3715static SDOperand 3716LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3717 const MVT::ValueType PtrVT) { 3718 // Get the Thread Pointer 3719 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3720 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3721 // exec) 3722 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3723 GA->getValueType(0), 3724 GA->getOffset()); 3725 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3726 3727 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3728 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3729 3730 // The address of the thread local variable is the add of the thread 3731 // pointer with the offset of the variable. 3732 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3733} 3734 3735SDOperand 3736X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3737 // TODO: implement the "local dynamic" model 3738 // TODO: implement the "initial exec"model for pic executables 3739 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3740 "TLS not implemented for non-ELF and 64-bit targets"); 3741 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3742 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3743 // otherwise use the "Local Exec"TLS Model 3744 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3745 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3746 else 3747 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3748} 3749 3750SDOperand 3751X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3752 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3753 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3754 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3755 // With PIC, the address is actually $g + Offset. 3756 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3757 !Subtarget->isPICStyleRIPRel()) { 3758 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3759 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3760 Result); 3761 } 3762 3763 return Result; 3764} 3765 3766SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3767 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3768 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3769 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3770 // With PIC, the address is actually $g + Offset. 3771 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3772 !Subtarget->isPICStyleRIPRel()) { 3773 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3774 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3775 Result); 3776 } 3777 3778 return Result; 3779} 3780 3781/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 3782/// take a 2 x i32 value to shift plus a shift amount. 3783SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3784 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3785 "Not an i64 shift!"); 3786 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3787 SDOperand ShOpLo = Op.getOperand(0); 3788 SDOperand ShOpHi = Op.getOperand(1); 3789 SDOperand ShAmt = Op.getOperand(2); 3790 SDOperand Tmp1 = isSRA ? 3791 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3792 DAG.getConstant(0, MVT::i32); 3793 3794 SDOperand Tmp2, Tmp3; 3795 if (Op.getOpcode() == ISD::SHL_PARTS) { 3796 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3797 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3798 } else { 3799 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3800 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3801 } 3802 3803 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3804 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3805 DAG.getConstant(32, MVT::i8)); 3806 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 3807 AndNode, DAG.getConstant(0, MVT::i8)); 3808 3809 SDOperand Hi, Lo; 3810 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3811 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3812 SmallVector<SDOperand, 4> Ops; 3813 if (Op.getOpcode() == ISD::SHL_PARTS) { 3814 Ops.push_back(Tmp2); 3815 Ops.push_back(Tmp3); 3816 Ops.push_back(CC); 3817 Ops.push_back(Cond); 3818 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3819 3820 Ops.clear(); 3821 Ops.push_back(Tmp3); 3822 Ops.push_back(Tmp1); 3823 Ops.push_back(CC); 3824 Ops.push_back(Cond); 3825 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3826 } else { 3827 Ops.push_back(Tmp2); 3828 Ops.push_back(Tmp3); 3829 Ops.push_back(CC); 3830 Ops.push_back(Cond); 3831 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3832 3833 Ops.clear(); 3834 Ops.push_back(Tmp3); 3835 Ops.push_back(Tmp1); 3836 Ops.push_back(CC); 3837 Ops.push_back(Cond); 3838 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3839 } 3840 3841 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3842 Ops.clear(); 3843 Ops.push_back(Lo); 3844 Ops.push_back(Hi); 3845 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3846} 3847 3848SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3849 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3850 Op.getOperand(0).getValueType() >= MVT::i16 && 3851 "Unknown SINT_TO_FP to lower!"); 3852 3853 SDOperand Result; 3854 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3855 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3856 MachineFunction &MF = DAG.getMachineFunction(); 3857 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3858 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3859 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3860 StackSlot, NULL, 0); 3861 3862 // These are really Legal; caller falls through into that case. 3863 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 3864 return Result; 3865 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 3866 return Result; 3867 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 3868 Subtarget->is64Bit()) 3869 return Result; 3870 3871 // Build the FILD 3872 SDVTList Tys; 3873 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 3874 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 3875 if (useSSE) 3876 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3877 else 3878 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3879 SmallVector<SDOperand, 8> Ops; 3880 Ops.push_back(Chain); 3881 Ops.push_back(StackSlot); 3882 Ops.push_back(DAG.getValueType(SrcVT)); 3883 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3884 Tys, &Ops[0], Ops.size()); 3885 3886 if (useSSE) { 3887 Chain = Result.getValue(1); 3888 SDOperand InFlag = Result.getValue(2); 3889 3890 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3891 // shouldn't be necessary except that RFP cannot be live across 3892 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3893 MachineFunction &MF = DAG.getMachineFunction(); 3894 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3895 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3896 Tys = DAG.getVTList(MVT::Other); 3897 SmallVector<SDOperand, 8> Ops; 3898 Ops.push_back(Chain); 3899 Ops.push_back(Result); 3900 Ops.push_back(StackSlot); 3901 Ops.push_back(DAG.getValueType(Op.getValueType())); 3902 Ops.push_back(InFlag); 3903 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3904 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3905 } 3906 3907 return Result; 3908} 3909 3910SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3911 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3912 "Unknown FP_TO_SINT to lower!"); 3913 SDOperand Result; 3914 3915 // These are really Legal. 3916 if (Op.getValueType() == MVT::i32 && 3917 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 3918 return Result; 3919 if (Op.getValueType() == MVT::i32 && 3920 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 3921 return Result; 3922 if (Subtarget->is64Bit() && 3923 Op.getValueType() == MVT::i64 && 3924 Op.getOperand(0).getValueType() != MVT::f80) 3925 return Result; 3926 3927 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3928 // stack slot. 3929 MachineFunction &MF = DAG.getMachineFunction(); 3930 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3931 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3932 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3933 unsigned Opc; 3934 switch (Op.getValueType()) { 3935 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3936 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3937 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3938 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3939 } 3940 3941 SDOperand Chain = DAG.getEntryNode(); 3942 SDOperand Value = Op.getOperand(0); 3943 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 3944 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 3945 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3946 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3947 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3948 SDOperand Ops[] = { 3949 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3950 }; 3951 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3952 Chain = Value.getValue(1); 3953 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3954 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3955 } 3956 3957 // Build the FP_TO_INT*_IN_MEM 3958 SDOperand Ops[] = { Chain, Value, StackSlot }; 3959 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3960 3961 // Load the result. If this is an i64 load on an x86-32 host, expand the 3962 // load. 3963 if (Op.getValueType() != MVT::i64 || Subtarget->is64Bit()) 3964 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3965 3966 SDOperand Lo = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0); 3967 StackSlot = DAG.getNode(ISD::ADD, StackSlot.getValueType(), StackSlot, 3968 DAG.getConstant(StackSlot.getValueType(), 4)); 3969 SDOperand Hi = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0); 3970 3971 3972 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 3973} 3974 3975SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3976 MVT::ValueType VT = Op.getValueType(); 3977 MVT::ValueType EltVT = VT; 3978 if (MVT::isVector(VT)) 3979 EltVT = MVT::getVectorElementType(VT); 3980 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3981 std::vector<Constant*> CV; 3982 if (EltVT == MVT::f64) { 3983 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3984 CV.push_back(C); 3985 CV.push_back(C); 3986 } else { 3987 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3988 CV.push_back(C); 3989 CV.push_back(C); 3990 CV.push_back(C); 3991 CV.push_back(C); 3992 } 3993 Constant *C = ConstantVector::get(CV); 3994 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3995 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3996 false, 16); 3997 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3998} 3999 4000SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 4001 MVT::ValueType VT = Op.getValueType(); 4002 MVT::ValueType EltVT = VT; 4003 unsigned EltNum = 1; 4004 if (MVT::isVector(VT)) { 4005 EltVT = MVT::getVectorElementType(VT); 4006 EltNum = MVT::getVectorNumElements(VT); 4007 } 4008 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4009 std::vector<Constant*> CV; 4010 if (EltVT == MVT::f64) { 4011 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4012 CV.push_back(C); 4013 CV.push_back(C); 4014 } else { 4015 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4016 CV.push_back(C); 4017 CV.push_back(C); 4018 CV.push_back(C); 4019 CV.push_back(C); 4020 } 4021 Constant *C = ConstantVector::get(CV); 4022 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4023 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4024 false, 16); 4025 if (MVT::isVector(VT)) { 4026 return DAG.getNode(ISD::BIT_CONVERT, VT, 4027 DAG.getNode(ISD::XOR, MVT::v2i64, 4028 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4029 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4030 } else { 4031 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4032 } 4033} 4034 4035SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4036 SDOperand Op0 = Op.getOperand(0); 4037 SDOperand Op1 = Op.getOperand(1); 4038 MVT::ValueType VT = Op.getValueType(); 4039 MVT::ValueType SrcVT = Op1.getValueType(); 4040 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4041 4042 // If second operand is smaller, extend it first. 4043 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4044 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4045 SrcVT = VT; 4046 SrcTy = MVT::getTypeForValueType(SrcVT); 4047 } 4048 // And if it is bigger, shrink it first. 4049 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4050 Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1); 4051 SrcVT = VT; 4052 SrcTy = MVT::getTypeForValueType(SrcVT); 4053 } 4054 4055 // At this point the operands and the result should have the same 4056 // type, and that won't be f80 since that is not custom lowered. 4057 4058 // First get the sign bit of second operand. 4059 std::vector<Constant*> CV; 4060 if (SrcVT == MVT::f64) { 4061 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4062 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4063 } else { 4064 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4065 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4066 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4067 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4068 } 4069 Constant *C = ConstantVector::get(CV); 4070 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4071 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4072 false, 16); 4073 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4074 4075 // Shift sign bit right or left if the two operands have different types. 4076 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4077 // Op0 is MVT::f32, Op1 is MVT::f64. 4078 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4079 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4080 DAG.getConstant(32, MVT::i32)); 4081 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4082 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4083 DAG.getConstant(0, getPointerTy())); 4084 } 4085 4086 // Clear first operand sign bit. 4087 CV.clear(); 4088 if (VT == MVT::f64) { 4089 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4090 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4091 } else { 4092 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4093 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4094 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4095 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4096 } 4097 C = ConstantVector::get(CV); 4098 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4099 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4100 false, 16); 4101 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4102 4103 // Or the value with the sign bit. 4104 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4105} 4106 4107SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4108 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4109 SDOperand Cond; 4110 SDOperand Op0 = Op.getOperand(0); 4111 SDOperand Op1 = Op.getOperand(1); 4112 SDOperand CC = Op.getOperand(2); 4113 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4114 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4115 unsigned X86CC; 4116 4117 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4118 Op0, Op1, DAG)) { 4119 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4120 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4121 DAG.getConstant(X86CC, MVT::i8), Cond); 4122 } 4123 4124 assert(isFP && "Illegal integer SetCC!"); 4125 4126 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4127 switch (SetCCOpcode) { 4128 default: assert(false && "Illegal floating point SetCC!"); 4129 case ISD::SETOEQ: { // !PF & ZF 4130 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4131 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4132 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4133 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4134 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4135 } 4136 case ISD::SETUNE: { // PF | !ZF 4137 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4138 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4139 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4140 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4141 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4142 } 4143 } 4144} 4145 4146 4147SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4148 bool addTest = true; 4149 SDOperand Cond = Op.getOperand(0); 4150 SDOperand CC; 4151 4152 if (Cond.getOpcode() == ISD::SETCC) 4153 Cond = LowerSETCC(Cond, DAG); 4154 4155 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4156 // setting operand in place of the X86ISD::SETCC. 4157 if (Cond.getOpcode() == X86ISD::SETCC) { 4158 CC = Cond.getOperand(0); 4159 4160 SDOperand Cmp = Cond.getOperand(1); 4161 unsigned Opc = Cmp.getOpcode(); 4162 MVT::ValueType VT = Op.getValueType(); 4163 bool IllegalFPCMov = false; 4164 if (VT == MVT::f32 && !X86ScalarSSEf32) 4165 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4166 else if (VT == MVT::f64 && !X86ScalarSSEf64) 4167 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4168 else if (VT == MVT::f80) 4169 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4170 if ((Opc == X86ISD::CMP || 4171 Opc == X86ISD::COMI || 4172 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4173 Cond = Cmp; 4174 addTest = false; 4175 } 4176 } 4177 4178 if (addTest) { 4179 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4180 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4181 } 4182 4183 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4184 MVT::Flag); 4185 SmallVector<SDOperand, 4> Ops; 4186 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4187 // condition is true. 4188 Ops.push_back(Op.getOperand(2)); 4189 Ops.push_back(Op.getOperand(1)); 4190 Ops.push_back(CC); 4191 Ops.push_back(Cond); 4192 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4193} 4194 4195SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4196 bool addTest = true; 4197 SDOperand Chain = Op.getOperand(0); 4198 SDOperand Cond = Op.getOperand(1); 4199 SDOperand Dest = Op.getOperand(2); 4200 SDOperand CC; 4201 4202 if (Cond.getOpcode() == ISD::SETCC) 4203 Cond = LowerSETCC(Cond, DAG); 4204 4205 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4206 // setting operand in place of the X86ISD::SETCC. 4207 if (Cond.getOpcode() == X86ISD::SETCC) { 4208 CC = Cond.getOperand(0); 4209 4210 SDOperand Cmp = Cond.getOperand(1); 4211 unsigned Opc = Cmp.getOpcode(); 4212 if (Opc == X86ISD::CMP || 4213 Opc == X86ISD::COMI || 4214 Opc == X86ISD::UCOMI) { 4215 Cond = Cmp; 4216 addTest = false; 4217 } 4218 } 4219 4220 if (addTest) { 4221 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4222 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4223 } 4224 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4225 Chain, Op.getOperand(2), CC, Cond); 4226} 4227 4228SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4229 unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4230 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 4231 4232 if (Subtarget->is64Bit()) 4233 if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) 4234 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4235 else 4236 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 4237 else 4238 switch (CallingConv) { 4239 default: 4240 assert(0 && "Unsupported calling convention"); 4241 case CallingConv::Fast: 4242 if (isTailCall && PerformTailCallOpt) 4243 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4244 else 4245 return LowerCCCCallTo(Op,DAG, CallingConv); 4246 case CallingConv::C: 4247 case CallingConv::X86_StdCall: 4248 return LowerCCCCallTo(Op, DAG, CallingConv); 4249 case CallingConv::X86_FastCall: 4250 return LowerFastCCCallTo(Op, DAG, CallingConv); 4251 } 4252} 4253 4254 4255// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4256// Calls to _alloca is needed to probe the stack when allocating more than 4k 4257// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4258// that the guard pages used by the OS virtual memory manager are allocated in 4259// correct sequence. 4260SDOperand 4261X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4262 SelectionDAG &DAG) { 4263 assert(Subtarget->isTargetCygMing() && 4264 "This should be used only on Cygwin/Mingw targets"); 4265 4266 // Get the inputs. 4267 SDOperand Chain = Op.getOperand(0); 4268 SDOperand Size = Op.getOperand(1); 4269 // FIXME: Ensure alignment here 4270 4271 SDOperand Flag; 4272 4273 MVT::ValueType IntPtr = getPointerTy(); 4274 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 4275 4276 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4277 Flag = Chain.getValue(1); 4278 4279 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4280 SDOperand Ops[] = { Chain, 4281 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4282 DAG.getRegister(X86::EAX, IntPtr), 4283 Flag }; 4284 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4285 Flag = Chain.getValue(1); 4286 4287 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4288 4289 std::vector<MVT::ValueType> Tys; 4290 Tys.push_back(SPTy); 4291 Tys.push_back(MVT::Other); 4292 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4293 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4294} 4295 4296SDOperand 4297X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4298 MachineFunction &MF = DAG.getMachineFunction(); 4299 const Function* Fn = MF.getFunction(); 4300 if (Fn->hasExternalLinkage() && 4301 Subtarget->isTargetCygMing() && 4302 Fn->getName() == "main") 4303 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 4304 4305 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4306 if (Subtarget->is64Bit()) 4307 return LowerX86_64CCCArguments(Op, DAG); 4308 else 4309 switch(CC) { 4310 default: 4311 assert(0 && "Unsupported calling convention"); 4312 case CallingConv::Fast: 4313 return LowerCCCArguments(Op,DAG, true); 4314 // Falls through 4315 case CallingConv::C: 4316 return LowerCCCArguments(Op, DAG); 4317 case CallingConv::X86_StdCall: 4318 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 4319 return LowerCCCArguments(Op, DAG, true); 4320 case CallingConv::X86_FastCall: 4321 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 4322 return LowerFastCCArguments(Op, DAG); 4323 } 4324} 4325 4326SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4327 SDOperand InFlag(0, 0); 4328 SDOperand Chain = Op.getOperand(0); 4329 unsigned Align = 4330 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4331 if (Align == 0) Align = 1; 4332 4333 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4334 // If not DWORD aligned or size is more than the threshold, call memset. 4335 // The libc version is likely to be faster for these cases. It can use the 4336 // address value and run time information about the CPU. 4337 if ((Align & 3) != 0 || 4338 (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { 4339 MVT::ValueType IntPtr = getPointerTy(); 4340 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4341 TargetLowering::ArgListTy Args; 4342 TargetLowering::ArgListEntry Entry; 4343 Entry.Node = Op.getOperand(1); 4344 Entry.Ty = IntPtrTy; 4345 Args.push_back(Entry); 4346 // Extend the unsigned i8 argument to be an int value for the call. 4347 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4348 Entry.Ty = IntPtrTy; 4349 Args.push_back(Entry); 4350 Entry.Node = Op.getOperand(3); 4351 Args.push_back(Entry); 4352 std::pair<SDOperand,SDOperand> CallResult = 4353 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4354 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4355 return CallResult.second; 4356 } 4357 4358 MVT::ValueType AVT; 4359 SDOperand Count; 4360 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4361 unsigned BytesLeft = 0; 4362 bool TwoRepStos = false; 4363 if (ValC) { 4364 unsigned ValReg; 4365 uint64_t Val = ValC->getValue() & 255; 4366 4367 // If the value is a constant, then we can potentially use larger sets. 4368 switch (Align & 3) { 4369 case 2: // WORD aligned 4370 AVT = MVT::i16; 4371 ValReg = X86::AX; 4372 Val = (Val << 8) | Val; 4373 break; 4374 case 0: // DWORD aligned 4375 AVT = MVT::i32; 4376 ValReg = X86::EAX; 4377 Val = (Val << 8) | Val; 4378 Val = (Val << 16) | Val; 4379 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4380 AVT = MVT::i64; 4381 ValReg = X86::RAX; 4382 Val = (Val << 32) | Val; 4383 } 4384 break; 4385 default: // Byte aligned 4386 AVT = MVT::i8; 4387 ValReg = X86::AL; 4388 Count = Op.getOperand(3); 4389 break; 4390 } 4391 4392 if (AVT > MVT::i8) { 4393 if (I) { 4394 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4395 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4396 BytesLeft = I->getValue() % UBytes; 4397 } else { 4398 assert(AVT >= MVT::i32 && 4399 "Do not use rep;stos if not at least DWORD aligned"); 4400 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4401 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4402 TwoRepStos = true; 4403 } 4404 } 4405 4406 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4407 InFlag); 4408 InFlag = Chain.getValue(1); 4409 } else { 4410 AVT = MVT::i8; 4411 Count = Op.getOperand(3); 4412 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4413 InFlag = Chain.getValue(1); 4414 } 4415 4416 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4417 Count, InFlag); 4418 InFlag = Chain.getValue(1); 4419 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4420 Op.getOperand(1), InFlag); 4421 InFlag = Chain.getValue(1); 4422 4423 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4424 SmallVector<SDOperand, 8> Ops; 4425 Ops.push_back(Chain); 4426 Ops.push_back(DAG.getValueType(AVT)); 4427 Ops.push_back(InFlag); 4428 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4429 4430 if (TwoRepStos) { 4431 InFlag = Chain.getValue(1); 4432 Count = Op.getOperand(3); 4433 MVT::ValueType CVT = Count.getValueType(); 4434 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4435 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4436 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4437 Left, InFlag); 4438 InFlag = Chain.getValue(1); 4439 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4440 Ops.clear(); 4441 Ops.push_back(Chain); 4442 Ops.push_back(DAG.getValueType(MVT::i8)); 4443 Ops.push_back(InFlag); 4444 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4445 } else if (BytesLeft) { 4446 // Issue stores for the last 1 - 7 bytes. 4447 SDOperand Value; 4448 unsigned Val = ValC->getValue() & 255; 4449 unsigned Offset = I->getValue() - BytesLeft; 4450 SDOperand DstAddr = Op.getOperand(1); 4451 MVT::ValueType AddrVT = DstAddr.getValueType(); 4452 if (BytesLeft >= 4) { 4453 Val = (Val << 8) | Val; 4454 Val = (Val << 16) | Val; 4455 Value = DAG.getConstant(Val, MVT::i32); 4456 Chain = DAG.getStore(Chain, Value, 4457 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4458 DAG.getConstant(Offset, AddrVT)), 4459 NULL, 0); 4460 BytesLeft -= 4; 4461 Offset += 4; 4462 } 4463 if (BytesLeft >= 2) { 4464 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4465 Chain = DAG.getStore(Chain, Value, 4466 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4467 DAG.getConstant(Offset, AddrVT)), 4468 NULL, 0); 4469 BytesLeft -= 2; 4470 Offset += 2; 4471 } 4472 if (BytesLeft == 1) { 4473 Value = DAG.getConstant(Val, MVT::i8); 4474 Chain = DAG.getStore(Chain, Value, 4475 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4476 DAG.getConstant(Offset, AddrVT)), 4477 NULL, 0); 4478 } 4479 } 4480 4481 return Chain; 4482} 4483 4484SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4485 SDOperand ChainOp = Op.getOperand(0); 4486 SDOperand DestOp = Op.getOperand(1); 4487 SDOperand SourceOp = Op.getOperand(2); 4488 SDOperand CountOp = Op.getOperand(3); 4489 SDOperand AlignOp = Op.getOperand(4); 4490 SDOperand AlwaysInlineOp = Op.getOperand(5); 4491 4492 bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)->getValue(); 4493 unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue(); 4494 if (Align == 0) Align = 1; 4495 4496 // If size is unknown, call memcpy. 4497 ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp); 4498 if (!I) { 4499 assert(!AlwaysInline && "Cannot inline copy of unknown size"); 4500 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4501 } 4502 4503 // If not DWORD aligned or if size is more than threshold, then call memcpy. 4504 // The libc version is likely to be faster for the following cases. It can 4505 // use the address value and run time information about the CPU. 4506 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 4507 unsigned Size = I->getValue(); 4508 if (AlwaysInline || 4509 (Size <= Subtarget->getMaxInlineSizeThreshold() && 4510 (Align & 3) == 0)) 4511 return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); 4512 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4513} 4514 4515SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain, 4516 SDOperand Dest, 4517 SDOperand Source, 4518 SDOperand Count, 4519 SelectionDAG &DAG) { 4520 MVT::ValueType IntPtr = getPointerTy(); 4521 TargetLowering::ArgListTy Args; 4522 TargetLowering::ArgListEntry Entry; 4523 Entry.Ty = getTargetData()->getIntPtrType(); 4524 Entry.Node = Dest; Args.push_back(Entry); 4525 Entry.Node = Source; Args.push_back(Entry); 4526 Entry.Node = Count; Args.push_back(Entry); 4527 std::pair<SDOperand,SDOperand> CallResult = 4528 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4529 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4530 return CallResult.second; 4531} 4532 4533SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4534 SDOperand Dest, 4535 SDOperand Source, 4536 unsigned Size, 4537 unsigned Align, 4538 SelectionDAG &DAG) { 4539 MVT::ValueType AVT; 4540 unsigned BytesLeft = 0; 4541 switch (Align & 3) { 4542 case 2: // WORD aligned 4543 AVT = MVT::i16; 4544 break; 4545 case 0: // DWORD aligned 4546 AVT = MVT::i32; 4547 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4548 AVT = MVT::i64; 4549 break; 4550 default: // Byte aligned 4551 AVT = MVT::i8; 4552 break; 4553 } 4554 4555 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4556 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4557 BytesLeft = Size % UBytes; 4558 4559 SDOperand InFlag(0, 0); 4560 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4561 Count, InFlag); 4562 InFlag = Chain.getValue(1); 4563 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4564 Dest, InFlag); 4565 InFlag = Chain.getValue(1); 4566 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4567 Source, InFlag); 4568 InFlag = Chain.getValue(1); 4569 4570 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4571 SmallVector<SDOperand, 8> Ops; 4572 Ops.push_back(Chain); 4573 Ops.push_back(DAG.getValueType(AVT)); 4574 Ops.push_back(InFlag); 4575 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4576 4577 if (BytesLeft) { 4578 // Issue loads and stores for the last 1 - 7 bytes. 4579 unsigned Offset = Size - BytesLeft; 4580 SDOperand DstAddr = Dest; 4581 MVT::ValueType DstVT = DstAddr.getValueType(); 4582 SDOperand SrcAddr = Source; 4583 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4584 SDOperand Value; 4585 if (BytesLeft >= 4) { 4586 Value = DAG.getLoad(MVT::i32, Chain, 4587 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4588 DAG.getConstant(Offset, SrcVT)), 4589 NULL, 0); 4590 Chain = Value.getValue(1); 4591 Chain = DAG.getStore(Chain, Value, 4592 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4593 DAG.getConstant(Offset, DstVT)), 4594 NULL, 0); 4595 BytesLeft -= 4; 4596 Offset += 4; 4597 } 4598 if (BytesLeft >= 2) { 4599 Value = DAG.getLoad(MVT::i16, Chain, 4600 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4601 DAG.getConstant(Offset, SrcVT)), 4602 NULL, 0); 4603 Chain = Value.getValue(1); 4604 Chain = DAG.getStore(Chain, Value, 4605 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4606 DAG.getConstant(Offset, DstVT)), 4607 NULL, 0); 4608 BytesLeft -= 2; 4609 Offset += 2; 4610 } 4611 4612 if (BytesLeft == 1) { 4613 Value = DAG.getLoad(MVT::i8, Chain, 4614 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4615 DAG.getConstant(Offset, SrcVT)), 4616 NULL, 0); 4617 Chain = Value.getValue(1); 4618 Chain = DAG.getStore(Chain, Value, 4619 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4620 DAG.getConstant(Offset, DstVT)), 4621 NULL, 0); 4622 } 4623 } 4624 4625 return Chain; 4626} 4627 4628SDOperand 4629X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4630 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4631 SDOperand TheOp = Op.getOperand(0); 4632 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4633 if (Subtarget->is64Bit()) { 4634 SDOperand Copy1 = 4635 DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4636 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4637 MVT::i64, Copy1.getValue(2)); 4638 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4639 DAG.getConstant(32, MVT::i8)); 4640 SDOperand Ops[] = { 4641 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4642 }; 4643 4644 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4645 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4646 } 4647 4648 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4649 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4650 MVT::i32, Copy1.getValue(2)); 4651 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4652 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4653 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4654} 4655 4656SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4657 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4658 4659 if (!Subtarget->is64Bit()) { 4660 // vastart just stores the address of the VarArgsFrameIndex slot into the 4661 // memory location argument. 4662 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4663 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4664 SV->getOffset()); 4665 } 4666 4667 // __va_list_tag: 4668 // gp_offset (0 - 6 * 8) 4669 // fp_offset (48 - 48 + 8 * 16) 4670 // overflow_arg_area (point to parameters coming in memory). 4671 // reg_save_area 4672 SmallVector<SDOperand, 8> MemOps; 4673 SDOperand FIN = Op.getOperand(1); 4674 // Store gp_offset 4675 SDOperand Store = DAG.getStore(Op.getOperand(0), 4676 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4677 FIN, SV->getValue(), SV->getOffset()); 4678 MemOps.push_back(Store); 4679 4680 // Store fp_offset 4681 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4682 DAG.getConstant(4, getPointerTy())); 4683 Store = DAG.getStore(Op.getOperand(0), 4684 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4685 FIN, SV->getValue(), SV->getOffset()); 4686 MemOps.push_back(Store); 4687 4688 // Store ptr to overflow_arg_area 4689 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4690 DAG.getConstant(4, getPointerTy())); 4691 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4692 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4693 SV->getOffset()); 4694 MemOps.push_back(Store); 4695 4696 // Store ptr to reg_save_area. 4697 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4698 DAG.getConstant(8, getPointerTy())); 4699 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4700 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4701 SV->getOffset()); 4702 MemOps.push_back(Store); 4703 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4704} 4705 4706SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4707 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4708 SDOperand Chain = Op.getOperand(0); 4709 SDOperand DstPtr = Op.getOperand(1); 4710 SDOperand SrcPtr = Op.getOperand(2); 4711 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4712 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4713 4714 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4715 SrcSV->getValue(), SrcSV->getOffset()); 4716 Chain = SrcPtr.getValue(1); 4717 for (unsigned i = 0; i < 3; ++i) { 4718 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4719 SrcSV->getValue(), SrcSV->getOffset()); 4720 Chain = Val.getValue(1); 4721 Chain = DAG.getStore(Chain, Val, DstPtr, 4722 DstSV->getValue(), DstSV->getOffset()); 4723 if (i == 2) 4724 break; 4725 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4726 DAG.getConstant(8, getPointerTy())); 4727 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4728 DAG.getConstant(8, getPointerTy())); 4729 } 4730 return Chain; 4731} 4732 4733SDOperand 4734X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4735 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4736 switch (IntNo) { 4737 default: return SDOperand(); // Don't custom lower most intrinsics. 4738 // Comparison intrinsics. 4739 case Intrinsic::x86_sse_comieq_ss: 4740 case Intrinsic::x86_sse_comilt_ss: 4741 case Intrinsic::x86_sse_comile_ss: 4742 case Intrinsic::x86_sse_comigt_ss: 4743 case Intrinsic::x86_sse_comige_ss: 4744 case Intrinsic::x86_sse_comineq_ss: 4745 case Intrinsic::x86_sse_ucomieq_ss: 4746 case Intrinsic::x86_sse_ucomilt_ss: 4747 case Intrinsic::x86_sse_ucomile_ss: 4748 case Intrinsic::x86_sse_ucomigt_ss: 4749 case Intrinsic::x86_sse_ucomige_ss: 4750 case Intrinsic::x86_sse_ucomineq_ss: 4751 case Intrinsic::x86_sse2_comieq_sd: 4752 case Intrinsic::x86_sse2_comilt_sd: 4753 case Intrinsic::x86_sse2_comile_sd: 4754 case Intrinsic::x86_sse2_comigt_sd: 4755 case Intrinsic::x86_sse2_comige_sd: 4756 case Intrinsic::x86_sse2_comineq_sd: 4757 case Intrinsic::x86_sse2_ucomieq_sd: 4758 case Intrinsic::x86_sse2_ucomilt_sd: 4759 case Intrinsic::x86_sse2_ucomile_sd: 4760 case Intrinsic::x86_sse2_ucomigt_sd: 4761 case Intrinsic::x86_sse2_ucomige_sd: 4762 case Intrinsic::x86_sse2_ucomineq_sd: { 4763 unsigned Opc = 0; 4764 ISD::CondCode CC = ISD::SETCC_INVALID; 4765 switch (IntNo) { 4766 default: break; 4767 case Intrinsic::x86_sse_comieq_ss: 4768 case Intrinsic::x86_sse2_comieq_sd: 4769 Opc = X86ISD::COMI; 4770 CC = ISD::SETEQ; 4771 break; 4772 case Intrinsic::x86_sse_comilt_ss: 4773 case Intrinsic::x86_sse2_comilt_sd: 4774 Opc = X86ISD::COMI; 4775 CC = ISD::SETLT; 4776 break; 4777 case Intrinsic::x86_sse_comile_ss: 4778 case Intrinsic::x86_sse2_comile_sd: 4779 Opc = X86ISD::COMI; 4780 CC = ISD::SETLE; 4781 break; 4782 case Intrinsic::x86_sse_comigt_ss: 4783 case Intrinsic::x86_sse2_comigt_sd: 4784 Opc = X86ISD::COMI; 4785 CC = ISD::SETGT; 4786 break; 4787 case Intrinsic::x86_sse_comige_ss: 4788 case Intrinsic::x86_sse2_comige_sd: 4789 Opc = X86ISD::COMI; 4790 CC = ISD::SETGE; 4791 break; 4792 case Intrinsic::x86_sse_comineq_ss: 4793 case Intrinsic::x86_sse2_comineq_sd: 4794 Opc = X86ISD::COMI; 4795 CC = ISD::SETNE; 4796 break; 4797 case Intrinsic::x86_sse_ucomieq_ss: 4798 case Intrinsic::x86_sse2_ucomieq_sd: 4799 Opc = X86ISD::UCOMI; 4800 CC = ISD::SETEQ; 4801 break; 4802 case Intrinsic::x86_sse_ucomilt_ss: 4803 case Intrinsic::x86_sse2_ucomilt_sd: 4804 Opc = X86ISD::UCOMI; 4805 CC = ISD::SETLT; 4806 break; 4807 case Intrinsic::x86_sse_ucomile_ss: 4808 case Intrinsic::x86_sse2_ucomile_sd: 4809 Opc = X86ISD::UCOMI; 4810 CC = ISD::SETLE; 4811 break; 4812 case Intrinsic::x86_sse_ucomigt_ss: 4813 case Intrinsic::x86_sse2_ucomigt_sd: 4814 Opc = X86ISD::UCOMI; 4815 CC = ISD::SETGT; 4816 break; 4817 case Intrinsic::x86_sse_ucomige_ss: 4818 case Intrinsic::x86_sse2_ucomige_sd: 4819 Opc = X86ISD::UCOMI; 4820 CC = ISD::SETGE; 4821 break; 4822 case Intrinsic::x86_sse_ucomineq_ss: 4823 case Intrinsic::x86_sse2_ucomineq_sd: 4824 Opc = X86ISD::UCOMI; 4825 CC = ISD::SETNE; 4826 break; 4827 } 4828 4829 unsigned X86CC; 4830 SDOperand LHS = Op.getOperand(1); 4831 SDOperand RHS = Op.getOperand(2); 4832 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4833 4834 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4835 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4836 DAG.getConstant(X86CC, MVT::i8), Cond); 4837 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4838 } 4839 } 4840} 4841 4842SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4843 // Depths > 0 not supported yet! 4844 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4845 return SDOperand(); 4846 4847 // Just load the return address 4848 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4849 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4850} 4851 4852SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4853 // Depths > 0 not supported yet! 4854 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4855 return SDOperand(); 4856 4857 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4858 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4859 DAG.getConstant(4, getPointerTy())); 4860} 4861 4862SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4863 SelectionDAG &DAG) { 4864 // Is not yet supported on x86-64 4865 if (Subtarget->is64Bit()) 4866 return SDOperand(); 4867 4868 return DAG.getConstant(8, getPointerTy()); 4869} 4870 4871SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4872{ 4873 assert(!Subtarget->is64Bit() && 4874 "Lowering of eh_return builtin is not supported yet on x86-64"); 4875 4876 MachineFunction &MF = DAG.getMachineFunction(); 4877 SDOperand Chain = Op.getOperand(0); 4878 SDOperand Offset = Op.getOperand(1); 4879 SDOperand Handler = Op.getOperand(2); 4880 4881 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4882 getPointerTy()); 4883 4884 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4885 DAG.getConstant(-4UL, getPointerTy())); 4886 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4887 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4888 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4889 MF.addLiveOut(X86::ECX); 4890 4891 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4892 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4893} 4894 4895SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4896 SelectionDAG &DAG) { 4897 SDOperand Root = Op.getOperand(0); 4898 SDOperand Trmp = Op.getOperand(1); // trampoline 4899 SDOperand FPtr = Op.getOperand(2); // nested function 4900 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4901 4902 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4903 4904 if (Subtarget->is64Bit()) { 4905 return SDOperand(); // not yet supported 4906 } else { 4907 Function *Func = (Function *) 4908 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4909 unsigned CC = Func->getCallingConv(); 4910 unsigned NestReg; 4911 4912 switch (CC) { 4913 default: 4914 assert(0 && "Unsupported calling convention"); 4915 case CallingConv::C: 4916 case CallingConv::X86_StdCall: { 4917 // Pass 'nest' parameter in ECX. 4918 // Must be kept in sync with X86CallingConv.td 4919 NestReg = X86::ECX; 4920 4921 // Check that ECX wasn't needed by an 'inreg' parameter. 4922 const FunctionType *FTy = Func->getFunctionType(); 4923 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4924 4925 if (Attrs && !Func->isVarArg()) { 4926 unsigned InRegCount = 0; 4927 unsigned Idx = 1; 4928 4929 for (FunctionType::param_iterator I = FTy->param_begin(), 4930 E = FTy->param_end(); I != E; ++I, ++Idx) 4931 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4932 // FIXME: should only count parameters that are lowered to integers. 4933 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4934 4935 if (InRegCount > 2) { 4936 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4937 abort(); 4938 } 4939 } 4940 break; 4941 } 4942 case CallingConv::X86_FastCall: 4943 // Pass 'nest' parameter in EAX. 4944 // Must be kept in sync with X86CallingConv.td 4945 NestReg = X86::EAX; 4946 break; 4947 } 4948 4949 const X86InstrInfo *TII = 4950 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4951 4952 SDOperand OutChains[4]; 4953 SDOperand Addr, Disp; 4954 4955 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4956 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4957 4958 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4959 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4960 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4961 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4962 4963 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4964 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4965 TrmpSV->getOffset() + 1, false, 1); 4966 4967 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4968 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4969 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4970 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4971 4972 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4973 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4974 TrmpSV->getOffset() + 6, false, 1); 4975 4976 SDOperand Ops[] = 4977 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4978 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4979 } 4980} 4981 4982/// LowerOperation - Provide custom lowering hooks for some operations. 4983/// 4984SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4985 switch (Op.getOpcode()) { 4986 default: assert(0 && "Should not custom lower this!"); 4987 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4988 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4989 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4990 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4991 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4992 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4993 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4994 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4995 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4996 case ISD::SHL_PARTS: 4997 case ISD::SRA_PARTS: 4998 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4999 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 5000 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 5001 case ISD::FABS: return LowerFABS(Op, DAG); 5002 case ISD::FNEG: return LowerFNEG(Op, DAG); 5003 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5004 case ISD::SETCC: return LowerSETCC(Op, DAG); 5005 case ISD::SELECT: return LowerSELECT(Op, DAG); 5006 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 5007 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5008 case ISD::CALL: return LowerCALL(Op, DAG); 5009 case ISD::RET: return LowerRET(Op, DAG); 5010 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 5011 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 5012 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 5013 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 5014 case ISD::VASTART: return LowerVASTART(Op, DAG); 5015 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5016 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5017 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5018 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5019 case ISD::FRAME_TO_ARGS_OFFSET: 5020 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5021 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5022 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5023 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5024 } 5025 return SDOperand(); 5026} 5027 5028const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5029 switch (Opcode) { 5030 default: return NULL; 5031 case X86ISD::SHLD: return "X86ISD::SHLD"; 5032 case X86ISD::SHRD: return "X86ISD::SHRD"; 5033 case X86ISD::FAND: return "X86ISD::FAND"; 5034 case X86ISD::FOR: return "X86ISD::FOR"; 5035 case X86ISD::FXOR: return "X86ISD::FXOR"; 5036 case X86ISD::FSRL: return "X86ISD::FSRL"; 5037 case X86ISD::FILD: return "X86ISD::FILD"; 5038 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5039 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5040 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5041 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5042 case X86ISD::FLD: return "X86ISD::FLD"; 5043 case X86ISD::FST: return "X86ISD::FST"; 5044 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5045 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5046 case X86ISD::CALL: return "X86ISD::CALL"; 5047 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5048 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5049 case X86ISD::CMP: return "X86ISD::CMP"; 5050 case X86ISD::COMI: return "X86ISD::COMI"; 5051 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5052 case X86ISD::SETCC: return "X86ISD::SETCC"; 5053 case X86ISD::CMOV: return "X86ISD::CMOV"; 5054 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5055 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5056 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5057 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5058 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5059 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5060 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5061 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5062 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5063 case X86ISD::FMAX: return "X86ISD::FMAX"; 5064 case X86ISD::FMIN: return "X86ISD::FMIN"; 5065 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5066 case X86ISD::FRCP: return "X86ISD::FRCP"; 5067 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5068 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5069 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5070 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5071 } 5072} 5073 5074// isLegalAddressingMode - Return true if the addressing mode represented 5075// by AM is legal for this target, for a load/store of the specified type. 5076bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5077 const Type *Ty) const { 5078 // X86 supports extremely general addressing modes. 5079 5080 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5081 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5082 return false; 5083 5084 if (AM.BaseGV) { 5085 // We can only fold this if we don't need an extra load. 5086 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5087 return false; 5088 5089 // X86-64 only supports addr of globals in small code model. 5090 if (Subtarget->is64Bit()) { 5091 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5092 return false; 5093 // If lower 4G is not available, then we must use rip-relative addressing. 5094 if (AM.BaseOffs || AM.Scale > 1) 5095 return false; 5096 } 5097 } 5098 5099 switch (AM.Scale) { 5100 case 0: 5101 case 1: 5102 case 2: 5103 case 4: 5104 case 8: 5105 // These scales always work. 5106 break; 5107 case 3: 5108 case 5: 5109 case 9: 5110 // These scales are formed with basereg+scalereg. Only accept if there is 5111 // no basereg yet. 5112 if (AM.HasBaseReg) 5113 return false; 5114 break; 5115 default: // Other stuff never works. 5116 return false; 5117 } 5118 5119 return true; 5120} 5121 5122 5123bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 5124 if (!Ty1->isInteger() || !Ty2->isInteger()) 5125 return false; 5126 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 5127 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 5128 if (NumBits1 <= NumBits2) 5129 return false; 5130 return Subtarget->is64Bit() || NumBits1 < 64; 5131} 5132 5133bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, 5134 MVT::ValueType VT2) const { 5135 if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) 5136 return false; 5137 unsigned NumBits1 = MVT::getSizeInBits(VT1); 5138 unsigned NumBits2 = MVT::getSizeInBits(VT2); 5139 if (NumBits1 <= NumBits2) 5140 return false; 5141 return Subtarget->is64Bit() || NumBits1 < 64; 5142} 5143 5144/// isShuffleMaskLegal - Targets can use this to indicate that they only 5145/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5146/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5147/// are assumed to be legal. 5148bool 5149X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5150 // Only do shuffles on 128-bit vector types for now. 5151 if (MVT::getSizeInBits(VT) == 64) return false; 5152 return (Mask.Val->getNumOperands() <= 4 || 5153 isIdentityMask(Mask.Val) || 5154 isIdentityMask(Mask.Val, true) || 5155 isSplatMask(Mask.Val) || 5156 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5157 X86::isUNPCKLMask(Mask.Val) || 5158 X86::isUNPCKHMask(Mask.Val) || 5159 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5160 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5161} 5162 5163bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5164 MVT::ValueType EVT, 5165 SelectionDAG &DAG) const { 5166 unsigned NumElts = BVOps.size(); 5167 // Only do shuffles on 128-bit vector types for now. 5168 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5169 if (NumElts == 2) return true; 5170 if (NumElts == 4) { 5171 return (isMOVLMask(&BVOps[0], 4) || 5172 isCommutedMOVL(&BVOps[0], 4, true) || 5173 isSHUFPMask(&BVOps[0], 4) || 5174 isCommutedSHUFP(&BVOps[0], 4)); 5175 } 5176 return false; 5177} 5178 5179//===----------------------------------------------------------------------===// 5180// X86 Scheduler Hooks 5181//===----------------------------------------------------------------------===// 5182 5183MachineBasicBlock * 5184X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5185 MachineBasicBlock *BB) { 5186 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5187 switch (MI->getOpcode()) { 5188 default: assert(false && "Unexpected instr type to insert"); 5189 case X86::CMOV_FR32: 5190 case X86::CMOV_FR64: 5191 case X86::CMOV_V4F32: 5192 case X86::CMOV_V2F64: 5193 case X86::CMOV_V2I64: { 5194 // To "insert" a SELECT_CC instruction, we actually have to insert the 5195 // diamond control-flow pattern. The incoming instruction knows the 5196 // destination vreg to set, the condition code register to branch on, the 5197 // true/false values to select between, and a branch opcode to use. 5198 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5199 ilist<MachineBasicBlock>::iterator It = BB; 5200 ++It; 5201 5202 // thisMBB: 5203 // ... 5204 // TrueVal = ... 5205 // cmpTY ccX, r1, r2 5206 // bCC copy1MBB 5207 // fallthrough --> copy0MBB 5208 MachineBasicBlock *thisMBB = BB; 5209 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5210 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5211 unsigned Opc = 5212 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5213 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5214 MachineFunction *F = BB->getParent(); 5215 F->getBasicBlockList().insert(It, copy0MBB); 5216 F->getBasicBlockList().insert(It, sinkMBB); 5217 // Update machine-CFG edges by first adding all successors of the current 5218 // block to the new block which will contain the Phi node for the select. 5219 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5220 e = BB->succ_end(); i != e; ++i) 5221 sinkMBB->addSuccessor(*i); 5222 // Next, remove all successors of the current block, and add the true 5223 // and fallthrough blocks as its successors. 5224 while(!BB->succ_empty()) 5225 BB->removeSuccessor(BB->succ_begin()); 5226 BB->addSuccessor(copy0MBB); 5227 BB->addSuccessor(sinkMBB); 5228 5229 // copy0MBB: 5230 // %FalseValue = ... 5231 // # fallthrough to sinkMBB 5232 BB = copy0MBB; 5233 5234 // Update machine-CFG edges 5235 BB->addSuccessor(sinkMBB); 5236 5237 // sinkMBB: 5238 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5239 // ... 5240 BB = sinkMBB; 5241 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5242 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5243 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5244 5245 delete MI; // The pseudo instruction is gone now. 5246 return BB; 5247 } 5248 5249 case X86::FP32_TO_INT16_IN_MEM: 5250 case X86::FP32_TO_INT32_IN_MEM: 5251 case X86::FP32_TO_INT64_IN_MEM: 5252 case X86::FP64_TO_INT16_IN_MEM: 5253 case X86::FP64_TO_INT32_IN_MEM: 5254 case X86::FP64_TO_INT64_IN_MEM: 5255 case X86::FP80_TO_INT16_IN_MEM: 5256 case X86::FP80_TO_INT32_IN_MEM: 5257 case X86::FP80_TO_INT64_IN_MEM: { 5258 // Change the floating point control register to use "round towards zero" 5259 // mode when truncating to an integer value. 5260 MachineFunction *F = BB->getParent(); 5261 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5262 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5263 5264 // Load the old value of the high byte of the control word... 5265 unsigned OldCW = 5266 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5267 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5268 5269 // Set the high part to be round to zero... 5270 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5271 .addImm(0xC7F); 5272 5273 // Reload the modified control word now... 5274 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5275 5276 // Restore the memory image of control word to original value 5277 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5278 .addReg(OldCW); 5279 5280 // Get the X86 opcode to use. 5281 unsigned Opc; 5282 switch (MI->getOpcode()) { 5283 default: assert(0 && "illegal opcode!"); 5284 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5285 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5286 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5287 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5288 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5289 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5290 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5291 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5292 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5293 } 5294 5295 X86AddressMode AM; 5296 MachineOperand &Op = MI->getOperand(0); 5297 if (Op.isRegister()) { 5298 AM.BaseType = X86AddressMode::RegBase; 5299 AM.Base.Reg = Op.getReg(); 5300 } else { 5301 AM.BaseType = X86AddressMode::FrameIndexBase; 5302 AM.Base.FrameIndex = Op.getFrameIndex(); 5303 } 5304 Op = MI->getOperand(1); 5305 if (Op.isImmediate()) 5306 AM.Scale = Op.getImm(); 5307 Op = MI->getOperand(2); 5308 if (Op.isImmediate()) 5309 AM.IndexReg = Op.getImm(); 5310 Op = MI->getOperand(3); 5311 if (Op.isGlobalAddress()) { 5312 AM.GV = Op.getGlobal(); 5313 } else { 5314 AM.Disp = Op.getImm(); 5315 } 5316 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5317 .addReg(MI->getOperand(4).getReg()); 5318 5319 // Reload the original control word now. 5320 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5321 5322 delete MI; // The pseudo instruction is gone now. 5323 return BB; 5324 } 5325 } 5326} 5327 5328//===----------------------------------------------------------------------===// 5329// X86 Optimization Hooks 5330//===----------------------------------------------------------------------===// 5331 5332void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5333 uint64_t Mask, 5334 uint64_t &KnownZero, 5335 uint64_t &KnownOne, 5336 const SelectionDAG &DAG, 5337 unsigned Depth) const { 5338 unsigned Opc = Op.getOpcode(); 5339 assert((Opc >= ISD::BUILTIN_OP_END || 5340 Opc == ISD::INTRINSIC_WO_CHAIN || 5341 Opc == ISD::INTRINSIC_W_CHAIN || 5342 Opc == ISD::INTRINSIC_VOID) && 5343 "Should use MaskedValueIsZero if you don't know whether Op" 5344 " is a target node!"); 5345 5346 KnownZero = KnownOne = 0; // Don't know anything. 5347 switch (Opc) { 5348 default: break; 5349 case X86ISD::SETCC: 5350 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5351 break; 5352 } 5353} 5354 5355/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5356/// element of the result of the vector shuffle. 5357static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5358 MVT::ValueType VT = N->getValueType(0); 5359 SDOperand PermMask = N->getOperand(2); 5360 unsigned NumElems = PermMask.getNumOperands(); 5361 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5362 i %= NumElems; 5363 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5364 return (i == 0) 5365 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5366 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5367 SDOperand Idx = PermMask.getOperand(i); 5368 if (Idx.getOpcode() == ISD::UNDEF) 5369 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5370 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5371 } 5372 return SDOperand(); 5373} 5374 5375/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5376/// node is a GlobalAddress + an offset. 5377static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5378 unsigned Opc = N->getOpcode(); 5379 if (Opc == X86ISD::Wrapper) { 5380 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5381 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5382 return true; 5383 } 5384 } else if (Opc == ISD::ADD) { 5385 SDOperand N1 = N->getOperand(0); 5386 SDOperand N2 = N->getOperand(1); 5387 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5388 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5389 if (V) { 5390 Offset += V->getSignExtended(); 5391 return true; 5392 } 5393 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5394 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5395 if (V) { 5396 Offset += V->getSignExtended(); 5397 return true; 5398 } 5399 } 5400 } 5401 return false; 5402} 5403 5404/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5405/// + Dist * Size. 5406static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5407 MachineFrameInfo *MFI) { 5408 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5409 return false; 5410 5411 SDOperand Loc = N->getOperand(1); 5412 SDOperand BaseLoc = Base->getOperand(1); 5413 if (Loc.getOpcode() == ISD::FrameIndex) { 5414 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5415 return false; 5416 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5417 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5418 int FS = MFI->getObjectSize(FI); 5419 int BFS = MFI->getObjectSize(BFI); 5420 if (FS != BFS || FS != Size) return false; 5421 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5422 } else { 5423 GlobalValue *GV1 = NULL; 5424 GlobalValue *GV2 = NULL; 5425 int64_t Offset1 = 0; 5426 int64_t Offset2 = 0; 5427 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5428 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5429 if (isGA1 && isGA2 && GV1 == GV2) 5430 return Offset1 == (Offset2 + Dist*Size); 5431 } 5432 5433 return false; 5434} 5435 5436static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5437 const X86Subtarget *Subtarget) { 5438 GlobalValue *GV; 5439 int64_t Offset; 5440 if (isGAPlusOffset(Base, GV, Offset)) 5441 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5442 else { 5443 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5444 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5445 if (BFI < 0) 5446 // Fixed objects do not specify alignment, however the offsets are known. 5447 return ((Subtarget->getStackAlignment() % 16) == 0 && 5448 (MFI->getObjectOffset(BFI) % 16) == 0); 5449 else 5450 return MFI->getObjectAlignment(BFI) >= 16; 5451 } 5452 return false; 5453} 5454 5455 5456/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5457/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5458/// if the load addresses are consecutive, non-overlapping, and in the right 5459/// order. 5460static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5461 const X86Subtarget *Subtarget) { 5462 MachineFunction &MF = DAG.getMachineFunction(); 5463 MachineFrameInfo *MFI = MF.getFrameInfo(); 5464 MVT::ValueType VT = N->getValueType(0); 5465 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5466 SDOperand PermMask = N->getOperand(2); 5467 int NumElems = (int)PermMask.getNumOperands(); 5468 SDNode *Base = NULL; 5469 for (int i = 0; i < NumElems; ++i) { 5470 SDOperand Idx = PermMask.getOperand(i); 5471 if (Idx.getOpcode() == ISD::UNDEF) { 5472 if (!Base) return SDOperand(); 5473 } else { 5474 SDOperand Arg = 5475 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5476 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5477 return SDOperand(); 5478 if (!Base) 5479 Base = Arg.Val; 5480 else if (!isConsecutiveLoad(Arg.Val, Base, 5481 i, MVT::getSizeInBits(EVT)/8,MFI)) 5482 return SDOperand(); 5483 } 5484 } 5485 5486 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5487 LoadSDNode *LD = cast<LoadSDNode>(Base); 5488 if (isAlign16) { 5489 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5490 LD->getSrcValueOffset(), LD->isVolatile()); 5491 } else { 5492 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5493 LD->getSrcValueOffset(), LD->isVolatile(), 5494 LD->getAlignment()); 5495 } 5496} 5497 5498/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5499static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5500 const X86Subtarget *Subtarget) { 5501 SDOperand Cond = N->getOperand(0); 5502 5503 // If we have SSE[12] support, try to form min/max nodes. 5504 if (Subtarget->hasSSE2() && 5505 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5506 if (Cond.getOpcode() == ISD::SETCC) { 5507 // Get the LHS/RHS of the select. 5508 SDOperand LHS = N->getOperand(1); 5509 SDOperand RHS = N->getOperand(2); 5510 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5511 5512 unsigned Opcode = 0; 5513 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5514 switch (CC) { 5515 default: break; 5516 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5517 case ISD::SETULE: 5518 case ISD::SETLE: 5519 if (!UnsafeFPMath) break; 5520 // FALL THROUGH. 5521 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5522 case ISD::SETLT: 5523 Opcode = X86ISD::FMIN; 5524 break; 5525 5526 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5527 case ISD::SETUGT: 5528 case ISD::SETGT: 5529 if (!UnsafeFPMath) break; 5530 // FALL THROUGH. 5531 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5532 case ISD::SETGE: 5533 Opcode = X86ISD::FMAX; 5534 break; 5535 } 5536 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5537 switch (CC) { 5538 default: break; 5539 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5540 case ISD::SETUGT: 5541 case ISD::SETGT: 5542 if (!UnsafeFPMath) break; 5543 // FALL THROUGH. 5544 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5545 case ISD::SETGE: 5546 Opcode = X86ISD::FMIN; 5547 break; 5548 5549 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5550 case ISD::SETULE: 5551 case ISD::SETLE: 5552 if (!UnsafeFPMath) break; 5553 // FALL THROUGH. 5554 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5555 case ISD::SETLT: 5556 Opcode = X86ISD::FMAX; 5557 break; 5558 } 5559 } 5560 5561 if (Opcode) 5562 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5563 } 5564 5565 } 5566 5567 return SDOperand(); 5568} 5569 5570 5571SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5572 DAGCombinerInfo &DCI) const { 5573 SelectionDAG &DAG = DCI.DAG; 5574 switch (N->getOpcode()) { 5575 default: break; 5576 case ISD::VECTOR_SHUFFLE: 5577 return PerformShuffleCombine(N, DAG, Subtarget); 5578 case ISD::SELECT: 5579 return PerformSELECTCombine(N, DAG, Subtarget); 5580 } 5581 5582 return SDOperand(); 5583} 5584 5585//===----------------------------------------------------------------------===// 5586// X86 Inline Assembly Support 5587//===----------------------------------------------------------------------===// 5588 5589/// getConstraintType - Given a constraint letter, return the type of 5590/// constraint it is for this target. 5591X86TargetLowering::ConstraintType 5592X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5593 if (Constraint.size() == 1) { 5594 switch (Constraint[0]) { 5595 case 'A': 5596 case 'r': 5597 case 'R': 5598 case 'l': 5599 case 'q': 5600 case 'Q': 5601 case 'x': 5602 case 'Y': 5603 return C_RegisterClass; 5604 default: 5605 break; 5606 } 5607 } 5608 return TargetLowering::getConstraintType(Constraint); 5609} 5610 5611/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5612/// vector. If it is invalid, don't add anything to Ops. 5613void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5614 char Constraint, 5615 std::vector<SDOperand>&Ops, 5616 SelectionDAG &DAG) { 5617 SDOperand Result(0, 0); 5618 5619 switch (Constraint) { 5620 default: break; 5621 case 'I': 5622 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5623 if (C->getValue() <= 31) { 5624 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5625 break; 5626 } 5627 } 5628 return; 5629 case 'N': 5630 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5631 if (C->getValue() <= 255) { 5632 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5633 break; 5634 } 5635 } 5636 return; 5637 case 'i': { 5638 // Literal immediates are always ok. 5639 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5640 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5641 break; 5642 } 5643 5644 // If we are in non-pic codegen mode, we allow the address of a global (with 5645 // an optional displacement) to be used with 'i'. 5646 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5647 int64_t Offset = 0; 5648 5649 // Match either (GA) or (GA+C) 5650 if (GA) { 5651 Offset = GA->getOffset(); 5652 } else if (Op.getOpcode() == ISD::ADD) { 5653 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5654 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5655 if (C && GA) { 5656 Offset = GA->getOffset()+C->getValue(); 5657 } else { 5658 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5659 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5660 if (C && GA) 5661 Offset = GA->getOffset()+C->getValue(); 5662 else 5663 C = 0, GA = 0; 5664 } 5665 } 5666 5667 if (GA) { 5668 // If addressing this global requires a load (e.g. in PIC mode), we can't 5669 // match. 5670 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5671 false)) 5672 return; 5673 5674 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5675 Offset); 5676 Result = Op; 5677 break; 5678 } 5679 5680 // Otherwise, not valid for this mode. 5681 return; 5682 } 5683 } 5684 5685 if (Result.Val) { 5686 Ops.push_back(Result); 5687 return; 5688 } 5689 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5690} 5691 5692std::vector<unsigned> X86TargetLowering:: 5693getRegClassForInlineAsmConstraint(const std::string &Constraint, 5694 MVT::ValueType VT) const { 5695 if (Constraint.size() == 1) { 5696 // FIXME: not handling fp-stack yet! 5697 switch (Constraint[0]) { // GCC X86 Constraint Letters 5698 default: break; // Unknown constraint letter 5699 case 'A': // EAX/EDX 5700 if (VT == MVT::i32 || VT == MVT::i64) 5701 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5702 break; 5703 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5704 case 'Q': // Q_REGS 5705 if (VT == MVT::i32) 5706 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5707 else if (VT == MVT::i16) 5708 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5709 else if (VT == MVT::i8) 5710 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5711 break; 5712 } 5713 } 5714 5715 return std::vector<unsigned>(); 5716} 5717 5718std::pair<unsigned, const TargetRegisterClass*> 5719X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5720 MVT::ValueType VT) const { 5721 // First, see if this is a constraint that directly corresponds to an LLVM 5722 // register class. 5723 if (Constraint.size() == 1) { 5724 // GCC Constraint Letters 5725 switch (Constraint[0]) { 5726 default: break; 5727 case 'r': // GENERAL_REGS 5728 case 'R': // LEGACY_REGS 5729 case 'l': // INDEX_REGS 5730 if (VT == MVT::i64 && Subtarget->is64Bit()) 5731 return std::make_pair(0U, X86::GR64RegisterClass); 5732 if (VT == MVT::i32) 5733 return std::make_pair(0U, X86::GR32RegisterClass); 5734 else if (VT == MVT::i16) 5735 return std::make_pair(0U, X86::GR16RegisterClass); 5736 else if (VT == MVT::i8) 5737 return std::make_pair(0U, X86::GR8RegisterClass); 5738 break; 5739 case 'y': // MMX_REGS if MMX allowed. 5740 if (!Subtarget->hasMMX()) break; 5741 return std::make_pair(0U, X86::VR64RegisterClass); 5742 break; 5743 case 'Y': // SSE_REGS if SSE2 allowed 5744 if (!Subtarget->hasSSE2()) break; 5745 // FALL THROUGH. 5746 case 'x': // SSE_REGS if SSE1 allowed 5747 if (!Subtarget->hasSSE1()) break; 5748 5749 switch (VT) { 5750 default: break; 5751 // Scalar SSE types. 5752 case MVT::f32: 5753 case MVT::i32: 5754 return std::make_pair(0U, X86::FR32RegisterClass); 5755 case MVT::f64: 5756 case MVT::i64: 5757 return std::make_pair(0U, X86::FR64RegisterClass); 5758 // Vector types. 5759 case MVT::v16i8: 5760 case MVT::v8i16: 5761 case MVT::v4i32: 5762 case MVT::v2i64: 5763 case MVT::v4f32: 5764 case MVT::v2f64: 5765 return std::make_pair(0U, X86::VR128RegisterClass); 5766 } 5767 break; 5768 } 5769 } 5770 5771 // Use the default implementation in TargetLowering to convert the register 5772 // constraint into a member of a register class. 5773 std::pair<unsigned, const TargetRegisterClass*> Res; 5774 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5775 5776 // Not found as a standard register? 5777 if (Res.second == 0) { 5778 // GCC calls "st(0)" just plain "st". 5779 if (StringsEqualNoCase("{st}", Constraint)) { 5780 Res.first = X86::ST0; 5781 Res.second = X86::RFP80RegisterClass; 5782 } 5783 5784 return Res; 5785 } 5786 5787 // Otherwise, check to see if this is a register class of the wrong value 5788 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5789 // turn into {ax},{dx}. 5790 if (Res.second->hasType(VT)) 5791 return Res; // Correct type already, nothing to do. 5792 5793 // All of the single-register GCC register classes map their values onto 5794 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5795 // really want an 8-bit or 32-bit register, map to the appropriate register 5796 // class and return the appropriate register. 5797 if (Res.second != X86::GR16RegisterClass) 5798 return Res; 5799 5800 if (VT == MVT::i8) { 5801 unsigned DestReg = 0; 5802 switch (Res.first) { 5803 default: break; 5804 case X86::AX: DestReg = X86::AL; break; 5805 case X86::DX: DestReg = X86::DL; break; 5806 case X86::CX: DestReg = X86::CL; break; 5807 case X86::BX: DestReg = X86::BL; break; 5808 } 5809 if (DestReg) { 5810 Res.first = DestReg; 5811 Res.second = Res.second = X86::GR8RegisterClass; 5812 } 5813 } else if (VT == MVT::i32) { 5814 unsigned DestReg = 0; 5815 switch (Res.first) { 5816 default: break; 5817 case X86::AX: DestReg = X86::EAX; break; 5818 case X86::DX: DestReg = X86::EDX; break; 5819 case X86::CX: DestReg = X86::ECX; break; 5820 case X86::BX: DestReg = X86::EBX; break; 5821 case X86::SI: DestReg = X86::ESI; break; 5822 case X86::DI: DestReg = X86::EDI; break; 5823 case X86::BP: DestReg = X86::EBP; break; 5824 case X86::SP: DestReg = X86::ESP; break; 5825 } 5826 if (DestReg) { 5827 Res.first = DestReg; 5828 Res.second = Res.second = X86::GR32RegisterClass; 5829 } 5830 } else if (VT == MVT::i64) { 5831 unsigned DestReg = 0; 5832 switch (Res.first) { 5833 default: break; 5834 case X86::AX: DestReg = X86::RAX; break; 5835 case X86::DX: DestReg = X86::RDX; break; 5836 case X86::CX: DestReg = X86::RCX; break; 5837 case X86::BX: DestReg = X86::RBX; break; 5838 case X86::SI: DestReg = X86::RSI; break; 5839 case X86::DI: DestReg = X86::RDI; break; 5840 case X86::BP: DestReg = X86::RBP; break; 5841 case X86::SP: DestReg = X86::RSP; break; 5842 } 5843 if (DestReg) { 5844 Res.first = DestReg; 5845 Res.second = Res.second = X86::GR64RegisterClass; 5846 } 5847 } 5848 5849 return Res; 5850} 5851