X86ISelLowering.cpp revision 7ef1a4bf0401d54ce88d10bcb4d1a2e98663a843
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Target/TargetOptions.h" 38#include "llvm/ADT/StringExtras.h" 39#include "llvm/ParameterAttributes.h" 40using namespace llvm; 41 42X86TargetLowering::X86TargetLowering(TargetMachine &TM) 43 : TargetLowering(TM) { 44 Subtarget = &TM.getSubtarget<X86Subtarget>(); 45 X86ScalarSSEf64 = Subtarget->hasSSE2(); 46 X86ScalarSSEf32 = Subtarget->hasSSE1(); 47 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 48 49 50 RegInfo = TM.getRegisterInfo(); 51 52 // Set up the TargetLowering object. 53 54 // X86 is weird, it always uses i8 for shift amounts and setcc results. 55 setShiftAmountType(MVT::i8); 56 setSetCCResultType(MVT::i8); 57 setSetCCResultContents(ZeroOrOneSetCCResult); 58 setSchedulingPreference(SchedulingForRegPressure); 59 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 60 setStackPointerRegisterToSaveRestore(X86StackPtr); 61 62 if (Subtarget->isTargetDarwin()) { 63 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(false); 65 setUseUnderscoreLongJmp(false); 66 } else if (Subtarget->isTargetMingw()) { 67 // MS runtime is weird: it exports _setjmp, but longjmp! 68 setUseUnderscoreSetJmp(true); 69 setUseUnderscoreLongJmp(false); 70 } else { 71 setUseUnderscoreSetJmp(true); 72 setUseUnderscoreLongJmp(true); 73 } 74 75 // Set up the register classes. 76 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 77 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 78 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 79 if (Subtarget->is64Bit()) 80 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 81 82 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 83 84 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 85 // operation. 86 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 87 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 89 90 if (Subtarget->is64Bit()) { 91 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 93 } else { 94 if (X86ScalarSSEf64) 95 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 96 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 97 else 98 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 99 } 100 101 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 102 // this operation. 103 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 104 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 105 // SSE has no i16 to fp conversion, only i32 106 if (X86ScalarSSEf32) { 107 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 108 // f32 and f64 cases are Legal, f80 case is not 109 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 110 } else { 111 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 112 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 113 } 114 115 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 116 // are Legal, f80 is custom lowered. 117 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 118 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 119 120 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 121 // this operation. 122 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 123 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 124 125 if (X86ScalarSSEf32) { 126 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 127 // f32 and f64 cases are Legal, f80 case is not 128 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 129 } else { 130 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 131 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 132 } 133 134 // Handle FP_TO_UINT by promoting the destination to a larger signed 135 // conversion. 136 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 137 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 138 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 139 140 if (Subtarget->is64Bit()) { 141 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 143 } else { 144 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 145 // Expand FP_TO_UINT into a select. 146 // FIXME: We would like to use a Custom expander here eventually to do 147 // the optimal thing for SSE vs. the default expansion in the legalizer. 148 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 149 else 150 // With SSE3 we can use fisttpll to convert to a signed i64. 151 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 152 } 153 154 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 155 if (!X86ScalarSSEf64) { 156 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 157 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 158 } 159 160 // Scalar integer multiply, multiply-high, divide, and remainder are 161 // lowered to use operations that produce two results, to match the 162 // available instructions. This exposes the two-result form to trivial 163 // CSE, which is able to combine x/y and x%y into a single instruction, 164 // for example. The single-result multiply instructions are introduced 165 // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part 166 // is not needed. 167 setOperationAction(ISD::MUL , MVT::i8 , Expand); 168 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 169 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 170 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 171 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 172 setOperationAction(ISD::SREM , MVT::i8 , Expand); 173 setOperationAction(ISD::UREM , MVT::i8 , Expand); 174 setOperationAction(ISD::MUL , MVT::i16 , Expand); 175 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 176 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 177 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 178 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 179 setOperationAction(ISD::SREM , MVT::i16 , Expand); 180 setOperationAction(ISD::UREM , MVT::i16 , Expand); 181 setOperationAction(ISD::MUL , MVT::i32 , Expand); 182 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 183 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 184 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 185 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 186 setOperationAction(ISD::SREM , MVT::i32 , Expand); 187 setOperationAction(ISD::UREM , MVT::i32 , Expand); 188 setOperationAction(ISD::MUL , MVT::i64 , Expand); 189 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 190 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 191 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 192 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 193 setOperationAction(ISD::SREM , MVT::i64 , Expand); 194 setOperationAction(ISD::UREM , MVT::i64 , Expand); 195 196 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 197 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 198 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 199 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 200 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 201 if (Subtarget->is64Bit()) 202 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 203 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 204 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 205 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 206 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 207 setOperationAction(ISD::FREM , MVT::f64 , Expand); 208 209 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 211 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 212 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 213 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 214 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 215 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 216 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 217 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 218 if (Subtarget->is64Bit()) { 219 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 220 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 221 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 222 } 223 224 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 225 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 226 227 // These should be promoted to a larger select which is supported. 228 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 229 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 230 // X86 wants to expand cmov itself. 231 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 232 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 233 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 234 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 235 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 236 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 237 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 238 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 239 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 240 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 241 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 242 if (Subtarget->is64Bit()) { 243 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 244 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 245 } 246 // X86 ret instruction may pop stack. 247 setOperationAction(ISD::RET , MVT::Other, Custom); 248 if (!Subtarget->is64Bit()) 249 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 250 251 // Darwin ABI issue. 252 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 253 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 254 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 255 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 256 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 257 if (Subtarget->is64Bit()) { 258 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 259 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 260 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 261 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 262 } 263 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 264 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 265 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 266 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 267 // X86 wants to expand memset / memcpy itself. 268 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 269 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 270 271 // Use the default ISD::LOCATION expansion. 272 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 273 // FIXME - use subtarget debug flags 274 if (!Subtarget->isTargetDarwin() && 275 !Subtarget->isTargetELF() && 276 !Subtarget->isTargetCygMing()) 277 setOperationAction(ISD::LABEL, MVT::Other, Expand); 278 279 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 280 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 281 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 282 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 283 if (Subtarget->is64Bit()) { 284 // FIXME: Verify 285 setExceptionPointerRegister(X86::RAX); 286 setExceptionSelectorRegister(X86::RDX); 287 } else { 288 setExceptionPointerRegister(X86::EAX); 289 setExceptionSelectorRegister(X86::EDX); 290 } 291 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 292 293 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 294 295 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 296 setOperationAction(ISD::VASTART , MVT::Other, Custom); 297 setOperationAction(ISD::VAARG , MVT::Other, Expand); 298 setOperationAction(ISD::VAEND , MVT::Other, Expand); 299 if (Subtarget->is64Bit()) 300 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 301 else 302 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 303 304 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 305 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 306 if (Subtarget->is64Bit()) 307 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 308 if (Subtarget->isTargetCygMing()) 309 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 310 else 311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 312 313 if (X86ScalarSSEf64) { 314 // f32 and f64 use SSE. 315 // Set up the FP register classes. 316 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 317 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 318 319 // Use ANDPD to simulate FABS. 320 setOperationAction(ISD::FABS , MVT::f64, Custom); 321 setOperationAction(ISD::FABS , MVT::f32, Custom); 322 323 // Use XORP to simulate FNEG. 324 setOperationAction(ISD::FNEG , MVT::f64, Custom); 325 setOperationAction(ISD::FNEG , MVT::f32, Custom); 326 327 // Use ANDPD and ORPD to simulate FCOPYSIGN. 328 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 329 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 330 331 // We don't support sin/cos/fmod 332 setOperationAction(ISD::FSIN , MVT::f64, Expand); 333 setOperationAction(ISD::FCOS , MVT::f64, Expand); 334 setOperationAction(ISD::FREM , MVT::f64, Expand); 335 setOperationAction(ISD::FSIN , MVT::f32, Expand); 336 setOperationAction(ISD::FCOS , MVT::f32, Expand); 337 setOperationAction(ISD::FREM , MVT::f32, Expand); 338 339 // Expand FP immediates into loads from the stack, except for the special 340 // cases we handle. 341 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 342 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 343 addLegalFPImmediate(APFloat(+0.0)); // xorpd 344 addLegalFPImmediate(APFloat(+0.0f)); // xorps 345 346 // Conversions to long double (in X87) go through memory. 347 setConvertAction(MVT::f32, MVT::f80, Expand); 348 setConvertAction(MVT::f64, MVT::f80, Expand); 349 350 // Conversions from long double (in X87) go through memory. 351 setConvertAction(MVT::f80, MVT::f32, Expand); 352 setConvertAction(MVT::f80, MVT::f64, Expand); 353 } else if (X86ScalarSSEf32) { 354 // Use SSE for f32, x87 for f64. 355 // Set up the FP register classes. 356 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 357 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 358 359 // Use ANDPS to simulate FABS. 360 setOperationAction(ISD::FABS , MVT::f32, Custom); 361 362 // Use XORP to simulate FNEG. 363 setOperationAction(ISD::FNEG , MVT::f32, Custom); 364 365 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 366 367 // Use ANDPS and ORPS to simulate FCOPYSIGN. 368 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 369 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 370 371 // We don't support sin/cos/fmod 372 setOperationAction(ISD::FSIN , MVT::f32, Expand); 373 setOperationAction(ISD::FCOS , MVT::f32, Expand); 374 setOperationAction(ISD::FREM , MVT::f32, Expand); 375 376 // Expand FP immediates into loads from the stack, except for the special 377 // cases we handle. 378 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 379 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 380 addLegalFPImmediate(APFloat(+0.0f)); // xorps 381 addLegalFPImmediate(APFloat(+0.0)); // FLD0 382 addLegalFPImmediate(APFloat(+1.0)); // FLD1 383 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 384 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 385 386 // SSE->x87 conversions go through memory. 387 setConvertAction(MVT::f32, MVT::f64, Expand); 388 setConvertAction(MVT::f32, MVT::f80, Expand); 389 390 // x87->SSE truncations need to go through memory. 391 setConvertAction(MVT::f80, MVT::f32, Expand); 392 setConvertAction(MVT::f64, MVT::f32, Expand); 393 // And x87->x87 truncations also. 394 setConvertAction(MVT::f80, MVT::f64, Expand); 395 396 if (!UnsafeFPMath) { 397 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 398 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 399 } 400 } else { 401 // f32 and f64 in x87. 402 // Set up the FP register classes. 403 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 404 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 405 406 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 407 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 408 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 409 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 410 411 // Floating truncations need to go through memory. 412 setConvertAction(MVT::f80, MVT::f32, Expand); 413 setConvertAction(MVT::f64, MVT::f32, Expand); 414 setConvertAction(MVT::f80, MVT::f64, Expand); 415 416 if (!UnsafeFPMath) { 417 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 418 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 419 } 420 421 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 422 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 423 addLegalFPImmediate(APFloat(+0.0)); // FLD0 424 addLegalFPImmediate(APFloat(+1.0)); // FLD1 425 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 426 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 427 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 428 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 429 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 430 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 431 } 432 433 // Long double always uses X87. 434 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 435 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 436 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 437 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 438 if (!UnsafeFPMath) { 439 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 440 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 441 } 442 443 // Always use a library call for pow. 444 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 445 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 446 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 447 448 // First set operation action for all vector types to expand. Then we 449 // will selectively turn on ones that can be effectively codegen'd. 450 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 451 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 452 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 453 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 454 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 455 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 456 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 457 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 458 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 459 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 460 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 461 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 462 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 463 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 464 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 465 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 466 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 467 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 468 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 469 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 470 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 471 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 472 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 473 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 474 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 475 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 476 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 477 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 478 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 479 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 480 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 481 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 482 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 483 } 484 485 if (Subtarget->hasMMX()) { 486 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 487 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 488 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 489 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 490 491 // FIXME: add MMX packed arithmetics 492 493 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 494 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 495 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 496 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 497 498 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 499 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 500 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 501 502 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 503 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 504 505 setOperationAction(ISD::AND, MVT::v8i8, Promote); 506 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 507 setOperationAction(ISD::AND, MVT::v4i16, Promote); 508 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 509 setOperationAction(ISD::AND, MVT::v2i32, Promote); 510 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 511 setOperationAction(ISD::AND, MVT::v1i64, Legal); 512 513 setOperationAction(ISD::OR, MVT::v8i8, Promote); 514 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 515 setOperationAction(ISD::OR, MVT::v4i16, Promote); 516 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 517 setOperationAction(ISD::OR, MVT::v2i32, Promote); 518 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 519 setOperationAction(ISD::OR, MVT::v1i64, Legal); 520 521 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 522 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 523 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 524 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 525 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 526 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 527 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 528 529 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 530 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 531 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 532 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 533 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 534 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 535 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 536 537 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 538 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 539 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 540 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 541 542 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 543 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 544 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 545 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 546 547 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 548 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 549 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 550 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 551 } 552 553 if (Subtarget->hasSSE1()) { 554 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 555 556 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 557 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 558 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 559 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 560 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 561 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 562 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 563 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 564 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 565 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 566 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 567 } 568 569 if (Subtarget->hasSSE2()) { 570 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 571 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 572 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 573 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 574 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 575 576 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 577 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 578 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 579 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 580 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 581 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 582 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 583 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 584 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 585 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 586 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 587 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 588 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 589 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 590 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 591 592 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 593 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 594 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 595 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 596 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 597 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 598 599 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 600 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 601 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 602 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 603 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 604 } 605 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 606 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 607 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 608 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 609 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 610 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 611 612 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 613 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 614 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 615 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 616 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 617 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 618 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 619 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 620 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 621 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 622 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 623 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 624 } 625 626 // Custom lower v2i64 and v2f64 selects. 627 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 628 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 629 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 630 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 631 } 632 633 // We want to custom lower some of our intrinsics. 634 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 635 636 // We have target-specific dag combine patterns for the following nodes: 637 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 638 setTargetDAGCombine(ISD::SELECT); 639 640 computeRegisterProperties(); 641 642 // FIXME: These should be based on subtarget info. Plus, the values should 643 // be smaller when we are in optimizing for size mode. 644 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 645 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 646 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 647 allowUnalignedMemoryAccesses = true; // x86 supports it! 648} 649 650 651//===----------------------------------------------------------------------===// 652// Return Value Calling Convention Implementation 653//===----------------------------------------------------------------------===// 654 655#include "X86GenCallingConv.inc" 656 657/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it 658/// exists skip possible ISD:TokenFactor. 659static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { 660 if (Chain.getOpcode()==X86ISD::TAILCALL) { 661 return Chain; 662 } else if (Chain.getOpcode()==ISD::TokenFactor) { 663 if (Chain.getNumOperands() && 664 Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) 665 return Chain.getOperand(0); 666 } 667 return Chain; 668} 669 670/// LowerRET - Lower an ISD::RET node. 671SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 672 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 673 674 SmallVector<CCValAssign, 16> RVLocs; 675 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 676 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 677 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 678 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 679 680 // If this is the first return lowered for this function, add the regs to the 681 // liveout set for the function. 682 if (DAG.getMachineFunction().liveout_empty()) { 683 for (unsigned i = 0; i != RVLocs.size(); ++i) 684 if (RVLocs[i].isRegLoc()) 685 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 686 } 687 SDOperand Chain = Op.getOperand(0); 688 689 // Handle tail call return. 690 Chain = GetPossiblePreceedingTailCall(Chain); 691 if (Chain.getOpcode() == X86ISD::TAILCALL) { 692 SDOperand TailCall = Chain; 693 SDOperand TargetAddress = TailCall.getOperand(1); 694 SDOperand StackAdjustment = TailCall.getOperand(2); 695 assert ( ((TargetAddress.getOpcode() == ISD::Register && 696 (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX || 697 cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || 698 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 699 TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 700 "Expecting an global address, external symbol, or register"); 701 assert( StackAdjustment.getOpcode() == ISD::Constant && 702 "Expecting a const value"); 703 704 SmallVector<SDOperand,8> Operands; 705 Operands.push_back(Chain.getOperand(0)); 706 Operands.push_back(TargetAddress); 707 Operands.push_back(StackAdjustment); 708 // Copy registers used by the call. Last operand is a flag so it is not 709 // copied. 710 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 711 Operands.push_back(Chain.getOperand(i)); 712 } 713 return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 714 Operands.size()); 715 } 716 717 // Regular return. 718 SDOperand Flag; 719 720 // Copy the result values into the output registers. 721 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 722 RVLocs[0].getLocReg() != X86::ST0) { 723 for (unsigned i = 0; i != RVLocs.size(); ++i) { 724 CCValAssign &VA = RVLocs[i]; 725 assert(VA.isRegLoc() && "Can only return in registers!"); 726 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 727 Flag); 728 Flag = Chain.getValue(1); 729 } 730 } else { 731 // We need to handle a destination of ST0 specially, because it isn't really 732 // a register. 733 SDOperand Value = Op.getOperand(1); 734 735 // If this is an FP return with ScalarSSE, we need to move the value from 736 // an XMM register onto the fp-stack. 737 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 738 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 739 SDOperand MemLoc; 740 741 // If this is a load into a scalarsse value, don't store the loaded value 742 // back to the stack, only to reload it: just replace the scalar-sse load. 743 if (ISD::isNON_EXTLoad(Value.Val) && 744 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 745 Chain = Value.getOperand(0); 746 MemLoc = Value.getOperand(1); 747 } else { 748 // Spill the value to memory and reload it into top of stack. 749 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 750 MachineFunction &MF = DAG.getMachineFunction(); 751 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 752 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 753 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 754 } 755 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 756 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 757 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 758 Chain = Value.getValue(1); 759 } 760 761 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 762 SDOperand Ops[] = { Chain, Value }; 763 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 764 Flag = Chain.getValue(1); 765 } 766 767 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 768 if (Flag.Val) 769 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 770 else 771 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 772} 773 774 775/// LowerCallResult - Lower the result values of an ISD::CALL into the 776/// appropriate copies out of appropriate physical registers. This assumes that 777/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 778/// being lowered. The returns a SDNode with the same number of values as the 779/// ISD::CALL. 780SDNode *X86TargetLowering:: 781LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 782 unsigned CallingConv, SelectionDAG &DAG) { 783 784 // Assign locations to each value returned by this call. 785 SmallVector<CCValAssign, 16> RVLocs; 786 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 787 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 788 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 789 790 791 SmallVector<SDOperand, 8> ResultVals; 792 793 // Copy all of the result registers out of their specified physreg. 794 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 795 for (unsigned i = 0; i != RVLocs.size(); ++i) { 796 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 797 RVLocs[i].getValVT(), InFlag).getValue(1); 798 InFlag = Chain.getValue(2); 799 ResultVals.push_back(Chain.getValue(0)); 800 } 801 } else { 802 // Copies from the FP stack are special, as ST0 isn't a valid register 803 // before the fp stackifier runs. 804 805 // Copy ST0 into an RFP register with FP_GET_RESULT. 806 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 807 SDOperand GROps[] = { Chain, InFlag }; 808 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 809 Chain = RetVal.getValue(1); 810 InFlag = RetVal.getValue(2); 811 812 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 813 // an XMM register. 814 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 815 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 816 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 817 // shouldn't be necessary except that RFP cannot be live across 818 // multiple blocks. When stackifier is fixed, they can be uncoupled. 819 MachineFunction &MF = DAG.getMachineFunction(); 820 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 821 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 822 SDOperand Ops[] = { 823 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 824 }; 825 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 826 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 827 Chain = RetVal.getValue(1); 828 } 829 ResultVals.push_back(RetVal); 830 } 831 832 // Merge everything together with a MERGE_VALUES node. 833 ResultVals.push_back(Chain); 834 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 835 &ResultVals[0], ResultVals.size()).Val; 836} 837 838 839//===----------------------------------------------------------------------===// 840// C & StdCall & Fast Calling Convention implementation 841//===----------------------------------------------------------------------===// 842// StdCall calling convention seems to be standard for many Windows' API 843// routines and around. It differs from C calling convention just a little: 844// callee should clean up the stack, not caller. Symbols should be also 845// decorated in some fancy way :) It doesn't support any vector arguments. 846// For info on fast calling convention see Fast Calling Convention (tail call) 847// implementation LowerX86_32FastCCCallTo. 848 849/// AddLiveIn - This helper function adds the specified physical register to the 850/// MachineFunction as a live in value. It also creates a corresponding virtual 851/// register for it. 852static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 853 const TargetRegisterClass *RC) { 854 assert(RC->contains(PReg) && "Not the correct regclass!"); 855 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 856 MF.addLiveIn(PReg, VReg); 857 return VReg; 858} 859 860// align stack arguments according to platform alignment needed for tail calls 861unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); 862 863SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 864 const CCValAssign &VA, 865 MachineFrameInfo *MFI, 866 SDOperand Root, unsigned i) { 867 // Create the nodes corresponding to a load from this parameter slot. 868 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 869 VA.getLocMemOffset()); 870 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 871 872 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 873 874 if (Flags & ISD::ParamFlags::ByVal) 875 return FIN; 876 else 877 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 878} 879 880SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 881 bool isStdCall) { 882 unsigned NumArgs = Op.Val->getNumValues() - 1; 883 MachineFunction &MF = DAG.getMachineFunction(); 884 MachineFrameInfo *MFI = MF.getFrameInfo(); 885 SDOperand Root = Op.getOperand(0); 886 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 887 unsigned CC = MF.getFunction()->getCallingConv(); 888 // Assign locations to all of the incoming arguments. 889 SmallVector<CCValAssign, 16> ArgLocs; 890 CCState CCInfo(CC, isVarArg, 891 getTargetMachine(), ArgLocs); 892 // Check for possible tail call calling convention. 893 if (CC == CallingConv::Fast && PerformTailCallOpt) 894 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); 895 else 896 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 897 898 SmallVector<SDOperand, 8> ArgValues; 899 unsigned LastVal = ~0U; 900 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 901 CCValAssign &VA = ArgLocs[i]; 902 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 903 // places. 904 assert(VA.getValNo() != LastVal && 905 "Don't support value assigned to multiple locs yet"); 906 LastVal = VA.getValNo(); 907 908 if (VA.isRegLoc()) { 909 MVT::ValueType RegVT = VA.getLocVT(); 910 TargetRegisterClass *RC; 911 if (RegVT == MVT::i32) 912 RC = X86::GR32RegisterClass; 913 else { 914 assert(MVT::isVector(RegVT)); 915 RC = X86::VR128RegisterClass; 916 } 917 918 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 919 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 920 921 // If this is an 8 or 16-bit value, it is really passed promoted to 32 922 // bits. Insert an assert[sz]ext to capture this, then truncate to the 923 // right size. 924 if (VA.getLocInfo() == CCValAssign::SExt) 925 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 926 DAG.getValueType(VA.getValVT())); 927 else if (VA.getLocInfo() == CCValAssign::ZExt) 928 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 929 DAG.getValueType(VA.getValVT())); 930 931 if (VA.getLocInfo() != CCValAssign::Full) 932 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 933 934 ArgValues.push_back(ArgValue); 935 } else { 936 assert(VA.isMemLoc()); 937 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 938 } 939 } 940 941 unsigned StackSize = CCInfo.getNextStackOffset(); 942 // align stack specially for tail calls 943 if (CC==CallingConv::Fast) 944 StackSize = GetAlignedArgumentStackSize(StackSize,DAG); 945 946 ArgValues.push_back(Root); 947 948 // If the function takes variable number of arguments, make a frame index for 949 // the start of the first vararg value... for expansion of llvm.va_start. 950 if (isVarArg) 951 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 952 953 // Tail call calling convention (CallingConv::Fast) does not support varargs. 954 assert( !(isVarArg && CC == CallingConv::Fast) && 955 "CallingConv::Fast does not support varargs."); 956 957 if (isStdCall && !isVarArg && 958 (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { 959 BytesToPopOnReturn = StackSize; // Callee pops everything.. 960 BytesCallerReserves = 0; 961 } else { 962 BytesToPopOnReturn = 0; // Callee pops nothing. 963 964 // If this is an sret function, the return should pop the hidden pointer. 965 if (NumArgs && 966 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 967 ISD::ParamFlags::StructReturn)) 968 BytesToPopOnReturn = 4; 969 970 BytesCallerReserves = StackSize; 971 } 972 973 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 974 975 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 976 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 977 978 // Return the new list of results. 979 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 980 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 981} 982 983SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 984 unsigned CC) { 985 SDOperand Chain = Op.getOperand(0); 986 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 987 SDOperand Callee = Op.getOperand(4); 988 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 989 990 // Analyze operands of the call, assigning locations to each operand. 991 SmallVector<CCValAssign, 16> ArgLocs; 992 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 993 if(CC==CallingConv::Fast && PerformTailCallOpt) 994 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 995 else 996 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 997 998 // Get a count of how many bytes are to be pushed on the stack. 999 unsigned NumBytes = CCInfo.getNextStackOffset(); 1000 if (CC==CallingConv::Fast) 1001 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1002 1003 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1004 1005 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1006 SmallVector<SDOperand, 8> MemOpChains; 1007 1008 SDOperand StackPtr; 1009 1010 // Walk the register/memloc assignments, inserting copies/loads. 1011 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1012 CCValAssign &VA = ArgLocs[i]; 1013 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1014 1015 // Promote the value if needed. 1016 switch (VA.getLocInfo()) { 1017 default: assert(0 && "Unknown loc info!"); 1018 case CCValAssign::Full: break; 1019 case CCValAssign::SExt: 1020 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1021 break; 1022 case CCValAssign::ZExt: 1023 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1024 break; 1025 case CCValAssign::AExt: 1026 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1027 break; 1028 } 1029 1030 if (VA.isRegLoc()) { 1031 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1032 } else { 1033 assert(VA.isMemLoc()); 1034 if (StackPtr.Val == 0) 1035 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1036 1037 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1038 Arg)); 1039 } 1040 } 1041 1042 // If the first argument is an sret pointer, remember it. 1043 bool isSRet = NumOps && 1044 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 1045 ISD::ParamFlags::StructReturn); 1046 1047 if (!MemOpChains.empty()) 1048 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1049 &MemOpChains[0], MemOpChains.size()); 1050 1051 // Build a sequence of copy-to-reg nodes chained together with token chain 1052 // and flag operands which copy the outgoing args into registers. 1053 SDOperand InFlag; 1054 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1055 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1056 InFlag); 1057 InFlag = Chain.getValue(1); 1058 } 1059 1060 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1061 // GOT pointer. 1062 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1063 Subtarget->isPICStyleGOT()) { 1064 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1065 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1066 InFlag); 1067 InFlag = Chain.getValue(1); 1068 } 1069 1070 // If the callee is a GlobalAddress node (quite common, every direct call is) 1071 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1072 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1073 // We should use extra load for direct calls to dllimported functions in 1074 // non-JIT mode. 1075 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1076 getTargetMachine(), true)) 1077 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1078 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1079 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1080 1081 // Returns a chain & a flag for retval copy to use. 1082 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1083 SmallVector<SDOperand, 8> Ops; 1084 Ops.push_back(Chain); 1085 Ops.push_back(Callee); 1086 1087 // Add argument registers to the end of the list so that they are known live 1088 // into the call. 1089 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1090 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1091 RegsToPass[i].second.getValueType())); 1092 1093 // Add an implicit use GOT pointer in EBX. 1094 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1095 Subtarget->isPICStyleGOT()) 1096 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1097 1098 if (InFlag.Val) 1099 Ops.push_back(InFlag); 1100 1101 Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); 1102 InFlag = Chain.getValue(1); 1103 1104 // Create the CALLSEQ_END node. 1105 unsigned NumBytesForCalleeToPush = 0; 1106 1107 if (CC == CallingConv::X86_StdCall || 1108 (CC == CallingConv::Fast && PerformTailCallOpt)) { 1109 if (isVarArg) 1110 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1111 else 1112 NumBytesForCalleeToPush = NumBytes; 1113 assert(!(isVarArg && CC==CallingConv::Fast) && 1114 "CallingConv::Fast does not support varargs."); 1115 } else { 1116 // If this is is a call to a struct-return function, the callee 1117 // pops the hidden struct pointer, so we have to push it back. 1118 // This is common for Darwin/X86, Linux & Mingw32 targets. 1119 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1120 } 1121 1122 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1123 Ops.clear(); 1124 Ops.push_back(Chain); 1125 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1126 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 1127 Ops.push_back(InFlag); 1128 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1129 InFlag = Chain.getValue(1); 1130 1131 // Handle result values, copying them out of physregs into vregs that we 1132 // return. 1133 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1134} 1135 1136 1137//===----------------------------------------------------------------------===// 1138// FastCall Calling Convention implementation 1139//===----------------------------------------------------------------------===// 1140// 1141// The X86 'fastcall' calling convention passes up to two integer arguments in 1142// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1143// and requires that the callee pop its arguments off the stack (allowing proper 1144// tail calls), and has the same return value conventions as C calling convs. 1145// 1146// This calling convention always arranges for the callee pop value to be 8n+4 1147// bytes, which is needed for tail recursion elimination and stack alignment 1148// reasons. 1149SDOperand 1150X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1151 MachineFunction &MF = DAG.getMachineFunction(); 1152 MachineFrameInfo *MFI = MF.getFrameInfo(); 1153 SDOperand Root = Op.getOperand(0); 1154 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1155 1156 // Assign locations to all of the incoming arguments. 1157 SmallVector<CCValAssign, 16> ArgLocs; 1158 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1159 getTargetMachine(), ArgLocs); 1160 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1161 1162 SmallVector<SDOperand, 8> ArgValues; 1163 unsigned LastVal = ~0U; 1164 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1165 CCValAssign &VA = ArgLocs[i]; 1166 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1167 // places. 1168 assert(VA.getValNo() != LastVal && 1169 "Don't support value assigned to multiple locs yet"); 1170 LastVal = VA.getValNo(); 1171 1172 if (VA.isRegLoc()) { 1173 MVT::ValueType RegVT = VA.getLocVT(); 1174 TargetRegisterClass *RC; 1175 if (RegVT == MVT::i32) 1176 RC = X86::GR32RegisterClass; 1177 else { 1178 assert(MVT::isVector(RegVT)); 1179 RC = X86::VR128RegisterClass; 1180 } 1181 1182 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1183 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1184 1185 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1186 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1187 // right size. 1188 if (VA.getLocInfo() == CCValAssign::SExt) 1189 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1190 DAG.getValueType(VA.getValVT())); 1191 else if (VA.getLocInfo() == CCValAssign::ZExt) 1192 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1193 DAG.getValueType(VA.getValVT())); 1194 1195 if (VA.getLocInfo() != CCValAssign::Full) 1196 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1197 1198 ArgValues.push_back(ArgValue); 1199 } else { 1200 assert(VA.isMemLoc()); 1201 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1202 } 1203 } 1204 1205 ArgValues.push_back(Root); 1206 1207 unsigned StackSize = CCInfo.getNextStackOffset(); 1208 1209 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1210 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1211 // arguments and the arguments after the retaddr has been pushed are 1212 // aligned. 1213 if ((StackSize & 7) == 0) 1214 StackSize += 4; 1215 } 1216 1217 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1218 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1219 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1220 BytesCallerReserves = 0; 1221 1222 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1223 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1224 1225 // Return the new list of results. 1226 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1227 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1228} 1229 1230SDOperand 1231X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1232 const SDOperand &StackPtr, 1233 const CCValAssign &VA, 1234 SDOperand Chain, 1235 SDOperand Arg) { 1236 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1237 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1238 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1239 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1240 if (Flags & ISD::ParamFlags::ByVal) { 1241 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1242 ISD::ParamFlags::ByValAlignOffs); 1243 1244 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1245 ISD::ParamFlags::ByValSizeOffs; 1246 1247 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1248 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1249 1250 return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, 1251 AlignNode); 1252 } else { 1253 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1254 } 1255} 1256 1257SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1258 unsigned CC) { 1259 SDOperand Chain = Op.getOperand(0); 1260 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1261 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1262 SDOperand Callee = Op.getOperand(4); 1263 1264 // Analyze operands of the call, assigning locations to each operand. 1265 SmallVector<CCValAssign, 16> ArgLocs; 1266 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1267 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1268 1269 // Get a count of how many bytes are to be pushed on the stack. 1270 unsigned NumBytes = CCInfo.getNextStackOffset(); 1271 1272 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1273 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1274 // arguments and the arguments after the retaddr has been pushed are 1275 // aligned. 1276 if ((NumBytes & 7) == 0) 1277 NumBytes += 4; 1278 } 1279 1280 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1281 1282 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1283 SmallVector<SDOperand, 8> MemOpChains; 1284 1285 SDOperand StackPtr; 1286 1287 // Walk the register/memloc assignments, inserting copies/loads. 1288 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1289 CCValAssign &VA = ArgLocs[i]; 1290 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1291 1292 // Promote the value if needed. 1293 switch (VA.getLocInfo()) { 1294 default: assert(0 && "Unknown loc info!"); 1295 case CCValAssign::Full: break; 1296 case CCValAssign::SExt: 1297 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1298 break; 1299 case CCValAssign::ZExt: 1300 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1301 break; 1302 case CCValAssign::AExt: 1303 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1304 break; 1305 } 1306 1307 if (VA.isRegLoc()) { 1308 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1309 } else { 1310 assert(VA.isMemLoc()); 1311 if (StackPtr.Val == 0) 1312 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1313 1314 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1315 Arg)); 1316 } 1317 } 1318 1319 if (!MemOpChains.empty()) 1320 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1321 &MemOpChains[0], MemOpChains.size()); 1322 1323 // Build a sequence of copy-to-reg nodes chained together with token chain 1324 // and flag operands which copy the outgoing args into registers. 1325 SDOperand InFlag; 1326 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1327 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1328 InFlag); 1329 InFlag = Chain.getValue(1); 1330 } 1331 1332 // If the callee is a GlobalAddress node (quite common, every direct call is) 1333 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1334 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1335 // We should use extra load for direct calls to dllimported functions in 1336 // non-JIT mode. 1337 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1338 getTargetMachine(), true)) 1339 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1340 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1341 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1342 1343 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1344 // GOT pointer. 1345 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1346 Subtarget->isPICStyleGOT()) { 1347 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1348 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1349 InFlag); 1350 InFlag = Chain.getValue(1); 1351 } 1352 1353 // Returns a chain & a flag for retval copy to use. 1354 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1355 SmallVector<SDOperand, 8> Ops; 1356 Ops.push_back(Chain); 1357 Ops.push_back(Callee); 1358 1359 // Add argument registers to the end of the list so that they are known live 1360 // into the call. 1361 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1362 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1363 RegsToPass[i].second.getValueType())); 1364 1365 // Add an implicit use GOT pointer in EBX. 1366 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1367 Subtarget->isPICStyleGOT()) 1368 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1369 1370 if (InFlag.Val) 1371 Ops.push_back(InFlag); 1372 1373 assert(isTailCall==false && "no tail call here"); 1374 Chain = DAG.getNode(X86ISD::CALL, 1375 NodeTys, &Ops[0], Ops.size()); 1376 InFlag = Chain.getValue(1); 1377 1378 // Returns a flag for retval copy to use. 1379 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1380 Ops.clear(); 1381 Ops.push_back(Chain); 1382 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1383 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1384 Ops.push_back(InFlag); 1385 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1386 InFlag = Chain.getValue(1); 1387 1388 // Handle result values, copying them out of physregs into vregs that we 1389 // return. 1390 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1391} 1392 1393//===----------------------------------------------------------------------===// 1394// Fast Calling Convention (tail call) implementation 1395//===----------------------------------------------------------------------===// 1396 1397// Like std call, callee cleans arguments, convention except that ECX is 1398// reserved for storing the tail called function address. Only 2 registers are 1399// free for argument passing (inreg). Tail call optimization is performed 1400// provided: 1401// * tailcallopt is enabled 1402// * caller/callee are fastcc 1403// * elf/pic is disabled OR 1404// * elf/pic enabled + callee is in module + callee has 1405// visibility protected or hidden 1406// To keep the stack aligned according to platform abi the function 1407// GetAlignedArgumentStackSize ensures that argument delta is always multiples 1408// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 1409// If a tail called function callee has more arguments than the caller the 1410// caller needs to make sure that there is room to move the RETADDR to. This is 1411// achieved by reserving an area the size of the argument delta right after the 1412// original REtADDR, but before the saved framepointer or the spilled registers 1413// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 1414// stack layout: 1415// arg1 1416// arg2 1417// RETADDR 1418// [ new RETADDR 1419// move area ] 1420// (possible EBP) 1421// ESI 1422// EDI 1423// local1 .. 1424 1425/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 1426/// for a 16 byte align requirement. 1427unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 1428 SelectionDAG& DAG) { 1429 if (PerformTailCallOpt) { 1430 MachineFunction &MF = DAG.getMachineFunction(); 1431 const TargetMachine &TM = MF.getTarget(); 1432 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 1433 unsigned StackAlignment = TFI.getStackAlignment(); 1434 uint64_t AlignMask = StackAlignment - 1; 1435 int64_t Offset = StackSize; 1436 unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; 1437 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 1438 // Number smaller than 12 so just add the difference. 1439 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 1440 } else { 1441 // Mask out lower bits, add stackalignment once plus the 12 bytes. 1442 Offset = ((~AlignMask) & Offset) + StackAlignment + 1443 (StackAlignment-SlotSize); 1444 } 1445 StackSize = Offset; 1446 } 1447 return StackSize; 1448} 1449 1450/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1451// following the call is a return. A function is eligible if caller/callee 1452// calling conventions match, currently only fastcc supports tail calls, and the 1453// function CALL is immediatly followed by a RET. 1454bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1455 SDOperand Ret, 1456 SelectionDAG& DAG) const { 1457 bool IsEligible = false; 1458 1459 // Check whether CALL node immediatly preceeds the RET node and whether the 1460 // return uses the result of the node or is a void return. 1461 if ((Ret.getNumOperands() == 1 && 1462 (Ret.getOperand(0)== SDOperand(Call.Val,1) || 1463 Ret.getOperand(0)== SDOperand(Call.Val,0))) || 1464 (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) && 1465 Ret.getOperand(1)== SDOperand(Call.Val,0))) { 1466 MachineFunction &MF = DAG.getMachineFunction(); 1467 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1468 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1469 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1470 SDOperand Callee = Call.getOperand(4); 1471 // On elf/pic %ebx needs to be livein. 1472 if(getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1473 Subtarget->isPICStyleGOT()) { 1474 // Can only do local tail calls with PIC. 1475 GlobalValue * GV = 0; 1476 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1477 if(G != 0 && 1478 (GV = G->getGlobal()) && 1479 (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) 1480 IsEligible=true; 1481 } else { 1482 IsEligible=true; 1483 } 1484 } 1485 } 1486 return IsEligible; 1487} 1488 1489SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, 1490 SelectionDAG &DAG, 1491 unsigned CC) { 1492 SDOperand Chain = Op.getOperand(0); 1493 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1494 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1495 SDOperand Callee = Op.getOperand(4); 1496 bool is64Bit = Subtarget->is64Bit(); 1497 1498 assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); 1499 1500 // Analyze operands of the call, assigning locations to each operand. 1501 SmallVector<CCValAssign, 16> ArgLocs; 1502 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1503 if (is64Bit) 1504 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1505 else 1506 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); 1507 1508 1509 // Lower arguments at fp - stackoffset + fpdiff. 1510 MachineFunction &MF = DAG.getMachineFunction(); 1511 1512 unsigned NumBytesToBePushed = 1513 GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); 1514 1515 unsigned NumBytesCallerPushed = 1516 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1517 int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; 1518 1519 // Set the delta of movement of the returnaddr stackslot. 1520 // But only set if delta is greater than previous delta. 1521 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1522 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1523 1524 Chain = DAG. 1525 getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1526 1527 // Adjust the Return address stack slot. 1528 SDOperand RetAddrFrIdx, NewRetAddrFrIdx; 1529 if (FPDiff) { 1530 MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; 1531 RetAddrFrIdx = getReturnAddressFrameIndex(DAG); 1532 // Load the "old" Return address. 1533 RetAddrFrIdx = 1534 DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); 1535 // Calculate the new stack slot for the return address. 1536 int SlotSize = is64Bit ? 8 : 4; 1537 int NewReturnAddrFI = 1538 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); 1539 NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1540 Chain = SDOperand(RetAddrFrIdx.Val, 1); 1541 } 1542 1543 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1544 SmallVector<SDOperand, 8> MemOpChains; 1545 SmallVector<SDOperand, 8> MemOpChains2; 1546 SDOperand FramePtr, StackPtr; 1547 SDOperand PtrOff; 1548 SDOperand FIN; 1549 int FI = 0; 1550 1551 // Walk the register/memloc assignments, inserting copies/loads. Lower 1552 // arguments first to the stack slot where they would normally - in case of a 1553 // normal function call - be. 1554 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1555 CCValAssign &VA = ArgLocs[i]; 1556 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1557 1558 // Promote the value if needed. 1559 switch (VA.getLocInfo()) { 1560 default: assert(0 && "Unknown loc info!"); 1561 case CCValAssign::Full: break; 1562 case CCValAssign::SExt: 1563 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1564 break; 1565 case CCValAssign::ZExt: 1566 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1567 break; 1568 case CCValAssign::AExt: 1569 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1570 break; 1571 } 1572 1573 if (VA.isRegLoc()) { 1574 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1575 } else { 1576 assert(VA.isMemLoc()); 1577 if (StackPtr.Val == 0) 1578 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1579 1580 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1581 Arg)); 1582 } 1583 } 1584 1585 if (!MemOpChains.empty()) 1586 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1587 &MemOpChains[0], MemOpChains.size()); 1588 1589 // Build a sequence of copy-to-reg nodes chained together with token chain 1590 // and flag operands which copy the outgoing args into registers. 1591 SDOperand InFlag; 1592 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1593 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1594 InFlag); 1595 InFlag = Chain.getValue(1); 1596 } 1597 InFlag = SDOperand(); 1598 1599 // Copy from stack slots to stack slot of a tail called function. This needs 1600 // to be done because if we would lower the arguments directly to their real 1601 // stack slot we might end up overwriting each other. 1602 // TODO: To make this more efficient (sometimes saving a store/load) we could 1603 // analyse the arguments and emit this store/load/store sequence only for 1604 // arguments which would be overwritten otherwise. 1605 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1606 CCValAssign &VA = ArgLocs[i]; 1607 if (!VA.isRegLoc()) { 1608 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1609 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1610 1611 // Get source stack slot. 1612 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1613 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1614 // Create frame index. 1615 int32_t Offset = VA.getLocMemOffset()+FPDiff; 1616 uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; 1617 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1618 FIN = DAG.getFrameIndex(FI, MVT::i32); 1619 if (Flags & ISD::ParamFlags::ByVal) { 1620 // Copy relative to framepointer. 1621 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1622 ISD::ParamFlags::ByValAlignOffs); 1623 1624 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1625 ISD::ParamFlags::ByValSizeOffs; 1626 1627 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1628 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1629 // Copy relative to framepointer. 1630 MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN, 1631 PtrOff, SizeNode, AlignNode)); 1632 } else { 1633 SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); 1634 // Store relative to framepointer. 1635 MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); 1636 } 1637 } 1638 } 1639 1640 if (!MemOpChains2.empty()) 1641 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1642 &MemOpChains2[0], MemOpChains.size()); 1643 1644 // Store the return address to the appropriate stack slot. 1645 if (FPDiff) 1646 Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); 1647 1648 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1649 // GOT pointer. 1650 // Does not work with tail call since ebx is not restored correctly by 1651 // tailcaller. TODO: at least for x86 - verify for x86-64 1652 1653 // If the callee is a GlobalAddress node (quite common, every direct call is) 1654 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1655 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1656 // We should use extra load for direct calls to dllimported functions in 1657 // non-JIT mode. 1658 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1659 getTargetMachine(), true)) 1660 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1661 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1662 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1663 else { 1664 assert(Callee.getOpcode() == ISD::LOAD && 1665 "Function destination must be loaded into virtual register"); 1666 unsigned Opc = is64Bit ? X86::R9 : X86::ECX; 1667 1668 Chain = DAG.getCopyToReg(Chain, 1669 DAG.getRegister(Opc, getPointerTy()) , 1670 Callee,InFlag); 1671 Callee = DAG.getRegister(Opc, getPointerTy()); 1672 // Add register as live out. 1673 DAG.getMachineFunction().addLiveOut(Opc); 1674 } 1675 1676 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1677 SmallVector<SDOperand, 8> Ops; 1678 1679 Ops.push_back(Chain); 1680 Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); 1681 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1682 if (InFlag.Val) 1683 Ops.push_back(InFlag); 1684 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1685 InFlag = Chain.getValue(1); 1686 1687 // Returns a chain & a flag for retval copy to use. 1688 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1689 Ops.clear(); 1690 Ops.push_back(Chain); 1691 Ops.push_back(Callee); 1692 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 1693 // Add argument registers to the end of the list so that they are known live 1694 // into the call. 1695 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1696 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1697 RegsToPass[i].second.getValueType())); 1698 if (InFlag.Val) 1699 Ops.push_back(InFlag); 1700 assert(InFlag.Val && 1701 "Flag must be set. Depend on flag being set in LowerRET"); 1702 Chain = DAG.getNode(X86ISD::TAILCALL, 1703 Op.Val->getVTList(), &Ops[0], Ops.size()); 1704 1705 return SDOperand(Chain.Val, Op.ResNo); 1706} 1707 1708//===----------------------------------------------------------------------===// 1709// X86-64 C Calling Convention implementation 1710//===----------------------------------------------------------------------===// 1711 1712SDOperand 1713X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1714 MachineFunction &MF = DAG.getMachineFunction(); 1715 MachineFrameInfo *MFI = MF.getFrameInfo(); 1716 SDOperand Root = Op.getOperand(0); 1717 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1718 unsigned CC= MF.getFunction()->getCallingConv(); 1719 1720 static const unsigned GPR64ArgRegs[] = { 1721 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1722 }; 1723 static const unsigned XMMArgRegs[] = { 1724 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1725 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1726 }; 1727 1728 1729 // Assign locations to all of the incoming arguments. 1730 SmallVector<CCValAssign, 16> ArgLocs; 1731 CCState CCInfo(CC, isVarArg, 1732 getTargetMachine(), ArgLocs); 1733 if (CC == CallingConv::Fast && PerformTailCallOpt) 1734 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); 1735 else 1736 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1737 1738 SmallVector<SDOperand, 8> ArgValues; 1739 unsigned LastVal = ~0U; 1740 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1741 CCValAssign &VA = ArgLocs[i]; 1742 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1743 // places. 1744 assert(VA.getValNo() != LastVal && 1745 "Don't support value assigned to multiple locs yet"); 1746 LastVal = VA.getValNo(); 1747 1748 if (VA.isRegLoc()) { 1749 MVT::ValueType RegVT = VA.getLocVT(); 1750 TargetRegisterClass *RC; 1751 if (RegVT == MVT::i32) 1752 RC = X86::GR32RegisterClass; 1753 else if (RegVT == MVT::i64) 1754 RC = X86::GR64RegisterClass; 1755 else if (RegVT == MVT::f32) 1756 RC = X86::FR32RegisterClass; 1757 else if (RegVT == MVT::f64) 1758 RC = X86::FR64RegisterClass; 1759 else { 1760 assert(MVT::isVector(RegVT)); 1761 if (MVT::getSizeInBits(RegVT) == 64) { 1762 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1763 RegVT = MVT::i64; 1764 } else 1765 RC = X86::VR128RegisterClass; 1766 } 1767 1768 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1769 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1770 1771 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1772 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1773 // right size. 1774 if (VA.getLocInfo() == CCValAssign::SExt) 1775 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1776 DAG.getValueType(VA.getValVT())); 1777 else if (VA.getLocInfo() == CCValAssign::ZExt) 1778 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1779 DAG.getValueType(VA.getValVT())); 1780 1781 if (VA.getLocInfo() != CCValAssign::Full) 1782 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1783 1784 // Handle MMX values passed in GPRs. 1785 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1786 MVT::getSizeInBits(RegVT) == 64) 1787 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1788 1789 ArgValues.push_back(ArgValue); 1790 } else { 1791 assert(VA.isMemLoc()); 1792 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1793 } 1794 } 1795 1796 unsigned StackSize = CCInfo.getNextStackOffset(); 1797 if (CC==CallingConv::Fast) 1798 StackSize =GetAlignedArgumentStackSize(StackSize, DAG); 1799 1800 // If the function takes variable number of arguments, make a frame index for 1801 // the start of the first vararg value... for expansion of llvm.va_start. 1802 if (isVarArg) { 1803 assert(CC!=CallingConv::Fast 1804 && "Var arg not supported with calling convention fastcc"); 1805 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1806 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1807 1808 // For X86-64, if there are vararg parameters that are passed via 1809 // registers, then we must store them to their spots on the stack so they 1810 // may be loaded by deferencing the result of va_next. 1811 VarArgsGPOffset = NumIntRegs * 8; 1812 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1813 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1814 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1815 1816 // Store the integer parameter registers. 1817 SmallVector<SDOperand, 8> MemOps; 1818 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1819 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1820 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1821 for (; NumIntRegs != 6; ++NumIntRegs) { 1822 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1823 X86::GR64RegisterClass); 1824 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1825 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1826 MemOps.push_back(Store); 1827 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1828 DAG.getConstant(8, getPointerTy())); 1829 } 1830 1831 // Now store the XMM (fp + vector) parameter registers. 1832 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1833 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1834 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1835 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1836 X86::VR128RegisterClass); 1837 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1838 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1839 MemOps.push_back(Store); 1840 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1841 DAG.getConstant(16, getPointerTy())); 1842 } 1843 if (!MemOps.empty()) 1844 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1845 &MemOps[0], MemOps.size()); 1846 } 1847 1848 ArgValues.push_back(Root); 1849 // Tail call convention (fastcc) needs callee pop. 1850 if (CC == CallingConv::Fast && PerformTailCallOpt) { 1851 BytesToPopOnReturn = StackSize; // Callee pops everything. 1852 BytesCallerReserves = 0; 1853 } else { 1854 BytesToPopOnReturn = 0; // Callee pops nothing. 1855 BytesCallerReserves = StackSize; 1856 } 1857 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1858 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1859 1860 // Return the new list of results. 1861 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1862 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1863} 1864 1865SDOperand 1866X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1867 unsigned CC) { 1868 SDOperand Chain = Op.getOperand(0); 1869 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1870 SDOperand Callee = Op.getOperand(4); 1871 1872 // Analyze operands of the call, assigning locations to each operand. 1873 SmallVector<CCValAssign, 16> ArgLocs; 1874 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1875 if (CC==CallingConv::Fast && PerformTailCallOpt) 1876 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); 1877 else 1878 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1879 1880 // Get a count of how many bytes are to be pushed on the stack. 1881 unsigned NumBytes = CCInfo.getNextStackOffset(); 1882 if (CC == CallingConv::Fast) 1883 NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); 1884 1885 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1886 1887 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1888 SmallVector<SDOperand, 8> MemOpChains; 1889 1890 SDOperand StackPtr; 1891 1892 // Walk the register/memloc assignments, inserting copies/loads. 1893 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1894 CCValAssign &VA = ArgLocs[i]; 1895 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1896 1897 // Promote the value if needed. 1898 switch (VA.getLocInfo()) { 1899 default: assert(0 && "Unknown loc info!"); 1900 case CCValAssign::Full: break; 1901 case CCValAssign::SExt: 1902 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1903 break; 1904 case CCValAssign::ZExt: 1905 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1906 break; 1907 case CCValAssign::AExt: 1908 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1909 break; 1910 } 1911 1912 if (VA.isRegLoc()) { 1913 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1914 } else { 1915 assert(VA.isMemLoc()); 1916 if (StackPtr.Val == 0) 1917 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1918 1919 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1920 Arg)); 1921 } 1922 } 1923 1924 if (!MemOpChains.empty()) 1925 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1926 &MemOpChains[0], MemOpChains.size()); 1927 1928 // Build a sequence of copy-to-reg nodes chained together with token chain 1929 // and flag operands which copy the outgoing args into registers. 1930 SDOperand InFlag; 1931 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1932 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1933 InFlag); 1934 InFlag = Chain.getValue(1); 1935 } 1936 1937 if (isVarArg) { 1938 assert ( CallingConv::Fast != CC && 1939 "Var args not supported with calling convention fastcc"); 1940 1941 // From AMD64 ABI document: 1942 // For calls that may call functions that use varargs or stdargs 1943 // (prototype-less calls or calls to functions containing ellipsis (...) in 1944 // the declaration) %al is used as hidden argument to specify the number 1945 // of SSE registers used. The contents of %al do not need to match exactly 1946 // the number of registers, but must be an ubound on the number of SSE 1947 // registers used and is in the range 0 - 8 inclusive. 1948 1949 // Count the number of XMM registers allocated. 1950 static const unsigned XMMArgRegs[] = { 1951 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1952 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1953 }; 1954 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1955 1956 Chain = DAG.getCopyToReg(Chain, X86::AL, 1957 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1958 InFlag = Chain.getValue(1); 1959 } 1960 1961 // If the callee is a GlobalAddress node (quite common, every direct call is) 1962 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1963 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1964 // We should use extra load for direct calls to dllimported functions in 1965 // non-JIT mode. 1966 if (getTargetMachine().getCodeModel() != CodeModel::Large 1967 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1968 getTargetMachine(), true)) 1969 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1970 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1971 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1972 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1973 1974 // Returns a chain & a flag for retval copy to use. 1975 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1976 SmallVector<SDOperand, 8> Ops; 1977 Ops.push_back(Chain); 1978 Ops.push_back(Callee); 1979 1980 // Add argument registers to the end of the list so that they are known live 1981 // into the call. 1982 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1983 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1984 RegsToPass[i].second.getValueType())); 1985 1986 if (InFlag.Val) 1987 Ops.push_back(InFlag); 1988 1989 Chain = DAG.getNode(X86ISD::CALL, 1990 NodeTys, &Ops[0], Ops.size()); 1991 InFlag = Chain.getValue(1); 1992 int NumBytesForCalleeToPush = 0; 1993 if (CC==CallingConv::Fast && PerformTailCallOpt) { 1994 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 1995 } else { 1996 NumBytesForCalleeToPush = 0; // Callee pops nothing. 1997 } 1998 // Returns a flag for retval copy to use. 1999 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2000 Ops.clear(); 2001 Ops.push_back(Chain); 2002 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 2003 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 2004 Ops.push_back(InFlag); 2005 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 2006 InFlag = Chain.getValue(1); 2007 2008 // Handle result values, copying them out of physregs into vregs that we 2009 // return. 2010 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 2011} 2012 2013 2014//===----------------------------------------------------------------------===// 2015// Other Lowering Hooks 2016//===----------------------------------------------------------------------===// 2017 2018 2019SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2020 MachineFunction &MF = DAG.getMachineFunction(); 2021 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2022 int ReturnAddrIndex = FuncInfo->getRAIndex(); 2023 2024 if (ReturnAddrIndex == 0) { 2025 // Set up a frame object for the return address. 2026 if (Subtarget->is64Bit()) 2027 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 2028 else 2029 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 2030 2031 FuncInfo->setRAIndex(ReturnAddrIndex); 2032 } 2033 2034 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2035} 2036 2037 2038 2039/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 2040/// specific condition code. It returns a false if it cannot do a direct 2041/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 2042/// needed. 2043static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2044 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 2045 SelectionDAG &DAG) { 2046 X86CC = X86::COND_INVALID; 2047 if (!isFP) { 2048 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2049 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2050 // X > -1 -> X == 0, jump !sign. 2051 RHS = DAG.getConstant(0, RHS.getValueType()); 2052 X86CC = X86::COND_NS; 2053 return true; 2054 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2055 // X < 0 -> X == 0, jump on sign. 2056 X86CC = X86::COND_S; 2057 return true; 2058 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 2059 // X < 1 -> X <= 0 2060 RHS = DAG.getConstant(0, RHS.getValueType()); 2061 X86CC = X86::COND_LE; 2062 return true; 2063 } 2064 } 2065 2066 switch (SetCCOpcode) { 2067 default: break; 2068 case ISD::SETEQ: X86CC = X86::COND_E; break; 2069 case ISD::SETGT: X86CC = X86::COND_G; break; 2070 case ISD::SETGE: X86CC = X86::COND_GE; break; 2071 case ISD::SETLT: X86CC = X86::COND_L; break; 2072 case ISD::SETLE: X86CC = X86::COND_LE; break; 2073 case ISD::SETNE: X86CC = X86::COND_NE; break; 2074 case ISD::SETULT: X86CC = X86::COND_B; break; 2075 case ISD::SETUGT: X86CC = X86::COND_A; break; 2076 case ISD::SETULE: X86CC = X86::COND_BE; break; 2077 case ISD::SETUGE: X86CC = X86::COND_AE; break; 2078 } 2079 } else { 2080 // On a floating point condition, the flags are set as follows: 2081 // ZF PF CF op 2082 // 0 | 0 | 0 | X > Y 2083 // 0 | 0 | 1 | X < Y 2084 // 1 | 0 | 0 | X == Y 2085 // 1 | 1 | 1 | unordered 2086 bool Flip = false; 2087 switch (SetCCOpcode) { 2088 default: break; 2089 case ISD::SETUEQ: 2090 case ISD::SETEQ: X86CC = X86::COND_E; break; 2091 case ISD::SETOLT: Flip = true; // Fallthrough 2092 case ISD::SETOGT: 2093 case ISD::SETGT: X86CC = X86::COND_A; break; 2094 case ISD::SETOLE: Flip = true; // Fallthrough 2095 case ISD::SETOGE: 2096 case ISD::SETGE: X86CC = X86::COND_AE; break; 2097 case ISD::SETUGT: Flip = true; // Fallthrough 2098 case ISD::SETULT: 2099 case ISD::SETLT: X86CC = X86::COND_B; break; 2100 case ISD::SETUGE: Flip = true; // Fallthrough 2101 case ISD::SETULE: 2102 case ISD::SETLE: X86CC = X86::COND_BE; break; 2103 case ISD::SETONE: 2104 case ISD::SETNE: X86CC = X86::COND_NE; break; 2105 case ISD::SETUO: X86CC = X86::COND_P; break; 2106 case ISD::SETO: X86CC = X86::COND_NP; break; 2107 } 2108 if (Flip) 2109 std::swap(LHS, RHS); 2110 } 2111 2112 return X86CC != X86::COND_INVALID; 2113} 2114 2115/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2116/// code. Current x86 isa includes the following FP cmov instructions: 2117/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2118static bool hasFPCMov(unsigned X86CC) { 2119 switch (X86CC) { 2120 default: 2121 return false; 2122 case X86::COND_B: 2123 case X86::COND_BE: 2124 case X86::COND_E: 2125 case X86::COND_P: 2126 case X86::COND_A: 2127 case X86::COND_AE: 2128 case X86::COND_NE: 2129 case X86::COND_NP: 2130 return true; 2131 } 2132} 2133 2134/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 2135/// true if Op is undef or if its value falls within the specified range (L, H]. 2136static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 2137 if (Op.getOpcode() == ISD::UNDEF) 2138 return true; 2139 2140 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 2141 return (Val >= Low && Val < Hi); 2142} 2143 2144/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 2145/// true if Op is undef or if its value equal to the specified value. 2146static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 2147 if (Op.getOpcode() == ISD::UNDEF) 2148 return true; 2149 return cast<ConstantSDNode>(Op)->getValue() == Val; 2150} 2151 2152/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 2153/// specifies a shuffle of elements that is suitable for input to PSHUFD. 2154bool X86::isPSHUFDMask(SDNode *N) { 2155 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2156 2157 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 2158 return false; 2159 2160 // Check if the value doesn't reference the second vector. 2161 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 2162 SDOperand Arg = N->getOperand(i); 2163 if (Arg.getOpcode() == ISD::UNDEF) continue; 2164 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2165 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 2166 return false; 2167 } 2168 2169 return true; 2170} 2171 2172/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 2173/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 2174bool X86::isPSHUFHWMask(SDNode *N) { 2175 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2176 2177 if (N->getNumOperands() != 8) 2178 return false; 2179 2180 // Lower quadword copied in order. 2181 for (unsigned i = 0; i != 4; ++i) { 2182 SDOperand Arg = N->getOperand(i); 2183 if (Arg.getOpcode() == ISD::UNDEF) continue; 2184 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2185 if (cast<ConstantSDNode>(Arg)->getValue() != i) 2186 return false; 2187 } 2188 2189 // Upper quadword shuffled. 2190 for (unsigned i = 4; i != 8; ++i) { 2191 SDOperand Arg = N->getOperand(i); 2192 if (Arg.getOpcode() == ISD::UNDEF) continue; 2193 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2194 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2195 if (Val < 4 || Val > 7) 2196 return false; 2197 } 2198 2199 return true; 2200} 2201 2202/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 2203/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 2204bool X86::isPSHUFLWMask(SDNode *N) { 2205 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2206 2207 if (N->getNumOperands() != 8) 2208 return false; 2209 2210 // Upper quadword copied in order. 2211 for (unsigned i = 4; i != 8; ++i) 2212 if (!isUndefOrEqual(N->getOperand(i), i)) 2213 return false; 2214 2215 // Lower quadword shuffled. 2216 for (unsigned i = 0; i != 4; ++i) 2217 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 2218 return false; 2219 2220 return true; 2221} 2222 2223/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2224/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2225static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 2226 if (NumElems != 2 && NumElems != 4) return false; 2227 2228 unsigned Half = NumElems / 2; 2229 for (unsigned i = 0; i < Half; ++i) 2230 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 2231 return false; 2232 for (unsigned i = Half; i < NumElems; ++i) 2233 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 2234 return false; 2235 2236 return true; 2237} 2238 2239bool X86::isSHUFPMask(SDNode *N) { 2240 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2241 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 2242} 2243 2244/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2245/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2246/// half elements to come from vector 1 (which would equal the dest.) and 2247/// the upper half to come from vector 2. 2248static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 2249 if (NumOps != 2 && NumOps != 4) return false; 2250 2251 unsigned Half = NumOps / 2; 2252 for (unsigned i = 0; i < Half; ++i) 2253 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 2254 return false; 2255 for (unsigned i = Half; i < NumOps; ++i) 2256 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 2257 return false; 2258 return true; 2259} 2260 2261static bool isCommutedSHUFP(SDNode *N) { 2262 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2263 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 2264} 2265 2266/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2267/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2268bool X86::isMOVHLPSMask(SDNode *N) { 2269 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2270 2271 if (N->getNumOperands() != 4) 2272 return false; 2273 2274 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2275 return isUndefOrEqual(N->getOperand(0), 6) && 2276 isUndefOrEqual(N->getOperand(1), 7) && 2277 isUndefOrEqual(N->getOperand(2), 2) && 2278 isUndefOrEqual(N->getOperand(3), 3); 2279} 2280 2281/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2282/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2283/// <2, 3, 2, 3> 2284bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 2285 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2286 2287 if (N->getNumOperands() != 4) 2288 return false; 2289 2290 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 2291 return isUndefOrEqual(N->getOperand(0), 2) && 2292 isUndefOrEqual(N->getOperand(1), 3) && 2293 isUndefOrEqual(N->getOperand(2), 2) && 2294 isUndefOrEqual(N->getOperand(3), 3); 2295} 2296 2297/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2298/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2299bool X86::isMOVLPMask(SDNode *N) { 2300 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2301 2302 unsigned NumElems = N->getNumOperands(); 2303 if (NumElems != 2 && NumElems != 4) 2304 return false; 2305 2306 for (unsigned i = 0; i < NumElems/2; ++i) 2307 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 2308 return false; 2309 2310 for (unsigned i = NumElems/2; i < NumElems; ++i) 2311 if (!isUndefOrEqual(N->getOperand(i), i)) 2312 return false; 2313 2314 return true; 2315} 2316 2317/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 2318/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 2319/// and MOVLHPS. 2320bool X86::isMOVHPMask(SDNode *N) { 2321 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2322 2323 unsigned NumElems = N->getNumOperands(); 2324 if (NumElems != 2 && NumElems != 4) 2325 return false; 2326 2327 for (unsigned i = 0; i < NumElems/2; ++i) 2328 if (!isUndefOrEqual(N->getOperand(i), i)) 2329 return false; 2330 2331 for (unsigned i = 0; i < NumElems/2; ++i) { 2332 SDOperand Arg = N->getOperand(i + NumElems/2); 2333 if (!isUndefOrEqual(Arg, i + NumElems)) 2334 return false; 2335 } 2336 2337 return true; 2338} 2339 2340/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2341/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2342bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 2343 bool V2IsSplat = false) { 2344 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2345 return false; 2346 2347 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2348 SDOperand BitI = Elts[i]; 2349 SDOperand BitI1 = Elts[i+1]; 2350 if (!isUndefOrEqual(BitI, j)) 2351 return false; 2352 if (V2IsSplat) { 2353 if (isUndefOrEqual(BitI1, NumElts)) 2354 return false; 2355 } else { 2356 if (!isUndefOrEqual(BitI1, j + NumElts)) 2357 return false; 2358 } 2359 } 2360 2361 return true; 2362} 2363 2364bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 2365 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2366 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2367} 2368 2369/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2370/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2371bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 2372 bool V2IsSplat = false) { 2373 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2374 return false; 2375 2376 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 2377 SDOperand BitI = Elts[i]; 2378 SDOperand BitI1 = Elts[i+1]; 2379 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2380 return false; 2381 if (V2IsSplat) { 2382 if (isUndefOrEqual(BitI1, NumElts)) 2383 return false; 2384 } else { 2385 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2386 return false; 2387 } 2388 } 2389 2390 return true; 2391} 2392 2393bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 2394 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2395 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 2396} 2397 2398/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2399/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2400/// <0, 0, 1, 1> 2401bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 2402 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2403 2404 unsigned NumElems = N->getNumOperands(); 2405 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2406 return false; 2407 2408 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 2409 SDOperand BitI = N->getOperand(i); 2410 SDOperand BitI1 = N->getOperand(i+1); 2411 2412 if (!isUndefOrEqual(BitI, j)) 2413 return false; 2414 if (!isUndefOrEqual(BitI1, j)) 2415 return false; 2416 } 2417 2418 return true; 2419} 2420 2421/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2422/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2423/// <2, 2, 3, 3> 2424bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 2425 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2426 2427 unsigned NumElems = N->getNumOperands(); 2428 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2429 return false; 2430 2431 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2432 SDOperand BitI = N->getOperand(i); 2433 SDOperand BitI1 = N->getOperand(i + 1); 2434 2435 if (!isUndefOrEqual(BitI, j)) 2436 return false; 2437 if (!isUndefOrEqual(BitI1, j)) 2438 return false; 2439 } 2440 2441 return true; 2442} 2443 2444/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2445/// specifies a shuffle of elements that is suitable for input to MOVSS, 2446/// MOVSD, and MOVD, i.e. setting the lowest element. 2447static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2448 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2449 return false; 2450 2451 if (!isUndefOrEqual(Elts[0], NumElts)) 2452 return false; 2453 2454 for (unsigned i = 1; i < NumElts; ++i) { 2455 if (!isUndefOrEqual(Elts[i], i)) 2456 return false; 2457 } 2458 2459 return true; 2460} 2461 2462bool X86::isMOVLMask(SDNode *N) { 2463 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2464 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2465} 2466 2467/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2468/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2469/// element of vector 2 and the other elements to come from vector 1 in order. 2470static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2471 bool V2IsSplat = false, 2472 bool V2IsUndef = false) { 2473 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2474 return false; 2475 2476 if (!isUndefOrEqual(Ops[0], 0)) 2477 return false; 2478 2479 for (unsigned i = 1; i < NumOps; ++i) { 2480 SDOperand Arg = Ops[i]; 2481 if (!(isUndefOrEqual(Arg, i+NumOps) || 2482 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2483 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2484 return false; 2485 } 2486 2487 return true; 2488} 2489 2490static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2491 bool V2IsUndef = false) { 2492 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2493 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2494 V2IsSplat, V2IsUndef); 2495} 2496 2497/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2498/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2499bool X86::isMOVSHDUPMask(SDNode *N) { 2500 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2501 2502 if (N->getNumOperands() != 4) 2503 return false; 2504 2505 // Expect 1, 1, 3, 3 2506 for (unsigned i = 0; i < 2; ++i) { 2507 SDOperand Arg = N->getOperand(i); 2508 if (Arg.getOpcode() == ISD::UNDEF) continue; 2509 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2510 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2511 if (Val != 1) return false; 2512 } 2513 2514 bool HasHi = false; 2515 for (unsigned i = 2; i < 4; ++i) { 2516 SDOperand Arg = N->getOperand(i); 2517 if (Arg.getOpcode() == ISD::UNDEF) continue; 2518 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2519 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2520 if (Val != 3) return false; 2521 HasHi = true; 2522 } 2523 2524 // Don't use movshdup if it can be done with a shufps. 2525 return HasHi; 2526} 2527 2528/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2529/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2530bool X86::isMOVSLDUPMask(SDNode *N) { 2531 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2532 2533 if (N->getNumOperands() != 4) 2534 return false; 2535 2536 // Expect 0, 0, 2, 2 2537 for (unsigned i = 0; i < 2; ++i) { 2538 SDOperand Arg = N->getOperand(i); 2539 if (Arg.getOpcode() == ISD::UNDEF) continue; 2540 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2541 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2542 if (Val != 0) return false; 2543 } 2544 2545 bool HasHi = false; 2546 for (unsigned i = 2; i < 4; ++i) { 2547 SDOperand Arg = N->getOperand(i); 2548 if (Arg.getOpcode() == ISD::UNDEF) continue; 2549 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2550 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2551 if (Val != 2) return false; 2552 HasHi = true; 2553 } 2554 2555 // Don't use movshdup if it can be done with a shufps. 2556 return HasHi; 2557} 2558 2559/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2560/// specifies a identity operation on the LHS or RHS. 2561static bool isIdentityMask(SDNode *N, bool RHS = false) { 2562 unsigned NumElems = N->getNumOperands(); 2563 for (unsigned i = 0; i < NumElems; ++i) 2564 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2565 return false; 2566 return true; 2567} 2568 2569/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2570/// a splat of a single element. 2571static bool isSplatMask(SDNode *N) { 2572 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2573 2574 // This is a splat operation if each element of the permute is the same, and 2575 // if the value doesn't reference the second vector. 2576 unsigned NumElems = N->getNumOperands(); 2577 SDOperand ElementBase; 2578 unsigned i = 0; 2579 for (; i != NumElems; ++i) { 2580 SDOperand Elt = N->getOperand(i); 2581 if (isa<ConstantSDNode>(Elt)) { 2582 ElementBase = Elt; 2583 break; 2584 } 2585 } 2586 2587 if (!ElementBase.Val) 2588 return false; 2589 2590 for (; i != NumElems; ++i) { 2591 SDOperand Arg = N->getOperand(i); 2592 if (Arg.getOpcode() == ISD::UNDEF) continue; 2593 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2594 if (Arg != ElementBase) return false; 2595 } 2596 2597 // Make sure it is a splat of the first vector operand. 2598 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2599} 2600 2601/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2602/// a splat of a single element and it's a 2 or 4 element mask. 2603bool X86::isSplatMask(SDNode *N) { 2604 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2605 2606 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2607 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2608 return false; 2609 return ::isSplatMask(N); 2610} 2611 2612/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2613/// specifies a splat of zero element. 2614bool X86::isSplatLoMask(SDNode *N) { 2615 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2616 2617 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2618 if (!isUndefOrEqual(N->getOperand(i), 0)) 2619 return false; 2620 return true; 2621} 2622 2623/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2624/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2625/// instructions. 2626unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2627 unsigned NumOperands = N->getNumOperands(); 2628 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2629 unsigned Mask = 0; 2630 for (unsigned i = 0; i < NumOperands; ++i) { 2631 unsigned Val = 0; 2632 SDOperand Arg = N->getOperand(NumOperands-i-1); 2633 if (Arg.getOpcode() != ISD::UNDEF) 2634 Val = cast<ConstantSDNode>(Arg)->getValue(); 2635 if (Val >= NumOperands) Val -= NumOperands; 2636 Mask |= Val; 2637 if (i != NumOperands - 1) 2638 Mask <<= Shift; 2639 } 2640 2641 return Mask; 2642} 2643 2644/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2645/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2646/// instructions. 2647unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2648 unsigned Mask = 0; 2649 // 8 nodes, but we only care about the last 4. 2650 for (unsigned i = 7; i >= 4; --i) { 2651 unsigned Val = 0; 2652 SDOperand Arg = N->getOperand(i); 2653 if (Arg.getOpcode() != ISD::UNDEF) 2654 Val = cast<ConstantSDNode>(Arg)->getValue(); 2655 Mask |= (Val - 4); 2656 if (i != 4) 2657 Mask <<= 2; 2658 } 2659 2660 return Mask; 2661} 2662 2663/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2664/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2665/// instructions. 2666unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2667 unsigned Mask = 0; 2668 // 8 nodes, but we only care about the first 4. 2669 for (int i = 3; i >= 0; --i) { 2670 unsigned Val = 0; 2671 SDOperand Arg = N->getOperand(i); 2672 if (Arg.getOpcode() != ISD::UNDEF) 2673 Val = cast<ConstantSDNode>(Arg)->getValue(); 2674 Mask |= Val; 2675 if (i != 0) 2676 Mask <<= 2; 2677 } 2678 2679 return Mask; 2680} 2681 2682/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2683/// specifies a 8 element shuffle that can be broken into a pair of 2684/// PSHUFHW and PSHUFLW. 2685static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2686 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2687 2688 if (N->getNumOperands() != 8) 2689 return false; 2690 2691 // Lower quadword shuffled. 2692 for (unsigned i = 0; i != 4; ++i) { 2693 SDOperand Arg = N->getOperand(i); 2694 if (Arg.getOpcode() == ISD::UNDEF) continue; 2695 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2696 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2697 if (Val > 4) 2698 return false; 2699 } 2700 2701 // Upper quadword shuffled. 2702 for (unsigned i = 4; i != 8; ++i) { 2703 SDOperand Arg = N->getOperand(i); 2704 if (Arg.getOpcode() == ISD::UNDEF) continue; 2705 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2706 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2707 if (Val < 4 || Val > 7) 2708 return false; 2709 } 2710 2711 return true; 2712} 2713 2714/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2715/// values in ther permute mask. 2716static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2717 SDOperand &V2, SDOperand &Mask, 2718 SelectionDAG &DAG) { 2719 MVT::ValueType VT = Op.getValueType(); 2720 MVT::ValueType MaskVT = Mask.getValueType(); 2721 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2722 unsigned NumElems = Mask.getNumOperands(); 2723 SmallVector<SDOperand, 8> MaskVec; 2724 2725 for (unsigned i = 0; i != NumElems; ++i) { 2726 SDOperand Arg = Mask.getOperand(i); 2727 if (Arg.getOpcode() == ISD::UNDEF) { 2728 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2729 continue; 2730 } 2731 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2732 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2733 if (Val < NumElems) 2734 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2735 else 2736 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2737 } 2738 2739 std::swap(V1, V2); 2740 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2741 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2742} 2743 2744/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2745/// match movhlps. The lower half elements should come from upper half of 2746/// V1 (and in order), and the upper half elements should come from the upper 2747/// half of V2 (and in order). 2748static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2749 unsigned NumElems = Mask->getNumOperands(); 2750 if (NumElems != 4) 2751 return false; 2752 for (unsigned i = 0, e = 2; i != e; ++i) 2753 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2754 return false; 2755 for (unsigned i = 2; i != 4; ++i) 2756 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2757 return false; 2758 return true; 2759} 2760 2761/// isScalarLoadToVector - Returns true if the node is a scalar load that 2762/// is promoted to a vector. 2763static inline bool isScalarLoadToVector(SDNode *N) { 2764 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2765 N = N->getOperand(0).Val; 2766 return ISD::isNON_EXTLoad(N); 2767 } 2768 return false; 2769} 2770 2771/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2772/// match movlp{s|d}. The lower half elements should come from lower half of 2773/// V1 (and in order), and the upper half elements should come from the upper 2774/// half of V2 (and in order). And since V1 will become the source of the 2775/// MOVLP, it must be either a vector load or a scalar load to vector. 2776static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2777 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2778 return false; 2779 // Is V2 is a vector load, don't do this transformation. We will try to use 2780 // load folding shufps op. 2781 if (ISD::isNON_EXTLoad(V2)) 2782 return false; 2783 2784 unsigned NumElems = Mask->getNumOperands(); 2785 if (NumElems != 2 && NumElems != 4) 2786 return false; 2787 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2788 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2789 return false; 2790 for (unsigned i = NumElems/2; i != NumElems; ++i) 2791 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2792 return false; 2793 return true; 2794} 2795 2796/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2797/// all the same. 2798static bool isSplatVector(SDNode *N) { 2799 if (N->getOpcode() != ISD::BUILD_VECTOR) 2800 return false; 2801 2802 SDOperand SplatValue = N->getOperand(0); 2803 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2804 if (N->getOperand(i) != SplatValue) 2805 return false; 2806 return true; 2807} 2808 2809/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2810/// to an undef. 2811static bool isUndefShuffle(SDNode *N) { 2812 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2813 return false; 2814 2815 SDOperand V1 = N->getOperand(0); 2816 SDOperand V2 = N->getOperand(1); 2817 SDOperand Mask = N->getOperand(2); 2818 unsigned NumElems = Mask.getNumOperands(); 2819 for (unsigned i = 0; i != NumElems; ++i) { 2820 SDOperand Arg = Mask.getOperand(i); 2821 if (Arg.getOpcode() != ISD::UNDEF) { 2822 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2823 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2824 return false; 2825 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2826 return false; 2827 } 2828 } 2829 return true; 2830} 2831 2832/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2833/// constant +0.0. 2834static inline bool isZeroNode(SDOperand Elt) { 2835 return ((isa<ConstantSDNode>(Elt) && 2836 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2837 (isa<ConstantFPSDNode>(Elt) && 2838 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2839} 2840 2841/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2842/// to an zero vector. 2843static bool isZeroShuffle(SDNode *N) { 2844 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2845 return false; 2846 2847 SDOperand V1 = N->getOperand(0); 2848 SDOperand V2 = N->getOperand(1); 2849 SDOperand Mask = N->getOperand(2); 2850 unsigned NumElems = Mask.getNumOperands(); 2851 for (unsigned i = 0; i != NumElems; ++i) { 2852 SDOperand Arg = Mask.getOperand(i); 2853 if (Arg.getOpcode() != ISD::UNDEF) { 2854 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2855 if (Idx < NumElems) { 2856 unsigned Opc = V1.Val->getOpcode(); 2857 if (Opc == ISD::UNDEF) 2858 continue; 2859 if (Opc != ISD::BUILD_VECTOR || 2860 !isZeroNode(V1.Val->getOperand(Idx))) 2861 return false; 2862 } else if (Idx >= NumElems) { 2863 unsigned Opc = V2.Val->getOpcode(); 2864 if (Opc == ISD::UNDEF) 2865 continue; 2866 if (Opc != ISD::BUILD_VECTOR || 2867 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2868 return false; 2869 } 2870 } 2871 } 2872 return true; 2873} 2874 2875/// getZeroVector - Returns a vector of specified type with all zero elements. 2876/// 2877static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2878 assert(MVT::isVector(VT) && "Expected a vector type"); 2879 unsigned NumElems = MVT::getVectorNumElements(VT); 2880 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2881 bool isFP = MVT::isFloatingPoint(EVT); 2882 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2883 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2884 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2885} 2886 2887/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2888/// that point to V2 points to its first element. 2889static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2890 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2891 2892 bool Changed = false; 2893 SmallVector<SDOperand, 8> MaskVec; 2894 unsigned NumElems = Mask.getNumOperands(); 2895 for (unsigned i = 0; i != NumElems; ++i) { 2896 SDOperand Arg = Mask.getOperand(i); 2897 if (Arg.getOpcode() != ISD::UNDEF) { 2898 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2899 if (Val > NumElems) { 2900 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2901 Changed = true; 2902 } 2903 } 2904 MaskVec.push_back(Arg); 2905 } 2906 2907 if (Changed) 2908 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2909 &MaskVec[0], MaskVec.size()); 2910 return Mask; 2911} 2912 2913/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2914/// operation of specified width. 2915static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2916 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2917 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2918 2919 SmallVector<SDOperand, 8> MaskVec; 2920 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2921 for (unsigned i = 1; i != NumElems; ++i) 2922 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2923 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2924} 2925 2926/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2927/// of specified width. 2928static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2929 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2930 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2931 SmallVector<SDOperand, 8> MaskVec; 2932 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2933 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2934 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2935 } 2936 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2937} 2938 2939/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2940/// of specified width. 2941static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2942 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2943 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2944 unsigned Half = NumElems/2; 2945 SmallVector<SDOperand, 8> MaskVec; 2946 for (unsigned i = 0; i != Half; ++i) { 2947 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2948 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2949 } 2950 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2951} 2952 2953/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2954/// 2955static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2956 SDOperand V1 = Op.getOperand(0); 2957 SDOperand Mask = Op.getOperand(2); 2958 MVT::ValueType VT = Op.getValueType(); 2959 unsigned NumElems = Mask.getNumOperands(); 2960 Mask = getUnpacklMask(NumElems, DAG); 2961 while (NumElems != 4) { 2962 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2963 NumElems >>= 1; 2964 } 2965 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2966 2967 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2968 Mask = getZeroVector(MaskVT, DAG); 2969 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2970 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2971 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2972} 2973 2974/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2975/// vector of zero or undef vector. 2976static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2977 unsigned NumElems, unsigned Idx, 2978 bool isZero, SelectionDAG &DAG) { 2979 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2980 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2981 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2982 SDOperand Zero = DAG.getConstant(0, EVT); 2983 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2984 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2985 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2986 &MaskVec[0], MaskVec.size()); 2987 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2988} 2989 2990/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2991/// 2992static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2993 unsigned NumNonZero, unsigned NumZero, 2994 SelectionDAG &DAG, TargetLowering &TLI) { 2995 if (NumNonZero > 8) 2996 return SDOperand(); 2997 2998 SDOperand V(0, 0); 2999 bool First = true; 3000 for (unsigned i = 0; i < 16; ++i) { 3001 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3002 if (ThisIsNonZero && First) { 3003 if (NumZero) 3004 V = getZeroVector(MVT::v8i16, DAG); 3005 else 3006 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3007 First = false; 3008 } 3009 3010 if ((i & 1) != 0) { 3011 SDOperand ThisElt(0, 0), LastElt(0, 0); 3012 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3013 if (LastIsNonZero) { 3014 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 3015 } 3016 if (ThisIsNonZero) { 3017 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 3018 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 3019 ThisElt, DAG.getConstant(8, MVT::i8)); 3020 if (LastIsNonZero) 3021 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 3022 } else 3023 ThisElt = LastElt; 3024 3025 if (ThisElt.Val) 3026 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 3027 DAG.getConstant(i/2, TLI.getPointerTy())); 3028 } 3029 } 3030 3031 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 3032} 3033 3034/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3035/// 3036static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 3037 unsigned NumNonZero, unsigned NumZero, 3038 SelectionDAG &DAG, TargetLowering &TLI) { 3039 if (NumNonZero > 4) 3040 return SDOperand(); 3041 3042 SDOperand V(0, 0); 3043 bool First = true; 3044 for (unsigned i = 0; i < 8; ++i) { 3045 bool isNonZero = (NonZeros & (1 << i)) != 0; 3046 if (isNonZero) { 3047 if (First) { 3048 if (NumZero) 3049 V = getZeroVector(MVT::v8i16, DAG); 3050 else 3051 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 3052 First = false; 3053 } 3054 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 3055 DAG.getConstant(i, TLI.getPointerTy())); 3056 } 3057 } 3058 3059 return V; 3060} 3061 3062SDOperand 3063X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3064 // All zero's are handled with pxor. 3065 if (ISD::isBuildVectorAllZeros(Op.Val)) 3066 return Op; 3067 3068 // All one's are handled with pcmpeqd. 3069 if (ISD::isBuildVectorAllOnes(Op.Val)) 3070 return Op; 3071 3072 MVT::ValueType VT = Op.getValueType(); 3073 MVT::ValueType EVT = MVT::getVectorElementType(VT); 3074 unsigned EVTBits = MVT::getSizeInBits(EVT); 3075 3076 unsigned NumElems = Op.getNumOperands(); 3077 unsigned NumZero = 0; 3078 unsigned NumNonZero = 0; 3079 unsigned NonZeros = 0; 3080 unsigned NumNonZeroImms = 0; 3081 std::set<SDOperand> Values; 3082 for (unsigned i = 0; i < NumElems; ++i) { 3083 SDOperand Elt = Op.getOperand(i); 3084 if (Elt.getOpcode() != ISD::UNDEF) { 3085 Values.insert(Elt); 3086 if (isZeroNode(Elt)) 3087 NumZero++; 3088 else { 3089 NonZeros |= (1 << i); 3090 NumNonZero++; 3091 if (Elt.getOpcode() == ISD::Constant || 3092 Elt.getOpcode() == ISD::ConstantFP) 3093 NumNonZeroImms++; 3094 } 3095 } 3096 } 3097 3098 if (NumNonZero == 0) { 3099 if (NumZero == 0) 3100 // All undef vector. Return an UNDEF. 3101 return DAG.getNode(ISD::UNDEF, VT); 3102 else 3103 // A mix of zero and undef. Return a zero vector. 3104 return getZeroVector(VT, DAG); 3105 } 3106 3107 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3108 if (Values.size() == 1) 3109 return SDOperand(); 3110 3111 // Special case for single non-zero element. 3112 if (NumNonZero == 1) { 3113 unsigned Idx = CountTrailingZeros_32(NonZeros); 3114 SDOperand Item = Op.getOperand(Idx); 3115 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 3116 if (Idx == 0) 3117 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3118 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 3119 NumZero > 0, DAG); 3120 3121 if (EVTBits == 32) { 3122 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3123 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 3124 DAG); 3125 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3126 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3127 SmallVector<SDOperand, 8> MaskVec; 3128 for (unsigned i = 0; i < NumElems; i++) 3129 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 3130 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3131 &MaskVec[0], MaskVec.size()); 3132 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 3133 DAG.getNode(ISD::UNDEF, VT), Mask); 3134 } 3135 } 3136 3137 // A vector full of immediates; various special cases are already 3138 // handled, so this is best done with a single constant-pool load. 3139 if (NumNonZero == NumNonZeroImms) 3140 return SDOperand(); 3141 3142 // Let legalizer expand 2-wide build_vectors. 3143 if (EVTBits == 64) 3144 return SDOperand(); 3145 3146 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3147 if (EVTBits == 8 && NumElems == 16) { 3148 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3149 *this); 3150 if (V.Val) return V; 3151 } 3152 3153 if (EVTBits == 16 && NumElems == 8) { 3154 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3155 *this); 3156 if (V.Val) return V; 3157 } 3158 3159 // If element VT is == 32 bits, turn it into a number of shuffles. 3160 SmallVector<SDOperand, 8> V; 3161 V.resize(NumElems); 3162 if (NumElems == 4 && NumZero > 0) { 3163 for (unsigned i = 0; i < 4; ++i) { 3164 bool isZero = !(NonZeros & (1 << i)); 3165 if (isZero) 3166 V[i] = getZeroVector(VT, DAG); 3167 else 3168 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3169 } 3170 3171 for (unsigned i = 0; i < 2; ++i) { 3172 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3173 default: break; 3174 case 0: 3175 V[i] = V[i*2]; // Must be a zero vector. 3176 break; 3177 case 1: 3178 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 3179 getMOVLMask(NumElems, DAG)); 3180 break; 3181 case 2: 3182 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3183 getMOVLMask(NumElems, DAG)); 3184 break; 3185 case 3: 3186 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 3187 getUnpacklMask(NumElems, DAG)); 3188 break; 3189 } 3190 } 3191 3192 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 3193 // clears the upper bits. 3194 // FIXME: we can do the same for v4f32 case when we know both parts of 3195 // the lower half come from scalar_to_vector (loadf32). We should do 3196 // that in post legalizer dag combiner with target specific hooks. 3197 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 3198 return V[0]; 3199 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3200 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 3201 SmallVector<SDOperand, 8> MaskVec; 3202 bool Reverse = (NonZeros & 0x3) == 2; 3203 for (unsigned i = 0; i < 2; ++i) 3204 if (Reverse) 3205 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 3206 else 3207 MaskVec.push_back(DAG.getConstant(i, EVT)); 3208 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3209 for (unsigned i = 0; i < 2; ++i) 3210 if (Reverse) 3211 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 3212 else 3213 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 3214 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3215 &MaskVec[0], MaskVec.size()); 3216 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 3217 } 3218 3219 if (Values.size() > 2) { 3220 // Expand into a number of unpckl*. 3221 // e.g. for v4f32 3222 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3223 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3224 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3225 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 3226 for (unsigned i = 0; i < NumElems; ++i) 3227 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 3228 NumElems >>= 1; 3229 while (NumElems != 0) { 3230 for (unsigned i = 0; i < NumElems; ++i) 3231 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 3232 UnpckMask); 3233 NumElems >>= 1; 3234 } 3235 return V[0]; 3236 } 3237 3238 return SDOperand(); 3239} 3240 3241SDOperand 3242X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 3243 SDOperand V1 = Op.getOperand(0); 3244 SDOperand V2 = Op.getOperand(1); 3245 SDOperand PermMask = Op.getOperand(2); 3246 MVT::ValueType VT = Op.getValueType(); 3247 unsigned NumElems = PermMask.getNumOperands(); 3248 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 3249 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 3250 bool V1IsSplat = false; 3251 bool V2IsSplat = false; 3252 3253 if (isUndefShuffle(Op.Val)) 3254 return DAG.getNode(ISD::UNDEF, VT); 3255 3256 if (isZeroShuffle(Op.Val)) 3257 return getZeroVector(VT, DAG); 3258 3259 if (isIdentityMask(PermMask.Val)) 3260 return V1; 3261 else if (isIdentityMask(PermMask.Val, true)) 3262 return V2; 3263 3264 if (isSplatMask(PermMask.Val)) { 3265 if (NumElems <= 4) return Op; 3266 // Promote it to a v4i32 splat. 3267 return PromoteSplat(Op, DAG); 3268 } 3269 3270 if (X86::isMOVLMask(PermMask.Val)) 3271 return (V1IsUndef) ? V2 : Op; 3272 3273 if (X86::isMOVSHDUPMask(PermMask.Val) || 3274 X86::isMOVSLDUPMask(PermMask.Val) || 3275 X86::isMOVHLPSMask(PermMask.Val) || 3276 X86::isMOVHPMask(PermMask.Val) || 3277 X86::isMOVLPMask(PermMask.Val)) 3278 return Op; 3279 3280 if (ShouldXformToMOVHLPS(PermMask.Val) || 3281 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 3282 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3283 3284 bool Commuted = false; 3285 V1IsSplat = isSplatVector(V1.Val); 3286 V2IsSplat = isSplatVector(V2.Val); 3287 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 3288 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3289 std::swap(V1IsSplat, V2IsSplat); 3290 std::swap(V1IsUndef, V2IsUndef); 3291 Commuted = true; 3292 } 3293 3294 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 3295 if (V2IsUndef) return V1; 3296 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3297 if (V2IsSplat) { 3298 // V2 is a splat, so the mask may be malformed. That is, it may point 3299 // to any V2 element. The instruction selectior won't like this. Get 3300 // a corrected mask and commute to form a proper MOVS{S|D}. 3301 SDOperand NewMask = getMOVLMask(NumElems, DAG); 3302 if (NewMask.Val != PermMask.Val) 3303 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3304 } 3305 return Op; 3306 } 3307 3308 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3309 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3310 X86::isUNPCKLMask(PermMask.Val) || 3311 X86::isUNPCKHMask(PermMask.Val)) 3312 return Op; 3313 3314 if (V2IsSplat) { 3315 // Normalize mask so all entries that point to V2 points to its first 3316 // element then try to match unpck{h|l} again. If match, return a 3317 // new vector_shuffle with the corrected mask. 3318 SDOperand NewMask = NormalizeMask(PermMask, DAG); 3319 if (NewMask.Val != PermMask.Val) { 3320 if (X86::isUNPCKLMask(PermMask.Val, true)) { 3321 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 3322 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3323 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 3324 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 3325 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 3326 } 3327 } 3328 } 3329 3330 // Normalize the node to match x86 shuffle ops if needed 3331 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 3332 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3333 3334 if (Commuted) { 3335 // Commute is back and try unpck* again. 3336 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 3337 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 3338 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 3339 X86::isUNPCKLMask(PermMask.Val) || 3340 X86::isUNPCKHMask(PermMask.Val)) 3341 return Op; 3342 } 3343 3344 // If VT is integer, try PSHUF* first, then SHUFP*. 3345 if (MVT::isInteger(VT)) { 3346 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 3347 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 3348 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 3349 X86::isPSHUFDMask(PermMask.Val)) || 3350 X86::isPSHUFHWMask(PermMask.Val) || 3351 X86::isPSHUFLWMask(PermMask.Val)) { 3352 if (V2.getOpcode() != ISD::UNDEF) 3353 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3354 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3355 return Op; 3356 } 3357 3358 if (X86::isSHUFPMask(PermMask.Val) && 3359 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 3360 return Op; 3361 3362 // Handle v8i16 shuffle high / low shuffle node pair. 3363 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 3364 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 3365 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3366 SmallVector<SDOperand, 8> MaskVec; 3367 for (unsigned i = 0; i != 4; ++i) 3368 MaskVec.push_back(PermMask.getOperand(i)); 3369 for (unsigned i = 4; i != 8; ++i) 3370 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3371 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3372 &MaskVec[0], MaskVec.size()); 3373 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3374 MaskVec.clear(); 3375 for (unsigned i = 0; i != 4; ++i) 3376 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3377 for (unsigned i = 4; i != 8; ++i) 3378 MaskVec.push_back(PermMask.getOperand(i)); 3379 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 3380 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 3381 } 3382 } else { 3383 // Floating point cases in the other order. 3384 if (X86::isSHUFPMask(PermMask.Val)) 3385 return Op; 3386 if (X86::isPSHUFDMask(PermMask.Val) || 3387 X86::isPSHUFHWMask(PermMask.Val) || 3388 X86::isPSHUFLWMask(PermMask.Val)) { 3389 if (V2.getOpcode() != ISD::UNDEF) 3390 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 3391 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 3392 return Op; 3393 } 3394 } 3395 3396 if (NumElems == 4 && 3397 // Don't do this for MMX. 3398 MVT::getSizeInBits(VT) != 64) { 3399 MVT::ValueType MaskVT = PermMask.getValueType(); 3400 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 3401 SmallVector<std::pair<int, int>, 8> Locs; 3402 Locs.reserve(NumElems); 3403 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3404 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3405 unsigned NumHi = 0; 3406 unsigned NumLo = 0; 3407 // If no more than two elements come from either vector. This can be 3408 // implemented with two shuffles. First shuffle gather the elements. 3409 // The second shuffle, which takes the first shuffle as both of its 3410 // vector operands, put the elements into the right order. 3411 for (unsigned i = 0; i != NumElems; ++i) { 3412 SDOperand Elt = PermMask.getOperand(i); 3413 if (Elt.getOpcode() == ISD::UNDEF) { 3414 Locs[i] = std::make_pair(-1, -1); 3415 } else { 3416 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 3417 if (Val < NumElems) { 3418 Locs[i] = std::make_pair(0, NumLo); 3419 Mask1[NumLo] = Elt; 3420 NumLo++; 3421 } else { 3422 Locs[i] = std::make_pair(1, NumHi); 3423 if (2+NumHi < NumElems) 3424 Mask1[2+NumHi] = Elt; 3425 NumHi++; 3426 } 3427 } 3428 } 3429 if (NumLo <= 2 && NumHi <= 2) { 3430 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3431 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3432 &Mask1[0], Mask1.size())); 3433 for (unsigned i = 0; i != NumElems; ++i) { 3434 if (Locs[i].first == -1) 3435 continue; 3436 else { 3437 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3438 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3439 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3440 } 3441 } 3442 3443 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3444 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3445 &Mask2[0], Mask2.size())); 3446 } 3447 3448 // Break it into (shuffle shuffle_hi, shuffle_lo). 3449 Locs.clear(); 3450 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3451 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3452 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3453 unsigned MaskIdx = 0; 3454 unsigned LoIdx = 0; 3455 unsigned HiIdx = NumElems/2; 3456 for (unsigned i = 0; i != NumElems; ++i) { 3457 if (i == NumElems/2) { 3458 MaskPtr = &HiMask; 3459 MaskIdx = 1; 3460 LoIdx = 0; 3461 HiIdx = NumElems/2; 3462 } 3463 SDOperand Elt = PermMask.getOperand(i); 3464 if (Elt.getOpcode() == ISD::UNDEF) { 3465 Locs[i] = std::make_pair(-1, -1); 3466 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3467 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3468 (*MaskPtr)[LoIdx] = Elt; 3469 LoIdx++; 3470 } else { 3471 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3472 (*MaskPtr)[HiIdx] = Elt; 3473 HiIdx++; 3474 } 3475 } 3476 3477 SDOperand LoShuffle = 3478 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3479 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3480 &LoMask[0], LoMask.size())); 3481 SDOperand HiShuffle = 3482 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3483 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3484 &HiMask[0], HiMask.size())); 3485 SmallVector<SDOperand, 8> MaskOps; 3486 for (unsigned i = 0; i != NumElems; ++i) { 3487 if (Locs[i].first == -1) { 3488 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3489 } else { 3490 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3491 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3492 } 3493 } 3494 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3495 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3496 &MaskOps[0], MaskOps.size())); 3497 } 3498 3499 return SDOperand(); 3500} 3501 3502SDOperand 3503X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3504 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3505 return SDOperand(); 3506 3507 MVT::ValueType VT = Op.getValueType(); 3508 // TODO: handle v16i8. 3509 if (MVT::getSizeInBits(VT) == 16) { 3510 // Transform it so it match pextrw which produces a 32-bit result. 3511 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3512 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3513 Op.getOperand(0), Op.getOperand(1)); 3514 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3515 DAG.getValueType(VT)); 3516 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3517 } else if (MVT::getSizeInBits(VT) == 32) { 3518 SDOperand Vec = Op.getOperand(0); 3519 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3520 if (Idx == 0) 3521 return Op; 3522 // SHUFPS the element to the lowest double word, then movss. 3523 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3524 SmallVector<SDOperand, 8> IdxVec; 3525 IdxVec. 3526 push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3527 IdxVec. 3528 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3529 IdxVec. 3530 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3531 IdxVec. 3532 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3533 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3534 &IdxVec[0], IdxVec.size()); 3535 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3536 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3537 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3538 DAG.getConstant(0, getPointerTy())); 3539 } else if (MVT::getSizeInBits(VT) == 64) { 3540 SDOperand Vec = Op.getOperand(0); 3541 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3542 if (Idx == 0) 3543 return Op; 3544 3545 // UNPCKHPD the element to the lowest double word, then movsd. 3546 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3547 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3548 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3549 SmallVector<SDOperand, 8> IdxVec; 3550 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3551 IdxVec. 3552 push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3553 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3554 &IdxVec[0], IdxVec.size()); 3555 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3556 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3557 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3558 DAG.getConstant(0, getPointerTy())); 3559 } 3560 3561 return SDOperand(); 3562} 3563 3564SDOperand 3565X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3566 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3567 // as its second argument. 3568 MVT::ValueType VT = Op.getValueType(); 3569 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3570 SDOperand N0 = Op.getOperand(0); 3571 SDOperand N1 = Op.getOperand(1); 3572 SDOperand N2 = Op.getOperand(2); 3573 if (MVT::getSizeInBits(BaseVT) == 16) { 3574 if (N1.getValueType() != MVT::i32) 3575 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3576 if (N2.getValueType() != MVT::i32) 3577 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3578 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3579 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3580 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3581 if (Idx == 0) { 3582 // Use a movss. 3583 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3584 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3585 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3586 SmallVector<SDOperand, 8> MaskVec; 3587 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3588 for (unsigned i = 1; i <= 3; ++i) 3589 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3590 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3591 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3592 &MaskVec[0], MaskVec.size())); 3593 } else { 3594 // Use two pinsrw instructions to insert a 32 bit value. 3595 Idx <<= 1; 3596 if (MVT::isFloatingPoint(N1.getValueType())) { 3597 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3598 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3599 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3600 DAG.getConstant(0, getPointerTy())); 3601 } 3602 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3603 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3604 DAG.getConstant(Idx, getPointerTy())); 3605 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3606 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3607 DAG.getConstant(Idx+1, getPointerTy())); 3608 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3609 } 3610 } 3611 3612 return SDOperand(); 3613} 3614 3615SDOperand 3616X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3617 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3618 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3619} 3620 3621// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3622// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3623// one of the above mentioned nodes. It has to be wrapped because otherwise 3624// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3625// be used to form addressing mode. These wrapped nodes will be selected 3626// into MOV32ri. 3627SDOperand 3628X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3629 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3630 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3631 getPointerTy(), 3632 CP->getAlignment()); 3633 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3634 // With PIC, the address is actually $g + Offset. 3635 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3636 !Subtarget->isPICStyleRIPRel()) { 3637 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3638 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3639 Result); 3640 } 3641 3642 return Result; 3643} 3644 3645SDOperand 3646X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3647 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3648 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3649 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3650 // With PIC, the address is actually $g + Offset. 3651 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3652 !Subtarget->isPICStyleRIPRel()) { 3653 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3654 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3655 Result); 3656 } 3657 3658 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3659 // load the value at address GV, not the value of GV itself. This means that 3660 // the GlobalAddress must be in the base or index register of the address, not 3661 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3662 // The same applies for external symbols during PIC codegen 3663 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3664 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3665 3666 return Result; 3667} 3668 3669// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3670static SDOperand 3671LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3672 const MVT::ValueType PtrVT) { 3673 SDOperand InFlag; 3674 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3675 DAG.getNode(X86ISD::GlobalBaseReg, 3676 PtrVT), InFlag); 3677 InFlag = Chain.getValue(1); 3678 3679 // emit leal symbol@TLSGD(,%ebx,1), %eax 3680 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3681 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3682 GA->getValueType(0), 3683 GA->getOffset()); 3684 SDOperand Ops[] = { Chain, TGA, InFlag }; 3685 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3686 InFlag = Result.getValue(2); 3687 Chain = Result.getValue(1); 3688 3689 // call ___tls_get_addr. This function receives its argument in 3690 // the register EAX. 3691 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3692 InFlag = Chain.getValue(1); 3693 3694 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3695 SDOperand Ops1[] = { Chain, 3696 DAG.getTargetExternalSymbol("___tls_get_addr", 3697 PtrVT), 3698 DAG.getRegister(X86::EAX, PtrVT), 3699 DAG.getRegister(X86::EBX, PtrVT), 3700 InFlag }; 3701 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3702 InFlag = Chain.getValue(1); 3703 3704 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3705} 3706 3707// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3708// "local exec" model. 3709static SDOperand 3710LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3711 const MVT::ValueType PtrVT) { 3712 // Get the Thread Pointer 3713 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3714 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3715 // exec) 3716 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3717 GA->getValueType(0), 3718 GA->getOffset()); 3719 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3720 3721 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3722 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3723 3724 // The address of the thread local variable is the add of the thread 3725 // pointer with the offset of the variable. 3726 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3727} 3728 3729SDOperand 3730X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3731 // TODO: implement the "local dynamic" model 3732 // TODO: implement the "initial exec"model for pic executables 3733 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3734 "TLS not implemented for non-ELF and 64-bit targets"); 3735 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3736 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3737 // otherwise use the "Local Exec"TLS Model 3738 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3739 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3740 else 3741 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3742} 3743 3744SDOperand 3745X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3746 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3747 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3748 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3749 // With PIC, the address is actually $g + Offset. 3750 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3751 !Subtarget->isPICStyleRIPRel()) { 3752 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3753 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3754 Result); 3755 } 3756 3757 return Result; 3758} 3759 3760SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3761 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3762 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3763 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3764 // With PIC, the address is actually $g + Offset. 3765 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3766 !Subtarget->isPICStyleRIPRel()) { 3767 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3768 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3769 Result); 3770 } 3771 3772 return Result; 3773} 3774 3775/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 3776/// take a 2 x i32 value to shift plus a shift amount. 3777SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3778 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3779 "Not an i64 shift!"); 3780 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3781 SDOperand ShOpLo = Op.getOperand(0); 3782 SDOperand ShOpHi = Op.getOperand(1); 3783 SDOperand ShAmt = Op.getOperand(2); 3784 SDOperand Tmp1 = isSRA ? 3785 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3786 DAG.getConstant(0, MVT::i32); 3787 3788 SDOperand Tmp2, Tmp3; 3789 if (Op.getOpcode() == ISD::SHL_PARTS) { 3790 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3791 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3792 } else { 3793 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3794 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3795 } 3796 3797 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3798 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3799 DAG.getConstant(32, MVT::i8)); 3800 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 3801 AndNode, DAG.getConstant(0, MVT::i8)); 3802 3803 SDOperand Hi, Lo; 3804 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3805 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3806 SmallVector<SDOperand, 4> Ops; 3807 if (Op.getOpcode() == ISD::SHL_PARTS) { 3808 Ops.push_back(Tmp2); 3809 Ops.push_back(Tmp3); 3810 Ops.push_back(CC); 3811 Ops.push_back(Cond); 3812 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3813 3814 Ops.clear(); 3815 Ops.push_back(Tmp3); 3816 Ops.push_back(Tmp1); 3817 Ops.push_back(CC); 3818 Ops.push_back(Cond); 3819 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3820 } else { 3821 Ops.push_back(Tmp2); 3822 Ops.push_back(Tmp3); 3823 Ops.push_back(CC); 3824 Ops.push_back(Cond); 3825 Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3826 3827 Ops.clear(); 3828 Ops.push_back(Tmp3); 3829 Ops.push_back(Tmp1); 3830 Ops.push_back(CC); 3831 Ops.push_back(Cond); 3832 Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); 3833 } 3834 3835 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3836 Ops.clear(); 3837 Ops.push_back(Lo); 3838 Ops.push_back(Hi); 3839 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3840} 3841 3842SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3843 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3844 Op.getOperand(0).getValueType() >= MVT::i16 && 3845 "Unknown SINT_TO_FP to lower!"); 3846 3847 SDOperand Result; 3848 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3849 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3850 MachineFunction &MF = DAG.getMachineFunction(); 3851 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3852 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3853 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3854 StackSlot, NULL, 0); 3855 3856 // These are really Legal; caller falls through into that case. 3857 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 3858 return Result; 3859 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 3860 return Result; 3861 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 3862 Subtarget->is64Bit()) 3863 return Result; 3864 3865 // Build the FILD 3866 SDVTList Tys; 3867 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 3868 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 3869 if (useSSE) 3870 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3871 else 3872 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3873 SmallVector<SDOperand, 8> Ops; 3874 Ops.push_back(Chain); 3875 Ops.push_back(StackSlot); 3876 Ops.push_back(DAG.getValueType(SrcVT)); 3877 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3878 Tys, &Ops[0], Ops.size()); 3879 3880 if (useSSE) { 3881 Chain = Result.getValue(1); 3882 SDOperand InFlag = Result.getValue(2); 3883 3884 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3885 // shouldn't be necessary except that RFP cannot be live across 3886 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3887 MachineFunction &MF = DAG.getMachineFunction(); 3888 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3889 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3890 Tys = DAG.getVTList(MVT::Other); 3891 SmallVector<SDOperand, 8> Ops; 3892 Ops.push_back(Chain); 3893 Ops.push_back(Result); 3894 Ops.push_back(StackSlot); 3895 Ops.push_back(DAG.getValueType(Op.getValueType())); 3896 Ops.push_back(InFlag); 3897 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3898 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3899 } 3900 3901 return Result; 3902} 3903 3904SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3905 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3906 "Unknown FP_TO_SINT to lower!"); 3907 SDOperand Result; 3908 3909 // These are really Legal. 3910 if (Op.getValueType() == MVT::i32 && 3911 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 3912 return Result; 3913 if (Op.getValueType() == MVT::i32 && 3914 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 3915 return Result; 3916 if (Subtarget->is64Bit() && 3917 Op.getValueType() == MVT::i64 && 3918 Op.getOperand(0).getValueType() != MVT::f80) 3919 return Result; 3920 3921 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3922 // stack slot. 3923 MachineFunction &MF = DAG.getMachineFunction(); 3924 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3925 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3926 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3927 unsigned Opc; 3928 switch (Op.getValueType()) { 3929 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3930 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3931 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3932 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3933 } 3934 3935 SDOperand Chain = DAG.getEntryNode(); 3936 SDOperand Value = Op.getOperand(0); 3937 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 3938 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 3939 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3940 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3941 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3942 SDOperand Ops[] = { 3943 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3944 }; 3945 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3946 Chain = Value.getValue(1); 3947 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3948 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3949 } 3950 3951 // Build the FP_TO_INT*_IN_MEM 3952 SDOperand Ops[] = { Chain, Value, StackSlot }; 3953 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3954 3955 // Load the result. If this is an i64 load on an x86-32 host, expand the 3956 // load. 3957 if (Op.getValueType() != MVT::i64 || Subtarget->is64Bit()) 3958 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3959 3960 SDOperand Lo = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0); 3961 StackSlot = DAG.getNode(ISD::ADD, StackSlot.getValueType(), StackSlot, 3962 DAG.getConstant(StackSlot.getValueType(), 4)); 3963 SDOperand Hi = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0); 3964 3965 3966 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 3967} 3968 3969SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3970 MVT::ValueType VT = Op.getValueType(); 3971 MVT::ValueType EltVT = VT; 3972 if (MVT::isVector(VT)) 3973 EltVT = MVT::getVectorElementType(VT); 3974 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3975 std::vector<Constant*> CV; 3976 if (EltVT == MVT::f64) { 3977 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3978 CV.push_back(C); 3979 CV.push_back(C); 3980 } else { 3981 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3982 CV.push_back(C); 3983 CV.push_back(C); 3984 CV.push_back(C); 3985 CV.push_back(C); 3986 } 3987 Constant *C = ConstantVector::get(CV); 3988 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3989 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3990 false, 16); 3991 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3992} 3993 3994SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3995 MVT::ValueType VT = Op.getValueType(); 3996 MVT::ValueType EltVT = VT; 3997 unsigned EltNum = 1; 3998 if (MVT::isVector(VT)) { 3999 EltVT = MVT::getVectorElementType(VT); 4000 EltNum = MVT::getVectorNumElements(VT); 4001 } 4002 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 4003 std::vector<Constant*> CV; 4004 if (EltVT == MVT::f64) { 4005 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 4006 CV.push_back(C); 4007 CV.push_back(C); 4008 } else { 4009 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 4010 CV.push_back(C); 4011 CV.push_back(C); 4012 CV.push_back(C); 4013 CV.push_back(C); 4014 } 4015 Constant *C = ConstantVector::get(CV); 4016 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4017 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4018 false, 16); 4019 if (MVT::isVector(VT)) { 4020 return DAG.getNode(ISD::BIT_CONVERT, VT, 4021 DAG.getNode(ISD::XOR, MVT::v2i64, 4022 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 4023 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 4024 } else { 4025 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 4026 } 4027} 4028 4029SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 4030 SDOperand Op0 = Op.getOperand(0); 4031 SDOperand Op1 = Op.getOperand(1); 4032 MVT::ValueType VT = Op.getValueType(); 4033 MVT::ValueType SrcVT = Op1.getValueType(); 4034 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 4035 4036 // If second operand is smaller, extend it first. 4037 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 4038 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 4039 SrcVT = VT; 4040 SrcTy = MVT::getTypeForValueType(SrcVT); 4041 } 4042 4043 // First get the sign bit of second operand. 4044 std::vector<Constant*> CV; 4045 if (SrcVT == MVT::f64) { 4046 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 4047 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4048 } else { 4049 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 4050 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4051 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4052 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4053 } 4054 Constant *C = ConstantVector::get(CV); 4055 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4056 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 4057 false, 16); 4058 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 4059 4060 // Shift sign bit right or left if the two operands have different types. 4061 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 4062 // Op0 is MVT::f32, Op1 is MVT::f64. 4063 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 4064 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 4065 DAG.getConstant(32, MVT::i32)); 4066 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 4067 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 4068 DAG.getConstant(0, getPointerTy())); 4069 } 4070 4071 // Clear first operand sign bit. 4072 CV.clear(); 4073 if (VT == MVT::f64) { 4074 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 4075 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 4076 } else { 4077 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 4078 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4079 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4080 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 4081 } 4082 C = ConstantVector::get(CV); 4083 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 4084 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 4085 false, 16); 4086 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 4087 4088 // Or the value with the sign bit. 4089 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 4090} 4091 4092SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 4093 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 4094 SDOperand Cond; 4095 SDOperand Op0 = Op.getOperand(0); 4096 SDOperand Op1 = Op.getOperand(1); 4097 SDOperand CC = Op.getOperand(2); 4098 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4099 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 4100 unsigned X86CC; 4101 4102 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 4103 Op0, Op1, DAG)) { 4104 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4105 return DAG.getNode(X86ISD::SETCC, MVT::i8, 4106 DAG.getConstant(X86CC, MVT::i8), Cond); 4107 } 4108 4109 assert(isFP && "Illegal integer SetCC!"); 4110 4111 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 4112 switch (SetCCOpcode) { 4113 default: assert(false && "Illegal floating point SetCC!"); 4114 case ISD::SETOEQ: { // !PF & ZF 4115 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4116 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 4117 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4118 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 4119 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 4120 } 4121 case ISD::SETUNE: { // PF | !ZF 4122 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4123 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 4124 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 4125 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 4126 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 4127 } 4128 } 4129} 4130 4131 4132SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 4133 bool addTest = true; 4134 SDOperand Cond = Op.getOperand(0); 4135 SDOperand CC; 4136 4137 if (Cond.getOpcode() == ISD::SETCC) 4138 Cond = LowerSETCC(Cond, DAG); 4139 4140 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4141 // setting operand in place of the X86ISD::SETCC. 4142 if (Cond.getOpcode() == X86ISD::SETCC) { 4143 CC = Cond.getOperand(0); 4144 4145 SDOperand Cmp = Cond.getOperand(1); 4146 unsigned Opc = Cmp.getOpcode(); 4147 MVT::ValueType VT = Op.getValueType(); 4148 bool IllegalFPCMov = false; 4149 if (VT == MVT::f32 && !X86ScalarSSEf32) 4150 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4151 else if (VT == MVT::f64 && !X86ScalarSSEf64) 4152 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4153 else if (VT == MVT::f80) 4154 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 4155 if ((Opc == X86ISD::CMP || 4156 Opc == X86ISD::COMI || 4157 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 4158 Cond = Cmp; 4159 addTest = false; 4160 } 4161 } 4162 4163 if (addTest) { 4164 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4165 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4166 } 4167 4168 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 4169 MVT::Flag); 4170 SmallVector<SDOperand, 4> Ops; 4171 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 4172 // condition is true. 4173 Ops.push_back(Op.getOperand(2)); 4174 Ops.push_back(Op.getOperand(1)); 4175 Ops.push_back(CC); 4176 Ops.push_back(Cond); 4177 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 4178} 4179 4180SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 4181 bool addTest = true; 4182 SDOperand Chain = Op.getOperand(0); 4183 SDOperand Cond = Op.getOperand(1); 4184 SDOperand Dest = Op.getOperand(2); 4185 SDOperand CC; 4186 4187 if (Cond.getOpcode() == ISD::SETCC) 4188 Cond = LowerSETCC(Cond, DAG); 4189 4190 // If condition flag is set by a X86ISD::CMP, then use it as the condition 4191 // setting operand in place of the X86ISD::SETCC. 4192 if (Cond.getOpcode() == X86ISD::SETCC) { 4193 CC = Cond.getOperand(0); 4194 4195 SDOperand Cmp = Cond.getOperand(1); 4196 unsigned Opc = Cmp.getOpcode(); 4197 if (Opc == X86ISD::CMP || 4198 Opc == X86ISD::COMI || 4199 Opc == X86ISD::UCOMI) { 4200 Cond = Cmp; 4201 addTest = false; 4202 } 4203 } 4204 4205 if (addTest) { 4206 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 4207 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 4208 } 4209 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 4210 Chain, Op.getOperand(2), CC, Cond); 4211} 4212 4213SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 4214 unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4215 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 4216 4217 if (Subtarget->is64Bit()) 4218 if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) 4219 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4220 else 4221 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 4222 else 4223 switch (CallingConv) { 4224 default: 4225 assert(0 && "Unsupported calling convention"); 4226 case CallingConv::Fast: 4227 if (isTailCall && PerformTailCallOpt) 4228 return LowerX86_TailCallTo(Op, DAG, CallingConv); 4229 else 4230 return LowerCCCCallTo(Op,DAG, CallingConv); 4231 case CallingConv::C: 4232 case CallingConv::X86_StdCall: 4233 return LowerCCCCallTo(Op, DAG, CallingConv); 4234 case CallingConv::X86_FastCall: 4235 return LowerFastCCCallTo(Op, DAG, CallingConv); 4236 } 4237} 4238 4239 4240// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 4241// Calls to _alloca is needed to probe the stack when allocating more than 4k 4242// bytes in one go. Touching the stack at 4K increments is necessary to ensure 4243// that the guard pages used by the OS virtual memory manager are allocated in 4244// correct sequence. 4245SDOperand 4246X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 4247 SelectionDAG &DAG) { 4248 assert(Subtarget->isTargetCygMing() && 4249 "This should be used only on Cygwin/Mingw targets"); 4250 4251 // Get the inputs. 4252 SDOperand Chain = Op.getOperand(0); 4253 SDOperand Size = Op.getOperand(1); 4254 // FIXME: Ensure alignment here 4255 4256 SDOperand Flag; 4257 4258 MVT::ValueType IntPtr = getPointerTy(); 4259 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 4260 4261 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 4262 Flag = Chain.getValue(1); 4263 4264 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 4265 SDOperand Ops[] = { Chain, 4266 DAG.getTargetExternalSymbol("_alloca", IntPtr), 4267 DAG.getRegister(X86::EAX, IntPtr), 4268 Flag }; 4269 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 4270 Flag = Chain.getValue(1); 4271 4272 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 4273 4274 std::vector<MVT::ValueType> Tys; 4275 Tys.push_back(SPTy); 4276 Tys.push_back(MVT::Other); 4277 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 4278 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 4279} 4280 4281SDOperand 4282X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4283 MachineFunction &MF = DAG.getMachineFunction(); 4284 const Function* Fn = MF.getFunction(); 4285 if (Fn->hasExternalLinkage() && 4286 Subtarget->isTargetCygMing() && 4287 Fn->getName() == "main") 4288 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 4289 4290 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4291 if (Subtarget->is64Bit()) 4292 return LowerX86_64CCCArguments(Op, DAG); 4293 else 4294 switch(CC) { 4295 default: 4296 assert(0 && "Unsupported calling convention"); 4297 case CallingConv::Fast: 4298 return LowerCCCArguments(Op,DAG, true); 4299 // Falls through 4300 case CallingConv::C: 4301 return LowerCCCArguments(Op, DAG); 4302 case CallingConv::X86_StdCall: 4303 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 4304 return LowerCCCArguments(Op, DAG, true); 4305 case CallingConv::X86_FastCall: 4306 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 4307 return LowerFastCCArguments(Op, DAG); 4308 } 4309} 4310 4311SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4312 SDOperand InFlag(0, 0); 4313 SDOperand Chain = Op.getOperand(0); 4314 unsigned Align = 4315 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4316 if (Align == 0) Align = 1; 4317 4318 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4319 // If not DWORD aligned or size is more than the threshold, call memset. 4320 // The libc version is likely to be faster for these cases. It can use the 4321 // address value and run time information about the CPU. 4322 if ((Align & 3) != 0 || 4323 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 4324 MVT::ValueType IntPtr = getPointerTy(); 4325 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4326 TargetLowering::ArgListTy Args; 4327 TargetLowering::ArgListEntry Entry; 4328 Entry.Node = Op.getOperand(1); 4329 Entry.Ty = IntPtrTy; 4330 Args.push_back(Entry); 4331 // Extend the unsigned i8 argument to be an int value for the call. 4332 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4333 Entry.Ty = IntPtrTy; 4334 Args.push_back(Entry); 4335 Entry.Node = Op.getOperand(3); 4336 Args.push_back(Entry); 4337 std::pair<SDOperand,SDOperand> CallResult = 4338 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4339 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4340 return CallResult.second; 4341 } 4342 4343 MVT::ValueType AVT; 4344 SDOperand Count; 4345 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4346 unsigned BytesLeft = 0; 4347 bool TwoRepStos = false; 4348 if (ValC) { 4349 unsigned ValReg; 4350 uint64_t Val = ValC->getValue() & 255; 4351 4352 // If the value is a constant, then we can potentially use larger sets. 4353 switch (Align & 3) { 4354 case 2: // WORD aligned 4355 AVT = MVT::i16; 4356 ValReg = X86::AX; 4357 Val = (Val << 8) | Val; 4358 break; 4359 case 0: // DWORD aligned 4360 AVT = MVT::i32; 4361 ValReg = X86::EAX; 4362 Val = (Val << 8) | Val; 4363 Val = (Val << 16) | Val; 4364 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4365 AVT = MVT::i64; 4366 ValReg = X86::RAX; 4367 Val = (Val << 32) | Val; 4368 } 4369 break; 4370 default: // Byte aligned 4371 AVT = MVT::i8; 4372 ValReg = X86::AL; 4373 Count = Op.getOperand(3); 4374 break; 4375 } 4376 4377 if (AVT > MVT::i8) { 4378 if (I) { 4379 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4380 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4381 BytesLeft = I->getValue() % UBytes; 4382 } else { 4383 assert(AVT >= MVT::i32 && 4384 "Do not use rep;stos if not at least DWORD aligned"); 4385 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4386 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4387 TwoRepStos = true; 4388 } 4389 } 4390 4391 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4392 InFlag); 4393 InFlag = Chain.getValue(1); 4394 } else { 4395 AVT = MVT::i8; 4396 Count = Op.getOperand(3); 4397 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4398 InFlag = Chain.getValue(1); 4399 } 4400 4401 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4402 Count, InFlag); 4403 InFlag = Chain.getValue(1); 4404 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4405 Op.getOperand(1), InFlag); 4406 InFlag = Chain.getValue(1); 4407 4408 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4409 SmallVector<SDOperand, 8> Ops; 4410 Ops.push_back(Chain); 4411 Ops.push_back(DAG.getValueType(AVT)); 4412 Ops.push_back(InFlag); 4413 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4414 4415 if (TwoRepStos) { 4416 InFlag = Chain.getValue(1); 4417 Count = Op.getOperand(3); 4418 MVT::ValueType CVT = Count.getValueType(); 4419 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4420 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4421 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4422 Left, InFlag); 4423 InFlag = Chain.getValue(1); 4424 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4425 Ops.clear(); 4426 Ops.push_back(Chain); 4427 Ops.push_back(DAG.getValueType(MVT::i8)); 4428 Ops.push_back(InFlag); 4429 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4430 } else if (BytesLeft) { 4431 // Issue stores for the last 1 - 7 bytes. 4432 SDOperand Value; 4433 unsigned Val = ValC->getValue() & 255; 4434 unsigned Offset = I->getValue() - BytesLeft; 4435 SDOperand DstAddr = Op.getOperand(1); 4436 MVT::ValueType AddrVT = DstAddr.getValueType(); 4437 if (BytesLeft >= 4) { 4438 Val = (Val << 8) | Val; 4439 Val = (Val << 16) | Val; 4440 Value = DAG.getConstant(Val, MVT::i32); 4441 Chain = DAG.getStore(Chain, Value, 4442 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4443 DAG.getConstant(Offset, AddrVT)), 4444 NULL, 0); 4445 BytesLeft -= 4; 4446 Offset += 4; 4447 } 4448 if (BytesLeft >= 2) { 4449 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4450 Chain = DAG.getStore(Chain, Value, 4451 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4452 DAG.getConstant(Offset, AddrVT)), 4453 NULL, 0); 4454 BytesLeft -= 2; 4455 Offset += 2; 4456 } 4457 if (BytesLeft == 1) { 4458 Value = DAG.getConstant(Val, MVT::i8); 4459 Chain = DAG.getStore(Chain, Value, 4460 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4461 DAG.getConstant(Offset, AddrVT)), 4462 NULL, 0); 4463 } 4464 } 4465 4466 return Chain; 4467} 4468 4469SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4470 SDOperand ChainOp = Op.getOperand(0); 4471 SDOperand DestOp = Op.getOperand(1); 4472 SDOperand SourceOp = Op.getOperand(2); 4473 SDOperand CountOp = Op.getOperand(3); 4474 SDOperand AlignOp = Op.getOperand(4); 4475 unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue(); 4476 if (Align == 0) Align = 1; 4477 4478 // The libc version is likely to be faster for the following cases. It can 4479 // use the address value and run time information about the CPU. 4480 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 4481 4482 // If not DWORD aligned, call memcpy. 4483 if ((Align & 3) != 0) 4484 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4485 4486 // If size is unknown, call memcpy. 4487 ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp); 4488 if (!I) 4489 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4490 4491 // If size is more than the threshold, call memcpy. 4492 unsigned Size = I->getValue(); 4493 if (Size > Subtarget->getMinRepStrSizeThreshold()) 4494 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4495 4496 return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); 4497} 4498 4499SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain, 4500 SDOperand Dest, 4501 SDOperand Source, 4502 SDOperand Count, 4503 SelectionDAG &DAG) { 4504 MVT::ValueType IntPtr = getPointerTy(); 4505 TargetLowering::ArgListTy Args; 4506 TargetLowering::ArgListEntry Entry; 4507 Entry.Ty = getTargetData()->getIntPtrType(); 4508 Entry.Node = Dest; Args.push_back(Entry); 4509 Entry.Node = Source; Args.push_back(Entry); 4510 Entry.Node = Count; Args.push_back(Entry); 4511 std::pair<SDOperand,SDOperand> CallResult = 4512 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4513 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4514 return CallResult.second; 4515} 4516 4517SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4518 SDOperand Dest, 4519 SDOperand Source, 4520 unsigned Size, 4521 unsigned Align, 4522 SelectionDAG &DAG) { 4523 MVT::ValueType AVT; 4524 unsigned BytesLeft = 0; 4525 switch (Align & 3) { 4526 case 2: // WORD aligned 4527 AVT = MVT::i16; 4528 break; 4529 case 0: // DWORD aligned 4530 AVT = MVT::i32; 4531 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4532 AVT = MVT::i64; 4533 break; 4534 default: // Byte aligned 4535 AVT = MVT::i8; 4536 break; 4537 } 4538 4539 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4540 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4541 BytesLeft = Size % UBytes; 4542 4543 SDOperand InFlag(0, 0); 4544 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4545 Count, InFlag); 4546 InFlag = Chain.getValue(1); 4547 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4548 Dest, InFlag); 4549 InFlag = Chain.getValue(1); 4550 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4551 Source, InFlag); 4552 InFlag = Chain.getValue(1); 4553 4554 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4555 SmallVector<SDOperand, 8> Ops; 4556 Ops.push_back(Chain); 4557 Ops.push_back(DAG.getValueType(AVT)); 4558 Ops.push_back(InFlag); 4559 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4560 4561 if (BytesLeft) { 4562 // Issue loads and stores for the last 1 - 7 bytes. 4563 unsigned Offset = Size - BytesLeft; 4564 SDOperand DstAddr = Dest; 4565 MVT::ValueType DstVT = DstAddr.getValueType(); 4566 SDOperand SrcAddr = Source; 4567 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4568 SDOperand Value; 4569 if (BytesLeft >= 4) { 4570 Value = DAG.getLoad(MVT::i32, Chain, 4571 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4572 DAG.getConstant(Offset, SrcVT)), 4573 NULL, 0); 4574 Chain = Value.getValue(1); 4575 Chain = DAG.getStore(Chain, Value, 4576 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4577 DAG.getConstant(Offset, DstVT)), 4578 NULL, 0); 4579 BytesLeft -= 4; 4580 Offset += 4; 4581 } 4582 if (BytesLeft >= 2) { 4583 Value = DAG.getLoad(MVT::i16, Chain, 4584 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4585 DAG.getConstant(Offset, SrcVT)), 4586 NULL, 0); 4587 Chain = Value.getValue(1); 4588 Chain = DAG.getStore(Chain, Value, 4589 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4590 DAG.getConstant(Offset, DstVT)), 4591 NULL, 0); 4592 BytesLeft -= 2; 4593 Offset += 2; 4594 } 4595 4596 if (BytesLeft == 1) { 4597 Value = DAG.getLoad(MVT::i8, Chain, 4598 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4599 DAG.getConstant(Offset, SrcVT)), 4600 NULL, 0); 4601 Chain = Value.getValue(1); 4602 Chain = DAG.getStore(Chain, Value, 4603 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4604 DAG.getConstant(Offset, DstVT)), 4605 NULL, 0); 4606 } 4607 } 4608 4609 return Chain; 4610} 4611 4612SDOperand 4613X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4614 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4615 SDOperand TheOp = Op.getOperand(0); 4616 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4617 if (Subtarget->is64Bit()) { 4618 SDOperand Copy1 = 4619 DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4620 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4621 MVT::i64, Copy1.getValue(2)); 4622 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4623 DAG.getConstant(32, MVT::i8)); 4624 SDOperand Ops[] = { 4625 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4626 }; 4627 4628 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4629 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4630 } 4631 4632 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4633 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4634 MVT::i32, Copy1.getValue(2)); 4635 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4636 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4637 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4638} 4639 4640SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4641 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4642 4643 if (!Subtarget->is64Bit()) { 4644 // vastart just stores the address of the VarArgsFrameIndex slot into the 4645 // memory location argument. 4646 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4647 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4648 SV->getOffset()); 4649 } 4650 4651 // __va_list_tag: 4652 // gp_offset (0 - 6 * 8) 4653 // fp_offset (48 - 48 + 8 * 16) 4654 // overflow_arg_area (point to parameters coming in memory). 4655 // reg_save_area 4656 SmallVector<SDOperand, 8> MemOps; 4657 SDOperand FIN = Op.getOperand(1); 4658 // Store gp_offset 4659 SDOperand Store = DAG.getStore(Op.getOperand(0), 4660 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4661 FIN, SV->getValue(), SV->getOffset()); 4662 MemOps.push_back(Store); 4663 4664 // Store fp_offset 4665 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4666 DAG.getConstant(4, getPointerTy())); 4667 Store = DAG.getStore(Op.getOperand(0), 4668 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4669 FIN, SV->getValue(), SV->getOffset()); 4670 MemOps.push_back(Store); 4671 4672 // Store ptr to overflow_arg_area 4673 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4674 DAG.getConstant(4, getPointerTy())); 4675 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4676 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4677 SV->getOffset()); 4678 MemOps.push_back(Store); 4679 4680 // Store ptr to reg_save_area. 4681 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4682 DAG.getConstant(8, getPointerTy())); 4683 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4684 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4685 SV->getOffset()); 4686 MemOps.push_back(Store); 4687 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4688} 4689 4690SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4691 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4692 SDOperand Chain = Op.getOperand(0); 4693 SDOperand DstPtr = Op.getOperand(1); 4694 SDOperand SrcPtr = Op.getOperand(2); 4695 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4696 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4697 4698 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4699 SrcSV->getValue(), SrcSV->getOffset()); 4700 Chain = SrcPtr.getValue(1); 4701 for (unsigned i = 0; i < 3; ++i) { 4702 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4703 SrcSV->getValue(), SrcSV->getOffset()); 4704 Chain = Val.getValue(1); 4705 Chain = DAG.getStore(Chain, Val, DstPtr, 4706 DstSV->getValue(), DstSV->getOffset()); 4707 if (i == 2) 4708 break; 4709 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4710 DAG.getConstant(8, getPointerTy())); 4711 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4712 DAG.getConstant(8, getPointerTy())); 4713 } 4714 return Chain; 4715} 4716 4717SDOperand 4718X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4719 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4720 switch (IntNo) { 4721 default: return SDOperand(); // Don't custom lower most intrinsics. 4722 // Comparison intrinsics. 4723 case Intrinsic::x86_sse_comieq_ss: 4724 case Intrinsic::x86_sse_comilt_ss: 4725 case Intrinsic::x86_sse_comile_ss: 4726 case Intrinsic::x86_sse_comigt_ss: 4727 case Intrinsic::x86_sse_comige_ss: 4728 case Intrinsic::x86_sse_comineq_ss: 4729 case Intrinsic::x86_sse_ucomieq_ss: 4730 case Intrinsic::x86_sse_ucomilt_ss: 4731 case Intrinsic::x86_sse_ucomile_ss: 4732 case Intrinsic::x86_sse_ucomigt_ss: 4733 case Intrinsic::x86_sse_ucomige_ss: 4734 case Intrinsic::x86_sse_ucomineq_ss: 4735 case Intrinsic::x86_sse2_comieq_sd: 4736 case Intrinsic::x86_sse2_comilt_sd: 4737 case Intrinsic::x86_sse2_comile_sd: 4738 case Intrinsic::x86_sse2_comigt_sd: 4739 case Intrinsic::x86_sse2_comige_sd: 4740 case Intrinsic::x86_sse2_comineq_sd: 4741 case Intrinsic::x86_sse2_ucomieq_sd: 4742 case Intrinsic::x86_sse2_ucomilt_sd: 4743 case Intrinsic::x86_sse2_ucomile_sd: 4744 case Intrinsic::x86_sse2_ucomigt_sd: 4745 case Intrinsic::x86_sse2_ucomige_sd: 4746 case Intrinsic::x86_sse2_ucomineq_sd: { 4747 unsigned Opc = 0; 4748 ISD::CondCode CC = ISD::SETCC_INVALID; 4749 switch (IntNo) { 4750 default: break; 4751 case Intrinsic::x86_sse_comieq_ss: 4752 case Intrinsic::x86_sse2_comieq_sd: 4753 Opc = X86ISD::COMI; 4754 CC = ISD::SETEQ; 4755 break; 4756 case Intrinsic::x86_sse_comilt_ss: 4757 case Intrinsic::x86_sse2_comilt_sd: 4758 Opc = X86ISD::COMI; 4759 CC = ISD::SETLT; 4760 break; 4761 case Intrinsic::x86_sse_comile_ss: 4762 case Intrinsic::x86_sse2_comile_sd: 4763 Opc = X86ISD::COMI; 4764 CC = ISD::SETLE; 4765 break; 4766 case Intrinsic::x86_sse_comigt_ss: 4767 case Intrinsic::x86_sse2_comigt_sd: 4768 Opc = X86ISD::COMI; 4769 CC = ISD::SETGT; 4770 break; 4771 case Intrinsic::x86_sse_comige_ss: 4772 case Intrinsic::x86_sse2_comige_sd: 4773 Opc = X86ISD::COMI; 4774 CC = ISD::SETGE; 4775 break; 4776 case Intrinsic::x86_sse_comineq_ss: 4777 case Intrinsic::x86_sse2_comineq_sd: 4778 Opc = X86ISD::COMI; 4779 CC = ISD::SETNE; 4780 break; 4781 case Intrinsic::x86_sse_ucomieq_ss: 4782 case Intrinsic::x86_sse2_ucomieq_sd: 4783 Opc = X86ISD::UCOMI; 4784 CC = ISD::SETEQ; 4785 break; 4786 case Intrinsic::x86_sse_ucomilt_ss: 4787 case Intrinsic::x86_sse2_ucomilt_sd: 4788 Opc = X86ISD::UCOMI; 4789 CC = ISD::SETLT; 4790 break; 4791 case Intrinsic::x86_sse_ucomile_ss: 4792 case Intrinsic::x86_sse2_ucomile_sd: 4793 Opc = X86ISD::UCOMI; 4794 CC = ISD::SETLE; 4795 break; 4796 case Intrinsic::x86_sse_ucomigt_ss: 4797 case Intrinsic::x86_sse2_ucomigt_sd: 4798 Opc = X86ISD::UCOMI; 4799 CC = ISD::SETGT; 4800 break; 4801 case Intrinsic::x86_sse_ucomige_ss: 4802 case Intrinsic::x86_sse2_ucomige_sd: 4803 Opc = X86ISD::UCOMI; 4804 CC = ISD::SETGE; 4805 break; 4806 case Intrinsic::x86_sse_ucomineq_ss: 4807 case Intrinsic::x86_sse2_ucomineq_sd: 4808 Opc = X86ISD::UCOMI; 4809 CC = ISD::SETNE; 4810 break; 4811 } 4812 4813 unsigned X86CC; 4814 SDOperand LHS = Op.getOperand(1); 4815 SDOperand RHS = Op.getOperand(2); 4816 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4817 4818 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4819 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4820 DAG.getConstant(X86CC, MVT::i8), Cond); 4821 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4822 } 4823 } 4824} 4825 4826SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4827 // Depths > 0 not supported yet! 4828 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4829 return SDOperand(); 4830 4831 // Just load the return address 4832 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4833 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4834} 4835 4836SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4837 // Depths > 0 not supported yet! 4838 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4839 return SDOperand(); 4840 4841 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4842 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4843 DAG.getConstant(4, getPointerTy())); 4844} 4845 4846SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4847 SelectionDAG &DAG) { 4848 // Is not yet supported on x86-64 4849 if (Subtarget->is64Bit()) 4850 return SDOperand(); 4851 4852 return DAG.getConstant(8, getPointerTy()); 4853} 4854 4855SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4856{ 4857 assert(!Subtarget->is64Bit() && 4858 "Lowering of eh_return builtin is not supported yet on x86-64"); 4859 4860 MachineFunction &MF = DAG.getMachineFunction(); 4861 SDOperand Chain = Op.getOperand(0); 4862 SDOperand Offset = Op.getOperand(1); 4863 SDOperand Handler = Op.getOperand(2); 4864 4865 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4866 getPointerTy()); 4867 4868 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4869 DAG.getConstant(-4UL, getPointerTy())); 4870 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4871 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4872 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4873 MF.addLiveOut(X86::ECX); 4874 4875 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4876 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4877} 4878 4879SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4880 SelectionDAG &DAG) { 4881 SDOperand Root = Op.getOperand(0); 4882 SDOperand Trmp = Op.getOperand(1); // trampoline 4883 SDOperand FPtr = Op.getOperand(2); // nested function 4884 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4885 4886 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4887 4888 if (Subtarget->is64Bit()) { 4889 return SDOperand(); // not yet supported 4890 } else { 4891 Function *Func = (Function *) 4892 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4893 unsigned CC = Func->getCallingConv(); 4894 unsigned NestReg; 4895 4896 switch (CC) { 4897 default: 4898 assert(0 && "Unsupported calling convention"); 4899 case CallingConv::C: 4900 case CallingConv::X86_StdCall: { 4901 // Pass 'nest' parameter in ECX. 4902 // Must be kept in sync with X86CallingConv.td 4903 NestReg = X86::ECX; 4904 4905 // Check that ECX wasn't needed by an 'inreg' parameter. 4906 const FunctionType *FTy = Func->getFunctionType(); 4907 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4908 4909 if (Attrs && !Func->isVarArg()) { 4910 unsigned InRegCount = 0; 4911 unsigned Idx = 1; 4912 4913 for (FunctionType::param_iterator I = FTy->param_begin(), 4914 E = FTy->param_end(); I != E; ++I, ++Idx) 4915 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4916 // FIXME: should only count parameters that are lowered to integers. 4917 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4918 4919 if (InRegCount > 2) { 4920 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4921 abort(); 4922 } 4923 } 4924 break; 4925 } 4926 case CallingConv::X86_FastCall: 4927 // Pass 'nest' parameter in EAX. 4928 // Must be kept in sync with X86CallingConv.td 4929 NestReg = X86::EAX; 4930 break; 4931 } 4932 4933 const X86InstrInfo *TII = 4934 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4935 4936 SDOperand OutChains[4]; 4937 SDOperand Addr, Disp; 4938 4939 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4940 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4941 4942 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4943 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4944 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4945 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4946 4947 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4948 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4949 TrmpSV->getOffset() + 1, false, 1); 4950 4951 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4952 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4953 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4954 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4955 4956 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4957 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4958 TrmpSV->getOffset() + 6, false, 1); 4959 4960 SDOperand Ops[] = 4961 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4962 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4963 } 4964} 4965 4966/// LowerOperation - Provide custom lowering hooks for some operations. 4967/// 4968SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4969 switch (Op.getOpcode()) { 4970 default: assert(0 && "Should not custom lower this!"); 4971 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4972 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4973 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4974 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4975 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4976 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4977 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4978 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4979 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4980 case ISD::SHL_PARTS: 4981 case ISD::SRA_PARTS: 4982 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4983 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4984 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4985 case ISD::FABS: return LowerFABS(Op, DAG); 4986 case ISD::FNEG: return LowerFNEG(Op, DAG); 4987 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4988 case ISD::SETCC: return LowerSETCC(Op, DAG); 4989 case ISD::SELECT: return LowerSELECT(Op, DAG); 4990 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4991 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4992 case ISD::CALL: return LowerCALL(Op, DAG); 4993 case ISD::RET: return LowerRET(Op, DAG); 4994 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4995 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4996 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4997 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4998 case ISD::VASTART: return LowerVASTART(Op, DAG); 4999 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 5000 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5001 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5002 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5003 case ISD::FRAME_TO_ARGS_OFFSET: 5004 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 5005 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 5006 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 5007 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 5008 } 5009 return SDOperand(); 5010} 5011 5012const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 5013 switch (Opcode) { 5014 default: return NULL; 5015 case X86ISD::SHLD: return "X86ISD::SHLD"; 5016 case X86ISD::SHRD: return "X86ISD::SHRD"; 5017 case X86ISD::FAND: return "X86ISD::FAND"; 5018 case X86ISD::FOR: return "X86ISD::FOR"; 5019 case X86ISD::FXOR: return "X86ISD::FXOR"; 5020 case X86ISD::FSRL: return "X86ISD::FSRL"; 5021 case X86ISD::FILD: return "X86ISD::FILD"; 5022 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 5023 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 5024 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 5025 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 5026 case X86ISD::FLD: return "X86ISD::FLD"; 5027 case X86ISD::FST: return "X86ISD::FST"; 5028 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 5029 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 5030 case X86ISD::CALL: return "X86ISD::CALL"; 5031 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 5032 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 5033 case X86ISD::CMP: return "X86ISD::CMP"; 5034 case X86ISD::COMI: return "X86ISD::COMI"; 5035 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 5036 case X86ISD::SETCC: return "X86ISD::SETCC"; 5037 case X86ISD::CMOV: return "X86ISD::CMOV"; 5038 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 5039 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 5040 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 5041 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 5042 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 5043 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 5044 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 5045 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 5046 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 5047 case X86ISD::FMAX: return "X86ISD::FMAX"; 5048 case X86ISD::FMIN: return "X86ISD::FMIN"; 5049 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 5050 case X86ISD::FRCP: return "X86ISD::FRCP"; 5051 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 5052 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 5053 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 5054 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 5055 } 5056} 5057 5058// isLegalAddressingMode - Return true if the addressing mode represented 5059// by AM is legal for this target, for a load/store of the specified type. 5060bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 5061 const Type *Ty) const { 5062 // X86 supports extremely general addressing modes. 5063 5064 // X86 allows a sign-extended 32-bit immediate field as a displacement. 5065 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 5066 return false; 5067 5068 if (AM.BaseGV) { 5069 // We can only fold this if we don't need an extra load. 5070 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 5071 return false; 5072 5073 // X86-64 only supports addr of globals in small code model. 5074 if (Subtarget->is64Bit()) { 5075 if (getTargetMachine().getCodeModel() != CodeModel::Small) 5076 return false; 5077 // If lower 4G is not available, then we must use rip-relative addressing. 5078 if (AM.BaseOffs || AM.Scale > 1) 5079 return false; 5080 } 5081 } 5082 5083 switch (AM.Scale) { 5084 case 0: 5085 case 1: 5086 case 2: 5087 case 4: 5088 case 8: 5089 // These scales always work. 5090 break; 5091 case 3: 5092 case 5: 5093 case 9: 5094 // These scales are formed with basereg+scalereg. Only accept if there is 5095 // no basereg yet. 5096 if (AM.HasBaseReg) 5097 return false; 5098 break; 5099 default: // Other stuff never works. 5100 return false; 5101 } 5102 5103 return true; 5104} 5105 5106 5107/// isShuffleMaskLegal - Targets can use this to indicate that they only 5108/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5109/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5110/// are assumed to be legal. 5111bool 5112X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 5113 // Only do shuffles on 128-bit vector types for now. 5114 if (MVT::getSizeInBits(VT) == 64) return false; 5115 return (Mask.Val->getNumOperands() <= 4 || 5116 isIdentityMask(Mask.Val) || 5117 isIdentityMask(Mask.Val, true) || 5118 isSplatMask(Mask.Val) || 5119 isPSHUFHW_PSHUFLWMask(Mask.Val) || 5120 X86::isUNPCKLMask(Mask.Val) || 5121 X86::isUNPCKHMask(Mask.Val) || 5122 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 5123 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 5124} 5125 5126bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 5127 MVT::ValueType EVT, 5128 SelectionDAG &DAG) const { 5129 unsigned NumElts = BVOps.size(); 5130 // Only do shuffles on 128-bit vector types for now. 5131 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 5132 if (NumElts == 2) return true; 5133 if (NumElts == 4) { 5134 return (isMOVLMask(&BVOps[0], 4) || 5135 isCommutedMOVL(&BVOps[0], 4, true) || 5136 isSHUFPMask(&BVOps[0], 4) || 5137 isCommutedSHUFP(&BVOps[0], 4)); 5138 } 5139 return false; 5140} 5141 5142//===----------------------------------------------------------------------===// 5143// X86 Scheduler Hooks 5144//===----------------------------------------------------------------------===// 5145 5146MachineBasicBlock * 5147X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 5148 MachineBasicBlock *BB) { 5149 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5150 switch (MI->getOpcode()) { 5151 default: assert(false && "Unexpected instr type to insert"); 5152 case X86::CMOV_FR32: 5153 case X86::CMOV_FR64: 5154 case X86::CMOV_V4F32: 5155 case X86::CMOV_V2F64: 5156 case X86::CMOV_V2I64: { 5157 // To "insert" a SELECT_CC instruction, we actually have to insert the 5158 // diamond control-flow pattern. The incoming instruction knows the 5159 // destination vreg to set, the condition code register to branch on, the 5160 // true/false values to select between, and a branch opcode to use. 5161 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5162 ilist<MachineBasicBlock>::iterator It = BB; 5163 ++It; 5164 5165 // thisMBB: 5166 // ... 5167 // TrueVal = ... 5168 // cmpTY ccX, r1, r2 5169 // bCC copy1MBB 5170 // fallthrough --> copy0MBB 5171 MachineBasicBlock *thisMBB = BB; 5172 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 5173 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 5174 unsigned Opc = 5175 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 5176 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 5177 MachineFunction *F = BB->getParent(); 5178 F->getBasicBlockList().insert(It, copy0MBB); 5179 F->getBasicBlockList().insert(It, sinkMBB); 5180 // Update machine-CFG edges by first adding all successors of the current 5181 // block to the new block which will contain the Phi node for the select. 5182 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 5183 e = BB->succ_end(); i != e; ++i) 5184 sinkMBB->addSuccessor(*i); 5185 // Next, remove all successors of the current block, and add the true 5186 // and fallthrough blocks as its successors. 5187 while(!BB->succ_empty()) 5188 BB->removeSuccessor(BB->succ_begin()); 5189 BB->addSuccessor(copy0MBB); 5190 BB->addSuccessor(sinkMBB); 5191 5192 // copy0MBB: 5193 // %FalseValue = ... 5194 // # fallthrough to sinkMBB 5195 BB = copy0MBB; 5196 5197 // Update machine-CFG edges 5198 BB->addSuccessor(sinkMBB); 5199 5200 // sinkMBB: 5201 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5202 // ... 5203 BB = sinkMBB; 5204 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 5205 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5206 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5207 5208 delete MI; // The pseudo instruction is gone now. 5209 return BB; 5210 } 5211 5212 case X86::FP32_TO_INT16_IN_MEM: 5213 case X86::FP32_TO_INT32_IN_MEM: 5214 case X86::FP32_TO_INT64_IN_MEM: 5215 case X86::FP64_TO_INT16_IN_MEM: 5216 case X86::FP64_TO_INT32_IN_MEM: 5217 case X86::FP64_TO_INT64_IN_MEM: 5218 case X86::FP80_TO_INT16_IN_MEM: 5219 case X86::FP80_TO_INT32_IN_MEM: 5220 case X86::FP80_TO_INT64_IN_MEM: { 5221 // Change the floating point control register to use "round towards zero" 5222 // mode when truncating to an integer value. 5223 MachineFunction *F = BB->getParent(); 5224 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 5225 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 5226 5227 // Load the old value of the high byte of the control word... 5228 unsigned OldCW = 5229 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 5230 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 5231 5232 // Set the high part to be round to zero... 5233 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 5234 .addImm(0xC7F); 5235 5236 // Reload the modified control word now... 5237 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5238 5239 // Restore the memory image of control word to original value 5240 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 5241 .addReg(OldCW); 5242 5243 // Get the X86 opcode to use. 5244 unsigned Opc; 5245 switch (MI->getOpcode()) { 5246 default: assert(0 && "illegal opcode!"); 5247 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5248 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5249 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5250 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5251 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5252 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5253 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5254 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5255 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5256 } 5257 5258 X86AddressMode AM; 5259 MachineOperand &Op = MI->getOperand(0); 5260 if (Op.isRegister()) { 5261 AM.BaseType = X86AddressMode::RegBase; 5262 AM.Base.Reg = Op.getReg(); 5263 } else { 5264 AM.BaseType = X86AddressMode::FrameIndexBase; 5265 AM.Base.FrameIndex = Op.getFrameIndex(); 5266 } 5267 Op = MI->getOperand(1); 5268 if (Op.isImmediate()) 5269 AM.Scale = Op.getImm(); 5270 Op = MI->getOperand(2); 5271 if (Op.isImmediate()) 5272 AM.IndexReg = Op.getImm(); 5273 Op = MI->getOperand(3); 5274 if (Op.isGlobalAddress()) { 5275 AM.GV = Op.getGlobal(); 5276 } else { 5277 AM.Disp = Op.getImm(); 5278 } 5279 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5280 .addReg(MI->getOperand(4).getReg()); 5281 5282 // Reload the original control word now. 5283 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5284 5285 delete MI; // The pseudo instruction is gone now. 5286 return BB; 5287 } 5288 } 5289} 5290 5291//===----------------------------------------------------------------------===// 5292// X86 Optimization Hooks 5293//===----------------------------------------------------------------------===// 5294 5295void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5296 uint64_t Mask, 5297 uint64_t &KnownZero, 5298 uint64_t &KnownOne, 5299 const SelectionDAG &DAG, 5300 unsigned Depth) const { 5301 unsigned Opc = Op.getOpcode(); 5302 assert((Opc >= ISD::BUILTIN_OP_END || 5303 Opc == ISD::INTRINSIC_WO_CHAIN || 5304 Opc == ISD::INTRINSIC_W_CHAIN || 5305 Opc == ISD::INTRINSIC_VOID) && 5306 "Should use MaskedValueIsZero if you don't know whether Op" 5307 " is a target node!"); 5308 5309 KnownZero = KnownOne = 0; // Don't know anything. 5310 switch (Opc) { 5311 default: break; 5312 case X86ISD::SETCC: 5313 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5314 break; 5315 } 5316} 5317 5318/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5319/// element of the result of the vector shuffle. 5320static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5321 MVT::ValueType VT = N->getValueType(0); 5322 SDOperand PermMask = N->getOperand(2); 5323 unsigned NumElems = PermMask.getNumOperands(); 5324 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5325 i %= NumElems; 5326 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5327 return (i == 0) 5328 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5329 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5330 SDOperand Idx = PermMask.getOperand(i); 5331 if (Idx.getOpcode() == ISD::UNDEF) 5332 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5333 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5334 } 5335 return SDOperand(); 5336} 5337 5338/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5339/// node is a GlobalAddress + an offset. 5340static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5341 unsigned Opc = N->getOpcode(); 5342 if (Opc == X86ISD::Wrapper) { 5343 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5344 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5345 return true; 5346 } 5347 } else if (Opc == ISD::ADD) { 5348 SDOperand N1 = N->getOperand(0); 5349 SDOperand N2 = N->getOperand(1); 5350 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5351 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5352 if (V) { 5353 Offset += V->getSignExtended(); 5354 return true; 5355 } 5356 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5357 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5358 if (V) { 5359 Offset += V->getSignExtended(); 5360 return true; 5361 } 5362 } 5363 } 5364 return false; 5365} 5366 5367/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5368/// + Dist * Size. 5369static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5370 MachineFrameInfo *MFI) { 5371 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5372 return false; 5373 5374 SDOperand Loc = N->getOperand(1); 5375 SDOperand BaseLoc = Base->getOperand(1); 5376 if (Loc.getOpcode() == ISD::FrameIndex) { 5377 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5378 return false; 5379 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5380 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5381 int FS = MFI->getObjectSize(FI); 5382 int BFS = MFI->getObjectSize(BFI); 5383 if (FS != BFS || FS != Size) return false; 5384 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5385 } else { 5386 GlobalValue *GV1 = NULL; 5387 GlobalValue *GV2 = NULL; 5388 int64_t Offset1 = 0; 5389 int64_t Offset2 = 0; 5390 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5391 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5392 if (isGA1 && isGA2 && GV1 == GV2) 5393 return Offset1 == (Offset2 + Dist*Size); 5394 } 5395 5396 return false; 5397} 5398 5399static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5400 const X86Subtarget *Subtarget) { 5401 GlobalValue *GV; 5402 int64_t Offset; 5403 if (isGAPlusOffset(Base, GV, Offset)) 5404 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5405 else { 5406 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5407 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5408 if (BFI < 0) 5409 // Fixed objects do not specify alignment, however the offsets are known. 5410 return ((Subtarget->getStackAlignment() % 16) == 0 && 5411 (MFI->getObjectOffset(BFI) % 16) == 0); 5412 else 5413 return MFI->getObjectAlignment(BFI) >= 16; 5414 } 5415 return false; 5416} 5417 5418 5419/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5420/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5421/// if the load addresses are consecutive, non-overlapping, and in the right 5422/// order. 5423static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5424 const X86Subtarget *Subtarget) { 5425 MachineFunction &MF = DAG.getMachineFunction(); 5426 MachineFrameInfo *MFI = MF.getFrameInfo(); 5427 MVT::ValueType VT = N->getValueType(0); 5428 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5429 SDOperand PermMask = N->getOperand(2); 5430 int NumElems = (int)PermMask.getNumOperands(); 5431 SDNode *Base = NULL; 5432 for (int i = 0; i < NumElems; ++i) { 5433 SDOperand Idx = PermMask.getOperand(i); 5434 if (Idx.getOpcode() == ISD::UNDEF) { 5435 if (!Base) return SDOperand(); 5436 } else { 5437 SDOperand Arg = 5438 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5439 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5440 return SDOperand(); 5441 if (!Base) 5442 Base = Arg.Val; 5443 else if (!isConsecutiveLoad(Arg.Val, Base, 5444 i, MVT::getSizeInBits(EVT)/8,MFI)) 5445 return SDOperand(); 5446 } 5447 } 5448 5449 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5450 LoadSDNode *LD = cast<LoadSDNode>(Base); 5451 if (isAlign16) { 5452 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5453 LD->getSrcValueOffset(), LD->isVolatile()); 5454 } else { 5455 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5456 LD->getSrcValueOffset(), LD->isVolatile(), 5457 LD->getAlignment()); 5458 } 5459} 5460 5461/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5462static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5463 const X86Subtarget *Subtarget) { 5464 SDOperand Cond = N->getOperand(0); 5465 5466 // If we have SSE[12] support, try to form min/max nodes. 5467 if (Subtarget->hasSSE2() && 5468 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5469 if (Cond.getOpcode() == ISD::SETCC) { 5470 // Get the LHS/RHS of the select. 5471 SDOperand LHS = N->getOperand(1); 5472 SDOperand RHS = N->getOperand(2); 5473 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5474 5475 unsigned Opcode = 0; 5476 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5477 switch (CC) { 5478 default: break; 5479 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5480 case ISD::SETULE: 5481 case ISD::SETLE: 5482 if (!UnsafeFPMath) break; 5483 // FALL THROUGH. 5484 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5485 case ISD::SETLT: 5486 Opcode = X86ISD::FMIN; 5487 break; 5488 5489 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5490 case ISD::SETUGT: 5491 case ISD::SETGT: 5492 if (!UnsafeFPMath) break; 5493 // FALL THROUGH. 5494 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5495 case ISD::SETGE: 5496 Opcode = X86ISD::FMAX; 5497 break; 5498 } 5499 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5500 switch (CC) { 5501 default: break; 5502 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5503 case ISD::SETUGT: 5504 case ISD::SETGT: 5505 if (!UnsafeFPMath) break; 5506 // FALL THROUGH. 5507 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5508 case ISD::SETGE: 5509 Opcode = X86ISD::FMIN; 5510 break; 5511 5512 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5513 case ISD::SETULE: 5514 case ISD::SETLE: 5515 if (!UnsafeFPMath) break; 5516 // FALL THROUGH. 5517 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5518 case ISD::SETLT: 5519 Opcode = X86ISD::FMAX; 5520 break; 5521 } 5522 } 5523 5524 if (Opcode) 5525 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5526 } 5527 5528 } 5529 5530 return SDOperand(); 5531} 5532 5533 5534SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5535 DAGCombinerInfo &DCI) const { 5536 SelectionDAG &DAG = DCI.DAG; 5537 switch (N->getOpcode()) { 5538 default: break; 5539 case ISD::VECTOR_SHUFFLE: 5540 return PerformShuffleCombine(N, DAG, Subtarget); 5541 case ISD::SELECT: 5542 return PerformSELECTCombine(N, DAG, Subtarget); 5543 } 5544 5545 return SDOperand(); 5546} 5547 5548//===----------------------------------------------------------------------===// 5549// X86 Inline Assembly Support 5550//===----------------------------------------------------------------------===// 5551 5552/// getConstraintType - Given a constraint letter, return the type of 5553/// constraint it is for this target. 5554X86TargetLowering::ConstraintType 5555X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5556 if (Constraint.size() == 1) { 5557 switch (Constraint[0]) { 5558 case 'A': 5559 case 'r': 5560 case 'R': 5561 case 'l': 5562 case 'q': 5563 case 'Q': 5564 case 'x': 5565 case 'Y': 5566 return C_RegisterClass; 5567 default: 5568 break; 5569 } 5570 } 5571 return TargetLowering::getConstraintType(Constraint); 5572} 5573 5574/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5575/// vector. If it is invalid, don't add anything to Ops. 5576void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5577 char Constraint, 5578 std::vector<SDOperand>&Ops, 5579 SelectionDAG &DAG) { 5580 SDOperand Result(0, 0); 5581 5582 switch (Constraint) { 5583 default: break; 5584 case 'I': 5585 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5586 if (C->getValue() <= 31) { 5587 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5588 break; 5589 } 5590 } 5591 return; 5592 case 'N': 5593 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5594 if (C->getValue() <= 255) { 5595 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5596 break; 5597 } 5598 } 5599 return; 5600 case 'i': { 5601 // Literal immediates are always ok. 5602 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5603 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5604 break; 5605 } 5606 5607 // If we are in non-pic codegen mode, we allow the address of a global (with 5608 // an optional displacement) to be used with 'i'. 5609 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5610 int64_t Offset = 0; 5611 5612 // Match either (GA) or (GA+C) 5613 if (GA) { 5614 Offset = GA->getOffset(); 5615 } else if (Op.getOpcode() == ISD::ADD) { 5616 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5617 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5618 if (C && GA) { 5619 Offset = GA->getOffset()+C->getValue(); 5620 } else { 5621 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5622 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5623 if (C && GA) 5624 Offset = GA->getOffset()+C->getValue(); 5625 else 5626 C = 0, GA = 0; 5627 } 5628 } 5629 5630 if (GA) { 5631 // If addressing this global requires a load (e.g. in PIC mode), we can't 5632 // match. 5633 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5634 false)) 5635 return; 5636 5637 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5638 Offset); 5639 Result = Op; 5640 break; 5641 } 5642 5643 // Otherwise, not valid for this mode. 5644 return; 5645 } 5646 } 5647 5648 if (Result.Val) { 5649 Ops.push_back(Result); 5650 return; 5651 } 5652 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5653} 5654 5655std::vector<unsigned> X86TargetLowering:: 5656getRegClassForInlineAsmConstraint(const std::string &Constraint, 5657 MVT::ValueType VT) const { 5658 if (Constraint.size() == 1) { 5659 // FIXME: not handling fp-stack yet! 5660 switch (Constraint[0]) { // GCC X86 Constraint Letters 5661 default: break; // Unknown constraint letter 5662 case 'A': // EAX/EDX 5663 if (VT == MVT::i32 || VT == MVT::i64) 5664 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5665 break; 5666 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5667 case 'Q': // Q_REGS 5668 if (VT == MVT::i32) 5669 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5670 else if (VT == MVT::i16) 5671 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5672 else if (VT == MVT::i8) 5673 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5674 break; 5675 } 5676 } 5677 5678 return std::vector<unsigned>(); 5679} 5680 5681std::pair<unsigned, const TargetRegisterClass*> 5682X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5683 MVT::ValueType VT) const { 5684 // First, see if this is a constraint that directly corresponds to an LLVM 5685 // register class. 5686 if (Constraint.size() == 1) { 5687 // GCC Constraint Letters 5688 switch (Constraint[0]) { 5689 default: break; 5690 case 'r': // GENERAL_REGS 5691 case 'R': // LEGACY_REGS 5692 case 'l': // INDEX_REGS 5693 if (VT == MVT::i64 && Subtarget->is64Bit()) 5694 return std::make_pair(0U, X86::GR64RegisterClass); 5695 if (VT == MVT::i32) 5696 return std::make_pair(0U, X86::GR32RegisterClass); 5697 else if (VT == MVT::i16) 5698 return std::make_pair(0U, X86::GR16RegisterClass); 5699 else if (VT == MVT::i8) 5700 return std::make_pair(0U, X86::GR8RegisterClass); 5701 break; 5702 case 'y': // MMX_REGS if MMX allowed. 5703 if (!Subtarget->hasMMX()) break; 5704 return std::make_pair(0U, X86::VR64RegisterClass); 5705 break; 5706 case 'Y': // SSE_REGS if SSE2 allowed 5707 if (!Subtarget->hasSSE2()) break; 5708 // FALL THROUGH. 5709 case 'x': // SSE_REGS if SSE1 allowed 5710 if (!Subtarget->hasSSE1()) break; 5711 5712 switch (VT) { 5713 default: break; 5714 // Scalar SSE types. 5715 case MVT::f32: 5716 case MVT::i32: 5717 return std::make_pair(0U, X86::FR32RegisterClass); 5718 case MVT::f64: 5719 case MVT::i64: 5720 return std::make_pair(0U, X86::FR64RegisterClass); 5721 // Vector types. 5722 case MVT::v16i8: 5723 case MVT::v8i16: 5724 case MVT::v4i32: 5725 case MVT::v2i64: 5726 case MVT::v4f32: 5727 case MVT::v2f64: 5728 return std::make_pair(0U, X86::VR128RegisterClass); 5729 } 5730 break; 5731 } 5732 } 5733 5734 // Use the default implementation in TargetLowering to convert the register 5735 // constraint into a member of a register class. 5736 std::pair<unsigned, const TargetRegisterClass*> Res; 5737 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5738 5739 // Not found as a standard register? 5740 if (Res.second == 0) { 5741 // GCC calls "st(0)" just plain "st". 5742 if (StringsEqualNoCase("{st}", Constraint)) { 5743 Res.first = X86::ST0; 5744 Res.second = X86::RFP80RegisterClass; 5745 } 5746 5747 return Res; 5748 } 5749 5750 // Otherwise, check to see if this is a register class of the wrong value 5751 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5752 // turn into {ax},{dx}. 5753 if (Res.second->hasType(VT)) 5754 return Res; // Correct type already, nothing to do. 5755 5756 // All of the single-register GCC register classes map their values onto 5757 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5758 // really want an 8-bit or 32-bit register, map to the appropriate register 5759 // class and return the appropriate register. 5760 if (Res.second != X86::GR16RegisterClass) 5761 return Res; 5762 5763 if (VT == MVT::i8) { 5764 unsigned DestReg = 0; 5765 switch (Res.first) { 5766 default: break; 5767 case X86::AX: DestReg = X86::AL; break; 5768 case X86::DX: DestReg = X86::DL; break; 5769 case X86::CX: DestReg = X86::CL; break; 5770 case X86::BX: DestReg = X86::BL; break; 5771 } 5772 if (DestReg) { 5773 Res.first = DestReg; 5774 Res.second = Res.second = X86::GR8RegisterClass; 5775 } 5776 } else if (VT == MVT::i32) { 5777 unsigned DestReg = 0; 5778 switch (Res.first) { 5779 default: break; 5780 case X86::AX: DestReg = X86::EAX; break; 5781 case X86::DX: DestReg = X86::EDX; break; 5782 case X86::CX: DestReg = X86::ECX; break; 5783 case X86::BX: DestReg = X86::EBX; break; 5784 case X86::SI: DestReg = X86::ESI; break; 5785 case X86::DI: DestReg = X86::EDI; break; 5786 case X86::BP: DestReg = X86::EBP; break; 5787 case X86::SP: DestReg = X86::ESP; break; 5788 } 5789 if (DestReg) { 5790 Res.first = DestReg; 5791 Res.second = Res.second = X86::GR32RegisterClass; 5792 } 5793 } else if (VT == MVT::i64) { 5794 unsigned DestReg = 0; 5795 switch (Res.first) { 5796 default: break; 5797 case X86::AX: DestReg = X86::RAX; break; 5798 case X86::DX: DestReg = X86::RDX; break; 5799 case X86::CX: DestReg = X86::RCX; break; 5800 case X86::BX: DestReg = X86::RBX; break; 5801 case X86::SI: DestReg = X86::RSI; break; 5802 case X86::DI: DestReg = X86::RDI; break; 5803 case X86::BP: DestReg = X86::RBP; break; 5804 case X86::SP: DestReg = X86::RSP; break; 5805 } 5806 if (DestReg) { 5807 Res.first = DestReg; 5808 Res.second = Res.second = X86::GR64RegisterClass; 5809 } 5810 } 5811 5812 return Res; 5813} 5814