X86ISelLowering.cpp revision e5f6204cd5d2306379bf8954e280ad35619a38b5
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Target/TargetOptions.h" 37#include "llvm/ADT/StringExtras.h" 38#include "llvm/ParameterAttributes.h" 39using namespace llvm; 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSEf64 = Subtarget->hasSSE2(); 45 X86ScalarSSEf32 = Subtarget->hasSSE1(); 46 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 47 48 RegInfo = TM.getRegisterInfo(); 49 50 // Set up the TargetLowering object. 51 52 // X86 is weird, it always uses i8 for shift amounts and setcc results. 53 setShiftAmountType(MVT::i8); 54 setSetCCResultType(MVT::i8); 55 setSetCCResultContents(ZeroOrOneSetCCResult); 56 setSchedulingPreference(SchedulingForRegPressure); 57 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 58 setStackPointerRegisterToSaveRestore(X86StackPtr); 59 60 if (Subtarget->isTargetDarwin()) { 61 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 62 setUseUnderscoreSetJmp(false); 63 setUseUnderscoreLongJmp(false); 64 } else if (Subtarget->isTargetMingw()) { 65 // MS runtime is weird: it exports _setjmp, but longjmp! 66 setUseUnderscoreSetJmp(true); 67 setUseUnderscoreLongJmp(false); 68 } else { 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(true); 71 } 72 73 // Set up the register classes. 74 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 75 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 76 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 77 if (Subtarget->is64Bit()) 78 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 79 80 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 81 82 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 83 // operation. 84 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 86 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 87 88 if (Subtarget->is64Bit()) { 89 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 90 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 91 } else { 92 if (X86ScalarSSEf64) 93 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 95 else 96 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 97 } 98 99 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 100 // this operation. 101 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 102 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 103 // SSE has no i16 to fp conversion, only i32 104 if (X86ScalarSSEf32) { 105 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 106 // f32 and f64 cases are Legal, f80 case is not 107 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 108 } else { 109 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 110 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 111 } 112 113 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 114 // are Legal, f80 is custom lowered. 115 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 116 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 117 118 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 119 // this operation. 120 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 121 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 122 123 if (X86ScalarSSEf32) { 124 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 125 // f32 and f64 cases are Legal, f80 case is not 126 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 127 } else { 128 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 129 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 130 } 131 132 // Handle FP_TO_UINT by promoting the destination to a larger signed 133 // conversion. 134 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 135 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 136 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 137 138 if (Subtarget->is64Bit()) { 139 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 140 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 141 } else { 142 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 143 // Expand FP_TO_UINT into a select. 144 // FIXME: We would like to use a Custom expander here eventually to do 145 // the optimal thing for SSE vs. the default expansion in the legalizer. 146 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 147 else 148 // With SSE3 we can use fisttpll to convert to a signed i64. 149 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 150 } 151 152 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 153 if (!X86ScalarSSEf64) { 154 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 155 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 156 } 157 158 // Divide and remainder are lowered to use div or idiv in legalize in 159 // order to expose the intermediate computations to trivial CSE. This is 160 // most noticeable when both x/y and x%y are being computed; they can be 161 // done with a single div or idiv. 162 setOperationAction(ISD::SDIV , MVT::i8 , Custom); 163 setOperationAction(ISD::UDIV , MVT::i8 , Custom); 164 setOperationAction(ISD::SREM , MVT::i8 , Custom); 165 setOperationAction(ISD::UREM , MVT::i8 , Custom); 166 setOperationAction(ISD::SDIV , MVT::i16 , Custom); 167 setOperationAction(ISD::UDIV , MVT::i16 , Custom); 168 setOperationAction(ISD::SREM , MVT::i16 , Custom); 169 setOperationAction(ISD::UREM , MVT::i16 , Custom); 170 setOperationAction(ISD::SDIV , MVT::i32 , Custom); 171 setOperationAction(ISD::UDIV , MVT::i32 , Custom); 172 setOperationAction(ISD::SREM , MVT::i32 , Custom); 173 setOperationAction(ISD::UREM , MVT::i32 , Custom); 174 setOperationAction(ISD::SDIV , MVT::i64 , Custom); 175 setOperationAction(ISD::UDIV , MVT::i64 , Custom); 176 setOperationAction(ISD::SREM , MVT::i64 , Custom); 177 setOperationAction(ISD::UREM , MVT::i64 , Custom); 178 179 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 180 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 181 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 182 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 183 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 184 if (Subtarget->is64Bit()) 185 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 186 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 187 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 188 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 189 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 190 setOperationAction(ISD::FREM , MVT::f64 , Expand); 191 192 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 193 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 194 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 195 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 196 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 197 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 198 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 199 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 200 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 201 if (Subtarget->is64Bit()) { 202 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 203 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 204 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 205 } 206 207 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 208 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 209 210 // These should be promoted to a larger select which is supported. 211 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 212 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 213 // X86 wants to expand cmov itself. 214 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 215 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 216 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 217 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 218 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 219 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 220 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 221 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 222 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 223 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 224 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 225 if (Subtarget->is64Bit()) { 226 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 227 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 228 } 229 // X86 ret instruction may pop stack. 230 setOperationAction(ISD::RET , MVT::Other, Custom); 231 if (!Subtarget->is64Bit()) 232 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 233 234 // Darwin ABI issue. 235 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 236 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 237 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 238 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 239 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 240 if (Subtarget->is64Bit()) { 241 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 242 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 243 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 244 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 245 } 246 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 247 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 248 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 249 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 250 // X86 wants to expand memset / memcpy itself. 251 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 252 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 253 254 // Use the default ISD::LOCATION expansion. 255 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 256 // FIXME - use subtarget debug flags 257 if (!Subtarget->isTargetDarwin() && 258 !Subtarget->isTargetELF() && 259 !Subtarget->isTargetCygMing()) 260 setOperationAction(ISD::LABEL, MVT::Other, Expand); 261 262 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 263 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 264 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 265 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 266 if (Subtarget->is64Bit()) { 267 // FIXME: Verify 268 setExceptionPointerRegister(X86::RAX); 269 setExceptionSelectorRegister(X86::RDX); 270 } else { 271 setExceptionPointerRegister(X86::EAX); 272 setExceptionSelectorRegister(X86::EDX); 273 } 274 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 275 276 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 277 278 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 279 setOperationAction(ISD::VASTART , MVT::Other, Custom); 280 setOperationAction(ISD::VAARG , MVT::Other, Expand); 281 setOperationAction(ISD::VAEND , MVT::Other, Expand); 282 if (Subtarget->is64Bit()) 283 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 284 else 285 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 286 287 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 288 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 289 if (Subtarget->is64Bit()) 290 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 291 if (Subtarget->isTargetCygMing()) 292 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 293 else 294 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 295 296 if (X86ScalarSSEf64) { 297 // f32 and f64 use SSE. 298 // Set up the FP register classes. 299 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 300 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 301 302 // Use ANDPD to simulate FABS. 303 setOperationAction(ISD::FABS , MVT::f64, Custom); 304 setOperationAction(ISD::FABS , MVT::f32, Custom); 305 306 // Use XORP to simulate FNEG. 307 setOperationAction(ISD::FNEG , MVT::f64, Custom); 308 setOperationAction(ISD::FNEG , MVT::f32, Custom); 309 310 // Use ANDPD and ORPD to simulate FCOPYSIGN. 311 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 312 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 313 314 // We don't support sin/cos/fmod 315 setOperationAction(ISD::FSIN , MVT::f64, Expand); 316 setOperationAction(ISD::FCOS , MVT::f64, Expand); 317 setOperationAction(ISD::FREM , MVT::f64, Expand); 318 setOperationAction(ISD::FSIN , MVT::f32, Expand); 319 setOperationAction(ISD::FCOS , MVT::f32, Expand); 320 setOperationAction(ISD::FREM , MVT::f32, Expand); 321 322 // Expand FP immediates into loads from the stack, except for the special 323 // cases we handle. 324 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 325 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 326 addLegalFPImmediate(APFloat(+0.0)); // xorpd 327 addLegalFPImmediate(APFloat(+0.0f)); // xorps 328 329 // Conversions to long double (in X87) go through memory. 330 setConvertAction(MVT::f32, MVT::f80, Expand); 331 setConvertAction(MVT::f64, MVT::f80, Expand); 332 333 // Conversions from long double (in X87) go through memory. 334 setConvertAction(MVT::f80, MVT::f32, Expand); 335 setConvertAction(MVT::f80, MVT::f64, Expand); 336 } else if (X86ScalarSSEf32) { 337 // Use SSE for f32, x87 for f64. 338 // Set up the FP register classes. 339 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 340 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 341 342 // Use ANDPS to simulate FABS. 343 setOperationAction(ISD::FABS , MVT::f32, Custom); 344 345 // Use XORP to simulate FNEG. 346 setOperationAction(ISD::FNEG , MVT::f32, Custom); 347 348 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 349 350 // Use ANDPS and ORPS to simulate FCOPYSIGN. 351 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 352 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 353 354 // We don't support sin/cos/fmod 355 setOperationAction(ISD::FSIN , MVT::f32, Expand); 356 setOperationAction(ISD::FCOS , MVT::f32, Expand); 357 setOperationAction(ISD::FREM , MVT::f32, Expand); 358 359 // Expand FP immediates into loads from the stack, except for the special 360 // cases we handle. 361 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 362 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 363 addLegalFPImmediate(APFloat(+0.0f)); // xorps 364 addLegalFPImmediate(APFloat(+0.0)); // FLD0 365 addLegalFPImmediate(APFloat(+1.0)); // FLD1 366 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 367 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 368 369 // SSE->x87 conversions go through memory. 370 setConvertAction(MVT::f32, MVT::f64, Expand); 371 setConvertAction(MVT::f32, MVT::f80, Expand); 372 373 // x87->SSE truncations need to go through memory. 374 setConvertAction(MVT::f80, MVT::f32, Expand); 375 setConvertAction(MVT::f64, MVT::f32, Expand); 376 // And x87->x87 truncations also. 377 setConvertAction(MVT::f80, MVT::f64, Expand); 378 379 if (!UnsafeFPMath) { 380 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 381 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 382 } 383 } else { 384 // f32 and f64 in x87. 385 // Set up the FP register classes. 386 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 387 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 388 389 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 390 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 391 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 392 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 393 394 // Floating truncations need to go through memory. 395 setConvertAction(MVT::f80, MVT::f32, Expand); 396 setConvertAction(MVT::f64, MVT::f32, Expand); 397 setConvertAction(MVT::f80, MVT::f64, Expand); 398 399 if (!UnsafeFPMath) { 400 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 401 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 402 } 403 404 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 405 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 406 addLegalFPImmediate(APFloat(+0.0)); // FLD0 407 addLegalFPImmediate(APFloat(+1.0)); // FLD1 408 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 409 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 410 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 411 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 412 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 413 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 414 } 415 416 // Long double always uses X87. 417 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 418 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 419 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 420 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 421 if (!UnsafeFPMath) { 422 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 423 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 424 } 425 426 // First set operation action for all vector types to expand. Then we 427 // will selectively turn on ones that can be effectively codegen'd. 428 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 429 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 430 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 431 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 432 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 433 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 434 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 435 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 436 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 437 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 438 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 439 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 440 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 441 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 442 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 443 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 445 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 446 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 447 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 448 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 449 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 450 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 451 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 452 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 453 } 454 455 if (Subtarget->hasMMX()) { 456 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 457 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 458 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 459 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 460 461 // FIXME: add MMX packed arithmetics 462 463 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 464 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 465 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 466 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 467 468 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 469 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 470 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 471 472 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 473 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 474 475 setOperationAction(ISD::AND, MVT::v8i8, Promote); 476 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 477 setOperationAction(ISD::AND, MVT::v4i16, Promote); 478 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 479 setOperationAction(ISD::AND, MVT::v2i32, Promote); 480 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 481 setOperationAction(ISD::AND, MVT::v1i64, Legal); 482 483 setOperationAction(ISD::OR, MVT::v8i8, Promote); 484 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 485 setOperationAction(ISD::OR, MVT::v4i16, Promote); 486 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 487 setOperationAction(ISD::OR, MVT::v2i32, Promote); 488 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 489 setOperationAction(ISD::OR, MVT::v1i64, Legal); 490 491 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 492 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 493 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 494 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 495 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 496 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 497 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 498 499 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 500 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 501 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 502 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 503 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 504 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 505 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 506 507 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 508 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 509 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 510 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 511 512 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 513 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 514 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 515 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 516 517 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 518 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 519 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 520 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 521 } 522 523 if (Subtarget->hasSSE1()) { 524 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 525 526 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 527 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 528 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 529 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 530 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 531 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 532 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 533 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 534 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 535 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 536 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 537 } 538 539 if (Subtarget->hasSSE2()) { 540 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 541 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 542 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 543 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 544 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 545 546 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 547 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 548 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 549 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 550 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 551 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 552 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 553 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 554 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 555 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 556 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 557 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 558 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 559 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 560 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 561 562 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 563 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 564 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 565 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 566 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 567 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 568 569 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 570 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 571 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 572 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 573 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 574 } 575 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 576 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 577 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 578 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 579 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 580 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 581 582 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 583 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 584 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 585 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 586 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 587 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 588 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 589 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 590 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 591 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 592 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 593 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 594 } 595 596 // Custom lower v2i64 and v2f64 selects. 597 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 598 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 599 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 600 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 601 } 602 603 // We want to custom lower some of our intrinsics. 604 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 605 606 // We have target-specific dag combine patterns for the following nodes: 607 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 608 setTargetDAGCombine(ISD::SELECT); 609 610 computeRegisterProperties(); 611 612 // FIXME: These should be based on subtarget info. Plus, the values should 613 // be smaller when we are in optimizing for size mode. 614 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 615 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 616 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 617 allowUnalignedMemoryAccesses = true; // x86 supports it! 618} 619 620 621//===----------------------------------------------------------------------===// 622// Return Value Calling Convention Implementation 623//===----------------------------------------------------------------------===// 624 625#include "X86GenCallingConv.inc" 626 627/// LowerRET - Lower an ISD::RET node. 628SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 629 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 630 631 SmallVector<CCValAssign, 16> RVLocs; 632 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 633 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 634 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 635 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 636 637 638 // If this is the first return lowered for this function, add the regs to the 639 // liveout set for the function. 640 if (DAG.getMachineFunction().liveout_empty()) { 641 for (unsigned i = 0; i != RVLocs.size(); ++i) 642 if (RVLocs[i].isRegLoc()) 643 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 644 } 645 646 SDOperand Chain = Op.getOperand(0); 647 SDOperand Flag; 648 649 // Copy the result values into the output registers. 650 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 651 RVLocs[0].getLocReg() != X86::ST0) { 652 for (unsigned i = 0; i != RVLocs.size(); ++i) { 653 CCValAssign &VA = RVLocs[i]; 654 assert(VA.isRegLoc() && "Can only return in registers!"); 655 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 656 Flag); 657 Flag = Chain.getValue(1); 658 } 659 } else { 660 // We need to handle a destination of ST0 specially, because it isn't really 661 // a register. 662 SDOperand Value = Op.getOperand(1); 663 664 // If this is an FP return with ScalarSSE, we need to move the value from 665 // an XMM register onto the fp-stack. 666 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 667 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 668 SDOperand MemLoc; 669 670 // If this is a load into a scalarsse value, don't store the loaded value 671 // back to the stack, only to reload it: just replace the scalar-sse load. 672 if (ISD::isNON_EXTLoad(Value.Val) && 673 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 674 Chain = Value.getOperand(0); 675 MemLoc = Value.getOperand(1); 676 } else { 677 // Spill the value to memory and reload it into top of stack. 678 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 679 MachineFunction &MF = DAG.getMachineFunction(); 680 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 681 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 682 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 683 } 684 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 685 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 686 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 687 Chain = Value.getValue(1); 688 } 689 690 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 691 SDOperand Ops[] = { Chain, Value }; 692 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 693 Flag = Chain.getValue(1); 694 } 695 696 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 697 if (Flag.Val) 698 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 699 else 700 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 701} 702 703 704/// LowerCallResult - Lower the result values of an ISD::CALL into the 705/// appropriate copies out of appropriate physical registers. This assumes that 706/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 707/// being lowered. The returns a SDNode with the same number of values as the 708/// ISD::CALL. 709SDNode *X86TargetLowering:: 710LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 711 unsigned CallingConv, SelectionDAG &DAG) { 712 713 // Assign locations to each value returned by this call. 714 SmallVector<CCValAssign, 16> RVLocs; 715 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 716 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 717 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 718 719 720 SmallVector<SDOperand, 8> ResultVals; 721 722 // Copy all of the result registers out of their specified physreg. 723 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 724 for (unsigned i = 0; i != RVLocs.size(); ++i) { 725 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 726 RVLocs[i].getValVT(), InFlag).getValue(1); 727 InFlag = Chain.getValue(2); 728 ResultVals.push_back(Chain.getValue(0)); 729 } 730 } else { 731 // Copies from the FP stack are special, as ST0 isn't a valid register 732 // before the fp stackifier runs. 733 734 // Copy ST0 into an RFP register with FP_GET_RESULT. 735 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 736 SDOperand GROps[] = { Chain, InFlag }; 737 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 738 Chain = RetVal.getValue(1); 739 InFlag = RetVal.getValue(2); 740 741 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 742 // an XMM register. 743 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 744 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 745 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 746 // shouldn't be necessary except that RFP cannot be live across 747 // multiple blocks. When stackifier is fixed, they can be uncoupled. 748 MachineFunction &MF = DAG.getMachineFunction(); 749 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 750 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 751 SDOperand Ops[] = { 752 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 753 }; 754 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 755 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 756 Chain = RetVal.getValue(1); 757 } 758 ResultVals.push_back(RetVal); 759 } 760 761 // Merge everything together with a MERGE_VALUES node. 762 ResultVals.push_back(Chain); 763 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 764 &ResultVals[0], ResultVals.size()).Val; 765} 766 767 768//===----------------------------------------------------------------------===// 769// C & StdCall Calling Convention implementation 770//===----------------------------------------------------------------------===// 771// StdCall calling convention seems to be standard for many Windows' API 772// routines and around. It differs from C calling convention just a little: 773// callee should clean up the stack, not caller. Symbols should be also 774// decorated in some fancy way :) It doesn't support any vector arguments. 775 776/// AddLiveIn - This helper function adds the specified physical register to the 777/// MachineFunction as a live in value. It also creates a corresponding virtual 778/// register for it. 779static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 780 const TargetRegisterClass *RC) { 781 assert(RC->contains(PReg) && "Not the correct regclass!"); 782 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 783 MF.addLiveIn(PReg, VReg); 784 return VReg; 785} 786 787SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 788 const CCValAssign &VA, 789 MachineFrameInfo *MFI, 790 SDOperand Root, unsigned i) { 791 // Create the nodes corresponding to a load from this parameter slot. 792 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 793 VA.getLocMemOffset()); 794 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 795 796 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 797 798 if (Flags & ISD::ParamFlags::ByVal) 799 return FIN; 800 else 801 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 802} 803 804SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 805 bool isStdCall) { 806 unsigned NumArgs = Op.Val->getNumValues() - 1; 807 MachineFunction &MF = DAG.getMachineFunction(); 808 MachineFrameInfo *MFI = MF.getFrameInfo(); 809 SDOperand Root = Op.getOperand(0); 810 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 811 812 // Assign locations to all of the incoming arguments. 813 SmallVector<CCValAssign, 16> ArgLocs; 814 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 815 getTargetMachine(), ArgLocs); 816 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 817 818 SmallVector<SDOperand, 8> ArgValues; 819 unsigned LastVal = ~0U; 820 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 821 CCValAssign &VA = ArgLocs[i]; 822 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 823 // places. 824 assert(VA.getValNo() != LastVal && 825 "Don't support value assigned to multiple locs yet"); 826 LastVal = VA.getValNo(); 827 828 if (VA.isRegLoc()) { 829 MVT::ValueType RegVT = VA.getLocVT(); 830 TargetRegisterClass *RC; 831 if (RegVT == MVT::i32) 832 RC = X86::GR32RegisterClass; 833 else { 834 assert(MVT::isVector(RegVT)); 835 RC = X86::VR128RegisterClass; 836 } 837 838 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 839 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 840 841 // If this is an 8 or 16-bit value, it is really passed promoted to 32 842 // bits. Insert an assert[sz]ext to capture this, then truncate to the 843 // right size. 844 if (VA.getLocInfo() == CCValAssign::SExt) 845 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 846 DAG.getValueType(VA.getValVT())); 847 else if (VA.getLocInfo() == CCValAssign::ZExt) 848 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 849 DAG.getValueType(VA.getValVT())); 850 851 if (VA.getLocInfo() != CCValAssign::Full) 852 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 853 854 ArgValues.push_back(ArgValue); 855 } else { 856 assert(VA.isMemLoc()); 857 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 858 } 859 } 860 861 unsigned StackSize = CCInfo.getNextStackOffset(); 862 863 ArgValues.push_back(Root); 864 865 // If the function takes variable number of arguments, make a frame index for 866 // the start of the first vararg value... for expansion of llvm.va_start. 867 if (isVarArg) 868 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 869 870 if (isStdCall && !isVarArg) { 871 BytesToPopOnReturn = StackSize; // Callee pops everything.. 872 BytesCallerReserves = 0; 873 } else { 874 BytesToPopOnReturn = 0; // Callee pops nothing. 875 876 // If this is an sret function, the return should pop the hidden pointer. 877 if (NumArgs && 878 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 879 ISD::ParamFlags::StructReturn)) 880 BytesToPopOnReturn = 4; 881 882 BytesCallerReserves = StackSize; 883 } 884 885 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 886 887 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 888 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 889 890 // Return the new list of results. 891 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 892 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 893} 894 895SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 896 unsigned CC) { 897 SDOperand Chain = Op.getOperand(0); 898 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 899 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 900 SDOperand Callee = Op.getOperand(4); 901 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 902 903 // Analyze operands of the call, assigning locations to each operand. 904 SmallVector<CCValAssign, 16> ArgLocs; 905 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 906 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 907 908 // Get a count of how many bytes are to be pushed on the stack. 909 unsigned NumBytes = CCInfo.getNextStackOffset(); 910 911 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 912 913 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 914 SmallVector<SDOperand, 8> MemOpChains; 915 916 SDOperand StackPtr; 917 918 // Walk the register/memloc assignments, inserting copies/loads. 919 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 920 CCValAssign &VA = ArgLocs[i]; 921 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 922 923 // Promote the value if needed. 924 switch (VA.getLocInfo()) { 925 default: assert(0 && "Unknown loc info!"); 926 case CCValAssign::Full: break; 927 case CCValAssign::SExt: 928 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 929 break; 930 case CCValAssign::ZExt: 931 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 932 break; 933 case CCValAssign::AExt: 934 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 935 break; 936 } 937 938 if (VA.isRegLoc()) { 939 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 940 } else { 941 assert(VA.isMemLoc()); 942 if (StackPtr.Val == 0) 943 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 944 945 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 946 Arg)); 947 } 948 } 949 950 // If the first argument is an sret pointer, remember it. 951 bool isSRet = NumOps && 952 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 953 ISD::ParamFlags::StructReturn); 954 955 if (!MemOpChains.empty()) 956 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 957 &MemOpChains[0], MemOpChains.size()); 958 959 // Build a sequence of copy-to-reg nodes chained together with token chain 960 // and flag operands which copy the outgoing args into registers. 961 SDOperand InFlag; 962 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 963 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 964 InFlag); 965 InFlag = Chain.getValue(1); 966 } 967 968 // ELF / PIC requires GOT in the EBX register before function calls via PLT 969 // GOT pointer. 970 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 971 Subtarget->isPICStyleGOT()) { 972 Chain = DAG.getCopyToReg(Chain, X86::EBX, 973 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 974 InFlag); 975 InFlag = Chain.getValue(1); 976 } 977 978 // If the callee is a GlobalAddress node (quite common, every direct call is) 979 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 980 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 981 // We should use extra load for direct calls to dllimported functions in 982 // non-JIT mode. 983 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 984 getTargetMachine(), true)) 985 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 986 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 987 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 988 989 // Returns a chain & a flag for retval copy to use. 990 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 991 SmallVector<SDOperand, 8> Ops; 992 Ops.push_back(Chain); 993 Ops.push_back(Callee); 994 995 // Add argument registers to the end of the list so that they are known live 996 // into the call. 997 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 998 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 999 RegsToPass[i].second.getValueType())); 1000 1001 // Add an implicit use GOT pointer in EBX. 1002 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1003 Subtarget->isPICStyleGOT()) 1004 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1005 1006 if (InFlag.Val) 1007 Ops.push_back(InFlag); 1008 1009 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1010 NodeTys, &Ops[0], Ops.size()); 1011 InFlag = Chain.getValue(1); 1012 1013 // Create the CALLSEQ_END node. 1014 unsigned NumBytesForCalleeToPush = 0; 1015 1016 if (CC == CallingConv::X86_StdCall) { 1017 if (isVarArg) 1018 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1019 else 1020 NumBytesForCalleeToPush = NumBytes; 1021 } else { 1022 // If this is is a call to a struct-return function, the callee 1023 // pops the hidden struct pointer, so we have to push it back. 1024 // This is common for Darwin/X86, Linux & Mingw32 targets. 1025 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1026 } 1027 1028 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1029 Ops.clear(); 1030 Ops.push_back(Chain); 1031 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1032 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 1033 Ops.push_back(InFlag); 1034 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1035 InFlag = Chain.getValue(1); 1036 1037 // Handle result values, copying them out of physregs into vregs that we 1038 // return. 1039 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1040} 1041 1042 1043//===----------------------------------------------------------------------===// 1044// FastCall Calling Convention implementation 1045//===----------------------------------------------------------------------===// 1046// 1047// The X86 'fastcall' calling convention passes up to two integer arguments in 1048// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1049// and requires that the callee pop its arguments off the stack (allowing proper 1050// tail calls), and has the same return value conventions as C calling convs. 1051// 1052// This calling convention always arranges for the callee pop value to be 8n+4 1053// bytes, which is needed for tail recursion elimination and stack alignment 1054// reasons. 1055SDOperand 1056X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1057 MachineFunction &MF = DAG.getMachineFunction(); 1058 MachineFrameInfo *MFI = MF.getFrameInfo(); 1059 SDOperand Root = Op.getOperand(0); 1060 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1061 1062 // Assign locations to all of the incoming arguments. 1063 SmallVector<CCValAssign, 16> ArgLocs; 1064 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1065 getTargetMachine(), ArgLocs); 1066 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1067 1068 SmallVector<SDOperand, 8> ArgValues; 1069 unsigned LastVal = ~0U; 1070 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1071 CCValAssign &VA = ArgLocs[i]; 1072 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1073 // places. 1074 assert(VA.getValNo() != LastVal && 1075 "Don't support value assigned to multiple locs yet"); 1076 LastVal = VA.getValNo(); 1077 1078 if (VA.isRegLoc()) { 1079 MVT::ValueType RegVT = VA.getLocVT(); 1080 TargetRegisterClass *RC; 1081 if (RegVT == MVT::i32) 1082 RC = X86::GR32RegisterClass; 1083 else { 1084 assert(MVT::isVector(RegVT)); 1085 RC = X86::VR128RegisterClass; 1086 } 1087 1088 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1089 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1090 1091 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1092 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1093 // right size. 1094 if (VA.getLocInfo() == CCValAssign::SExt) 1095 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1096 DAG.getValueType(VA.getValVT())); 1097 else if (VA.getLocInfo() == CCValAssign::ZExt) 1098 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1099 DAG.getValueType(VA.getValVT())); 1100 1101 if (VA.getLocInfo() != CCValAssign::Full) 1102 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1103 1104 ArgValues.push_back(ArgValue); 1105 } else { 1106 assert(VA.isMemLoc()); 1107 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1108 } 1109 } 1110 1111 ArgValues.push_back(Root); 1112 1113 unsigned StackSize = CCInfo.getNextStackOffset(); 1114 1115 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1116 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1117 // arguments and the arguments after the retaddr has been pushed are aligned. 1118 if ((StackSize & 7) == 0) 1119 StackSize += 4; 1120 } 1121 1122 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1123 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1124 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1125 BytesCallerReserves = 0; 1126 1127 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1128 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1129 1130 // Return the new list of results. 1131 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1132 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1133} 1134 1135SDOperand 1136X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1137 const SDOperand &StackPtr, 1138 const CCValAssign &VA, 1139 SDOperand Chain, 1140 SDOperand Arg) { 1141 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1142 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1143 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1144 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1145 if (Flags & ISD::ParamFlags::ByVal) { 1146 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1147 ISD::ParamFlags::ByValAlignOffs); 1148 1149 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1150 ISD::ParamFlags::ByValSizeOffs; 1151 1152 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1153 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1154 1155 return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, 1156 AlignNode); 1157 } else { 1158 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1159 } 1160} 1161 1162SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1163 unsigned CC) { 1164 SDOperand Chain = Op.getOperand(0); 1165 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1166 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1167 SDOperand Callee = Op.getOperand(4); 1168 1169 // Analyze operands of the call, assigning locations to each operand. 1170 SmallVector<CCValAssign, 16> ArgLocs; 1171 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1172 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1173 1174 // Get a count of how many bytes are to be pushed on the stack. 1175 unsigned NumBytes = CCInfo.getNextStackOffset(); 1176 1177 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1178 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1179 // arguments and the arguments after the retaddr has been pushed are aligned. 1180 if ((NumBytes & 7) == 0) 1181 NumBytes += 4; 1182 } 1183 1184 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1185 1186 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1187 SmallVector<SDOperand, 8> MemOpChains; 1188 1189 SDOperand StackPtr; 1190 1191 // Walk the register/memloc assignments, inserting copies/loads. 1192 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1193 CCValAssign &VA = ArgLocs[i]; 1194 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1195 1196 // Promote the value if needed. 1197 switch (VA.getLocInfo()) { 1198 default: assert(0 && "Unknown loc info!"); 1199 case CCValAssign::Full: break; 1200 case CCValAssign::SExt: 1201 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1202 break; 1203 case CCValAssign::ZExt: 1204 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1205 break; 1206 case CCValAssign::AExt: 1207 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1208 break; 1209 } 1210 1211 if (VA.isRegLoc()) { 1212 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1213 } else { 1214 assert(VA.isMemLoc()); 1215 if (StackPtr.Val == 0) 1216 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1217 1218 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1219 Arg)); 1220 } 1221 } 1222 1223 if (!MemOpChains.empty()) 1224 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1225 &MemOpChains[0], MemOpChains.size()); 1226 1227 // Build a sequence of copy-to-reg nodes chained together with token chain 1228 // and flag operands which copy the outgoing args into registers. 1229 SDOperand InFlag; 1230 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1231 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1232 InFlag); 1233 InFlag = Chain.getValue(1); 1234 } 1235 1236 // If the callee is a GlobalAddress node (quite common, every direct call is) 1237 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1238 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1239 // We should use extra load for direct calls to dllimported functions in 1240 // non-JIT mode. 1241 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1242 getTargetMachine(), true)) 1243 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1244 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1245 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1246 1247 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1248 // GOT pointer. 1249 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1250 Subtarget->isPICStyleGOT()) { 1251 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1252 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1253 InFlag); 1254 InFlag = Chain.getValue(1); 1255 } 1256 1257 // Returns a chain & a flag for retval copy to use. 1258 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1259 SmallVector<SDOperand, 8> Ops; 1260 Ops.push_back(Chain); 1261 Ops.push_back(Callee); 1262 1263 // Add argument registers to the end of the list so that they are known live 1264 // into the call. 1265 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1266 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1267 RegsToPass[i].second.getValueType())); 1268 1269 // Add an implicit use GOT pointer in EBX. 1270 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1271 Subtarget->isPICStyleGOT()) 1272 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1273 1274 if (InFlag.Val) 1275 Ops.push_back(InFlag); 1276 1277 // FIXME: Do not generate X86ISD::TAILCALL for now. 1278 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1279 NodeTys, &Ops[0], Ops.size()); 1280 InFlag = Chain.getValue(1); 1281 1282 // Returns a flag for retval copy to use. 1283 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1284 Ops.clear(); 1285 Ops.push_back(Chain); 1286 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1287 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1288 Ops.push_back(InFlag); 1289 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1290 InFlag = Chain.getValue(1); 1291 1292 // Handle result values, copying them out of physregs into vregs that we 1293 // return. 1294 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1295} 1296 1297 1298//===----------------------------------------------------------------------===// 1299// X86-64 C Calling Convention implementation 1300//===----------------------------------------------------------------------===// 1301 1302SDOperand 1303X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1304 MachineFunction &MF = DAG.getMachineFunction(); 1305 MachineFrameInfo *MFI = MF.getFrameInfo(); 1306 SDOperand Root = Op.getOperand(0); 1307 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1308 1309 static const unsigned GPR64ArgRegs[] = { 1310 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1311 }; 1312 static const unsigned XMMArgRegs[] = { 1313 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1314 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1315 }; 1316 1317 1318 // Assign locations to all of the incoming arguments. 1319 SmallVector<CCValAssign, 16> ArgLocs; 1320 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1321 getTargetMachine(), ArgLocs); 1322 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1323 1324 SmallVector<SDOperand, 8> ArgValues; 1325 unsigned LastVal = ~0U; 1326 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1327 CCValAssign &VA = ArgLocs[i]; 1328 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1329 // places. 1330 assert(VA.getValNo() != LastVal && 1331 "Don't support value assigned to multiple locs yet"); 1332 LastVal = VA.getValNo(); 1333 1334 if (VA.isRegLoc()) { 1335 MVT::ValueType RegVT = VA.getLocVT(); 1336 TargetRegisterClass *RC; 1337 if (RegVT == MVT::i32) 1338 RC = X86::GR32RegisterClass; 1339 else if (RegVT == MVT::i64) 1340 RC = X86::GR64RegisterClass; 1341 else if (RegVT == MVT::f32) 1342 RC = X86::FR32RegisterClass; 1343 else if (RegVT == MVT::f64) 1344 RC = X86::FR64RegisterClass; 1345 else { 1346 assert(MVT::isVector(RegVT)); 1347 if (MVT::getSizeInBits(RegVT) == 64) { 1348 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1349 RegVT = MVT::i64; 1350 } else 1351 RC = X86::VR128RegisterClass; 1352 } 1353 1354 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1355 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1356 1357 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1358 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1359 // right size. 1360 if (VA.getLocInfo() == CCValAssign::SExt) 1361 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1362 DAG.getValueType(VA.getValVT())); 1363 else if (VA.getLocInfo() == CCValAssign::ZExt) 1364 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1365 DAG.getValueType(VA.getValVT())); 1366 1367 if (VA.getLocInfo() != CCValAssign::Full) 1368 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1369 1370 // Handle MMX values passed in GPRs. 1371 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1372 MVT::getSizeInBits(RegVT) == 64) 1373 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1374 1375 ArgValues.push_back(ArgValue); 1376 } else { 1377 assert(VA.isMemLoc()); 1378 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1379 } 1380 } 1381 1382 unsigned StackSize = CCInfo.getNextStackOffset(); 1383 1384 // If the function takes variable number of arguments, make a frame index for 1385 // the start of the first vararg value... for expansion of llvm.va_start. 1386 if (isVarArg) { 1387 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1388 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1389 1390 // For X86-64, if there are vararg parameters that are passed via 1391 // registers, then we must store them to their spots on the stack so they 1392 // may be loaded by deferencing the result of va_next. 1393 VarArgsGPOffset = NumIntRegs * 8; 1394 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1395 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1396 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1397 1398 // Store the integer parameter registers. 1399 SmallVector<SDOperand, 8> MemOps; 1400 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1401 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1402 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1403 for (; NumIntRegs != 6; ++NumIntRegs) { 1404 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1405 X86::GR64RegisterClass); 1406 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1407 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1408 MemOps.push_back(Store); 1409 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1410 DAG.getConstant(8, getPointerTy())); 1411 } 1412 1413 // Now store the XMM (fp + vector) parameter registers. 1414 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1415 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1416 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1417 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1418 X86::VR128RegisterClass); 1419 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1420 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1421 MemOps.push_back(Store); 1422 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1423 DAG.getConstant(16, getPointerTy())); 1424 } 1425 if (!MemOps.empty()) 1426 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1427 &MemOps[0], MemOps.size()); 1428 } 1429 1430 ArgValues.push_back(Root); 1431 1432 BytesToPopOnReturn = 0; // Callee pops nothing. 1433 BytesCallerReserves = StackSize; 1434 1435 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1436 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1437 1438 // Return the new list of results. 1439 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1440 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1441} 1442 1443SDOperand 1444X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1445 unsigned CC) { 1446 SDOperand Chain = Op.getOperand(0); 1447 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1448 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1449 SDOperand Callee = Op.getOperand(4); 1450 1451 // Analyze operands of the call, assigning locations to each operand. 1452 SmallVector<CCValAssign, 16> ArgLocs; 1453 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1454 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1455 1456 // Get a count of how many bytes are to be pushed on the stack. 1457 unsigned NumBytes = CCInfo.getNextStackOffset(); 1458 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1459 1460 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1461 SmallVector<SDOperand, 8> MemOpChains; 1462 1463 SDOperand StackPtr; 1464 1465 // Walk the register/memloc assignments, inserting copies/loads. 1466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1467 CCValAssign &VA = ArgLocs[i]; 1468 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1469 1470 // Promote the value if needed. 1471 switch (VA.getLocInfo()) { 1472 default: assert(0 && "Unknown loc info!"); 1473 case CCValAssign::Full: break; 1474 case CCValAssign::SExt: 1475 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1476 break; 1477 case CCValAssign::ZExt: 1478 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1479 break; 1480 case CCValAssign::AExt: 1481 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1482 break; 1483 } 1484 1485 if (VA.isRegLoc()) { 1486 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1487 } else { 1488 assert(VA.isMemLoc()); 1489 if (StackPtr.Val == 0) 1490 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1491 1492 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1493 Arg)); 1494 } 1495 } 1496 1497 if (!MemOpChains.empty()) 1498 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1499 &MemOpChains[0], MemOpChains.size()); 1500 1501 // Build a sequence of copy-to-reg nodes chained together with token chain 1502 // and flag operands which copy the outgoing args into registers. 1503 SDOperand InFlag; 1504 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1505 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1506 InFlag); 1507 InFlag = Chain.getValue(1); 1508 } 1509 1510 if (isVarArg) { 1511 // From AMD64 ABI document: 1512 // For calls that may call functions that use varargs or stdargs 1513 // (prototype-less calls or calls to functions containing ellipsis (...) in 1514 // the declaration) %al is used as hidden argument to specify the number 1515 // of SSE registers used. The contents of %al do not need to match exactly 1516 // the number of registers, but must be an ubound on the number of SSE 1517 // registers used and is in the range 0 - 8 inclusive. 1518 1519 // Count the number of XMM registers allocated. 1520 static const unsigned XMMArgRegs[] = { 1521 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1522 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1523 }; 1524 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1525 1526 Chain = DAG.getCopyToReg(Chain, X86::AL, 1527 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1528 InFlag = Chain.getValue(1); 1529 } 1530 1531 // If the callee is a GlobalAddress node (quite common, every direct call is) 1532 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1533 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1534 // We should use extra load for direct calls to dllimported functions in 1535 // non-JIT mode. 1536 if (getTargetMachine().getCodeModel() != CodeModel::Large 1537 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1538 getTargetMachine(), true)) 1539 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1540 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1541 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1542 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1543 1544 // Returns a chain & a flag for retval copy to use. 1545 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1546 SmallVector<SDOperand, 8> Ops; 1547 Ops.push_back(Chain); 1548 Ops.push_back(Callee); 1549 1550 // Add argument registers to the end of the list so that they are known live 1551 // into the call. 1552 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1553 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1554 RegsToPass[i].second.getValueType())); 1555 1556 if (InFlag.Val) 1557 Ops.push_back(InFlag); 1558 1559 // FIXME: Do not generate X86ISD::TAILCALL for now. 1560 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1561 NodeTys, &Ops[0], Ops.size()); 1562 InFlag = Chain.getValue(1); 1563 1564 // Returns a flag for retval copy to use. 1565 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1566 Ops.clear(); 1567 Ops.push_back(Chain); 1568 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1569 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1570 Ops.push_back(InFlag); 1571 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1572 InFlag = Chain.getValue(1); 1573 1574 // Handle result values, copying them out of physregs into vregs that we 1575 // return. 1576 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1577} 1578 1579 1580//===----------------------------------------------------------------------===// 1581// Other Lowering Hooks 1582//===----------------------------------------------------------------------===// 1583 1584 1585SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1586 MachineFunction &MF = DAG.getMachineFunction(); 1587 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1588 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1589 1590 if (ReturnAddrIndex == 0) { 1591 // Set up a frame object for the return address. 1592 if (Subtarget->is64Bit()) 1593 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1594 else 1595 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1596 1597 FuncInfo->setRAIndex(ReturnAddrIndex); 1598 } 1599 1600 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1601} 1602 1603 1604 1605/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1606/// specific condition code. It returns a false if it cannot do a direct 1607/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1608/// needed. 1609static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1610 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1611 SelectionDAG &DAG) { 1612 X86CC = X86::COND_INVALID; 1613 if (!isFP) { 1614 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1615 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1616 // X > -1 -> X == 0, jump !sign. 1617 RHS = DAG.getConstant(0, RHS.getValueType()); 1618 X86CC = X86::COND_NS; 1619 return true; 1620 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1621 // X < 0 -> X == 0, jump on sign. 1622 X86CC = X86::COND_S; 1623 return true; 1624 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1625 // X < 1 -> X <= 0 1626 RHS = DAG.getConstant(0, RHS.getValueType()); 1627 X86CC = X86::COND_LE; 1628 return true; 1629 } 1630 } 1631 1632 switch (SetCCOpcode) { 1633 default: break; 1634 case ISD::SETEQ: X86CC = X86::COND_E; break; 1635 case ISD::SETGT: X86CC = X86::COND_G; break; 1636 case ISD::SETGE: X86CC = X86::COND_GE; break; 1637 case ISD::SETLT: X86CC = X86::COND_L; break; 1638 case ISD::SETLE: X86CC = X86::COND_LE; break; 1639 case ISD::SETNE: X86CC = X86::COND_NE; break; 1640 case ISD::SETULT: X86CC = X86::COND_B; break; 1641 case ISD::SETUGT: X86CC = X86::COND_A; break; 1642 case ISD::SETULE: X86CC = X86::COND_BE; break; 1643 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1644 } 1645 } else { 1646 // On a floating point condition, the flags are set as follows: 1647 // ZF PF CF op 1648 // 0 | 0 | 0 | X > Y 1649 // 0 | 0 | 1 | X < Y 1650 // 1 | 0 | 0 | X == Y 1651 // 1 | 1 | 1 | unordered 1652 bool Flip = false; 1653 switch (SetCCOpcode) { 1654 default: break; 1655 case ISD::SETUEQ: 1656 case ISD::SETEQ: X86CC = X86::COND_E; break; 1657 case ISD::SETOLT: Flip = true; // Fallthrough 1658 case ISD::SETOGT: 1659 case ISD::SETGT: X86CC = X86::COND_A; break; 1660 case ISD::SETOLE: Flip = true; // Fallthrough 1661 case ISD::SETOGE: 1662 case ISD::SETGE: X86CC = X86::COND_AE; break; 1663 case ISD::SETUGT: Flip = true; // Fallthrough 1664 case ISD::SETULT: 1665 case ISD::SETLT: X86CC = X86::COND_B; break; 1666 case ISD::SETUGE: Flip = true; // Fallthrough 1667 case ISD::SETULE: 1668 case ISD::SETLE: X86CC = X86::COND_BE; break; 1669 case ISD::SETONE: 1670 case ISD::SETNE: X86CC = X86::COND_NE; break; 1671 case ISD::SETUO: X86CC = X86::COND_P; break; 1672 case ISD::SETO: X86CC = X86::COND_NP; break; 1673 } 1674 if (Flip) 1675 std::swap(LHS, RHS); 1676 } 1677 1678 return X86CC != X86::COND_INVALID; 1679} 1680 1681/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1682/// code. Current x86 isa includes the following FP cmov instructions: 1683/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1684static bool hasFPCMov(unsigned X86CC) { 1685 switch (X86CC) { 1686 default: 1687 return false; 1688 case X86::COND_B: 1689 case X86::COND_BE: 1690 case X86::COND_E: 1691 case X86::COND_P: 1692 case X86::COND_A: 1693 case X86::COND_AE: 1694 case X86::COND_NE: 1695 case X86::COND_NP: 1696 return true; 1697 } 1698} 1699 1700/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1701/// true if Op is undef or if its value falls within the specified range (L, H]. 1702static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1703 if (Op.getOpcode() == ISD::UNDEF) 1704 return true; 1705 1706 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1707 return (Val >= Low && Val < Hi); 1708} 1709 1710/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1711/// true if Op is undef or if its value equal to the specified value. 1712static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1713 if (Op.getOpcode() == ISD::UNDEF) 1714 return true; 1715 return cast<ConstantSDNode>(Op)->getValue() == Val; 1716} 1717 1718/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1719/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1720bool X86::isPSHUFDMask(SDNode *N) { 1721 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1722 1723 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1724 return false; 1725 1726 // Check if the value doesn't reference the second vector. 1727 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1728 SDOperand Arg = N->getOperand(i); 1729 if (Arg.getOpcode() == ISD::UNDEF) continue; 1730 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1731 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1732 return false; 1733 } 1734 1735 return true; 1736} 1737 1738/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1739/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1740bool X86::isPSHUFHWMask(SDNode *N) { 1741 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1742 1743 if (N->getNumOperands() != 8) 1744 return false; 1745 1746 // Lower quadword copied in order. 1747 for (unsigned i = 0; i != 4; ++i) { 1748 SDOperand Arg = N->getOperand(i); 1749 if (Arg.getOpcode() == ISD::UNDEF) continue; 1750 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1751 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1752 return false; 1753 } 1754 1755 // Upper quadword shuffled. 1756 for (unsigned i = 4; i != 8; ++i) { 1757 SDOperand Arg = N->getOperand(i); 1758 if (Arg.getOpcode() == ISD::UNDEF) continue; 1759 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1760 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1761 if (Val < 4 || Val > 7) 1762 return false; 1763 } 1764 1765 return true; 1766} 1767 1768/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1769/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1770bool X86::isPSHUFLWMask(SDNode *N) { 1771 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1772 1773 if (N->getNumOperands() != 8) 1774 return false; 1775 1776 // Upper quadword copied in order. 1777 for (unsigned i = 4; i != 8; ++i) 1778 if (!isUndefOrEqual(N->getOperand(i), i)) 1779 return false; 1780 1781 // Lower quadword shuffled. 1782 for (unsigned i = 0; i != 4; ++i) 1783 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1784 return false; 1785 1786 return true; 1787} 1788 1789/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1790/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1791static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1792 if (NumElems != 2 && NumElems != 4) return false; 1793 1794 unsigned Half = NumElems / 2; 1795 for (unsigned i = 0; i < Half; ++i) 1796 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1797 return false; 1798 for (unsigned i = Half; i < NumElems; ++i) 1799 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1800 return false; 1801 1802 return true; 1803} 1804 1805bool X86::isSHUFPMask(SDNode *N) { 1806 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1807 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1808} 1809 1810/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1811/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1812/// half elements to come from vector 1 (which would equal the dest.) and 1813/// the upper half to come from vector 2. 1814static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1815 if (NumOps != 2 && NumOps != 4) return false; 1816 1817 unsigned Half = NumOps / 2; 1818 for (unsigned i = 0; i < Half; ++i) 1819 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1820 return false; 1821 for (unsigned i = Half; i < NumOps; ++i) 1822 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1823 return false; 1824 return true; 1825} 1826 1827static bool isCommutedSHUFP(SDNode *N) { 1828 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1829 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1830} 1831 1832/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1833/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1834bool X86::isMOVHLPSMask(SDNode *N) { 1835 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1836 1837 if (N->getNumOperands() != 4) 1838 return false; 1839 1840 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1841 return isUndefOrEqual(N->getOperand(0), 6) && 1842 isUndefOrEqual(N->getOperand(1), 7) && 1843 isUndefOrEqual(N->getOperand(2), 2) && 1844 isUndefOrEqual(N->getOperand(3), 3); 1845} 1846 1847/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1848/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1849/// <2, 3, 2, 3> 1850bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1851 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1852 1853 if (N->getNumOperands() != 4) 1854 return false; 1855 1856 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1857 return isUndefOrEqual(N->getOperand(0), 2) && 1858 isUndefOrEqual(N->getOperand(1), 3) && 1859 isUndefOrEqual(N->getOperand(2), 2) && 1860 isUndefOrEqual(N->getOperand(3), 3); 1861} 1862 1863/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1864/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1865bool X86::isMOVLPMask(SDNode *N) { 1866 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1867 1868 unsigned NumElems = N->getNumOperands(); 1869 if (NumElems != 2 && NumElems != 4) 1870 return false; 1871 1872 for (unsigned i = 0; i < NumElems/2; ++i) 1873 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1874 return false; 1875 1876 for (unsigned i = NumElems/2; i < NumElems; ++i) 1877 if (!isUndefOrEqual(N->getOperand(i), i)) 1878 return false; 1879 1880 return true; 1881} 1882 1883/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1884/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1885/// and MOVLHPS. 1886bool X86::isMOVHPMask(SDNode *N) { 1887 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1888 1889 unsigned NumElems = N->getNumOperands(); 1890 if (NumElems != 2 && NumElems != 4) 1891 return false; 1892 1893 for (unsigned i = 0; i < NumElems/2; ++i) 1894 if (!isUndefOrEqual(N->getOperand(i), i)) 1895 return false; 1896 1897 for (unsigned i = 0; i < NumElems/2; ++i) { 1898 SDOperand Arg = N->getOperand(i + NumElems/2); 1899 if (!isUndefOrEqual(Arg, i + NumElems)) 1900 return false; 1901 } 1902 1903 return true; 1904} 1905 1906/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1907/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1908bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1909 bool V2IsSplat = false) { 1910 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1911 return false; 1912 1913 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1914 SDOperand BitI = Elts[i]; 1915 SDOperand BitI1 = Elts[i+1]; 1916 if (!isUndefOrEqual(BitI, j)) 1917 return false; 1918 if (V2IsSplat) { 1919 if (isUndefOrEqual(BitI1, NumElts)) 1920 return false; 1921 } else { 1922 if (!isUndefOrEqual(BitI1, j + NumElts)) 1923 return false; 1924 } 1925 } 1926 1927 return true; 1928} 1929 1930bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1931 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1932 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1933} 1934 1935/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1936/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1937bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1938 bool V2IsSplat = false) { 1939 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1940 return false; 1941 1942 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1943 SDOperand BitI = Elts[i]; 1944 SDOperand BitI1 = Elts[i+1]; 1945 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1946 return false; 1947 if (V2IsSplat) { 1948 if (isUndefOrEqual(BitI1, NumElts)) 1949 return false; 1950 } else { 1951 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1952 return false; 1953 } 1954 } 1955 1956 return true; 1957} 1958 1959bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1960 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1961 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1962} 1963 1964/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1965/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1966/// <0, 0, 1, 1> 1967bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1968 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1969 1970 unsigned NumElems = N->getNumOperands(); 1971 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1972 return false; 1973 1974 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1975 SDOperand BitI = N->getOperand(i); 1976 SDOperand BitI1 = N->getOperand(i+1); 1977 1978 if (!isUndefOrEqual(BitI, j)) 1979 return false; 1980 if (!isUndefOrEqual(BitI1, j)) 1981 return false; 1982 } 1983 1984 return true; 1985} 1986 1987/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1988/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1989/// <2, 2, 3, 3> 1990bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1991 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1992 1993 unsigned NumElems = N->getNumOperands(); 1994 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1995 return false; 1996 1997 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1998 SDOperand BitI = N->getOperand(i); 1999 SDOperand BitI1 = N->getOperand(i + 1); 2000 2001 if (!isUndefOrEqual(BitI, j)) 2002 return false; 2003 if (!isUndefOrEqual(BitI1, j)) 2004 return false; 2005 } 2006 2007 return true; 2008} 2009 2010/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2011/// specifies a shuffle of elements that is suitable for input to MOVSS, 2012/// MOVSD, and MOVD, i.e. setting the lowest element. 2013static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2014 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2015 return false; 2016 2017 if (!isUndefOrEqual(Elts[0], NumElts)) 2018 return false; 2019 2020 for (unsigned i = 1; i < NumElts; ++i) { 2021 if (!isUndefOrEqual(Elts[i], i)) 2022 return false; 2023 } 2024 2025 return true; 2026} 2027 2028bool X86::isMOVLMask(SDNode *N) { 2029 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2030 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2031} 2032 2033/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2034/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2035/// element of vector 2 and the other elements to come from vector 1 in order. 2036static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2037 bool V2IsSplat = false, 2038 bool V2IsUndef = false) { 2039 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2040 return false; 2041 2042 if (!isUndefOrEqual(Ops[0], 0)) 2043 return false; 2044 2045 for (unsigned i = 1; i < NumOps; ++i) { 2046 SDOperand Arg = Ops[i]; 2047 if (!(isUndefOrEqual(Arg, i+NumOps) || 2048 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2049 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2050 return false; 2051 } 2052 2053 return true; 2054} 2055 2056static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2057 bool V2IsUndef = false) { 2058 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2059 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2060 V2IsSplat, V2IsUndef); 2061} 2062 2063/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2064/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2065bool X86::isMOVSHDUPMask(SDNode *N) { 2066 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2067 2068 if (N->getNumOperands() != 4) 2069 return false; 2070 2071 // Expect 1, 1, 3, 3 2072 for (unsigned i = 0; i < 2; ++i) { 2073 SDOperand Arg = N->getOperand(i); 2074 if (Arg.getOpcode() == ISD::UNDEF) continue; 2075 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2076 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2077 if (Val != 1) return false; 2078 } 2079 2080 bool HasHi = false; 2081 for (unsigned i = 2; i < 4; ++i) { 2082 SDOperand Arg = N->getOperand(i); 2083 if (Arg.getOpcode() == ISD::UNDEF) continue; 2084 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2085 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2086 if (Val != 3) return false; 2087 HasHi = true; 2088 } 2089 2090 // Don't use movshdup if it can be done with a shufps. 2091 return HasHi; 2092} 2093 2094/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2095/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2096bool X86::isMOVSLDUPMask(SDNode *N) { 2097 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2098 2099 if (N->getNumOperands() != 4) 2100 return false; 2101 2102 // Expect 0, 0, 2, 2 2103 for (unsigned i = 0; i < 2; ++i) { 2104 SDOperand Arg = N->getOperand(i); 2105 if (Arg.getOpcode() == ISD::UNDEF) continue; 2106 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2107 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2108 if (Val != 0) return false; 2109 } 2110 2111 bool HasHi = false; 2112 for (unsigned i = 2; i < 4; ++i) { 2113 SDOperand Arg = N->getOperand(i); 2114 if (Arg.getOpcode() == ISD::UNDEF) continue; 2115 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2116 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2117 if (Val != 2) return false; 2118 HasHi = true; 2119 } 2120 2121 // Don't use movshdup if it can be done with a shufps. 2122 return HasHi; 2123} 2124 2125/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2126/// specifies a identity operation on the LHS or RHS. 2127static bool isIdentityMask(SDNode *N, bool RHS = false) { 2128 unsigned NumElems = N->getNumOperands(); 2129 for (unsigned i = 0; i < NumElems; ++i) 2130 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2131 return false; 2132 return true; 2133} 2134 2135/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2136/// a splat of a single element. 2137static bool isSplatMask(SDNode *N) { 2138 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2139 2140 // This is a splat operation if each element of the permute is the same, and 2141 // if the value doesn't reference the second vector. 2142 unsigned NumElems = N->getNumOperands(); 2143 SDOperand ElementBase; 2144 unsigned i = 0; 2145 for (; i != NumElems; ++i) { 2146 SDOperand Elt = N->getOperand(i); 2147 if (isa<ConstantSDNode>(Elt)) { 2148 ElementBase = Elt; 2149 break; 2150 } 2151 } 2152 2153 if (!ElementBase.Val) 2154 return false; 2155 2156 for (; i != NumElems; ++i) { 2157 SDOperand Arg = N->getOperand(i); 2158 if (Arg.getOpcode() == ISD::UNDEF) continue; 2159 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2160 if (Arg != ElementBase) return false; 2161 } 2162 2163 // Make sure it is a splat of the first vector operand. 2164 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2165} 2166 2167/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2168/// a splat of a single element and it's a 2 or 4 element mask. 2169bool X86::isSplatMask(SDNode *N) { 2170 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2171 2172 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2173 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2174 return false; 2175 return ::isSplatMask(N); 2176} 2177 2178/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2179/// specifies a splat of zero element. 2180bool X86::isSplatLoMask(SDNode *N) { 2181 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2182 2183 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2184 if (!isUndefOrEqual(N->getOperand(i), 0)) 2185 return false; 2186 return true; 2187} 2188 2189/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2190/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2191/// instructions. 2192unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2193 unsigned NumOperands = N->getNumOperands(); 2194 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2195 unsigned Mask = 0; 2196 for (unsigned i = 0; i < NumOperands; ++i) { 2197 unsigned Val = 0; 2198 SDOperand Arg = N->getOperand(NumOperands-i-1); 2199 if (Arg.getOpcode() != ISD::UNDEF) 2200 Val = cast<ConstantSDNode>(Arg)->getValue(); 2201 if (Val >= NumOperands) Val -= NumOperands; 2202 Mask |= Val; 2203 if (i != NumOperands - 1) 2204 Mask <<= Shift; 2205 } 2206 2207 return Mask; 2208} 2209 2210/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2211/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2212/// instructions. 2213unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2214 unsigned Mask = 0; 2215 // 8 nodes, but we only care about the last 4. 2216 for (unsigned i = 7; i >= 4; --i) { 2217 unsigned Val = 0; 2218 SDOperand Arg = N->getOperand(i); 2219 if (Arg.getOpcode() != ISD::UNDEF) 2220 Val = cast<ConstantSDNode>(Arg)->getValue(); 2221 Mask |= (Val - 4); 2222 if (i != 4) 2223 Mask <<= 2; 2224 } 2225 2226 return Mask; 2227} 2228 2229/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2230/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2231/// instructions. 2232unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2233 unsigned Mask = 0; 2234 // 8 nodes, but we only care about the first 4. 2235 for (int i = 3; i >= 0; --i) { 2236 unsigned Val = 0; 2237 SDOperand Arg = N->getOperand(i); 2238 if (Arg.getOpcode() != ISD::UNDEF) 2239 Val = cast<ConstantSDNode>(Arg)->getValue(); 2240 Mask |= Val; 2241 if (i != 0) 2242 Mask <<= 2; 2243 } 2244 2245 return Mask; 2246} 2247 2248/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2249/// specifies a 8 element shuffle that can be broken into a pair of 2250/// PSHUFHW and PSHUFLW. 2251static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2252 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2253 2254 if (N->getNumOperands() != 8) 2255 return false; 2256 2257 // Lower quadword shuffled. 2258 for (unsigned i = 0; i != 4; ++i) { 2259 SDOperand Arg = N->getOperand(i); 2260 if (Arg.getOpcode() == ISD::UNDEF) continue; 2261 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2262 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2263 if (Val > 4) 2264 return false; 2265 } 2266 2267 // Upper quadword shuffled. 2268 for (unsigned i = 4; i != 8; ++i) { 2269 SDOperand Arg = N->getOperand(i); 2270 if (Arg.getOpcode() == ISD::UNDEF) continue; 2271 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2272 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2273 if (Val < 4 || Val > 7) 2274 return false; 2275 } 2276 2277 return true; 2278} 2279 2280/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2281/// values in ther permute mask. 2282static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2283 SDOperand &V2, SDOperand &Mask, 2284 SelectionDAG &DAG) { 2285 MVT::ValueType VT = Op.getValueType(); 2286 MVT::ValueType MaskVT = Mask.getValueType(); 2287 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2288 unsigned NumElems = Mask.getNumOperands(); 2289 SmallVector<SDOperand, 8> MaskVec; 2290 2291 for (unsigned i = 0; i != NumElems; ++i) { 2292 SDOperand Arg = Mask.getOperand(i); 2293 if (Arg.getOpcode() == ISD::UNDEF) { 2294 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2295 continue; 2296 } 2297 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2298 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2299 if (Val < NumElems) 2300 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2301 else 2302 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2303 } 2304 2305 std::swap(V1, V2); 2306 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2307 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2308} 2309 2310/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2311/// match movhlps. The lower half elements should come from upper half of 2312/// V1 (and in order), and the upper half elements should come from the upper 2313/// half of V2 (and in order). 2314static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2315 unsigned NumElems = Mask->getNumOperands(); 2316 if (NumElems != 4) 2317 return false; 2318 for (unsigned i = 0, e = 2; i != e; ++i) 2319 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2320 return false; 2321 for (unsigned i = 2; i != 4; ++i) 2322 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2323 return false; 2324 return true; 2325} 2326 2327/// isScalarLoadToVector - Returns true if the node is a scalar load that 2328/// is promoted to a vector. 2329static inline bool isScalarLoadToVector(SDNode *N) { 2330 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2331 N = N->getOperand(0).Val; 2332 return ISD::isNON_EXTLoad(N); 2333 } 2334 return false; 2335} 2336 2337/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2338/// match movlp{s|d}. The lower half elements should come from lower half of 2339/// V1 (and in order), and the upper half elements should come from the upper 2340/// half of V2 (and in order). And since V1 will become the source of the 2341/// MOVLP, it must be either a vector load or a scalar load to vector. 2342static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2343 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2344 return false; 2345 // Is V2 is a vector load, don't do this transformation. We will try to use 2346 // load folding shufps op. 2347 if (ISD::isNON_EXTLoad(V2)) 2348 return false; 2349 2350 unsigned NumElems = Mask->getNumOperands(); 2351 if (NumElems != 2 && NumElems != 4) 2352 return false; 2353 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2354 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2355 return false; 2356 for (unsigned i = NumElems/2; i != NumElems; ++i) 2357 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2358 return false; 2359 return true; 2360} 2361 2362/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2363/// all the same. 2364static bool isSplatVector(SDNode *N) { 2365 if (N->getOpcode() != ISD::BUILD_VECTOR) 2366 return false; 2367 2368 SDOperand SplatValue = N->getOperand(0); 2369 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2370 if (N->getOperand(i) != SplatValue) 2371 return false; 2372 return true; 2373} 2374 2375/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2376/// to an undef. 2377static bool isUndefShuffle(SDNode *N) { 2378 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2379 return false; 2380 2381 SDOperand V1 = N->getOperand(0); 2382 SDOperand V2 = N->getOperand(1); 2383 SDOperand Mask = N->getOperand(2); 2384 unsigned NumElems = Mask.getNumOperands(); 2385 for (unsigned i = 0; i != NumElems; ++i) { 2386 SDOperand Arg = Mask.getOperand(i); 2387 if (Arg.getOpcode() != ISD::UNDEF) { 2388 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2389 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2390 return false; 2391 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2392 return false; 2393 } 2394 } 2395 return true; 2396} 2397 2398/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2399/// constant +0.0. 2400static inline bool isZeroNode(SDOperand Elt) { 2401 return ((isa<ConstantSDNode>(Elt) && 2402 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2403 (isa<ConstantFPSDNode>(Elt) && 2404 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2405} 2406 2407/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2408/// to an zero vector. 2409static bool isZeroShuffle(SDNode *N) { 2410 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2411 return false; 2412 2413 SDOperand V1 = N->getOperand(0); 2414 SDOperand V2 = N->getOperand(1); 2415 SDOperand Mask = N->getOperand(2); 2416 unsigned NumElems = Mask.getNumOperands(); 2417 for (unsigned i = 0; i != NumElems; ++i) { 2418 SDOperand Arg = Mask.getOperand(i); 2419 if (Arg.getOpcode() != ISD::UNDEF) { 2420 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2421 if (Idx < NumElems) { 2422 unsigned Opc = V1.Val->getOpcode(); 2423 if (Opc == ISD::UNDEF) 2424 continue; 2425 if (Opc != ISD::BUILD_VECTOR || 2426 !isZeroNode(V1.Val->getOperand(Idx))) 2427 return false; 2428 } else if (Idx >= NumElems) { 2429 unsigned Opc = V2.Val->getOpcode(); 2430 if (Opc == ISD::UNDEF) 2431 continue; 2432 if (Opc != ISD::BUILD_VECTOR || 2433 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2434 return false; 2435 } 2436 } 2437 } 2438 return true; 2439} 2440 2441/// getZeroVector - Returns a vector of specified type with all zero elements. 2442/// 2443static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2444 assert(MVT::isVector(VT) && "Expected a vector type"); 2445 unsigned NumElems = MVT::getVectorNumElements(VT); 2446 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2447 bool isFP = MVT::isFloatingPoint(EVT); 2448 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2449 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2450 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2451} 2452 2453/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2454/// that point to V2 points to its first element. 2455static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2456 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2457 2458 bool Changed = false; 2459 SmallVector<SDOperand, 8> MaskVec; 2460 unsigned NumElems = Mask.getNumOperands(); 2461 for (unsigned i = 0; i != NumElems; ++i) { 2462 SDOperand Arg = Mask.getOperand(i); 2463 if (Arg.getOpcode() != ISD::UNDEF) { 2464 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2465 if (Val > NumElems) { 2466 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2467 Changed = true; 2468 } 2469 } 2470 MaskVec.push_back(Arg); 2471 } 2472 2473 if (Changed) 2474 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2475 &MaskVec[0], MaskVec.size()); 2476 return Mask; 2477} 2478 2479/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2480/// operation of specified width. 2481static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2482 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2483 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2484 2485 SmallVector<SDOperand, 8> MaskVec; 2486 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2487 for (unsigned i = 1; i != NumElems; ++i) 2488 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2489 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2490} 2491 2492/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2493/// of specified width. 2494static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2495 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2496 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2497 SmallVector<SDOperand, 8> MaskVec; 2498 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2499 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2500 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2501 } 2502 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2503} 2504 2505/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2506/// of specified width. 2507static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2508 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2509 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2510 unsigned Half = NumElems/2; 2511 SmallVector<SDOperand, 8> MaskVec; 2512 for (unsigned i = 0; i != Half; ++i) { 2513 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2514 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2515 } 2516 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2517} 2518 2519/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2520/// 2521static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2522 SDOperand V1 = Op.getOperand(0); 2523 SDOperand Mask = Op.getOperand(2); 2524 MVT::ValueType VT = Op.getValueType(); 2525 unsigned NumElems = Mask.getNumOperands(); 2526 Mask = getUnpacklMask(NumElems, DAG); 2527 while (NumElems != 4) { 2528 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2529 NumElems >>= 1; 2530 } 2531 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2532 2533 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2534 Mask = getZeroVector(MaskVT, DAG); 2535 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2536 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2537 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2538} 2539 2540/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2541/// vector of zero or undef vector. 2542static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2543 unsigned NumElems, unsigned Idx, 2544 bool isZero, SelectionDAG &DAG) { 2545 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2546 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2547 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2548 SDOperand Zero = DAG.getConstant(0, EVT); 2549 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2550 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2551 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2552 &MaskVec[0], MaskVec.size()); 2553 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2554} 2555 2556/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2557/// 2558static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2559 unsigned NumNonZero, unsigned NumZero, 2560 SelectionDAG &DAG, TargetLowering &TLI) { 2561 if (NumNonZero > 8) 2562 return SDOperand(); 2563 2564 SDOperand V(0, 0); 2565 bool First = true; 2566 for (unsigned i = 0; i < 16; ++i) { 2567 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2568 if (ThisIsNonZero && First) { 2569 if (NumZero) 2570 V = getZeroVector(MVT::v8i16, DAG); 2571 else 2572 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2573 First = false; 2574 } 2575 2576 if ((i & 1) != 0) { 2577 SDOperand ThisElt(0, 0), LastElt(0, 0); 2578 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2579 if (LastIsNonZero) { 2580 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2581 } 2582 if (ThisIsNonZero) { 2583 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2584 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2585 ThisElt, DAG.getConstant(8, MVT::i8)); 2586 if (LastIsNonZero) 2587 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2588 } else 2589 ThisElt = LastElt; 2590 2591 if (ThisElt.Val) 2592 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2593 DAG.getConstant(i/2, TLI.getPointerTy())); 2594 } 2595 } 2596 2597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2598} 2599 2600/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2601/// 2602static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2603 unsigned NumNonZero, unsigned NumZero, 2604 SelectionDAG &DAG, TargetLowering &TLI) { 2605 if (NumNonZero > 4) 2606 return SDOperand(); 2607 2608 SDOperand V(0, 0); 2609 bool First = true; 2610 for (unsigned i = 0; i < 8; ++i) { 2611 bool isNonZero = (NonZeros & (1 << i)) != 0; 2612 if (isNonZero) { 2613 if (First) { 2614 if (NumZero) 2615 V = getZeroVector(MVT::v8i16, DAG); 2616 else 2617 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2618 First = false; 2619 } 2620 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2621 DAG.getConstant(i, TLI.getPointerTy())); 2622 } 2623 } 2624 2625 return V; 2626} 2627 2628SDOperand 2629X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2630 // All zero's are handled with pxor. 2631 if (ISD::isBuildVectorAllZeros(Op.Val)) 2632 return Op; 2633 2634 // All one's are handled with pcmpeqd. 2635 if (ISD::isBuildVectorAllOnes(Op.Val)) 2636 return Op; 2637 2638 MVT::ValueType VT = Op.getValueType(); 2639 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2640 unsigned EVTBits = MVT::getSizeInBits(EVT); 2641 2642 unsigned NumElems = Op.getNumOperands(); 2643 unsigned NumZero = 0; 2644 unsigned NumNonZero = 0; 2645 unsigned NonZeros = 0; 2646 unsigned NumNonZeroImms = 0; 2647 std::set<SDOperand> Values; 2648 for (unsigned i = 0; i < NumElems; ++i) { 2649 SDOperand Elt = Op.getOperand(i); 2650 if (Elt.getOpcode() != ISD::UNDEF) { 2651 Values.insert(Elt); 2652 if (isZeroNode(Elt)) 2653 NumZero++; 2654 else { 2655 NonZeros |= (1 << i); 2656 NumNonZero++; 2657 if (Elt.getOpcode() == ISD::Constant || 2658 Elt.getOpcode() == ISD::ConstantFP) 2659 NumNonZeroImms++; 2660 } 2661 } 2662 } 2663 2664 if (NumNonZero == 0) { 2665 if (NumZero == 0) 2666 // All undef vector. Return an UNDEF. 2667 return DAG.getNode(ISD::UNDEF, VT); 2668 else 2669 // A mix of zero and undef. Return a zero vector. 2670 return getZeroVector(VT, DAG); 2671 } 2672 2673 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2674 if (Values.size() == 1) 2675 return SDOperand(); 2676 2677 // Special case for single non-zero element. 2678 if (NumNonZero == 1) { 2679 unsigned Idx = CountTrailingZeros_32(NonZeros); 2680 SDOperand Item = Op.getOperand(Idx); 2681 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2682 if (Idx == 0) 2683 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2684 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2685 NumZero > 0, DAG); 2686 2687 if (EVTBits == 32) { 2688 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2689 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2690 DAG); 2691 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2692 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2693 SmallVector<SDOperand, 8> MaskVec; 2694 for (unsigned i = 0; i < NumElems; i++) 2695 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2696 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2697 &MaskVec[0], MaskVec.size()); 2698 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2699 DAG.getNode(ISD::UNDEF, VT), Mask); 2700 } 2701 } 2702 2703 // A vector full of immediates; various special cases are already 2704 // handled, so this is best done with a single constant-pool load. 2705 if (NumNonZero == NumNonZeroImms) 2706 return SDOperand(); 2707 2708 // Let legalizer expand 2-wide build_vectors. 2709 if (EVTBits == 64) 2710 return SDOperand(); 2711 2712 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2713 if (EVTBits == 8 && NumElems == 16) { 2714 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2715 *this); 2716 if (V.Val) return V; 2717 } 2718 2719 if (EVTBits == 16 && NumElems == 8) { 2720 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2721 *this); 2722 if (V.Val) return V; 2723 } 2724 2725 // If element VT is == 32 bits, turn it into a number of shuffles. 2726 SmallVector<SDOperand, 8> V; 2727 V.resize(NumElems); 2728 if (NumElems == 4 && NumZero > 0) { 2729 for (unsigned i = 0; i < 4; ++i) { 2730 bool isZero = !(NonZeros & (1 << i)); 2731 if (isZero) 2732 V[i] = getZeroVector(VT, DAG); 2733 else 2734 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2735 } 2736 2737 for (unsigned i = 0; i < 2; ++i) { 2738 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2739 default: break; 2740 case 0: 2741 V[i] = V[i*2]; // Must be a zero vector. 2742 break; 2743 case 1: 2744 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2745 getMOVLMask(NumElems, DAG)); 2746 break; 2747 case 2: 2748 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2749 getMOVLMask(NumElems, DAG)); 2750 break; 2751 case 3: 2752 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2753 getUnpacklMask(NumElems, DAG)); 2754 break; 2755 } 2756 } 2757 2758 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2759 // clears the upper bits. 2760 // FIXME: we can do the same for v4f32 case when we know both parts of 2761 // the lower half come from scalar_to_vector (loadf32). We should do 2762 // that in post legalizer dag combiner with target specific hooks. 2763 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2764 return V[0]; 2765 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2766 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2767 SmallVector<SDOperand, 8> MaskVec; 2768 bool Reverse = (NonZeros & 0x3) == 2; 2769 for (unsigned i = 0; i < 2; ++i) 2770 if (Reverse) 2771 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2772 else 2773 MaskVec.push_back(DAG.getConstant(i, EVT)); 2774 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2775 for (unsigned i = 0; i < 2; ++i) 2776 if (Reverse) 2777 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2778 else 2779 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2780 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2781 &MaskVec[0], MaskVec.size()); 2782 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2783 } 2784 2785 if (Values.size() > 2) { 2786 // Expand into a number of unpckl*. 2787 // e.g. for v4f32 2788 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2789 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2790 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2791 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2792 for (unsigned i = 0; i < NumElems; ++i) 2793 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2794 NumElems >>= 1; 2795 while (NumElems != 0) { 2796 for (unsigned i = 0; i < NumElems; ++i) 2797 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2798 UnpckMask); 2799 NumElems >>= 1; 2800 } 2801 return V[0]; 2802 } 2803 2804 return SDOperand(); 2805} 2806 2807SDOperand 2808X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2809 SDOperand V1 = Op.getOperand(0); 2810 SDOperand V2 = Op.getOperand(1); 2811 SDOperand PermMask = Op.getOperand(2); 2812 MVT::ValueType VT = Op.getValueType(); 2813 unsigned NumElems = PermMask.getNumOperands(); 2814 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2815 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2816 bool V1IsSplat = false; 2817 bool V2IsSplat = false; 2818 2819 if (isUndefShuffle(Op.Val)) 2820 return DAG.getNode(ISD::UNDEF, VT); 2821 2822 if (isZeroShuffle(Op.Val)) 2823 return getZeroVector(VT, DAG); 2824 2825 if (isIdentityMask(PermMask.Val)) 2826 return V1; 2827 else if (isIdentityMask(PermMask.Val, true)) 2828 return V2; 2829 2830 if (isSplatMask(PermMask.Val)) { 2831 if (NumElems <= 4) return Op; 2832 // Promote it to a v4i32 splat. 2833 return PromoteSplat(Op, DAG); 2834 } 2835 2836 if (X86::isMOVLMask(PermMask.Val)) 2837 return (V1IsUndef) ? V2 : Op; 2838 2839 if (X86::isMOVSHDUPMask(PermMask.Val) || 2840 X86::isMOVSLDUPMask(PermMask.Val) || 2841 X86::isMOVHLPSMask(PermMask.Val) || 2842 X86::isMOVHPMask(PermMask.Val) || 2843 X86::isMOVLPMask(PermMask.Val)) 2844 return Op; 2845 2846 if (ShouldXformToMOVHLPS(PermMask.Val) || 2847 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2848 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2849 2850 bool Commuted = false; 2851 V1IsSplat = isSplatVector(V1.Val); 2852 V2IsSplat = isSplatVector(V2.Val); 2853 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2854 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2855 std::swap(V1IsSplat, V2IsSplat); 2856 std::swap(V1IsUndef, V2IsUndef); 2857 Commuted = true; 2858 } 2859 2860 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2861 if (V2IsUndef) return V1; 2862 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2863 if (V2IsSplat) { 2864 // V2 is a splat, so the mask may be malformed. That is, it may point 2865 // to any V2 element. The instruction selectior won't like this. Get 2866 // a corrected mask and commute to form a proper MOVS{S|D}. 2867 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2868 if (NewMask.Val != PermMask.Val) 2869 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2870 } 2871 return Op; 2872 } 2873 2874 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2875 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2876 X86::isUNPCKLMask(PermMask.Val) || 2877 X86::isUNPCKHMask(PermMask.Val)) 2878 return Op; 2879 2880 if (V2IsSplat) { 2881 // Normalize mask so all entries that point to V2 points to its first 2882 // element then try to match unpck{h|l} again. If match, return a 2883 // new vector_shuffle with the corrected mask. 2884 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2885 if (NewMask.Val != PermMask.Val) { 2886 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2887 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2888 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2889 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2890 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2891 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2892 } 2893 } 2894 } 2895 2896 // Normalize the node to match x86 shuffle ops if needed 2897 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2898 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2899 2900 if (Commuted) { 2901 // Commute is back and try unpck* again. 2902 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2903 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2904 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2905 X86::isUNPCKLMask(PermMask.Val) || 2906 X86::isUNPCKHMask(PermMask.Val)) 2907 return Op; 2908 } 2909 2910 // If VT is integer, try PSHUF* first, then SHUFP*. 2911 if (MVT::isInteger(VT)) { 2912 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 2913 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 2914 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 2915 X86::isPSHUFDMask(PermMask.Val)) || 2916 X86::isPSHUFHWMask(PermMask.Val) || 2917 X86::isPSHUFLWMask(PermMask.Val)) { 2918 if (V2.getOpcode() != ISD::UNDEF) 2919 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2920 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2921 return Op; 2922 } 2923 2924 if (X86::isSHUFPMask(PermMask.Val) && 2925 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2926 return Op; 2927 2928 // Handle v8i16 shuffle high / low shuffle node pair. 2929 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2930 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2931 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2932 SmallVector<SDOperand, 8> MaskVec; 2933 for (unsigned i = 0; i != 4; ++i) 2934 MaskVec.push_back(PermMask.getOperand(i)); 2935 for (unsigned i = 4; i != 8; ++i) 2936 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2937 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2938 &MaskVec[0], MaskVec.size()); 2939 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2940 MaskVec.clear(); 2941 for (unsigned i = 0; i != 4; ++i) 2942 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2943 for (unsigned i = 4; i != 8; ++i) 2944 MaskVec.push_back(PermMask.getOperand(i)); 2945 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2946 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2947 } 2948 } else { 2949 // Floating point cases in the other order. 2950 if (X86::isSHUFPMask(PermMask.Val)) 2951 return Op; 2952 if (X86::isPSHUFDMask(PermMask.Val) || 2953 X86::isPSHUFHWMask(PermMask.Val) || 2954 X86::isPSHUFLWMask(PermMask.Val)) { 2955 if (V2.getOpcode() != ISD::UNDEF) 2956 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2957 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2958 return Op; 2959 } 2960 } 2961 2962 if (NumElems == 4 && 2963 // Don't do this for MMX. 2964 MVT::getSizeInBits(VT) != 64) { 2965 MVT::ValueType MaskVT = PermMask.getValueType(); 2966 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2967 SmallVector<std::pair<int, int>, 8> Locs; 2968 Locs.reserve(NumElems); 2969 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2970 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2971 unsigned NumHi = 0; 2972 unsigned NumLo = 0; 2973 // If no more than two elements come from either vector. This can be 2974 // implemented with two shuffles. First shuffle gather the elements. 2975 // The second shuffle, which takes the first shuffle as both of its 2976 // vector operands, put the elements into the right order. 2977 for (unsigned i = 0; i != NumElems; ++i) { 2978 SDOperand Elt = PermMask.getOperand(i); 2979 if (Elt.getOpcode() == ISD::UNDEF) { 2980 Locs[i] = std::make_pair(-1, -1); 2981 } else { 2982 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2983 if (Val < NumElems) { 2984 Locs[i] = std::make_pair(0, NumLo); 2985 Mask1[NumLo] = Elt; 2986 NumLo++; 2987 } else { 2988 Locs[i] = std::make_pair(1, NumHi); 2989 if (2+NumHi < NumElems) 2990 Mask1[2+NumHi] = Elt; 2991 NumHi++; 2992 } 2993 } 2994 } 2995 if (NumLo <= 2 && NumHi <= 2) { 2996 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2997 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2998 &Mask1[0], Mask1.size())); 2999 for (unsigned i = 0; i != NumElems; ++i) { 3000 if (Locs[i].first == -1) 3001 continue; 3002 else { 3003 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3004 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3005 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3006 } 3007 } 3008 3009 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3010 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3011 &Mask2[0], Mask2.size())); 3012 } 3013 3014 // Break it into (shuffle shuffle_hi, shuffle_lo). 3015 Locs.clear(); 3016 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3017 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3018 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3019 unsigned MaskIdx = 0; 3020 unsigned LoIdx = 0; 3021 unsigned HiIdx = NumElems/2; 3022 for (unsigned i = 0; i != NumElems; ++i) { 3023 if (i == NumElems/2) { 3024 MaskPtr = &HiMask; 3025 MaskIdx = 1; 3026 LoIdx = 0; 3027 HiIdx = NumElems/2; 3028 } 3029 SDOperand Elt = PermMask.getOperand(i); 3030 if (Elt.getOpcode() == ISD::UNDEF) { 3031 Locs[i] = std::make_pair(-1, -1); 3032 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3033 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3034 (*MaskPtr)[LoIdx] = Elt; 3035 LoIdx++; 3036 } else { 3037 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3038 (*MaskPtr)[HiIdx] = Elt; 3039 HiIdx++; 3040 } 3041 } 3042 3043 SDOperand LoShuffle = 3044 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3045 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3046 &LoMask[0], LoMask.size())); 3047 SDOperand HiShuffle = 3048 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3049 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3050 &HiMask[0], HiMask.size())); 3051 SmallVector<SDOperand, 8> MaskOps; 3052 for (unsigned i = 0; i != NumElems; ++i) { 3053 if (Locs[i].first == -1) { 3054 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3055 } else { 3056 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3057 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3058 } 3059 } 3060 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3061 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3062 &MaskOps[0], MaskOps.size())); 3063 } 3064 3065 return SDOperand(); 3066} 3067 3068SDOperand 3069X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3070 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3071 return SDOperand(); 3072 3073 MVT::ValueType VT = Op.getValueType(); 3074 // TODO: handle v16i8. 3075 if (MVT::getSizeInBits(VT) == 16) { 3076 // Transform it so it match pextrw which produces a 32-bit result. 3077 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3078 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3079 Op.getOperand(0), Op.getOperand(1)); 3080 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3081 DAG.getValueType(VT)); 3082 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3083 } else if (MVT::getSizeInBits(VT) == 32) { 3084 SDOperand Vec = Op.getOperand(0); 3085 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3086 if (Idx == 0) 3087 return Op; 3088 // SHUFPS the element to the lowest double word, then movss. 3089 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3090 SmallVector<SDOperand, 8> IdxVec; 3091 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3092 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3093 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3094 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3095 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3096 &IdxVec[0], IdxVec.size()); 3097 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3098 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3099 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3100 DAG.getConstant(0, getPointerTy())); 3101 } else if (MVT::getSizeInBits(VT) == 64) { 3102 SDOperand Vec = Op.getOperand(0); 3103 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3104 if (Idx == 0) 3105 return Op; 3106 3107 // UNPCKHPD the element to the lowest double word, then movsd. 3108 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3109 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3110 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3111 SmallVector<SDOperand, 8> IdxVec; 3112 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3113 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3114 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3115 &IdxVec[0], IdxVec.size()); 3116 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3117 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3118 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3119 DAG.getConstant(0, getPointerTy())); 3120 } 3121 3122 return SDOperand(); 3123} 3124 3125SDOperand 3126X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3127 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3128 // as its second argument. 3129 MVT::ValueType VT = Op.getValueType(); 3130 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3131 SDOperand N0 = Op.getOperand(0); 3132 SDOperand N1 = Op.getOperand(1); 3133 SDOperand N2 = Op.getOperand(2); 3134 if (MVT::getSizeInBits(BaseVT) == 16) { 3135 if (N1.getValueType() != MVT::i32) 3136 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3137 if (N2.getValueType() != MVT::i32) 3138 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3139 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3140 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3141 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3142 if (Idx == 0) { 3143 // Use a movss. 3144 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3145 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3146 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3147 SmallVector<SDOperand, 8> MaskVec; 3148 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3149 for (unsigned i = 1; i <= 3; ++i) 3150 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3151 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3152 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3153 &MaskVec[0], MaskVec.size())); 3154 } else { 3155 // Use two pinsrw instructions to insert a 32 bit value. 3156 Idx <<= 1; 3157 if (MVT::isFloatingPoint(N1.getValueType())) { 3158 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3159 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3160 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3161 DAG.getConstant(0, getPointerTy())); 3162 } 3163 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3164 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3165 DAG.getConstant(Idx, getPointerTy())); 3166 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3167 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3168 DAG.getConstant(Idx+1, getPointerTy())); 3169 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3170 } 3171 } 3172 3173 return SDOperand(); 3174} 3175 3176SDOperand 3177X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3178 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3179 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3180} 3181 3182// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3183// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3184// one of the above mentioned nodes. It has to be wrapped because otherwise 3185// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3186// be used to form addressing mode. These wrapped nodes will be selected 3187// into MOV32ri. 3188SDOperand 3189X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3190 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3191 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3192 getPointerTy(), 3193 CP->getAlignment()); 3194 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3195 // With PIC, the address is actually $g + Offset. 3196 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3197 !Subtarget->isPICStyleRIPRel()) { 3198 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3199 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3200 Result); 3201 } 3202 3203 return Result; 3204} 3205 3206SDOperand 3207X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3208 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3209 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3210 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3211 // With PIC, the address is actually $g + Offset. 3212 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3213 !Subtarget->isPICStyleRIPRel()) { 3214 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3215 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3216 Result); 3217 } 3218 3219 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3220 // load the value at address GV, not the value of GV itself. This means that 3221 // the GlobalAddress must be in the base or index register of the address, not 3222 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3223 // The same applies for external symbols during PIC codegen 3224 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3225 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3226 3227 return Result; 3228} 3229 3230// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3231static SDOperand 3232LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3233 const MVT::ValueType PtrVT) { 3234 SDOperand InFlag; 3235 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3236 DAG.getNode(X86ISD::GlobalBaseReg, 3237 PtrVT), InFlag); 3238 InFlag = Chain.getValue(1); 3239 3240 // emit leal symbol@TLSGD(,%ebx,1), %eax 3241 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3242 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3243 GA->getValueType(0), 3244 GA->getOffset()); 3245 SDOperand Ops[] = { Chain, TGA, InFlag }; 3246 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3247 InFlag = Result.getValue(2); 3248 Chain = Result.getValue(1); 3249 3250 // call ___tls_get_addr. This function receives its argument in 3251 // the register EAX. 3252 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3253 InFlag = Chain.getValue(1); 3254 3255 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3256 SDOperand Ops1[] = { Chain, 3257 DAG.getTargetExternalSymbol("___tls_get_addr", 3258 PtrVT), 3259 DAG.getRegister(X86::EAX, PtrVT), 3260 DAG.getRegister(X86::EBX, PtrVT), 3261 InFlag }; 3262 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3263 InFlag = Chain.getValue(1); 3264 3265 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3266} 3267 3268// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3269// "local exec" model. 3270static SDOperand 3271LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3272 const MVT::ValueType PtrVT) { 3273 // Get the Thread Pointer 3274 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3275 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3276 // exec) 3277 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3278 GA->getValueType(0), 3279 GA->getOffset()); 3280 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3281 3282 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3283 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3284 3285 // The address of the thread local variable is the add of the thread 3286 // pointer with the offset of the variable. 3287 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3288} 3289 3290SDOperand 3291X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3292 // TODO: implement the "local dynamic" model 3293 // TODO: implement the "initial exec"model for pic executables 3294 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3295 "TLS not implemented for non-ELF and 64-bit targets"); 3296 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3297 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3298 // otherwise use the "Local Exec"TLS Model 3299 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3300 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3301 else 3302 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3303} 3304 3305SDOperand 3306X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3307 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3308 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3309 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3310 // With PIC, the address is actually $g + Offset. 3311 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3312 !Subtarget->isPICStyleRIPRel()) { 3313 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3314 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3315 Result); 3316 } 3317 3318 return Result; 3319} 3320 3321SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3322 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3323 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3324 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3325 // With PIC, the address is actually $g + Offset. 3326 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3327 !Subtarget->isPICStyleRIPRel()) { 3328 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3329 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3330 Result); 3331 } 3332 3333 return Result; 3334} 3335 3336SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3337 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3338 "Not an i64 shift!"); 3339 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3340 SDOperand ShOpLo = Op.getOperand(0); 3341 SDOperand ShOpHi = Op.getOperand(1); 3342 SDOperand ShAmt = Op.getOperand(2); 3343 SDOperand Tmp1 = isSRA ? 3344 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3345 DAG.getConstant(0, MVT::i32); 3346 3347 SDOperand Tmp2, Tmp3; 3348 if (Op.getOpcode() == ISD::SHL_PARTS) { 3349 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3350 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3351 } else { 3352 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3353 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3354 } 3355 3356 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3357 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3358 DAG.getConstant(32, MVT::i8)); 3359 SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, 3360 AndNode, DAG.getConstant(0, MVT::i8)); 3361 3362 SDOperand Hi, Lo; 3363 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3364 unsigned Opc = X86ISD::CMOV; 3365 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3366 SmallVector<SDOperand, 4> Ops; 3367 if (Op.getOpcode() == ISD::SHL_PARTS) { 3368 Ops.push_back(Tmp2); 3369 Ops.push_back(Tmp3); 3370 Ops.push_back(CC); 3371 Ops.push_back(Cond); 3372 Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3373 3374 Ops.clear(); 3375 Ops.push_back(Tmp3); 3376 Ops.push_back(Tmp1); 3377 Ops.push_back(CC); 3378 Ops.push_back(Cond); 3379 Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3380 } else { 3381 Ops.push_back(Tmp2); 3382 Ops.push_back(Tmp3); 3383 Ops.push_back(CC); 3384 Ops.push_back(Cond); 3385 Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3386 3387 Ops.clear(); 3388 Ops.push_back(Tmp3); 3389 Ops.push_back(Tmp1); 3390 Ops.push_back(CC); 3391 Ops.push_back(Cond); 3392 Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3393 } 3394 3395 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3396 Ops.clear(); 3397 Ops.push_back(Lo); 3398 Ops.push_back(Hi); 3399 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3400} 3401 3402SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) { 3403 unsigned Opcode = Op.getOpcode(); 3404 MVT::ValueType NVT = Op.getValueType(); 3405 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; 3406 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; 3407 unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV; 3408 3409 SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 3410 SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2); 3411 3412 if (isDiv) 3413 return DR; 3414 3415 return SDOperand(DR.Val, 1); 3416} 3417 3418SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3419 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3420 Op.getOperand(0).getValueType() >= MVT::i16 && 3421 "Unknown SINT_TO_FP to lower!"); 3422 3423 SDOperand Result; 3424 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3425 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3426 MachineFunction &MF = DAG.getMachineFunction(); 3427 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3428 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3429 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3430 StackSlot, NULL, 0); 3431 3432 // These are really Legal; caller falls through into that case. 3433 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 3434 return Result; 3435 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 3436 return Result; 3437 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 3438 Subtarget->is64Bit()) 3439 return Result; 3440 3441 // Build the FILD 3442 SDVTList Tys; 3443 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 3444 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 3445 if (useSSE) 3446 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3447 else 3448 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3449 SmallVector<SDOperand, 8> Ops; 3450 Ops.push_back(Chain); 3451 Ops.push_back(StackSlot); 3452 Ops.push_back(DAG.getValueType(SrcVT)); 3453 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3454 Tys, &Ops[0], Ops.size()); 3455 3456 if (useSSE) { 3457 Chain = Result.getValue(1); 3458 SDOperand InFlag = Result.getValue(2); 3459 3460 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3461 // shouldn't be necessary except that RFP cannot be live across 3462 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3463 MachineFunction &MF = DAG.getMachineFunction(); 3464 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3465 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3466 Tys = DAG.getVTList(MVT::Other); 3467 SmallVector<SDOperand, 8> Ops; 3468 Ops.push_back(Chain); 3469 Ops.push_back(Result); 3470 Ops.push_back(StackSlot); 3471 Ops.push_back(DAG.getValueType(Op.getValueType())); 3472 Ops.push_back(InFlag); 3473 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3474 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3475 } 3476 3477 return Result; 3478} 3479 3480SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3481 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3482 "Unknown FP_TO_SINT to lower!"); 3483 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3484 // stack slot. 3485 SDOperand Result; 3486 MachineFunction &MF = DAG.getMachineFunction(); 3487 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3488 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3489 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3490 3491 // These are really Legal. 3492 if (Op.getValueType() == MVT::i32 && 3493 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 3494 return Result; 3495 if (Op.getValueType() == MVT::i32 && 3496 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 3497 return Result; 3498 if (Subtarget->is64Bit() && 3499 Op.getValueType() == MVT::i64 && 3500 Op.getOperand(0).getValueType() != MVT::f80) 3501 return Result; 3502 3503 unsigned Opc; 3504 switch (Op.getValueType()) { 3505 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3506 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3507 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3508 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3509 } 3510 3511 SDOperand Chain = DAG.getEntryNode(); 3512 SDOperand Value = Op.getOperand(0); 3513 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 3514 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 3515 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3516 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3517 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3518 SDOperand Ops[] = { 3519 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3520 }; 3521 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3522 Chain = Value.getValue(1); 3523 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3524 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3525 } 3526 3527 // Build the FP_TO_INT*_IN_MEM 3528 SDOperand Ops[] = { Chain, Value, StackSlot }; 3529 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3530 3531 // Load the result. 3532 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3533} 3534 3535SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3536 MVT::ValueType VT = Op.getValueType(); 3537 MVT::ValueType EltVT = VT; 3538 if (MVT::isVector(VT)) 3539 EltVT = MVT::getVectorElementType(VT); 3540 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3541 std::vector<Constant*> CV; 3542 if (EltVT == MVT::f64) { 3543 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3544 CV.push_back(C); 3545 CV.push_back(C); 3546 } else { 3547 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3548 CV.push_back(C); 3549 CV.push_back(C); 3550 CV.push_back(C); 3551 CV.push_back(C); 3552 } 3553 Constant *C = ConstantVector::get(CV); 3554 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3555 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3556 false, 16); 3557 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3558} 3559 3560SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3561 MVT::ValueType VT = Op.getValueType(); 3562 MVT::ValueType EltVT = VT; 3563 unsigned EltNum = 1; 3564 if (MVT::isVector(VT)) { 3565 EltVT = MVT::getVectorElementType(VT); 3566 EltNum = MVT::getVectorNumElements(VT); 3567 } 3568 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3569 std::vector<Constant*> CV; 3570 if (EltVT == MVT::f64) { 3571 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 3572 CV.push_back(C); 3573 CV.push_back(C); 3574 } else { 3575 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 3576 CV.push_back(C); 3577 CV.push_back(C); 3578 CV.push_back(C); 3579 CV.push_back(C); 3580 } 3581 Constant *C = ConstantVector::get(CV); 3582 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3583 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3584 false, 16); 3585 if (MVT::isVector(VT)) { 3586 return DAG.getNode(ISD::BIT_CONVERT, VT, 3587 DAG.getNode(ISD::XOR, MVT::v2i64, 3588 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3589 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3590 } else { 3591 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3592 } 3593} 3594 3595SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3596 SDOperand Op0 = Op.getOperand(0); 3597 SDOperand Op1 = Op.getOperand(1); 3598 MVT::ValueType VT = Op.getValueType(); 3599 MVT::ValueType SrcVT = Op1.getValueType(); 3600 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3601 3602 // If second operand is smaller, extend it first. 3603 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3604 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3605 SrcVT = VT; 3606 SrcTy = MVT::getTypeForValueType(SrcVT); 3607 } 3608 3609 // First get the sign bit of second operand. 3610 std::vector<Constant*> CV; 3611 if (SrcVT == MVT::f64) { 3612 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 3613 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3614 } else { 3615 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 3616 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3617 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3618 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3619 } 3620 Constant *C = ConstantVector::get(CV); 3621 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3622 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3623 false, 16); 3624 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3625 3626 // Shift sign bit right or left if the two operands have different types. 3627 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3628 // Op0 is MVT::f32, Op1 is MVT::f64. 3629 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3630 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3631 DAG.getConstant(32, MVT::i32)); 3632 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3633 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3634 DAG.getConstant(0, getPointerTy())); 3635 } 3636 3637 // Clear first operand sign bit. 3638 CV.clear(); 3639 if (VT == MVT::f64) { 3640 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 3641 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3642 } else { 3643 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 3644 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3645 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3646 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3647 } 3648 C = ConstantVector::get(CV); 3649 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3650 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3651 false, 16); 3652 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3653 3654 // Or the value with the sign bit. 3655 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3656} 3657 3658SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 3659 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3660 SDOperand Cond; 3661 SDOperand Op0 = Op.getOperand(0); 3662 SDOperand Op1 = Op.getOperand(1); 3663 SDOperand CC = Op.getOperand(2); 3664 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3665 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3666 unsigned X86CC; 3667 3668 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3669 Op0, Op1, DAG)) { 3670 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 3671 return DAG.getNode(X86ISD::SETCC, MVT::i8, 3672 DAG.getConstant(X86CC, MVT::i8), Cond); 3673 } 3674 3675 assert(isFP && "Illegal integer SetCC!"); 3676 3677 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); 3678 switch (SetCCOpcode) { 3679 default: assert(false && "Illegal floating point SetCC!"); 3680 case ISD::SETOEQ: { // !PF & ZF 3681 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3682 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 3683 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3684 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 3685 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3686 } 3687 case ISD::SETUNE: { // PF | !ZF 3688 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3689 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 3690 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 3691 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 3692 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3693 } 3694 } 3695} 3696 3697 3698SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3699 bool addTest = true; 3700 SDOperand Cond = Op.getOperand(0); 3701 SDOperand CC; 3702 3703 if (Cond.getOpcode() == ISD::SETCC) 3704 Cond = LowerSETCC(Cond, DAG); 3705 3706 if (Cond.getOpcode() == X86ISD::SETCC) { 3707 CC = Cond.getOperand(0); 3708 3709 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3710 // (since flag operand cannot be shared). Use it as the condition setting 3711 // operand in place of the X86ISD::SETCC. 3712 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3713 // to use a test instead of duplicating the X86ISD::CMP (for register 3714 // pressure reason)? 3715 SDOperand Cmp = Cond.getOperand(1); 3716 unsigned Opc = Cmp.getOpcode(); 3717 bool IllegalFPCMov = 3718 ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || 3719 (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && 3720 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3721 if ((Opc == X86ISD::CMP || 3722 Opc == X86ISD::COMI || 3723 Opc == X86ISD::UCOMI) && !IllegalFPCMov) { 3724 Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); 3725 addTest = false; 3726 } 3727 } 3728 3729 if (addTest) { 3730 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3731 Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Cond, 3732 DAG.getConstant(0, MVT::i8)); 3733 } 3734 3735 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 3736 MVT::Flag); 3737 SmallVector<SDOperand, 4> Ops; 3738 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3739 // condition is true. 3740 Ops.push_back(Op.getOperand(2)); 3741 Ops.push_back(Op.getOperand(1)); 3742 Ops.push_back(CC); 3743 Ops.push_back(Cond); 3744 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3745} 3746 3747SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3748 bool addTest = true; 3749 SDOperand Chain = Op.getOperand(0); 3750 SDOperand Cond = Op.getOperand(1); 3751 SDOperand Dest = Op.getOperand(2); 3752 SDOperand CC; 3753 3754 if (Cond.getOpcode() == ISD::SETCC) 3755 Cond = LowerSETCC(Cond, DAG); 3756 3757 if (Cond.getOpcode() == X86ISD::SETCC) { 3758 CC = Cond.getOperand(0); 3759 3760 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3761 // (since flag operand cannot be shared). Use it as the condition setting 3762 // operand in place of the X86ISD::SETCC. 3763 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3764 // to use a test instead of duplicating the X86ISD::CMP (for register 3765 // pressure reason)? 3766 SDOperand Cmp = Cond.getOperand(1); 3767 unsigned Opc = Cmp.getOpcode(); 3768 if (Opc == X86ISD::CMP || 3769 Opc == X86ISD::COMI || 3770 Opc == X86ISD::UCOMI) { 3771 Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); 3772 addTest = false; 3773 } 3774 } 3775 3776 if (addTest) { 3777 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3778 Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 3779 } 3780 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3781 Chain, Op.getOperand(2), CC, Cond); 3782} 3783 3784SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3785 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3786 3787 if (Subtarget->is64Bit()) 3788 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3789 else 3790 switch (CallingConv) { 3791 default: 3792 assert(0 && "Unsupported calling convention"); 3793 case CallingConv::Fast: 3794 // TODO: Implement fastcc 3795 // Falls through 3796 case CallingConv::C: 3797 case CallingConv::X86_StdCall: 3798 return LowerCCCCallTo(Op, DAG, CallingConv); 3799 case CallingConv::X86_FastCall: 3800 return LowerFastCCCallTo(Op, DAG, CallingConv); 3801 } 3802} 3803 3804 3805// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3806// Calls to _alloca is needed to probe the stack when allocating more than 4k 3807// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3808// that the guard pages used by the OS virtual memory manager are allocated in 3809// correct sequence. 3810SDOperand 3811X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3812 SelectionDAG &DAG) { 3813 assert(Subtarget->isTargetCygMing() && 3814 "This should be used only on Cygwin/Mingw targets"); 3815 3816 // Get the inputs. 3817 SDOperand Chain = Op.getOperand(0); 3818 SDOperand Size = Op.getOperand(1); 3819 // FIXME: Ensure alignment here 3820 3821 SDOperand Flag; 3822 3823 MVT::ValueType IntPtr = getPointerTy(); 3824 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3825 3826 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3827 Flag = Chain.getValue(1); 3828 3829 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3830 SDOperand Ops[] = { Chain, 3831 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3832 DAG.getRegister(X86::EAX, IntPtr), 3833 Flag }; 3834 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3835 Flag = Chain.getValue(1); 3836 3837 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3838 3839 std::vector<MVT::ValueType> Tys; 3840 Tys.push_back(SPTy); 3841 Tys.push_back(MVT::Other); 3842 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3843 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3844} 3845 3846SDOperand 3847X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3848 MachineFunction &MF = DAG.getMachineFunction(); 3849 const Function* Fn = MF.getFunction(); 3850 if (Fn->hasExternalLinkage() && 3851 Subtarget->isTargetCygMing() && 3852 Fn->getName() == "main") 3853 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3854 3855 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3856 if (Subtarget->is64Bit()) 3857 return LowerX86_64CCCArguments(Op, DAG); 3858 else 3859 switch(CC) { 3860 default: 3861 assert(0 && "Unsupported calling convention"); 3862 case CallingConv::Fast: 3863 // TODO: implement fastcc. 3864 3865 // Falls through 3866 case CallingConv::C: 3867 return LowerCCCArguments(Op, DAG); 3868 case CallingConv::X86_StdCall: 3869 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3870 return LowerCCCArguments(Op, DAG, true); 3871 case CallingConv::X86_FastCall: 3872 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3873 return LowerFastCCArguments(Op, DAG); 3874 } 3875} 3876 3877SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3878 SDOperand InFlag(0, 0); 3879 SDOperand Chain = Op.getOperand(0); 3880 unsigned Align = 3881 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3882 if (Align == 0) Align = 1; 3883 3884 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3885 // If not DWORD aligned or size is more than the threshold, call memset. 3886 // The libc version is likely to be faster for these cases. It can use the 3887 // address value and run time information about the CPU. 3888 if ((Align & 3) != 0 || 3889 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3890 MVT::ValueType IntPtr = getPointerTy(); 3891 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3892 TargetLowering::ArgListTy Args; 3893 TargetLowering::ArgListEntry Entry; 3894 Entry.Node = Op.getOperand(1); 3895 Entry.Ty = IntPtrTy; 3896 Args.push_back(Entry); 3897 // Extend the unsigned i8 argument to be an int value for the call. 3898 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3899 Entry.Ty = IntPtrTy; 3900 Args.push_back(Entry); 3901 Entry.Node = Op.getOperand(3); 3902 Args.push_back(Entry); 3903 std::pair<SDOperand,SDOperand> CallResult = 3904 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3905 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3906 return CallResult.second; 3907 } 3908 3909 MVT::ValueType AVT; 3910 SDOperand Count; 3911 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3912 unsigned BytesLeft = 0; 3913 bool TwoRepStos = false; 3914 if (ValC) { 3915 unsigned ValReg; 3916 uint64_t Val = ValC->getValue() & 255; 3917 3918 // If the value is a constant, then we can potentially use larger sets. 3919 switch (Align & 3) { 3920 case 2: // WORD aligned 3921 AVT = MVT::i16; 3922 ValReg = X86::AX; 3923 Val = (Val << 8) | Val; 3924 break; 3925 case 0: // DWORD aligned 3926 AVT = MVT::i32; 3927 ValReg = X86::EAX; 3928 Val = (Val << 8) | Val; 3929 Val = (Val << 16) | Val; 3930 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3931 AVT = MVT::i64; 3932 ValReg = X86::RAX; 3933 Val = (Val << 32) | Val; 3934 } 3935 break; 3936 default: // Byte aligned 3937 AVT = MVT::i8; 3938 ValReg = X86::AL; 3939 Count = Op.getOperand(3); 3940 break; 3941 } 3942 3943 if (AVT > MVT::i8) { 3944 if (I) { 3945 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3946 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3947 BytesLeft = I->getValue() % UBytes; 3948 } else { 3949 assert(AVT >= MVT::i32 && 3950 "Do not use rep;stos if not at least DWORD aligned"); 3951 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3952 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3953 TwoRepStos = true; 3954 } 3955 } 3956 3957 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3958 InFlag); 3959 InFlag = Chain.getValue(1); 3960 } else { 3961 AVT = MVT::i8; 3962 Count = Op.getOperand(3); 3963 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3964 InFlag = Chain.getValue(1); 3965 } 3966 3967 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3968 Count, InFlag); 3969 InFlag = Chain.getValue(1); 3970 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3971 Op.getOperand(1), InFlag); 3972 InFlag = Chain.getValue(1); 3973 3974 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3975 SmallVector<SDOperand, 8> Ops; 3976 Ops.push_back(Chain); 3977 Ops.push_back(DAG.getValueType(AVT)); 3978 Ops.push_back(InFlag); 3979 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3980 3981 if (TwoRepStos) { 3982 InFlag = Chain.getValue(1); 3983 Count = Op.getOperand(3); 3984 MVT::ValueType CVT = Count.getValueType(); 3985 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3986 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3987 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3988 Left, InFlag); 3989 InFlag = Chain.getValue(1); 3990 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3991 Ops.clear(); 3992 Ops.push_back(Chain); 3993 Ops.push_back(DAG.getValueType(MVT::i8)); 3994 Ops.push_back(InFlag); 3995 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3996 } else if (BytesLeft) { 3997 // Issue stores for the last 1 - 7 bytes. 3998 SDOperand Value; 3999 unsigned Val = ValC->getValue() & 255; 4000 unsigned Offset = I->getValue() - BytesLeft; 4001 SDOperand DstAddr = Op.getOperand(1); 4002 MVT::ValueType AddrVT = DstAddr.getValueType(); 4003 if (BytesLeft >= 4) { 4004 Val = (Val << 8) | Val; 4005 Val = (Val << 16) | Val; 4006 Value = DAG.getConstant(Val, MVT::i32); 4007 Chain = DAG.getStore(Chain, Value, 4008 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4009 DAG.getConstant(Offset, AddrVT)), 4010 NULL, 0); 4011 BytesLeft -= 4; 4012 Offset += 4; 4013 } 4014 if (BytesLeft >= 2) { 4015 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4016 Chain = DAG.getStore(Chain, Value, 4017 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4018 DAG.getConstant(Offset, AddrVT)), 4019 NULL, 0); 4020 BytesLeft -= 2; 4021 Offset += 2; 4022 } 4023 if (BytesLeft == 1) { 4024 Value = DAG.getConstant(Val, MVT::i8); 4025 Chain = DAG.getStore(Chain, Value, 4026 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4027 DAG.getConstant(Offset, AddrVT)), 4028 NULL, 0); 4029 } 4030 } 4031 4032 return Chain; 4033} 4034 4035SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4036 SDOperand ChainOp = Op.getOperand(0); 4037 SDOperand DestOp = Op.getOperand(1); 4038 SDOperand SourceOp = Op.getOperand(2); 4039 SDOperand CountOp = Op.getOperand(3); 4040 SDOperand AlignOp = Op.getOperand(4); 4041 unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue(); 4042 if (Align == 0) Align = 1; 4043 4044 // The libc version is likely to be faster for the following cases. It can 4045 // use the address value and run time information about the CPU. 4046 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 4047 4048 // If not DWORD aligned, call memcpy. 4049 if ((Align & 3) != 0) 4050 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4051 4052 // If size is unknown, call memcpy. 4053 ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp); 4054 if (!I) 4055 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4056 4057 // If size is more than the threshold, call memcpy. 4058 unsigned Size = I->getValue(); 4059 if (Size > Subtarget->getMinRepStrSizeThreshold()) 4060 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4061 4062 return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); 4063} 4064 4065SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain, 4066 SDOperand Dest, 4067 SDOperand Source, 4068 SDOperand Count, 4069 SelectionDAG &DAG) { 4070 MVT::ValueType IntPtr = getPointerTy(); 4071 TargetLowering::ArgListTy Args; 4072 TargetLowering::ArgListEntry Entry; 4073 Entry.Ty = getTargetData()->getIntPtrType(); 4074 Entry.Node = Dest; Args.push_back(Entry); 4075 Entry.Node = Source; Args.push_back(Entry); 4076 Entry.Node = Count; Args.push_back(Entry); 4077 std::pair<SDOperand,SDOperand> CallResult = 4078 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4079 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4080 return CallResult.second; 4081} 4082 4083SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4084 SDOperand Dest, 4085 SDOperand Source, 4086 unsigned Size, 4087 unsigned Align, 4088 SelectionDAG &DAG) { 4089 MVT::ValueType AVT; 4090 unsigned BytesLeft = 0; 4091 switch (Align & 3) { 4092 case 2: // WORD aligned 4093 AVT = MVT::i16; 4094 break; 4095 case 0: // DWORD aligned 4096 AVT = MVT::i32; 4097 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4098 AVT = MVT::i64; 4099 break; 4100 default: // Byte aligned 4101 AVT = MVT::i8; 4102 break; 4103 } 4104 4105 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4106 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4107 BytesLeft = Size % UBytes; 4108 4109 SDOperand InFlag(0, 0); 4110 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4111 Count, InFlag); 4112 InFlag = Chain.getValue(1); 4113 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4114 Dest, InFlag); 4115 InFlag = Chain.getValue(1); 4116 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4117 Source, InFlag); 4118 InFlag = Chain.getValue(1); 4119 4120 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4121 SmallVector<SDOperand, 8> Ops; 4122 Ops.push_back(Chain); 4123 Ops.push_back(DAG.getValueType(AVT)); 4124 Ops.push_back(InFlag); 4125 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4126 4127 if (BytesLeft) { 4128 // Issue loads and stores for the last 1 - 7 bytes. 4129 unsigned Offset = Size - BytesLeft; 4130 SDOperand DstAddr = Dest; 4131 MVT::ValueType DstVT = DstAddr.getValueType(); 4132 SDOperand SrcAddr = Source; 4133 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4134 SDOperand Value; 4135 if (BytesLeft >= 4) { 4136 Value = DAG.getLoad(MVT::i32, Chain, 4137 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4138 DAG.getConstant(Offset, SrcVT)), 4139 NULL, 0); 4140 Chain = Value.getValue(1); 4141 Chain = DAG.getStore(Chain, Value, 4142 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4143 DAG.getConstant(Offset, DstVT)), 4144 NULL, 0); 4145 BytesLeft -= 4; 4146 Offset += 4; 4147 } 4148 if (BytesLeft >= 2) { 4149 Value = DAG.getLoad(MVT::i16, Chain, 4150 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4151 DAG.getConstant(Offset, SrcVT)), 4152 NULL, 0); 4153 Chain = Value.getValue(1); 4154 Chain = DAG.getStore(Chain, Value, 4155 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4156 DAG.getConstant(Offset, DstVT)), 4157 NULL, 0); 4158 BytesLeft -= 2; 4159 Offset += 2; 4160 } 4161 4162 if (BytesLeft == 1) { 4163 Value = DAG.getLoad(MVT::i8, Chain, 4164 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4165 DAG.getConstant(Offset, SrcVT)), 4166 NULL, 0); 4167 Chain = Value.getValue(1); 4168 Chain = DAG.getStore(Chain, Value, 4169 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4170 DAG.getConstant(Offset, DstVT)), 4171 NULL, 0); 4172 } 4173 } 4174 4175 return Chain; 4176} 4177 4178SDOperand 4179X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4180 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4181 SDOperand TheOp = Op.getOperand(0); 4182 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4183 if (Subtarget->is64Bit()) { 4184 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4185 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4186 MVT::i64, Copy1.getValue(2)); 4187 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4188 DAG.getConstant(32, MVT::i8)); 4189 SDOperand Ops[] = { 4190 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4191 }; 4192 4193 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4194 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4195 } 4196 4197 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4198 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4199 MVT::i32, Copy1.getValue(2)); 4200 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4201 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4202 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4203} 4204 4205SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4206 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4207 4208 if (!Subtarget->is64Bit()) { 4209 // vastart just stores the address of the VarArgsFrameIndex slot into the 4210 // memory location argument. 4211 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4212 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4213 SV->getOffset()); 4214 } 4215 4216 // __va_list_tag: 4217 // gp_offset (0 - 6 * 8) 4218 // fp_offset (48 - 48 + 8 * 16) 4219 // overflow_arg_area (point to parameters coming in memory). 4220 // reg_save_area 4221 SmallVector<SDOperand, 8> MemOps; 4222 SDOperand FIN = Op.getOperand(1); 4223 // Store gp_offset 4224 SDOperand Store = DAG.getStore(Op.getOperand(0), 4225 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4226 FIN, SV->getValue(), SV->getOffset()); 4227 MemOps.push_back(Store); 4228 4229 // Store fp_offset 4230 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4231 DAG.getConstant(4, getPointerTy())); 4232 Store = DAG.getStore(Op.getOperand(0), 4233 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4234 FIN, SV->getValue(), SV->getOffset()); 4235 MemOps.push_back(Store); 4236 4237 // Store ptr to overflow_arg_area 4238 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4239 DAG.getConstant(4, getPointerTy())); 4240 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4241 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4242 SV->getOffset()); 4243 MemOps.push_back(Store); 4244 4245 // Store ptr to reg_save_area. 4246 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4247 DAG.getConstant(8, getPointerTy())); 4248 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4249 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4250 SV->getOffset()); 4251 MemOps.push_back(Store); 4252 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4253} 4254 4255SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4256 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4257 SDOperand Chain = Op.getOperand(0); 4258 SDOperand DstPtr = Op.getOperand(1); 4259 SDOperand SrcPtr = Op.getOperand(2); 4260 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4261 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4262 4263 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4264 SrcSV->getValue(), SrcSV->getOffset()); 4265 Chain = SrcPtr.getValue(1); 4266 for (unsigned i = 0; i < 3; ++i) { 4267 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4268 SrcSV->getValue(), SrcSV->getOffset()); 4269 Chain = Val.getValue(1); 4270 Chain = DAG.getStore(Chain, Val, DstPtr, 4271 DstSV->getValue(), DstSV->getOffset()); 4272 if (i == 2) 4273 break; 4274 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4275 DAG.getConstant(8, getPointerTy())); 4276 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4277 DAG.getConstant(8, getPointerTy())); 4278 } 4279 return Chain; 4280} 4281 4282SDOperand 4283X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4284 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4285 switch (IntNo) { 4286 default: return SDOperand(); // Don't custom lower most intrinsics. 4287 // Comparison intrinsics. 4288 case Intrinsic::x86_sse_comieq_ss: 4289 case Intrinsic::x86_sse_comilt_ss: 4290 case Intrinsic::x86_sse_comile_ss: 4291 case Intrinsic::x86_sse_comigt_ss: 4292 case Intrinsic::x86_sse_comige_ss: 4293 case Intrinsic::x86_sse_comineq_ss: 4294 case Intrinsic::x86_sse_ucomieq_ss: 4295 case Intrinsic::x86_sse_ucomilt_ss: 4296 case Intrinsic::x86_sse_ucomile_ss: 4297 case Intrinsic::x86_sse_ucomigt_ss: 4298 case Intrinsic::x86_sse_ucomige_ss: 4299 case Intrinsic::x86_sse_ucomineq_ss: 4300 case Intrinsic::x86_sse2_comieq_sd: 4301 case Intrinsic::x86_sse2_comilt_sd: 4302 case Intrinsic::x86_sse2_comile_sd: 4303 case Intrinsic::x86_sse2_comigt_sd: 4304 case Intrinsic::x86_sse2_comige_sd: 4305 case Intrinsic::x86_sse2_comineq_sd: 4306 case Intrinsic::x86_sse2_ucomieq_sd: 4307 case Intrinsic::x86_sse2_ucomilt_sd: 4308 case Intrinsic::x86_sse2_ucomile_sd: 4309 case Intrinsic::x86_sse2_ucomigt_sd: 4310 case Intrinsic::x86_sse2_ucomige_sd: 4311 case Intrinsic::x86_sse2_ucomineq_sd: { 4312 unsigned Opc = 0; 4313 ISD::CondCode CC = ISD::SETCC_INVALID; 4314 switch (IntNo) { 4315 default: break; 4316 case Intrinsic::x86_sse_comieq_ss: 4317 case Intrinsic::x86_sse2_comieq_sd: 4318 Opc = X86ISD::COMI; 4319 CC = ISD::SETEQ; 4320 break; 4321 case Intrinsic::x86_sse_comilt_ss: 4322 case Intrinsic::x86_sse2_comilt_sd: 4323 Opc = X86ISD::COMI; 4324 CC = ISD::SETLT; 4325 break; 4326 case Intrinsic::x86_sse_comile_ss: 4327 case Intrinsic::x86_sse2_comile_sd: 4328 Opc = X86ISD::COMI; 4329 CC = ISD::SETLE; 4330 break; 4331 case Intrinsic::x86_sse_comigt_ss: 4332 case Intrinsic::x86_sse2_comigt_sd: 4333 Opc = X86ISD::COMI; 4334 CC = ISD::SETGT; 4335 break; 4336 case Intrinsic::x86_sse_comige_ss: 4337 case Intrinsic::x86_sse2_comige_sd: 4338 Opc = X86ISD::COMI; 4339 CC = ISD::SETGE; 4340 break; 4341 case Intrinsic::x86_sse_comineq_ss: 4342 case Intrinsic::x86_sse2_comineq_sd: 4343 Opc = X86ISD::COMI; 4344 CC = ISD::SETNE; 4345 break; 4346 case Intrinsic::x86_sse_ucomieq_ss: 4347 case Intrinsic::x86_sse2_ucomieq_sd: 4348 Opc = X86ISD::UCOMI; 4349 CC = ISD::SETEQ; 4350 break; 4351 case Intrinsic::x86_sse_ucomilt_ss: 4352 case Intrinsic::x86_sse2_ucomilt_sd: 4353 Opc = X86ISD::UCOMI; 4354 CC = ISD::SETLT; 4355 break; 4356 case Intrinsic::x86_sse_ucomile_ss: 4357 case Intrinsic::x86_sse2_ucomile_sd: 4358 Opc = X86ISD::UCOMI; 4359 CC = ISD::SETLE; 4360 break; 4361 case Intrinsic::x86_sse_ucomigt_ss: 4362 case Intrinsic::x86_sse2_ucomigt_sd: 4363 Opc = X86ISD::UCOMI; 4364 CC = ISD::SETGT; 4365 break; 4366 case Intrinsic::x86_sse_ucomige_ss: 4367 case Intrinsic::x86_sse2_ucomige_sd: 4368 Opc = X86ISD::UCOMI; 4369 CC = ISD::SETGE; 4370 break; 4371 case Intrinsic::x86_sse_ucomineq_ss: 4372 case Intrinsic::x86_sse2_ucomineq_sd: 4373 Opc = X86ISD::UCOMI; 4374 CC = ISD::SETNE; 4375 break; 4376 } 4377 4378 unsigned X86CC; 4379 SDOperand LHS = Op.getOperand(1); 4380 SDOperand RHS = Op.getOperand(2); 4381 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4382 4383 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4384 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 4385 DAG.getConstant(X86CC, MVT::i8), Cond); 4386 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4387 } 4388 } 4389} 4390 4391SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4392 // Depths > 0 not supported yet! 4393 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4394 return SDOperand(); 4395 4396 // Just load the return address 4397 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4398 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4399} 4400 4401SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4402 // Depths > 0 not supported yet! 4403 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4404 return SDOperand(); 4405 4406 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4407 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4408 DAG.getConstant(4, getPointerTy())); 4409} 4410 4411SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4412 SelectionDAG &DAG) { 4413 // Is not yet supported on x86-64 4414 if (Subtarget->is64Bit()) 4415 return SDOperand(); 4416 4417 return DAG.getConstant(8, getPointerTy()); 4418} 4419 4420SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4421{ 4422 assert(!Subtarget->is64Bit() && 4423 "Lowering of eh_return builtin is not supported yet on x86-64"); 4424 4425 MachineFunction &MF = DAG.getMachineFunction(); 4426 SDOperand Chain = Op.getOperand(0); 4427 SDOperand Offset = Op.getOperand(1); 4428 SDOperand Handler = Op.getOperand(2); 4429 4430 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4431 getPointerTy()); 4432 4433 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4434 DAG.getConstant(-4UL, getPointerTy())); 4435 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4436 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4437 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4438 MF.addLiveOut(X86::ECX); 4439 4440 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4441 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4442} 4443 4444SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4445 SelectionDAG &DAG) { 4446 SDOperand Root = Op.getOperand(0); 4447 SDOperand Trmp = Op.getOperand(1); // trampoline 4448 SDOperand FPtr = Op.getOperand(2); // nested function 4449 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4450 4451 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4452 4453 if (Subtarget->is64Bit()) { 4454 return SDOperand(); // not yet supported 4455 } else { 4456 Function *Func = (Function *) 4457 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4458 unsigned CC = Func->getCallingConv(); 4459 unsigned NestReg; 4460 4461 switch (CC) { 4462 default: 4463 assert(0 && "Unsupported calling convention"); 4464 case CallingConv::C: 4465 case CallingConv::Fast: 4466 case CallingConv::X86_StdCall: { 4467 // Pass 'nest' parameter in ECX. 4468 // Must be kept in sync with X86CallingConv.td 4469 NestReg = X86::ECX; 4470 4471 // Check that ECX wasn't needed by an 'inreg' parameter. 4472 const FunctionType *FTy = Func->getFunctionType(); 4473 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4474 4475 if (Attrs && !Func->isVarArg()) { 4476 unsigned InRegCount = 0; 4477 unsigned Idx = 1; 4478 4479 for (FunctionType::param_iterator I = FTy->param_begin(), 4480 E = FTy->param_end(); I != E; ++I, ++Idx) 4481 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4482 // FIXME: should only count parameters that are lowered to integers. 4483 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4484 4485 if (InRegCount > 2) { 4486 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4487 abort(); 4488 } 4489 } 4490 break; 4491 } 4492 case CallingConv::X86_FastCall: 4493 // Pass 'nest' parameter in EAX. 4494 // Must be kept in sync with X86CallingConv.td 4495 NestReg = X86::EAX; 4496 break; 4497 } 4498 4499 const X86InstrInfo *TII = 4500 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4501 4502 SDOperand OutChains[4]; 4503 SDOperand Addr, Disp; 4504 4505 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4506 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4507 4508 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4509 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4510 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4511 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4512 4513 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4514 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4515 TrmpSV->getOffset() + 1, false, 1); 4516 4517 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4518 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4519 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4520 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4521 4522 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4523 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4524 TrmpSV->getOffset() + 6, false, 1); 4525 4526 SDOperand Ops[] = 4527 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4528 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4529 } 4530} 4531 4532/// LowerOperation - Provide custom lowering hooks for some operations. 4533/// 4534SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4535 switch (Op.getOpcode()) { 4536 default: assert(0 && "Should not custom lower this!"); 4537 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4538 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4539 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4540 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4541 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4542 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4543 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4544 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4545 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4546 case ISD::SHL_PARTS: 4547 case ISD::SRA_PARTS: 4548 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4549 case ISD::SDIV: 4550 case ISD::UDIV: 4551 case ISD::SREM: 4552 case ISD::UREM: return LowerIntegerDivOrRem(Op, DAG); 4553 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4554 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4555 case ISD::FABS: return LowerFABS(Op, DAG); 4556 case ISD::FNEG: return LowerFNEG(Op, DAG); 4557 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4558 case ISD::SETCC: return LowerSETCC(Op, DAG); 4559 case ISD::SELECT: return LowerSELECT(Op, DAG); 4560 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4561 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4562 case ISD::CALL: return LowerCALL(Op, DAG); 4563 case ISD::RET: return LowerRET(Op, DAG); 4564 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4565 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4566 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4567 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4568 case ISD::VASTART: return LowerVASTART(Op, DAG); 4569 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4570 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4571 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4572 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4573 case ISD::FRAME_TO_ARGS_OFFSET: 4574 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4575 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4576 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4577 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4578 } 4579 return SDOperand(); 4580} 4581 4582const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4583 switch (Opcode) { 4584 default: return NULL; 4585 case X86ISD::SHLD: return "X86ISD::SHLD"; 4586 case X86ISD::SHRD: return "X86ISD::SHRD"; 4587 case X86ISD::FAND: return "X86ISD::FAND"; 4588 case X86ISD::FOR: return "X86ISD::FOR"; 4589 case X86ISD::FXOR: return "X86ISD::FXOR"; 4590 case X86ISD::FSRL: return "X86ISD::FSRL"; 4591 case X86ISD::FILD: return "X86ISD::FILD"; 4592 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4593 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4594 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4595 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4596 case X86ISD::FLD: return "X86ISD::FLD"; 4597 case X86ISD::FST: return "X86ISD::FST"; 4598 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4599 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4600 case X86ISD::CALL: return "X86ISD::CALL"; 4601 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4602 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4603 case X86ISD::CMP: return "X86ISD::CMP"; 4604 case X86ISD::COMI: return "X86ISD::COMI"; 4605 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4606 case X86ISD::SETCC: return "X86ISD::SETCC"; 4607 case X86ISD::CMOV: return "X86ISD::CMOV"; 4608 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4609 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4610 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4611 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4612 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4613 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4614 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4615 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4616 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4617 case X86ISD::FMAX: return "X86ISD::FMAX"; 4618 case X86ISD::FMIN: return "X86ISD::FMIN"; 4619 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4620 case X86ISD::FRCP: return "X86ISD::FRCP"; 4621 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4622 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4623 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4624 case X86ISD::DIV: return "X86ISD::DIV"; 4625 case X86ISD::IDIV: return "X86ISD::IDIV"; 4626 } 4627} 4628 4629// isLegalAddressingMode - Return true if the addressing mode represented 4630// by AM is legal for this target, for a load/store of the specified type. 4631bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4632 const Type *Ty) const { 4633 // X86 supports extremely general addressing modes. 4634 4635 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4636 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4637 return false; 4638 4639 if (AM.BaseGV) { 4640 // We can only fold this if we don't need an extra load. 4641 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4642 return false; 4643 4644 // X86-64 only supports addr of globals in small code model. 4645 if (Subtarget->is64Bit()) { 4646 if (getTargetMachine().getCodeModel() != CodeModel::Small) 4647 return false; 4648 // If lower 4G is not available, then we must use rip-relative addressing. 4649 if (AM.BaseOffs || AM.Scale > 1) 4650 return false; 4651 } 4652 } 4653 4654 switch (AM.Scale) { 4655 case 0: 4656 case 1: 4657 case 2: 4658 case 4: 4659 case 8: 4660 // These scales always work. 4661 break; 4662 case 3: 4663 case 5: 4664 case 9: 4665 // These scales are formed with basereg+scalereg. Only accept if there is 4666 // no basereg yet. 4667 if (AM.HasBaseReg) 4668 return false; 4669 break; 4670 default: // Other stuff never works. 4671 return false; 4672 } 4673 4674 return true; 4675} 4676 4677 4678/// isShuffleMaskLegal - Targets can use this to indicate that they only 4679/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4680/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4681/// are assumed to be legal. 4682bool 4683X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4684 // Only do shuffles on 128-bit vector types for now. 4685 if (MVT::getSizeInBits(VT) == 64) return false; 4686 return (Mask.Val->getNumOperands() <= 4 || 4687 isIdentityMask(Mask.Val) || 4688 isIdentityMask(Mask.Val, true) || 4689 isSplatMask(Mask.Val) || 4690 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4691 X86::isUNPCKLMask(Mask.Val) || 4692 X86::isUNPCKHMask(Mask.Val) || 4693 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4694 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4695} 4696 4697bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4698 MVT::ValueType EVT, 4699 SelectionDAG &DAG) const { 4700 unsigned NumElts = BVOps.size(); 4701 // Only do shuffles on 128-bit vector types for now. 4702 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4703 if (NumElts == 2) return true; 4704 if (NumElts == 4) { 4705 return (isMOVLMask(&BVOps[0], 4) || 4706 isCommutedMOVL(&BVOps[0], 4, true) || 4707 isSHUFPMask(&BVOps[0], 4) || 4708 isCommutedSHUFP(&BVOps[0], 4)); 4709 } 4710 return false; 4711} 4712 4713//===----------------------------------------------------------------------===// 4714// X86 Scheduler Hooks 4715//===----------------------------------------------------------------------===// 4716 4717MachineBasicBlock * 4718X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4719 MachineBasicBlock *BB) { 4720 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4721 switch (MI->getOpcode()) { 4722 default: assert(false && "Unexpected instr type to insert"); 4723 case X86::CMOV_FR32: 4724 case X86::CMOV_FR64: 4725 case X86::CMOV_V4F32: 4726 case X86::CMOV_V2F64: 4727 case X86::CMOV_V2I64: { 4728 // To "insert" a SELECT_CC instruction, we actually have to insert the 4729 // diamond control-flow pattern. The incoming instruction knows the 4730 // destination vreg to set, the condition code register to branch on, the 4731 // true/false values to select between, and a branch opcode to use. 4732 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4733 ilist<MachineBasicBlock>::iterator It = BB; 4734 ++It; 4735 4736 // thisMBB: 4737 // ... 4738 // TrueVal = ... 4739 // cmpTY ccX, r1, r2 4740 // bCC copy1MBB 4741 // fallthrough --> copy0MBB 4742 MachineBasicBlock *thisMBB = BB; 4743 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4744 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4745 unsigned Opc = 4746 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4747 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4748 MachineFunction *F = BB->getParent(); 4749 F->getBasicBlockList().insert(It, copy0MBB); 4750 F->getBasicBlockList().insert(It, sinkMBB); 4751 // Update machine-CFG edges by first adding all successors of the current 4752 // block to the new block which will contain the Phi node for the select. 4753 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4754 e = BB->succ_end(); i != e; ++i) 4755 sinkMBB->addSuccessor(*i); 4756 // Next, remove all successors of the current block, and add the true 4757 // and fallthrough blocks as its successors. 4758 while(!BB->succ_empty()) 4759 BB->removeSuccessor(BB->succ_begin()); 4760 BB->addSuccessor(copy0MBB); 4761 BB->addSuccessor(sinkMBB); 4762 4763 // copy0MBB: 4764 // %FalseValue = ... 4765 // # fallthrough to sinkMBB 4766 BB = copy0MBB; 4767 4768 // Update machine-CFG edges 4769 BB->addSuccessor(sinkMBB); 4770 4771 // sinkMBB: 4772 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4773 // ... 4774 BB = sinkMBB; 4775 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4776 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4777 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4778 4779 delete MI; // The pseudo instruction is gone now. 4780 return BB; 4781 } 4782 4783 case X86::FP32_TO_INT16_IN_MEM: 4784 case X86::FP32_TO_INT32_IN_MEM: 4785 case X86::FP32_TO_INT64_IN_MEM: 4786 case X86::FP64_TO_INT16_IN_MEM: 4787 case X86::FP64_TO_INT32_IN_MEM: 4788 case X86::FP64_TO_INT64_IN_MEM: 4789 case X86::FP80_TO_INT16_IN_MEM: 4790 case X86::FP80_TO_INT32_IN_MEM: 4791 case X86::FP80_TO_INT64_IN_MEM: { 4792 // Change the floating point control register to use "round towards zero" 4793 // mode when truncating to an integer value. 4794 MachineFunction *F = BB->getParent(); 4795 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4796 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4797 4798 // Load the old value of the high byte of the control word... 4799 unsigned OldCW = 4800 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4801 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4802 4803 // Set the high part to be round to zero... 4804 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4805 .addImm(0xC7F); 4806 4807 // Reload the modified control word now... 4808 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4809 4810 // Restore the memory image of control word to original value 4811 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4812 .addReg(OldCW); 4813 4814 // Get the X86 opcode to use. 4815 unsigned Opc; 4816 switch (MI->getOpcode()) { 4817 default: assert(0 && "illegal opcode!"); 4818 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 4819 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 4820 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 4821 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 4822 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 4823 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 4824 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 4825 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 4826 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 4827 } 4828 4829 X86AddressMode AM; 4830 MachineOperand &Op = MI->getOperand(0); 4831 if (Op.isRegister()) { 4832 AM.BaseType = X86AddressMode::RegBase; 4833 AM.Base.Reg = Op.getReg(); 4834 } else { 4835 AM.BaseType = X86AddressMode::FrameIndexBase; 4836 AM.Base.FrameIndex = Op.getFrameIndex(); 4837 } 4838 Op = MI->getOperand(1); 4839 if (Op.isImmediate()) 4840 AM.Scale = Op.getImm(); 4841 Op = MI->getOperand(2); 4842 if (Op.isImmediate()) 4843 AM.IndexReg = Op.getImm(); 4844 Op = MI->getOperand(3); 4845 if (Op.isGlobalAddress()) { 4846 AM.GV = Op.getGlobal(); 4847 } else { 4848 AM.Disp = Op.getImm(); 4849 } 4850 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4851 .addReg(MI->getOperand(4).getReg()); 4852 4853 // Reload the original control word now. 4854 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4855 4856 delete MI; // The pseudo instruction is gone now. 4857 return BB; 4858 } 4859 } 4860} 4861 4862//===----------------------------------------------------------------------===// 4863// X86 Optimization Hooks 4864//===----------------------------------------------------------------------===// 4865 4866void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4867 uint64_t Mask, 4868 uint64_t &KnownZero, 4869 uint64_t &KnownOne, 4870 const SelectionDAG &DAG, 4871 unsigned Depth) const { 4872 unsigned Opc = Op.getOpcode(); 4873 assert((Opc >= ISD::BUILTIN_OP_END || 4874 Opc == ISD::INTRINSIC_WO_CHAIN || 4875 Opc == ISD::INTRINSIC_W_CHAIN || 4876 Opc == ISD::INTRINSIC_VOID) && 4877 "Should use MaskedValueIsZero if you don't know whether Op" 4878 " is a target node!"); 4879 4880 KnownZero = KnownOne = 0; // Don't know anything. 4881 switch (Opc) { 4882 default: break; 4883 case X86ISD::SETCC: 4884 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4885 break; 4886 } 4887} 4888 4889/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4890/// element of the result of the vector shuffle. 4891static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4892 MVT::ValueType VT = N->getValueType(0); 4893 SDOperand PermMask = N->getOperand(2); 4894 unsigned NumElems = PermMask.getNumOperands(); 4895 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4896 i %= NumElems; 4897 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4898 return (i == 0) 4899 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4900 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4901 SDOperand Idx = PermMask.getOperand(i); 4902 if (Idx.getOpcode() == ISD::UNDEF) 4903 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4904 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4905 } 4906 return SDOperand(); 4907} 4908 4909/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4910/// node is a GlobalAddress + an offset. 4911static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4912 unsigned Opc = N->getOpcode(); 4913 if (Opc == X86ISD::Wrapper) { 4914 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4915 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4916 return true; 4917 } 4918 } else if (Opc == ISD::ADD) { 4919 SDOperand N1 = N->getOperand(0); 4920 SDOperand N2 = N->getOperand(1); 4921 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4922 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4923 if (V) { 4924 Offset += V->getSignExtended(); 4925 return true; 4926 } 4927 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4928 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4929 if (V) { 4930 Offset += V->getSignExtended(); 4931 return true; 4932 } 4933 } 4934 } 4935 return false; 4936} 4937 4938/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4939/// + Dist * Size. 4940static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4941 MachineFrameInfo *MFI) { 4942 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4943 return false; 4944 4945 SDOperand Loc = N->getOperand(1); 4946 SDOperand BaseLoc = Base->getOperand(1); 4947 if (Loc.getOpcode() == ISD::FrameIndex) { 4948 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4949 return false; 4950 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 4951 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4952 int FS = MFI->getObjectSize(FI); 4953 int BFS = MFI->getObjectSize(BFI); 4954 if (FS != BFS || FS != Size) return false; 4955 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4956 } else { 4957 GlobalValue *GV1 = NULL; 4958 GlobalValue *GV2 = NULL; 4959 int64_t Offset1 = 0; 4960 int64_t Offset2 = 0; 4961 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4962 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4963 if (isGA1 && isGA2 && GV1 == GV2) 4964 return Offset1 == (Offset2 + Dist*Size); 4965 } 4966 4967 return false; 4968} 4969 4970static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4971 const X86Subtarget *Subtarget) { 4972 GlobalValue *GV; 4973 int64_t Offset; 4974 if (isGAPlusOffset(Base, GV, Offset)) 4975 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4976 else { 4977 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4978 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 4979 if (BFI < 0) 4980 // Fixed objects do not specify alignment, however the offsets are known. 4981 return ((Subtarget->getStackAlignment() % 16) == 0 && 4982 (MFI->getObjectOffset(BFI) % 16) == 0); 4983 else 4984 return MFI->getObjectAlignment(BFI) >= 16; 4985 } 4986 return false; 4987} 4988 4989 4990/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4991/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4992/// if the load addresses are consecutive, non-overlapping, and in the right 4993/// order. 4994static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4995 const X86Subtarget *Subtarget) { 4996 MachineFunction &MF = DAG.getMachineFunction(); 4997 MachineFrameInfo *MFI = MF.getFrameInfo(); 4998 MVT::ValueType VT = N->getValueType(0); 4999 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5000 SDOperand PermMask = N->getOperand(2); 5001 int NumElems = (int)PermMask.getNumOperands(); 5002 SDNode *Base = NULL; 5003 for (int i = 0; i < NumElems; ++i) { 5004 SDOperand Idx = PermMask.getOperand(i); 5005 if (Idx.getOpcode() == ISD::UNDEF) { 5006 if (!Base) return SDOperand(); 5007 } else { 5008 SDOperand Arg = 5009 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5010 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5011 return SDOperand(); 5012 if (!Base) 5013 Base = Arg.Val; 5014 else if (!isConsecutiveLoad(Arg.Val, Base, 5015 i, MVT::getSizeInBits(EVT)/8,MFI)) 5016 return SDOperand(); 5017 } 5018 } 5019 5020 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5021 LoadSDNode *LD = cast<LoadSDNode>(Base); 5022 if (isAlign16) { 5023 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5024 LD->getSrcValueOffset(), LD->isVolatile()); 5025 } else { 5026 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5027 LD->getSrcValueOffset(), LD->isVolatile(), 5028 LD->getAlignment()); 5029 } 5030} 5031 5032/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5033static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5034 const X86Subtarget *Subtarget) { 5035 SDOperand Cond = N->getOperand(0); 5036 5037 // If we have SSE[12] support, try to form min/max nodes. 5038 if (Subtarget->hasSSE2() && 5039 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5040 if (Cond.getOpcode() == ISD::SETCC) { 5041 // Get the LHS/RHS of the select. 5042 SDOperand LHS = N->getOperand(1); 5043 SDOperand RHS = N->getOperand(2); 5044 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5045 5046 unsigned Opcode = 0; 5047 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5048 switch (CC) { 5049 default: break; 5050 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5051 case ISD::SETULE: 5052 case ISD::SETLE: 5053 if (!UnsafeFPMath) break; 5054 // FALL THROUGH. 5055 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5056 case ISD::SETLT: 5057 Opcode = X86ISD::FMIN; 5058 break; 5059 5060 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5061 case ISD::SETUGT: 5062 case ISD::SETGT: 5063 if (!UnsafeFPMath) break; 5064 // FALL THROUGH. 5065 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5066 case ISD::SETGE: 5067 Opcode = X86ISD::FMAX; 5068 break; 5069 } 5070 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5071 switch (CC) { 5072 default: break; 5073 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5074 case ISD::SETUGT: 5075 case ISD::SETGT: 5076 if (!UnsafeFPMath) break; 5077 // FALL THROUGH. 5078 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5079 case ISD::SETGE: 5080 Opcode = X86ISD::FMIN; 5081 break; 5082 5083 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5084 case ISD::SETULE: 5085 case ISD::SETLE: 5086 if (!UnsafeFPMath) break; 5087 // FALL THROUGH. 5088 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5089 case ISD::SETLT: 5090 Opcode = X86ISD::FMAX; 5091 break; 5092 } 5093 } 5094 5095 if (Opcode) 5096 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5097 } 5098 5099 } 5100 5101 return SDOperand(); 5102} 5103 5104 5105SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5106 DAGCombinerInfo &DCI) const { 5107 SelectionDAG &DAG = DCI.DAG; 5108 switch (N->getOpcode()) { 5109 default: break; 5110 case ISD::VECTOR_SHUFFLE: 5111 return PerformShuffleCombine(N, DAG, Subtarget); 5112 case ISD::SELECT: 5113 return PerformSELECTCombine(N, DAG, Subtarget); 5114 } 5115 5116 return SDOperand(); 5117} 5118 5119//===----------------------------------------------------------------------===// 5120// X86 Inline Assembly Support 5121//===----------------------------------------------------------------------===// 5122 5123/// getConstraintType - Given a constraint letter, return the type of 5124/// constraint it is for this target. 5125X86TargetLowering::ConstraintType 5126X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5127 if (Constraint.size() == 1) { 5128 switch (Constraint[0]) { 5129 case 'A': 5130 case 'r': 5131 case 'R': 5132 case 'l': 5133 case 'q': 5134 case 'Q': 5135 case 'x': 5136 case 'Y': 5137 return C_RegisterClass; 5138 default: 5139 break; 5140 } 5141 } 5142 return TargetLowering::getConstraintType(Constraint); 5143} 5144 5145/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5146/// vector. If it is invalid, don't add anything to Ops. 5147void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5148 char Constraint, 5149 std::vector<SDOperand>&Ops, 5150 SelectionDAG &DAG) { 5151 SDOperand Result(0, 0); 5152 5153 switch (Constraint) { 5154 default: break; 5155 case 'I': 5156 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5157 if (C->getValue() <= 31) { 5158 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5159 break; 5160 } 5161 } 5162 return; 5163 case 'N': 5164 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5165 if (C->getValue() <= 255) { 5166 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5167 break; 5168 } 5169 } 5170 return; 5171 case 'i': { 5172 // Literal immediates are always ok. 5173 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5174 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5175 break; 5176 } 5177 5178 // If we are in non-pic codegen mode, we allow the address of a global (with 5179 // an optional displacement) to be used with 'i'. 5180 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5181 int64_t Offset = 0; 5182 5183 // Match either (GA) or (GA+C) 5184 if (GA) { 5185 Offset = GA->getOffset(); 5186 } else if (Op.getOpcode() == ISD::ADD) { 5187 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5188 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5189 if (C && GA) { 5190 Offset = GA->getOffset()+C->getValue(); 5191 } else { 5192 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5193 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5194 if (C && GA) 5195 Offset = GA->getOffset()+C->getValue(); 5196 else 5197 C = 0, GA = 0; 5198 } 5199 } 5200 5201 if (GA) { 5202 // If addressing this global requires a load (e.g. in PIC mode), we can't 5203 // match. 5204 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5205 false)) 5206 return; 5207 5208 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5209 Offset); 5210 Result = Op; 5211 break; 5212 } 5213 5214 // Otherwise, not valid for this mode. 5215 return; 5216 } 5217 } 5218 5219 if (Result.Val) { 5220 Ops.push_back(Result); 5221 return; 5222 } 5223 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5224} 5225 5226std::vector<unsigned> X86TargetLowering:: 5227getRegClassForInlineAsmConstraint(const std::string &Constraint, 5228 MVT::ValueType VT) const { 5229 if (Constraint.size() == 1) { 5230 // FIXME: not handling fp-stack yet! 5231 switch (Constraint[0]) { // GCC X86 Constraint Letters 5232 default: break; // Unknown constraint letter 5233 case 'A': // EAX/EDX 5234 if (VT == MVT::i32 || VT == MVT::i64) 5235 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5236 break; 5237 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5238 case 'Q': // Q_REGS 5239 if (VT == MVT::i32) 5240 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5241 else if (VT == MVT::i16) 5242 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5243 else if (VT == MVT::i8) 5244 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5245 break; 5246 } 5247 } 5248 5249 return std::vector<unsigned>(); 5250} 5251 5252std::pair<unsigned, const TargetRegisterClass*> 5253X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5254 MVT::ValueType VT) const { 5255 // First, see if this is a constraint that directly corresponds to an LLVM 5256 // register class. 5257 if (Constraint.size() == 1) { 5258 // GCC Constraint Letters 5259 switch (Constraint[0]) { 5260 default: break; 5261 case 'r': // GENERAL_REGS 5262 case 'R': // LEGACY_REGS 5263 case 'l': // INDEX_REGS 5264 if (VT == MVT::i64 && Subtarget->is64Bit()) 5265 return std::make_pair(0U, X86::GR64RegisterClass); 5266 if (VT == MVT::i32) 5267 return std::make_pair(0U, X86::GR32RegisterClass); 5268 else if (VT == MVT::i16) 5269 return std::make_pair(0U, X86::GR16RegisterClass); 5270 else if (VT == MVT::i8) 5271 return std::make_pair(0U, X86::GR8RegisterClass); 5272 break; 5273 case 'y': // MMX_REGS if MMX allowed. 5274 if (!Subtarget->hasMMX()) break; 5275 return std::make_pair(0U, X86::VR64RegisterClass); 5276 break; 5277 case 'Y': // SSE_REGS if SSE2 allowed 5278 if (!Subtarget->hasSSE2()) break; 5279 // FALL THROUGH. 5280 case 'x': // SSE_REGS if SSE1 allowed 5281 if (!Subtarget->hasSSE1()) break; 5282 5283 switch (VT) { 5284 default: break; 5285 // Scalar SSE types. 5286 case MVT::f32: 5287 case MVT::i32: 5288 return std::make_pair(0U, X86::FR32RegisterClass); 5289 case MVT::f64: 5290 case MVT::i64: 5291 return std::make_pair(0U, X86::FR64RegisterClass); 5292 // Vector types. 5293 case MVT::v16i8: 5294 case MVT::v8i16: 5295 case MVT::v4i32: 5296 case MVT::v2i64: 5297 case MVT::v4f32: 5298 case MVT::v2f64: 5299 return std::make_pair(0U, X86::VR128RegisterClass); 5300 } 5301 break; 5302 } 5303 } 5304 5305 // Use the default implementation in TargetLowering to convert the register 5306 // constraint into a member of a register class. 5307 std::pair<unsigned, const TargetRegisterClass*> Res; 5308 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5309 5310 // Not found as a standard register? 5311 if (Res.second == 0) { 5312 // GCC calls "st(0)" just plain "st". 5313 if (StringsEqualNoCase("{st}", Constraint)) { 5314 Res.first = X86::ST0; 5315 Res.second = X86::RFP80RegisterClass; 5316 } 5317 5318 return Res; 5319 } 5320 5321 // Otherwise, check to see if this is a register class of the wrong value 5322 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5323 // turn into {ax},{dx}. 5324 if (Res.second->hasType(VT)) 5325 return Res; // Correct type already, nothing to do. 5326 5327 // All of the single-register GCC register classes map their values onto 5328 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5329 // really want an 8-bit or 32-bit register, map to the appropriate register 5330 // class and return the appropriate register. 5331 if (Res.second != X86::GR16RegisterClass) 5332 return Res; 5333 5334 if (VT == MVT::i8) { 5335 unsigned DestReg = 0; 5336 switch (Res.first) { 5337 default: break; 5338 case X86::AX: DestReg = X86::AL; break; 5339 case X86::DX: DestReg = X86::DL; break; 5340 case X86::CX: DestReg = X86::CL; break; 5341 case X86::BX: DestReg = X86::BL; break; 5342 } 5343 if (DestReg) { 5344 Res.first = DestReg; 5345 Res.second = Res.second = X86::GR8RegisterClass; 5346 } 5347 } else if (VT == MVT::i32) { 5348 unsigned DestReg = 0; 5349 switch (Res.first) { 5350 default: break; 5351 case X86::AX: DestReg = X86::EAX; break; 5352 case X86::DX: DestReg = X86::EDX; break; 5353 case X86::CX: DestReg = X86::ECX; break; 5354 case X86::BX: DestReg = X86::EBX; break; 5355 case X86::SI: DestReg = X86::ESI; break; 5356 case X86::DI: DestReg = X86::EDI; break; 5357 case X86::BP: DestReg = X86::EBP; break; 5358 case X86::SP: DestReg = X86::ESP; break; 5359 } 5360 if (DestReg) { 5361 Res.first = DestReg; 5362 Res.second = Res.second = X86::GR32RegisterClass; 5363 } 5364 } else if (VT == MVT::i64) { 5365 unsigned DestReg = 0; 5366 switch (Res.first) { 5367 default: break; 5368 case X86::AX: DestReg = X86::RAX; break; 5369 case X86::DX: DestReg = X86::RDX; break; 5370 case X86::CX: DestReg = X86::RCX; break; 5371 case X86::BX: DestReg = X86::RBX; break; 5372 case X86::SI: DestReg = X86::RSI; break; 5373 case X86::DI: DestReg = X86::RDI; break; 5374 case X86::BP: DestReg = X86::RBP; break; 5375 case X86::SP: DestReg = X86::RSP; break; 5376 } 5377 if (DestReg) { 5378 Res.first = DestReg; 5379 Res.second = Res.second = X86::GR64RegisterClass; 5380 } 5381 } 5382 5383 return Res; 5384} 5385