X86ISelLowering.cpp revision 068317bfa37ec52ae25f3b2fa3194bea3e7c6027
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Target/TargetOptions.h" 37#include "llvm/ADT/StringExtras.h" 38#include "llvm/ParameterAttributes.h" 39using namespace llvm; 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSEf64 = Subtarget->hasSSE2(); 45 X86ScalarSSEf32 = Subtarget->hasSSE1(); 46 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 47 48 RegInfo = TM.getRegisterInfo(); 49 50 // Set up the TargetLowering object. 51 52 // X86 is weird, it always uses i8 for shift amounts and setcc results. 53 setShiftAmountType(MVT::i8); 54 setSetCCResultType(MVT::i8); 55 setSetCCResultContents(ZeroOrOneSetCCResult); 56 setSchedulingPreference(SchedulingForRegPressure); 57 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 58 setStackPointerRegisterToSaveRestore(X86StackPtr); 59 60 if (Subtarget->isTargetDarwin()) { 61 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 62 setUseUnderscoreSetJmp(false); 63 setUseUnderscoreLongJmp(false); 64 } else if (Subtarget->isTargetMingw()) { 65 // MS runtime is weird: it exports _setjmp, but longjmp! 66 setUseUnderscoreSetJmp(true); 67 setUseUnderscoreLongJmp(false); 68 } else { 69 setUseUnderscoreSetJmp(true); 70 setUseUnderscoreLongJmp(true); 71 } 72 73 // Set up the register classes. 74 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 75 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 76 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 77 if (Subtarget->is64Bit()) 78 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 79 80 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 81 82 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 83 // operation. 84 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 86 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 87 88 if (Subtarget->is64Bit()) { 89 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 90 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 91 } else { 92 if (X86ScalarSSEf64) 93 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 95 else 96 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 97 } 98 99 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 100 // this operation. 101 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 102 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 103 // SSE has no i16 to fp conversion, only i32 104 if (X86ScalarSSEf32) { 105 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 106 // f32 and f64 cases are Legal, f80 case is not 107 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 108 } else { 109 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 110 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 111 } 112 113 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 114 // are Legal, f80 is custom lowered. 115 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 116 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 117 118 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 119 // this operation. 120 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 121 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 122 123 if (X86ScalarSSEf32) { 124 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 125 // f32 and f64 cases are Legal, f80 case is not 126 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 127 } else { 128 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 129 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 130 } 131 132 // Handle FP_TO_UINT by promoting the destination to a larger signed 133 // conversion. 134 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 135 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 136 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 137 138 if (Subtarget->is64Bit()) { 139 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 140 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 141 } else { 142 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 143 // Expand FP_TO_UINT into a select. 144 // FIXME: We would like to use a Custom expander here eventually to do 145 // the optimal thing for SSE vs. the default expansion in the legalizer. 146 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 147 else 148 // With SSE3 we can use fisttpll to convert to a signed i64. 149 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 150 } 151 152 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 153 if (!X86ScalarSSEf64) { 154 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 155 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 156 } 157 158 // Divide and remainder are lowered to use div or idiv in legalize in 159 // order to expose the intermediate computations to trivial CSE. This is 160 // most noticeable when both x/y and x%y are being computed; they can be 161 // done with a single div or idiv. 162 setOperationAction(ISD::SDIV , MVT::i8 , Custom); 163 setOperationAction(ISD::UDIV , MVT::i8 , Custom); 164 setOperationAction(ISD::SREM , MVT::i8 , Custom); 165 setOperationAction(ISD::UREM , MVT::i8 , Custom); 166 setOperationAction(ISD::SDIV , MVT::i16 , Custom); 167 setOperationAction(ISD::UDIV , MVT::i16 , Custom); 168 setOperationAction(ISD::SREM , MVT::i16 , Custom); 169 setOperationAction(ISD::UREM , MVT::i16 , Custom); 170 setOperationAction(ISD::SDIV , MVT::i32 , Custom); 171 setOperationAction(ISD::UDIV , MVT::i32 , Custom); 172 setOperationAction(ISD::SREM , MVT::i32 , Custom); 173 setOperationAction(ISD::UREM , MVT::i32 , Custom); 174 setOperationAction(ISD::SDIV , MVT::i64 , Custom); 175 setOperationAction(ISD::UDIV , MVT::i64 , Custom); 176 setOperationAction(ISD::SREM , MVT::i64 , Custom); 177 setOperationAction(ISD::UREM , MVT::i64 , Custom); 178 179 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 180 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 181 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 182 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 183 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 184 if (Subtarget->is64Bit()) 185 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 186 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 187 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 188 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 189 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 190 setOperationAction(ISD::FREM , MVT::f64 , Expand); 191 192 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 193 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 194 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 195 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 196 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 197 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 198 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 199 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 200 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 201 if (Subtarget->is64Bit()) { 202 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 203 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 204 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 205 } 206 207 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 208 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 209 210 // These should be promoted to a larger select which is supported. 211 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 212 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 213 // X86 wants to expand cmov itself. 214 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 215 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 216 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 217 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 218 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 219 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 220 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 221 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 222 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 223 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 224 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 225 if (Subtarget->is64Bit()) { 226 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 227 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 228 } 229 // X86 ret instruction may pop stack. 230 setOperationAction(ISD::RET , MVT::Other, Custom); 231 if (!Subtarget->is64Bit()) 232 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 233 234 // Darwin ABI issue. 235 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 236 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 237 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 238 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 239 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 240 if (Subtarget->is64Bit()) { 241 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 242 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 243 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 244 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 245 } 246 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 247 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 248 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 249 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 250 // X86 wants to expand memset / memcpy itself. 251 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 252 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 253 254 // Use the default ISD::LOCATION expansion. 255 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 256 // FIXME - use subtarget debug flags 257 if (!Subtarget->isTargetDarwin() && 258 !Subtarget->isTargetELF() && 259 !Subtarget->isTargetCygMing()) 260 setOperationAction(ISD::LABEL, MVT::Other, Expand); 261 262 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 263 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 264 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 265 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 266 if (Subtarget->is64Bit()) { 267 // FIXME: Verify 268 setExceptionPointerRegister(X86::RAX); 269 setExceptionSelectorRegister(X86::RDX); 270 } else { 271 setExceptionPointerRegister(X86::EAX); 272 setExceptionSelectorRegister(X86::EDX); 273 } 274 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 275 276 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 277 278 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 279 setOperationAction(ISD::VASTART , MVT::Other, Custom); 280 setOperationAction(ISD::VAARG , MVT::Other, Expand); 281 setOperationAction(ISD::VAEND , MVT::Other, Expand); 282 if (Subtarget->is64Bit()) 283 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 284 else 285 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 286 287 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 288 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 289 if (Subtarget->is64Bit()) 290 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 291 if (Subtarget->isTargetCygMing()) 292 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 293 else 294 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 295 296 if (X86ScalarSSEf64) { 297 // f32 and f64 use SSE. 298 // Set up the FP register classes. 299 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 300 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 301 302 // Use ANDPD to simulate FABS. 303 setOperationAction(ISD::FABS , MVT::f64, Custom); 304 setOperationAction(ISD::FABS , MVT::f32, Custom); 305 306 // Use XORP to simulate FNEG. 307 setOperationAction(ISD::FNEG , MVT::f64, Custom); 308 setOperationAction(ISD::FNEG , MVT::f32, Custom); 309 310 // Use ANDPD and ORPD to simulate FCOPYSIGN. 311 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 312 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 313 314 // We don't support sin/cos/fmod 315 setOperationAction(ISD::FSIN , MVT::f64, Expand); 316 setOperationAction(ISD::FCOS , MVT::f64, Expand); 317 setOperationAction(ISD::FREM , MVT::f64, Expand); 318 setOperationAction(ISD::FSIN , MVT::f32, Expand); 319 setOperationAction(ISD::FCOS , MVT::f32, Expand); 320 setOperationAction(ISD::FREM , MVT::f32, Expand); 321 322 // Expand FP immediates into loads from the stack, except for the special 323 // cases we handle. 324 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 325 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 326 addLegalFPImmediate(APFloat(+0.0)); // xorpd 327 addLegalFPImmediate(APFloat(+0.0f)); // xorps 328 329 // Conversions to long double (in X87) go through memory. 330 setConvertAction(MVT::f32, MVT::f80, Expand); 331 setConvertAction(MVT::f64, MVT::f80, Expand); 332 333 // Conversions from long double (in X87) go through memory. 334 setConvertAction(MVT::f80, MVT::f32, Expand); 335 setConvertAction(MVT::f80, MVT::f64, Expand); 336 } else if (X86ScalarSSEf32) { 337 // Use SSE for f32, x87 for f64. 338 // Set up the FP register classes. 339 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 340 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 341 342 // Use ANDPS to simulate FABS. 343 setOperationAction(ISD::FABS , MVT::f32, Custom); 344 345 // Use XORP to simulate FNEG. 346 setOperationAction(ISD::FNEG , MVT::f32, Custom); 347 348 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 349 350 // Use ANDPS and ORPS to simulate FCOPYSIGN. 351 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 352 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 353 354 // We don't support sin/cos/fmod 355 setOperationAction(ISD::FSIN , MVT::f32, Expand); 356 setOperationAction(ISD::FCOS , MVT::f32, Expand); 357 setOperationAction(ISD::FREM , MVT::f32, Expand); 358 359 // Expand FP immediates into loads from the stack, except for the special 360 // cases we handle. 361 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 362 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 363 addLegalFPImmediate(APFloat(+0.0f)); // xorps 364 addLegalFPImmediate(APFloat(+0.0)); // FLD0 365 addLegalFPImmediate(APFloat(+1.0)); // FLD1 366 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 367 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 368 369 // SSE->x87 conversions go through memory. 370 setConvertAction(MVT::f32, MVT::f64, Expand); 371 setConvertAction(MVT::f32, MVT::f80, Expand); 372 373 // x87->SSE truncations need to go through memory. 374 setConvertAction(MVT::f80, MVT::f32, Expand); 375 setConvertAction(MVT::f64, MVT::f32, Expand); 376 // And x87->x87 truncations also. 377 setConvertAction(MVT::f80, MVT::f64, Expand); 378 379 if (!UnsafeFPMath) { 380 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 381 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 382 } 383 } else { 384 // f32 and f64 in x87. 385 // Set up the FP register classes. 386 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 387 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 388 389 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 390 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 391 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 392 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 393 394 // Floating truncations need to go through memory. 395 setConvertAction(MVT::f80, MVT::f32, Expand); 396 setConvertAction(MVT::f64, MVT::f32, Expand); 397 setConvertAction(MVT::f80, MVT::f64, Expand); 398 399 if (!UnsafeFPMath) { 400 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 401 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 402 } 403 404 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 405 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 406 addLegalFPImmediate(APFloat(+0.0)); // FLD0 407 addLegalFPImmediate(APFloat(+1.0)); // FLD1 408 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 409 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 410 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 411 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 412 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 413 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 414 } 415 416 // Long double always uses X87. 417 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 418 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 419 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 420 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 421 if (!UnsafeFPMath) { 422 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 423 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 424 } 425 426 // First set operation action for all vector types to expand. Then we 427 // will selectively turn on ones that can be effectively codegen'd. 428 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 429 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 430 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 431 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 432 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 433 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 434 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 435 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 436 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 437 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 438 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 439 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 440 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 441 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 442 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 443 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 445 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 446 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 447 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 448 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 449 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 450 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 451 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 452 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 453 } 454 455 if (Subtarget->hasMMX()) { 456 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 457 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 458 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 459 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 460 461 // FIXME: add MMX packed arithmetics 462 463 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 464 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 465 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 466 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 467 468 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 469 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 470 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 471 472 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 473 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 474 475 setOperationAction(ISD::AND, MVT::v8i8, Promote); 476 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 477 setOperationAction(ISD::AND, MVT::v4i16, Promote); 478 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 479 setOperationAction(ISD::AND, MVT::v2i32, Promote); 480 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 481 setOperationAction(ISD::AND, MVT::v1i64, Legal); 482 483 setOperationAction(ISD::OR, MVT::v8i8, Promote); 484 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 485 setOperationAction(ISD::OR, MVT::v4i16, Promote); 486 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 487 setOperationAction(ISD::OR, MVT::v2i32, Promote); 488 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 489 setOperationAction(ISD::OR, MVT::v1i64, Legal); 490 491 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 492 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 493 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 494 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 495 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 496 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 497 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 498 499 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 500 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 501 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 502 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 503 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 504 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 505 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 506 507 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 508 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 509 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 510 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 511 512 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 513 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 514 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 515 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 516 517 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 518 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 519 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 520 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 521 } 522 523 if (Subtarget->hasSSE1()) { 524 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 525 526 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 527 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 528 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 529 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 530 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 531 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 532 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 533 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 534 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 535 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 536 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 537 } 538 539 if (Subtarget->hasSSE2()) { 540 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 541 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 542 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 543 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 544 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 545 546 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 547 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 548 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 549 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 550 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 551 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 552 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 553 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 554 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 555 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 556 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 557 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 558 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 559 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 560 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 561 562 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 563 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 564 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 565 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 566 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 567 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 568 569 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 570 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 571 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 572 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 573 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 574 } 575 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 576 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 577 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 578 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 579 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 580 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 581 582 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 583 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 584 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 585 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 586 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 587 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 588 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 589 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 590 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 591 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 592 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 593 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 594 } 595 596 // Custom lower v2i64 and v2f64 selects. 597 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 598 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 599 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 600 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 601 } 602 603 // We want to custom lower some of our intrinsics. 604 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 605 606 // We have target-specific dag combine patterns for the following nodes: 607 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 608 setTargetDAGCombine(ISD::SELECT); 609 610 computeRegisterProperties(); 611 612 // FIXME: These should be based on subtarget info. Plus, the values should 613 // be smaller when we are in optimizing for size mode. 614 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 615 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 616 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 617 allowUnalignedMemoryAccesses = true; // x86 supports it! 618} 619 620 621//===----------------------------------------------------------------------===// 622// Return Value Calling Convention Implementation 623//===----------------------------------------------------------------------===// 624 625#include "X86GenCallingConv.inc" 626 627/// LowerRET - Lower an ISD::RET node. 628SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 629 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 630 631 SmallVector<CCValAssign, 16> RVLocs; 632 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 633 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 634 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 635 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 636 637 638 // If this is the first return lowered for this function, add the regs to the 639 // liveout set for the function. 640 if (DAG.getMachineFunction().liveout_empty()) { 641 for (unsigned i = 0; i != RVLocs.size(); ++i) 642 if (RVLocs[i].isRegLoc()) 643 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 644 } 645 646 SDOperand Chain = Op.getOperand(0); 647 SDOperand Flag; 648 649 // Copy the result values into the output registers. 650 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 651 RVLocs[0].getLocReg() != X86::ST0) { 652 for (unsigned i = 0; i != RVLocs.size(); ++i) { 653 CCValAssign &VA = RVLocs[i]; 654 assert(VA.isRegLoc() && "Can only return in registers!"); 655 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 656 Flag); 657 Flag = Chain.getValue(1); 658 } 659 } else { 660 // We need to handle a destination of ST0 specially, because it isn't really 661 // a register. 662 SDOperand Value = Op.getOperand(1); 663 664 // If this is an FP return with ScalarSSE, we need to move the value from 665 // an XMM register onto the fp-stack. 666 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || 667 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { 668 SDOperand MemLoc; 669 670 // If this is a load into a scalarsse value, don't store the loaded value 671 // back to the stack, only to reload it: just replace the scalar-sse load. 672 if (ISD::isNON_EXTLoad(Value.Val) && 673 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 674 Chain = Value.getOperand(0); 675 MemLoc = Value.getOperand(1); 676 } else { 677 // Spill the value to memory and reload it into top of stack. 678 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 679 MachineFunction &MF = DAG.getMachineFunction(); 680 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 681 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 682 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 683 } 684 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 685 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 686 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 687 Chain = Value.getValue(1); 688 } 689 690 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 691 SDOperand Ops[] = { Chain, Value }; 692 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 693 Flag = Chain.getValue(1); 694 } 695 696 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 697 if (Flag.Val) 698 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 699 else 700 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 701} 702 703 704/// LowerCallResult - Lower the result values of an ISD::CALL into the 705/// appropriate copies out of appropriate physical registers. This assumes that 706/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 707/// being lowered. The returns a SDNode with the same number of values as the 708/// ISD::CALL. 709SDNode *X86TargetLowering:: 710LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 711 unsigned CallingConv, SelectionDAG &DAG) { 712 713 // Assign locations to each value returned by this call. 714 SmallVector<CCValAssign, 16> RVLocs; 715 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 716 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 717 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 718 719 720 SmallVector<SDOperand, 8> ResultVals; 721 722 // Copy all of the result registers out of their specified physreg. 723 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 724 for (unsigned i = 0; i != RVLocs.size(); ++i) { 725 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 726 RVLocs[i].getValVT(), InFlag).getValue(1); 727 InFlag = Chain.getValue(2); 728 ResultVals.push_back(Chain.getValue(0)); 729 } 730 } else { 731 // Copies from the FP stack are special, as ST0 isn't a valid register 732 // before the fp stackifier runs. 733 734 // Copy ST0 into an RFP register with FP_GET_RESULT. 735 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 736 SDOperand GROps[] = { Chain, InFlag }; 737 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 738 Chain = RetVal.getValue(1); 739 InFlag = RetVal.getValue(2); 740 741 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 742 // an XMM register. 743 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || 744 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { 745 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 746 // shouldn't be necessary except that RFP cannot be live across 747 // multiple blocks. When stackifier is fixed, they can be uncoupled. 748 MachineFunction &MF = DAG.getMachineFunction(); 749 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 750 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 751 SDOperand Ops[] = { 752 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 753 }; 754 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 755 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 756 Chain = RetVal.getValue(1); 757 } 758 ResultVals.push_back(RetVal); 759 } 760 761 // Merge everything together with a MERGE_VALUES node. 762 ResultVals.push_back(Chain); 763 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 764 &ResultVals[0], ResultVals.size()).Val; 765} 766 767 768//===----------------------------------------------------------------------===// 769// C & StdCall Calling Convention implementation 770//===----------------------------------------------------------------------===// 771// StdCall calling convention seems to be standard for many Windows' API 772// routines and around. It differs from C calling convention just a little: 773// callee should clean up the stack, not caller. Symbols should be also 774// decorated in some fancy way :) It doesn't support any vector arguments. 775 776/// AddLiveIn - This helper function adds the specified physical register to the 777/// MachineFunction as a live in value. It also creates a corresponding virtual 778/// register for it. 779static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 780 const TargetRegisterClass *RC) { 781 assert(RC->contains(PReg) && "Not the correct regclass!"); 782 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 783 MF.addLiveIn(PReg, VReg); 784 return VReg; 785} 786 787SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 788 const CCValAssign &VA, 789 MachineFrameInfo *MFI, 790 SDOperand Root, unsigned i) { 791 // Create the nodes corresponding to a load from this parameter slot. 792 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 793 VA.getLocMemOffset()); 794 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 795 796 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 797 798 if (Flags & ISD::ParamFlags::ByVal) 799 return FIN; 800 else 801 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 802} 803 804SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 805 bool isStdCall) { 806 unsigned NumArgs = Op.Val->getNumValues() - 1; 807 MachineFunction &MF = DAG.getMachineFunction(); 808 MachineFrameInfo *MFI = MF.getFrameInfo(); 809 SDOperand Root = Op.getOperand(0); 810 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 811 812 // Assign locations to all of the incoming arguments. 813 SmallVector<CCValAssign, 16> ArgLocs; 814 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 815 getTargetMachine(), ArgLocs); 816 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 817 818 SmallVector<SDOperand, 8> ArgValues; 819 unsigned LastVal = ~0U; 820 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 821 CCValAssign &VA = ArgLocs[i]; 822 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 823 // places. 824 assert(VA.getValNo() != LastVal && 825 "Don't support value assigned to multiple locs yet"); 826 LastVal = VA.getValNo(); 827 828 if (VA.isRegLoc()) { 829 MVT::ValueType RegVT = VA.getLocVT(); 830 TargetRegisterClass *RC; 831 if (RegVT == MVT::i32) 832 RC = X86::GR32RegisterClass; 833 else { 834 assert(MVT::isVector(RegVT)); 835 RC = X86::VR128RegisterClass; 836 } 837 838 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 839 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 840 841 // If this is an 8 or 16-bit value, it is really passed promoted to 32 842 // bits. Insert an assert[sz]ext to capture this, then truncate to the 843 // right size. 844 if (VA.getLocInfo() == CCValAssign::SExt) 845 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 846 DAG.getValueType(VA.getValVT())); 847 else if (VA.getLocInfo() == CCValAssign::ZExt) 848 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 849 DAG.getValueType(VA.getValVT())); 850 851 if (VA.getLocInfo() != CCValAssign::Full) 852 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 853 854 ArgValues.push_back(ArgValue); 855 } else { 856 assert(VA.isMemLoc()); 857 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 858 } 859 } 860 861 unsigned StackSize = CCInfo.getNextStackOffset(); 862 863 ArgValues.push_back(Root); 864 865 // If the function takes variable number of arguments, make a frame index for 866 // the start of the first vararg value... for expansion of llvm.va_start. 867 if (isVarArg) 868 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 869 870 if (isStdCall && !isVarArg) { 871 BytesToPopOnReturn = StackSize; // Callee pops everything.. 872 BytesCallerReserves = 0; 873 } else { 874 BytesToPopOnReturn = 0; // Callee pops nothing. 875 876 // If this is an sret function, the return should pop the hidden pointer. 877 if (NumArgs && 878 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 879 ISD::ParamFlags::StructReturn)) 880 BytesToPopOnReturn = 4; 881 882 BytesCallerReserves = StackSize; 883 } 884 885 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 886 887 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 888 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 889 890 // Return the new list of results. 891 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 892 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 893} 894 895SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 896 unsigned CC) { 897 SDOperand Chain = Op.getOperand(0); 898 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 899 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 900 SDOperand Callee = Op.getOperand(4); 901 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 902 903 // Analyze operands of the call, assigning locations to each operand. 904 SmallVector<CCValAssign, 16> ArgLocs; 905 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 906 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 907 908 // Get a count of how many bytes are to be pushed on the stack. 909 unsigned NumBytes = CCInfo.getNextStackOffset(); 910 911 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 912 913 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 914 SmallVector<SDOperand, 8> MemOpChains; 915 916 SDOperand StackPtr; 917 918 // Walk the register/memloc assignments, inserting copies/loads. 919 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 920 CCValAssign &VA = ArgLocs[i]; 921 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 922 923 // Promote the value if needed. 924 switch (VA.getLocInfo()) { 925 default: assert(0 && "Unknown loc info!"); 926 case CCValAssign::Full: break; 927 case CCValAssign::SExt: 928 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 929 break; 930 case CCValAssign::ZExt: 931 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 932 break; 933 case CCValAssign::AExt: 934 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 935 break; 936 } 937 938 if (VA.isRegLoc()) { 939 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 940 } else { 941 assert(VA.isMemLoc()); 942 if (StackPtr.Val == 0) 943 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 944 945 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 946 Arg)); 947 } 948 } 949 950 // If the first argument is an sret pointer, remember it. 951 bool isSRet = NumOps && 952 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 953 ISD::ParamFlags::StructReturn); 954 955 if (!MemOpChains.empty()) 956 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 957 &MemOpChains[0], MemOpChains.size()); 958 959 // Build a sequence of copy-to-reg nodes chained together with token chain 960 // and flag operands which copy the outgoing args into registers. 961 SDOperand InFlag; 962 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 963 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 964 InFlag); 965 InFlag = Chain.getValue(1); 966 } 967 968 // ELF / PIC requires GOT in the EBX register before function calls via PLT 969 // GOT pointer. 970 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 971 Subtarget->isPICStyleGOT()) { 972 Chain = DAG.getCopyToReg(Chain, X86::EBX, 973 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 974 InFlag); 975 InFlag = Chain.getValue(1); 976 } 977 978 // If the callee is a GlobalAddress node (quite common, every direct call is) 979 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 980 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 981 // We should use extra load for direct calls to dllimported functions in 982 // non-JIT mode. 983 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 984 getTargetMachine(), true)) 985 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 986 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 987 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 988 989 // Returns a chain & a flag for retval copy to use. 990 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 991 SmallVector<SDOperand, 8> Ops; 992 Ops.push_back(Chain); 993 Ops.push_back(Callee); 994 995 // Add argument registers to the end of the list so that they are known live 996 // into the call. 997 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 998 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 999 RegsToPass[i].second.getValueType())); 1000 1001 // Add an implicit use GOT pointer in EBX. 1002 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1003 Subtarget->isPICStyleGOT()) 1004 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1005 1006 if (InFlag.Val) 1007 Ops.push_back(InFlag); 1008 1009 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1010 NodeTys, &Ops[0], Ops.size()); 1011 InFlag = Chain.getValue(1); 1012 1013 // Create the CALLSEQ_END node. 1014 unsigned NumBytesForCalleeToPush = 0; 1015 1016 if (CC == CallingConv::X86_StdCall) { 1017 if (isVarArg) 1018 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1019 else 1020 NumBytesForCalleeToPush = NumBytes; 1021 } else { 1022 // If this is is a call to a struct-return function, the callee 1023 // pops the hidden struct pointer, so we have to push it back. 1024 // This is common for Darwin/X86, Linux & Mingw32 targets. 1025 NumBytesForCalleeToPush = isSRet ? 4 : 0; 1026 } 1027 1028 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1029 Ops.clear(); 1030 Ops.push_back(Chain); 1031 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1032 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 1033 Ops.push_back(InFlag); 1034 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1035 InFlag = Chain.getValue(1); 1036 1037 // Handle result values, copying them out of physregs into vregs that we 1038 // return. 1039 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1040} 1041 1042 1043//===----------------------------------------------------------------------===// 1044// FastCall Calling Convention implementation 1045//===----------------------------------------------------------------------===// 1046// 1047// The X86 'fastcall' calling convention passes up to two integer arguments in 1048// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 1049// and requires that the callee pop its arguments off the stack (allowing proper 1050// tail calls), and has the same return value conventions as C calling convs. 1051// 1052// This calling convention always arranges for the callee pop value to be 8n+4 1053// bytes, which is needed for tail recursion elimination and stack alignment 1054// reasons. 1055SDOperand 1056X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 1057 MachineFunction &MF = DAG.getMachineFunction(); 1058 MachineFrameInfo *MFI = MF.getFrameInfo(); 1059 SDOperand Root = Op.getOperand(0); 1060 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1061 1062 // Assign locations to all of the incoming arguments. 1063 SmallVector<CCValAssign, 16> ArgLocs; 1064 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1065 getTargetMachine(), ArgLocs); 1066 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 1067 1068 SmallVector<SDOperand, 8> ArgValues; 1069 unsigned LastVal = ~0U; 1070 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1071 CCValAssign &VA = ArgLocs[i]; 1072 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1073 // places. 1074 assert(VA.getValNo() != LastVal && 1075 "Don't support value assigned to multiple locs yet"); 1076 LastVal = VA.getValNo(); 1077 1078 if (VA.isRegLoc()) { 1079 MVT::ValueType RegVT = VA.getLocVT(); 1080 TargetRegisterClass *RC; 1081 if (RegVT == MVT::i32) 1082 RC = X86::GR32RegisterClass; 1083 else { 1084 assert(MVT::isVector(RegVT)); 1085 RC = X86::VR128RegisterClass; 1086 } 1087 1088 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1089 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1090 1091 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1092 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1093 // right size. 1094 if (VA.getLocInfo() == CCValAssign::SExt) 1095 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1096 DAG.getValueType(VA.getValVT())); 1097 else if (VA.getLocInfo() == CCValAssign::ZExt) 1098 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1099 DAG.getValueType(VA.getValVT())); 1100 1101 if (VA.getLocInfo() != CCValAssign::Full) 1102 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1103 1104 ArgValues.push_back(ArgValue); 1105 } else { 1106 assert(VA.isMemLoc()); 1107 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1108 } 1109 } 1110 1111 ArgValues.push_back(Root); 1112 1113 unsigned StackSize = CCInfo.getNextStackOffset(); 1114 1115 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1116 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1117 // arguments and the arguments after the retaddr has been pushed are aligned. 1118 if ((StackSize & 7) == 0) 1119 StackSize += 4; 1120 } 1121 1122 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1123 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1124 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1125 BytesCallerReserves = 0; 1126 1127 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1128 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1129 1130 // Return the new list of results. 1131 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1132 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1133} 1134 1135SDOperand 1136X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1137 const SDOperand &StackPtr, 1138 const CCValAssign &VA, 1139 SDOperand Chain, 1140 SDOperand Arg) { 1141 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1142 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1143 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1144 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1145 if (Flags & ISD::ParamFlags::ByVal) { 1146 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1147 ISD::ParamFlags::ByValAlignOffs); 1148 1149 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1150 ISD::ParamFlags::ByValSizeOffs; 1151 1152 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1153 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1154 1155 return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, 1156 AlignNode); 1157 } else { 1158 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1159 } 1160} 1161 1162SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1163 unsigned CC) { 1164 SDOperand Chain = Op.getOperand(0); 1165 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1166 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1167 SDOperand Callee = Op.getOperand(4); 1168 1169 // Analyze operands of the call, assigning locations to each operand. 1170 SmallVector<CCValAssign, 16> ArgLocs; 1171 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1172 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1173 1174 // Get a count of how many bytes are to be pushed on the stack. 1175 unsigned NumBytes = CCInfo.getNextStackOffset(); 1176 1177 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1178 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1179 // arguments and the arguments after the retaddr has been pushed are aligned. 1180 if ((NumBytes & 7) == 0) 1181 NumBytes += 4; 1182 } 1183 1184 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1185 1186 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1187 SmallVector<SDOperand, 8> MemOpChains; 1188 1189 SDOperand StackPtr; 1190 1191 // Walk the register/memloc assignments, inserting copies/loads. 1192 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1193 CCValAssign &VA = ArgLocs[i]; 1194 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1195 1196 // Promote the value if needed. 1197 switch (VA.getLocInfo()) { 1198 default: assert(0 && "Unknown loc info!"); 1199 case CCValAssign::Full: break; 1200 case CCValAssign::SExt: 1201 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1202 break; 1203 case CCValAssign::ZExt: 1204 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1205 break; 1206 case CCValAssign::AExt: 1207 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1208 break; 1209 } 1210 1211 if (VA.isRegLoc()) { 1212 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1213 } else { 1214 assert(VA.isMemLoc()); 1215 if (StackPtr.Val == 0) 1216 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1217 1218 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1219 Arg)); 1220 } 1221 } 1222 1223 if (!MemOpChains.empty()) 1224 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1225 &MemOpChains[0], MemOpChains.size()); 1226 1227 // Build a sequence of copy-to-reg nodes chained together with token chain 1228 // and flag operands which copy the outgoing args into registers. 1229 SDOperand InFlag; 1230 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1231 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1232 InFlag); 1233 InFlag = Chain.getValue(1); 1234 } 1235 1236 // If the callee is a GlobalAddress node (quite common, every direct call is) 1237 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1238 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1239 // We should use extra load for direct calls to dllimported functions in 1240 // non-JIT mode. 1241 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1242 getTargetMachine(), true)) 1243 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1244 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1245 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1246 1247 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1248 // GOT pointer. 1249 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1250 Subtarget->isPICStyleGOT()) { 1251 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1252 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1253 InFlag); 1254 InFlag = Chain.getValue(1); 1255 } 1256 1257 // Returns a chain & a flag for retval copy to use. 1258 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1259 SmallVector<SDOperand, 8> Ops; 1260 Ops.push_back(Chain); 1261 Ops.push_back(Callee); 1262 1263 // Add argument registers to the end of the list so that they are known live 1264 // into the call. 1265 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1266 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1267 RegsToPass[i].second.getValueType())); 1268 1269 // Add an implicit use GOT pointer in EBX. 1270 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1271 Subtarget->isPICStyleGOT()) 1272 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1273 1274 if (InFlag.Val) 1275 Ops.push_back(InFlag); 1276 1277 // FIXME: Do not generate X86ISD::TAILCALL for now. 1278 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1279 NodeTys, &Ops[0], Ops.size()); 1280 InFlag = Chain.getValue(1); 1281 1282 // Returns a flag for retval copy to use. 1283 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1284 Ops.clear(); 1285 Ops.push_back(Chain); 1286 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1287 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1288 Ops.push_back(InFlag); 1289 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1290 InFlag = Chain.getValue(1); 1291 1292 // Handle result values, copying them out of physregs into vregs that we 1293 // return. 1294 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1295} 1296 1297 1298//===----------------------------------------------------------------------===// 1299// X86-64 C Calling Convention implementation 1300//===----------------------------------------------------------------------===// 1301 1302SDOperand 1303X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1304 MachineFunction &MF = DAG.getMachineFunction(); 1305 MachineFrameInfo *MFI = MF.getFrameInfo(); 1306 SDOperand Root = Op.getOperand(0); 1307 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1308 1309 static const unsigned GPR64ArgRegs[] = { 1310 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1311 }; 1312 static const unsigned XMMArgRegs[] = { 1313 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1314 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1315 }; 1316 1317 1318 // Assign locations to all of the incoming arguments. 1319 SmallVector<CCValAssign, 16> ArgLocs; 1320 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1321 getTargetMachine(), ArgLocs); 1322 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1323 1324 SmallVector<SDOperand, 8> ArgValues; 1325 unsigned LastVal = ~0U; 1326 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1327 CCValAssign &VA = ArgLocs[i]; 1328 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1329 // places. 1330 assert(VA.getValNo() != LastVal && 1331 "Don't support value assigned to multiple locs yet"); 1332 LastVal = VA.getValNo(); 1333 1334 if (VA.isRegLoc()) { 1335 MVT::ValueType RegVT = VA.getLocVT(); 1336 TargetRegisterClass *RC; 1337 if (RegVT == MVT::i32) 1338 RC = X86::GR32RegisterClass; 1339 else if (RegVT == MVT::i64) 1340 RC = X86::GR64RegisterClass; 1341 else if (RegVT == MVT::f32) 1342 RC = X86::FR32RegisterClass; 1343 else if (RegVT == MVT::f64) 1344 RC = X86::FR64RegisterClass; 1345 else { 1346 assert(MVT::isVector(RegVT)); 1347 if (MVT::getSizeInBits(RegVT) == 64) { 1348 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1349 RegVT = MVT::i64; 1350 } else 1351 RC = X86::VR128RegisterClass; 1352 } 1353 1354 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1355 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1356 1357 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1358 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1359 // right size. 1360 if (VA.getLocInfo() == CCValAssign::SExt) 1361 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1362 DAG.getValueType(VA.getValVT())); 1363 else if (VA.getLocInfo() == CCValAssign::ZExt) 1364 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1365 DAG.getValueType(VA.getValVT())); 1366 1367 if (VA.getLocInfo() != CCValAssign::Full) 1368 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1369 1370 // Handle MMX values passed in GPRs. 1371 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1372 MVT::getSizeInBits(RegVT) == 64) 1373 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1374 1375 ArgValues.push_back(ArgValue); 1376 } else { 1377 assert(VA.isMemLoc()); 1378 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1379 } 1380 } 1381 1382 unsigned StackSize = CCInfo.getNextStackOffset(); 1383 1384 // If the function takes variable number of arguments, make a frame index for 1385 // the start of the first vararg value... for expansion of llvm.va_start. 1386 if (isVarArg) { 1387 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1388 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1389 1390 // For X86-64, if there are vararg parameters that are passed via 1391 // registers, then we must store them to their spots on the stack so they 1392 // may be loaded by deferencing the result of va_next. 1393 VarArgsGPOffset = NumIntRegs * 8; 1394 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1395 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1396 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1397 1398 // Store the integer parameter registers. 1399 SmallVector<SDOperand, 8> MemOps; 1400 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1401 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1402 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1403 for (; NumIntRegs != 6; ++NumIntRegs) { 1404 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1405 X86::GR64RegisterClass); 1406 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1407 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1408 MemOps.push_back(Store); 1409 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1410 DAG.getConstant(8, getPointerTy())); 1411 } 1412 1413 // Now store the XMM (fp + vector) parameter registers. 1414 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1415 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1416 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1417 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1418 X86::VR128RegisterClass); 1419 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1420 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1421 MemOps.push_back(Store); 1422 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1423 DAG.getConstant(16, getPointerTy())); 1424 } 1425 if (!MemOps.empty()) 1426 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1427 &MemOps[0], MemOps.size()); 1428 } 1429 1430 ArgValues.push_back(Root); 1431 1432 BytesToPopOnReturn = 0; // Callee pops nothing. 1433 BytesCallerReserves = StackSize; 1434 1435 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1436 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1437 1438 // Return the new list of results. 1439 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1440 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1441} 1442 1443SDOperand 1444X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1445 unsigned CC) { 1446 SDOperand Chain = Op.getOperand(0); 1447 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1448 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1449 SDOperand Callee = Op.getOperand(4); 1450 1451 // Analyze operands of the call, assigning locations to each operand. 1452 SmallVector<CCValAssign, 16> ArgLocs; 1453 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1454 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1455 1456 // Get a count of how many bytes are to be pushed on the stack. 1457 unsigned NumBytes = CCInfo.getNextStackOffset(); 1458 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1459 1460 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1461 SmallVector<SDOperand, 8> MemOpChains; 1462 1463 SDOperand StackPtr; 1464 1465 // Walk the register/memloc assignments, inserting copies/loads. 1466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1467 CCValAssign &VA = ArgLocs[i]; 1468 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1469 1470 // Promote the value if needed. 1471 switch (VA.getLocInfo()) { 1472 default: assert(0 && "Unknown loc info!"); 1473 case CCValAssign::Full: break; 1474 case CCValAssign::SExt: 1475 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1476 break; 1477 case CCValAssign::ZExt: 1478 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1479 break; 1480 case CCValAssign::AExt: 1481 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1482 break; 1483 } 1484 1485 if (VA.isRegLoc()) { 1486 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1487 } else { 1488 assert(VA.isMemLoc()); 1489 if (StackPtr.Val == 0) 1490 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1491 1492 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1493 Arg)); 1494 } 1495 } 1496 1497 if (!MemOpChains.empty()) 1498 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1499 &MemOpChains[0], MemOpChains.size()); 1500 1501 // Build a sequence of copy-to-reg nodes chained together with token chain 1502 // and flag operands which copy the outgoing args into registers. 1503 SDOperand InFlag; 1504 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1505 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1506 InFlag); 1507 InFlag = Chain.getValue(1); 1508 } 1509 1510 if (isVarArg) { 1511 // From AMD64 ABI document: 1512 // For calls that may call functions that use varargs or stdargs 1513 // (prototype-less calls or calls to functions containing ellipsis (...) in 1514 // the declaration) %al is used as hidden argument to specify the number 1515 // of SSE registers used. The contents of %al do not need to match exactly 1516 // the number of registers, but must be an ubound on the number of SSE 1517 // registers used and is in the range 0 - 8 inclusive. 1518 1519 // Count the number of XMM registers allocated. 1520 static const unsigned XMMArgRegs[] = { 1521 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1522 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1523 }; 1524 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1525 1526 Chain = DAG.getCopyToReg(Chain, X86::AL, 1527 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1528 InFlag = Chain.getValue(1); 1529 } 1530 1531 // If the callee is a GlobalAddress node (quite common, every direct call is) 1532 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1533 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1534 // We should use extra load for direct calls to dllimported functions in 1535 // non-JIT mode. 1536 if (getTargetMachine().getCodeModel() != CodeModel::Large 1537 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1538 getTargetMachine(), true)) 1539 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1540 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1541 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1542 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1543 1544 // Returns a chain & a flag for retval copy to use. 1545 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1546 SmallVector<SDOperand, 8> Ops; 1547 Ops.push_back(Chain); 1548 Ops.push_back(Callee); 1549 1550 // Add argument registers to the end of the list so that they are known live 1551 // into the call. 1552 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1553 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1554 RegsToPass[i].second.getValueType())); 1555 1556 if (InFlag.Val) 1557 Ops.push_back(InFlag); 1558 1559 // FIXME: Do not generate X86ISD::TAILCALL for now. 1560 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1561 NodeTys, &Ops[0], Ops.size()); 1562 InFlag = Chain.getValue(1); 1563 1564 // Returns a flag for retval copy to use. 1565 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1566 Ops.clear(); 1567 Ops.push_back(Chain); 1568 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1569 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1570 Ops.push_back(InFlag); 1571 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1572 InFlag = Chain.getValue(1); 1573 1574 // Handle result values, copying them out of physregs into vregs that we 1575 // return. 1576 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1577} 1578 1579 1580//===----------------------------------------------------------------------===// 1581// Other Lowering Hooks 1582//===----------------------------------------------------------------------===// 1583 1584 1585SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1586 MachineFunction &MF = DAG.getMachineFunction(); 1587 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1588 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1589 1590 if (ReturnAddrIndex == 0) { 1591 // Set up a frame object for the return address. 1592 if (Subtarget->is64Bit()) 1593 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1594 else 1595 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1596 1597 FuncInfo->setRAIndex(ReturnAddrIndex); 1598 } 1599 1600 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1601} 1602 1603 1604 1605/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1606/// specific condition code. It returns a false if it cannot do a direct 1607/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1608/// needed. 1609static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1610 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1611 SelectionDAG &DAG) { 1612 X86CC = X86::COND_INVALID; 1613 if (!isFP) { 1614 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1615 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1616 // X > -1 -> X == 0, jump !sign. 1617 RHS = DAG.getConstant(0, RHS.getValueType()); 1618 X86CC = X86::COND_NS; 1619 return true; 1620 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1621 // X < 0 -> X == 0, jump on sign. 1622 X86CC = X86::COND_S; 1623 return true; 1624 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1625 // X < 1 -> X <= 0 1626 RHS = DAG.getConstant(0, RHS.getValueType()); 1627 X86CC = X86::COND_LE; 1628 return true; 1629 } 1630 } 1631 1632 switch (SetCCOpcode) { 1633 default: break; 1634 case ISD::SETEQ: X86CC = X86::COND_E; break; 1635 case ISD::SETGT: X86CC = X86::COND_G; break; 1636 case ISD::SETGE: X86CC = X86::COND_GE; break; 1637 case ISD::SETLT: X86CC = X86::COND_L; break; 1638 case ISD::SETLE: X86CC = X86::COND_LE; break; 1639 case ISD::SETNE: X86CC = X86::COND_NE; break; 1640 case ISD::SETULT: X86CC = X86::COND_B; break; 1641 case ISD::SETUGT: X86CC = X86::COND_A; break; 1642 case ISD::SETULE: X86CC = X86::COND_BE; break; 1643 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1644 } 1645 } else { 1646 // On a floating point condition, the flags are set as follows: 1647 // ZF PF CF op 1648 // 0 | 0 | 0 | X > Y 1649 // 0 | 0 | 1 | X < Y 1650 // 1 | 0 | 0 | X == Y 1651 // 1 | 1 | 1 | unordered 1652 bool Flip = false; 1653 switch (SetCCOpcode) { 1654 default: break; 1655 case ISD::SETUEQ: 1656 case ISD::SETEQ: X86CC = X86::COND_E; break; 1657 case ISD::SETOLT: Flip = true; // Fallthrough 1658 case ISD::SETOGT: 1659 case ISD::SETGT: X86CC = X86::COND_A; break; 1660 case ISD::SETOLE: Flip = true; // Fallthrough 1661 case ISD::SETOGE: 1662 case ISD::SETGE: X86CC = X86::COND_AE; break; 1663 case ISD::SETUGT: Flip = true; // Fallthrough 1664 case ISD::SETULT: 1665 case ISD::SETLT: X86CC = X86::COND_B; break; 1666 case ISD::SETUGE: Flip = true; // Fallthrough 1667 case ISD::SETULE: 1668 case ISD::SETLE: X86CC = X86::COND_BE; break; 1669 case ISD::SETONE: 1670 case ISD::SETNE: X86CC = X86::COND_NE; break; 1671 case ISD::SETUO: X86CC = X86::COND_P; break; 1672 case ISD::SETO: X86CC = X86::COND_NP; break; 1673 } 1674 if (Flip) 1675 std::swap(LHS, RHS); 1676 } 1677 1678 return X86CC != X86::COND_INVALID; 1679} 1680 1681/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1682/// code. Current x86 isa includes the following FP cmov instructions: 1683/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1684static bool hasFPCMov(unsigned X86CC) { 1685 switch (X86CC) { 1686 default: 1687 return false; 1688 case X86::COND_B: 1689 case X86::COND_BE: 1690 case X86::COND_E: 1691 case X86::COND_P: 1692 case X86::COND_A: 1693 case X86::COND_AE: 1694 case X86::COND_NE: 1695 case X86::COND_NP: 1696 return true; 1697 } 1698} 1699 1700/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1701/// true if Op is undef or if its value falls within the specified range (L, H]. 1702static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1703 if (Op.getOpcode() == ISD::UNDEF) 1704 return true; 1705 1706 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1707 return (Val >= Low && Val < Hi); 1708} 1709 1710/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1711/// true if Op is undef or if its value equal to the specified value. 1712static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1713 if (Op.getOpcode() == ISD::UNDEF) 1714 return true; 1715 return cast<ConstantSDNode>(Op)->getValue() == Val; 1716} 1717 1718/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1719/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1720bool X86::isPSHUFDMask(SDNode *N) { 1721 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1722 1723 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1724 return false; 1725 1726 // Check if the value doesn't reference the second vector. 1727 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1728 SDOperand Arg = N->getOperand(i); 1729 if (Arg.getOpcode() == ISD::UNDEF) continue; 1730 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1731 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1732 return false; 1733 } 1734 1735 return true; 1736} 1737 1738/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1739/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1740bool X86::isPSHUFHWMask(SDNode *N) { 1741 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1742 1743 if (N->getNumOperands() != 8) 1744 return false; 1745 1746 // Lower quadword copied in order. 1747 for (unsigned i = 0; i != 4; ++i) { 1748 SDOperand Arg = N->getOperand(i); 1749 if (Arg.getOpcode() == ISD::UNDEF) continue; 1750 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1751 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1752 return false; 1753 } 1754 1755 // Upper quadword shuffled. 1756 for (unsigned i = 4; i != 8; ++i) { 1757 SDOperand Arg = N->getOperand(i); 1758 if (Arg.getOpcode() == ISD::UNDEF) continue; 1759 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1760 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1761 if (Val < 4 || Val > 7) 1762 return false; 1763 } 1764 1765 return true; 1766} 1767 1768/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1769/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1770bool X86::isPSHUFLWMask(SDNode *N) { 1771 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1772 1773 if (N->getNumOperands() != 8) 1774 return false; 1775 1776 // Upper quadword copied in order. 1777 for (unsigned i = 4; i != 8; ++i) 1778 if (!isUndefOrEqual(N->getOperand(i), i)) 1779 return false; 1780 1781 // Lower quadword shuffled. 1782 for (unsigned i = 0; i != 4; ++i) 1783 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1784 return false; 1785 1786 return true; 1787} 1788 1789/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1790/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1791static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1792 if (NumElems != 2 && NumElems != 4) return false; 1793 1794 unsigned Half = NumElems / 2; 1795 for (unsigned i = 0; i < Half; ++i) 1796 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1797 return false; 1798 for (unsigned i = Half; i < NumElems; ++i) 1799 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1800 return false; 1801 1802 return true; 1803} 1804 1805bool X86::isSHUFPMask(SDNode *N) { 1806 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1807 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1808} 1809 1810/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1811/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1812/// half elements to come from vector 1 (which would equal the dest.) and 1813/// the upper half to come from vector 2. 1814static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1815 if (NumOps != 2 && NumOps != 4) return false; 1816 1817 unsigned Half = NumOps / 2; 1818 for (unsigned i = 0; i < Half; ++i) 1819 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1820 return false; 1821 for (unsigned i = Half; i < NumOps; ++i) 1822 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1823 return false; 1824 return true; 1825} 1826 1827static bool isCommutedSHUFP(SDNode *N) { 1828 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1829 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1830} 1831 1832/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1833/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1834bool X86::isMOVHLPSMask(SDNode *N) { 1835 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1836 1837 if (N->getNumOperands() != 4) 1838 return false; 1839 1840 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1841 return isUndefOrEqual(N->getOperand(0), 6) && 1842 isUndefOrEqual(N->getOperand(1), 7) && 1843 isUndefOrEqual(N->getOperand(2), 2) && 1844 isUndefOrEqual(N->getOperand(3), 3); 1845} 1846 1847/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1848/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1849/// <2, 3, 2, 3> 1850bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1851 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1852 1853 if (N->getNumOperands() != 4) 1854 return false; 1855 1856 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1857 return isUndefOrEqual(N->getOperand(0), 2) && 1858 isUndefOrEqual(N->getOperand(1), 3) && 1859 isUndefOrEqual(N->getOperand(2), 2) && 1860 isUndefOrEqual(N->getOperand(3), 3); 1861} 1862 1863/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1864/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1865bool X86::isMOVLPMask(SDNode *N) { 1866 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1867 1868 unsigned NumElems = N->getNumOperands(); 1869 if (NumElems != 2 && NumElems != 4) 1870 return false; 1871 1872 for (unsigned i = 0; i < NumElems/2; ++i) 1873 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1874 return false; 1875 1876 for (unsigned i = NumElems/2; i < NumElems; ++i) 1877 if (!isUndefOrEqual(N->getOperand(i), i)) 1878 return false; 1879 1880 return true; 1881} 1882 1883/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1884/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1885/// and MOVLHPS. 1886bool X86::isMOVHPMask(SDNode *N) { 1887 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1888 1889 unsigned NumElems = N->getNumOperands(); 1890 if (NumElems != 2 && NumElems != 4) 1891 return false; 1892 1893 for (unsigned i = 0; i < NumElems/2; ++i) 1894 if (!isUndefOrEqual(N->getOperand(i), i)) 1895 return false; 1896 1897 for (unsigned i = 0; i < NumElems/2; ++i) { 1898 SDOperand Arg = N->getOperand(i + NumElems/2); 1899 if (!isUndefOrEqual(Arg, i + NumElems)) 1900 return false; 1901 } 1902 1903 return true; 1904} 1905 1906/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1907/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1908bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1909 bool V2IsSplat = false) { 1910 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1911 return false; 1912 1913 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1914 SDOperand BitI = Elts[i]; 1915 SDOperand BitI1 = Elts[i+1]; 1916 if (!isUndefOrEqual(BitI, j)) 1917 return false; 1918 if (V2IsSplat) { 1919 if (isUndefOrEqual(BitI1, NumElts)) 1920 return false; 1921 } else { 1922 if (!isUndefOrEqual(BitI1, j + NumElts)) 1923 return false; 1924 } 1925 } 1926 1927 return true; 1928} 1929 1930bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1931 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1932 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1933} 1934 1935/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1936/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1937bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1938 bool V2IsSplat = false) { 1939 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1940 return false; 1941 1942 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1943 SDOperand BitI = Elts[i]; 1944 SDOperand BitI1 = Elts[i+1]; 1945 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1946 return false; 1947 if (V2IsSplat) { 1948 if (isUndefOrEqual(BitI1, NumElts)) 1949 return false; 1950 } else { 1951 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1952 return false; 1953 } 1954 } 1955 1956 return true; 1957} 1958 1959bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1960 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1961 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1962} 1963 1964/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1965/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1966/// <0, 0, 1, 1> 1967bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1968 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1969 1970 unsigned NumElems = N->getNumOperands(); 1971 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1972 return false; 1973 1974 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1975 SDOperand BitI = N->getOperand(i); 1976 SDOperand BitI1 = N->getOperand(i+1); 1977 1978 if (!isUndefOrEqual(BitI, j)) 1979 return false; 1980 if (!isUndefOrEqual(BitI1, j)) 1981 return false; 1982 } 1983 1984 return true; 1985} 1986 1987/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1988/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1989/// <2, 2, 3, 3> 1990bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1991 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1992 1993 unsigned NumElems = N->getNumOperands(); 1994 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1995 return false; 1996 1997 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1998 SDOperand BitI = N->getOperand(i); 1999 SDOperand BitI1 = N->getOperand(i + 1); 2000 2001 if (!isUndefOrEqual(BitI, j)) 2002 return false; 2003 if (!isUndefOrEqual(BitI1, j)) 2004 return false; 2005 } 2006 2007 return true; 2008} 2009 2010/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2011/// specifies a shuffle of elements that is suitable for input to MOVSS, 2012/// MOVSD, and MOVD, i.e. setting the lowest element. 2013static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 2014 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2015 return false; 2016 2017 if (!isUndefOrEqual(Elts[0], NumElts)) 2018 return false; 2019 2020 for (unsigned i = 1; i < NumElts; ++i) { 2021 if (!isUndefOrEqual(Elts[i], i)) 2022 return false; 2023 } 2024 2025 return true; 2026} 2027 2028bool X86::isMOVLMask(SDNode *N) { 2029 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2030 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 2031} 2032 2033/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2034/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2035/// element of vector 2 and the other elements to come from vector 1 in order. 2036static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 2037 bool V2IsSplat = false, 2038 bool V2IsUndef = false) { 2039 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2040 return false; 2041 2042 if (!isUndefOrEqual(Ops[0], 0)) 2043 return false; 2044 2045 for (unsigned i = 1; i < NumOps; ++i) { 2046 SDOperand Arg = Ops[i]; 2047 if (!(isUndefOrEqual(Arg, i+NumOps) || 2048 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 2049 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 2050 return false; 2051 } 2052 2053 return true; 2054} 2055 2056static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 2057 bool V2IsUndef = false) { 2058 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2059 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 2060 V2IsSplat, V2IsUndef); 2061} 2062 2063/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2064/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 2065bool X86::isMOVSHDUPMask(SDNode *N) { 2066 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2067 2068 if (N->getNumOperands() != 4) 2069 return false; 2070 2071 // Expect 1, 1, 3, 3 2072 for (unsigned i = 0; i < 2; ++i) { 2073 SDOperand Arg = N->getOperand(i); 2074 if (Arg.getOpcode() == ISD::UNDEF) continue; 2075 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2076 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2077 if (Val != 1) return false; 2078 } 2079 2080 bool HasHi = false; 2081 for (unsigned i = 2; i < 4; ++i) { 2082 SDOperand Arg = N->getOperand(i); 2083 if (Arg.getOpcode() == ISD::UNDEF) continue; 2084 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2085 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2086 if (Val != 3) return false; 2087 HasHi = true; 2088 } 2089 2090 // Don't use movshdup if it can be done with a shufps. 2091 return HasHi; 2092} 2093 2094/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2095/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2096bool X86::isMOVSLDUPMask(SDNode *N) { 2097 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2098 2099 if (N->getNumOperands() != 4) 2100 return false; 2101 2102 // Expect 0, 0, 2, 2 2103 for (unsigned i = 0; i < 2; ++i) { 2104 SDOperand Arg = N->getOperand(i); 2105 if (Arg.getOpcode() == ISD::UNDEF) continue; 2106 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2107 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2108 if (Val != 0) return false; 2109 } 2110 2111 bool HasHi = false; 2112 for (unsigned i = 2; i < 4; ++i) { 2113 SDOperand Arg = N->getOperand(i); 2114 if (Arg.getOpcode() == ISD::UNDEF) continue; 2115 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2116 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2117 if (Val != 2) return false; 2118 HasHi = true; 2119 } 2120 2121 // Don't use movshdup if it can be done with a shufps. 2122 return HasHi; 2123} 2124 2125/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2126/// specifies a identity operation on the LHS or RHS. 2127static bool isIdentityMask(SDNode *N, bool RHS = false) { 2128 unsigned NumElems = N->getNumOperands(); 2129 for (unsigned i = 0; i < NumElems; ++i) 2130 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2131 return false; 2132 return true; 2133} 2134 2135/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2136/// a splat of a single element. 2137static bool isSplatMask(SDNode *N) { 2138 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2139 2140 // This is a splat operation if each element of the permute is the same, and 2141 // if the value doesn't reference the second vector. 2142 unsigned NumElems = N->getNumOperands(); 2143 SDOperand ElementBase; 2144 unsigned i = 0; 2145 for (; i != NumElems; ++i) { 2146 SDOperand Elt = N->getOperand(i); 2147 if (isa<ConstantSDNode>(Elt)) { 2148 ElementBase = Elt; 2149 break; 2150 } 2151 } 2152 2153 if (!ElementBase.Val) 2154 return false; 2155 2156 for (; i != NumElems; ++i) { 2157 SDOperand Arg = N->getOperand(i); 2158 if (Arg.getOpcode() == ISD::UNDEF) continue; 2159 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2160 if (Arg != ElementBase) return false; 2161 } 2162 2163 // Make sure it is a splat of the first vector operand. 2164 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2165} 2166 2167/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2168/// a splat of a single element and it's a 2 or 4 element mask. 2169bool X86::isSplatMask(SDNode *N) { 2170 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2171 2172 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2173 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2174 return false; 2175 return ::isSplatMask(N); 2176} 2177 2178/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2179/// specifies a splat of zero element. 2180bool X86::isSplatLoMask(SDNode *N) { 2181 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2182 2183 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2184 if (!isUndefOrEqual(N->getOperand(i), 0)) 2185 return false; 2186 return true; 2187} 2188 2189/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2190/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2191/// instructions. 2192unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2193 unsigned NumOperands = N->getNumOperands(); 2194 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2195 unsigned Mask = 0; 2196 for (unsigned i = 0; i < NumOperands; ++i) { 2197 unsigned Val = 0; 2198 SDOperand Arg = N->getOperand(NumOperands-i-1); 2199 if (Arg.getOpcode() != ISD::UNDEF) 2200 Val = cast<ConstantSDNode>(Arg)->getValue(); 2201 if (Val >= NumOperands) Val -= NumOperands; 2202 Mask |= Val; 2203 if (i != NumOperands - 1) 2204 Mask <<= Shift; 2205 } 2206 2207 return Mask; 2208} 2209 2210/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2211/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2212/// instructions. 2213unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2214 unsigned Mask = 0; 2215 // 8 nodes, but we only care about the last 4. 2216 for (unsigned i = 7; i >= 4; --i) { 2217 unsigned Val = 0; 2218 SDOperand Arg = N->getOperand(i); 2219 if (Arg.getOpcode() != ISD::UNDEF) 2220 Val = cast<ConstantSDNode>(Arg)->getValue(); 2221 Mask |= (Val - 4); 2222 if (i != 4) 2223 Mask <<= 2; 2224 } 2225 2226 return Mask; 2227} 2228 2229/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2230/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2231/// instructions. 2232unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2233 unsigned Mask = 0; 2234 // 8 nodes, but we only care about the first 4. 2235 for (int i = 3; i >= 0; --i) { 2236 unsigned Val = 0; 2237 SDOperand Arg = N->getOperand(i); 2238 if (Arg.getOpcode() != ISD::UNDEF) 2239 Val = cast<ConstantSDNode>(Arg)->getValue(); 2240 Mask |= Val; 2241 if (i != 0) 2242 Mask <<= 2; 2243 } 2244 2245 return Mask; 2246} 2247 2248/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2249/// specifies a 8 element shuffle that can be broken into a pair of 2250/// PSHUFHW and PSHUFLW. 2251static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2252 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2253 2254 if (N->getNumOperands() != 8) 2255 return false; 2256 2257 // Lower quadword shuffled. 2258 for (unsigned i = 0; i != 4; ++i) { 2259 SDOperand Arg = N->getOperand(i); 2260 if (Arg.getOpcode() == ISD::UNDEF) continue; 2261 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2262 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2263 if (Val > 4) 2264 return false; 2265 } 2266 2267 // Upper quadword shuffled. 2268 for (unsigned i = 4; i != 8; ++i) { 2269 SDOperand Arg = N->getOperand(i); 2270 if (Arg.getOpcode() == ISD::UNDEF) continue; 2271 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2272 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2273 if (Val < 4 || Val > 7) 2274 return false; 2275 } 2276 2277 return true; 2278} 2279 2280/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2281/// values in ther permute mask. 2282static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2283 SDOperand &V2, SDOperand &Mask, 2284 SelectionDAG &DAG) { 2285 MVT::ValueType VT = Op.getValueType(); 2286 MVT::ValueType MaskVT = Mask.getValueType(); 2287 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2288 unsigned NumElems = Mask.getNumOperands(); 2289 SmallVector<SDOperand, 8> MaskVec; 2290 2291 for (unsigned i = 0; i != NumElems; ++i) { 2292 SDOperand Arg = Mask.getOperand(i); 2293 if (Arg.getOpcode() == ISD::UNDEF) { 2294 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2295 continue; 2296 } 2297 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2298 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2299 if (Val < NumElems) 2300 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2301 else 2302 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2303 } 2304 2305 std::swap(V1, V2); 2306 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2307 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2308} 2309 2310/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2311/// match movhlps. The lower half elements should come from upper half of 2312/// V1 (and in order), and the upper half elements should come from the upper 2313/// half of V2 (and in order). 2314static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2315 unsigned NumElems = Mask->getNumOperands(); 2316 if (NumElems != 4) 2317 return false; 2318 for (unsigned i = 0, e = 2; i != e; ++i) 2319 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2320 return false; 2321 for (unsigned i = 2; i != 4; ++i) 2322 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2323 return false; 2324 return true; 2325} 2326 2327/// isScalarLoadToVector - Returns true if the node is a scalar load that 2328/// is promoted to a vector. 2329static inline bool isScalarLoadToVector(SDNode *N) { 2330 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2331 N = N->getOperand(0).Val; 2332 return ISD::isNON_EXTLoad(N); 2333 } 2334 return false; 2335} 2336 2337/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2338/// match movlp{s|d}. The lower half elements should come from lower half of 2339/// V1 (and in order), and the upper half elements should come from the upper 2340/// half of V2 (and in order). And since V1 will become the source of the 2341/// MOVLP, it must be either a vector load or a scalar load to vector. 2342static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2343 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2344 return false; 2345 // Is V2 is a vector load, don't do this transformation. We will try to use 2346 // load folding shufps op. 2347 if (ISD::isNON_EXTLoad(V2)) 2348 return false; 2349 2350 unsigned NumElems = Mask->getNumOperands(); 2351 if (NumElems != 2 && NumElems != 4) 2352 return false; 2353 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2354 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2355 return false; 2356 for (unsigned i = NumElems/2; i != NumElems; ++i) 2357 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2358 return false; 2359 return true; 2360} 2361 2362/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2363/// all the same. 2364static bool isSplatVector(SDNode *N) { 2365 if (N->getOpcode() != ISD::BUILD_VECTOR) 2366 return false; 2367 2368 SDOperand SplatValue = N->getOperand(0); 2369 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2370 if (N->getOperand(i) != SplatValue) 2371 return false; 2372 return true; 2373} 2374 2375/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2376/// to an undef. 2377static bool isUndefShuffle(SDNode *N) { 2378 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2379 return false; 2380 2381 SDOperand V1 = N->getOperand(0); 2382 SDOperand V2 = N->getOperand(1); 2383 SDOperand Mask = N->getOperand(2); 2384 unsigned NumElems = Mask.getNumOperands(); 2385 for (unsigned i = 0; i != NumElems; ++i) { 2386 SDOperand Arg = Mask.getOperand(i); 2387 if (Arg.getOpcode() != ISD::UNDEF) { 2388 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2389 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2390 return false; 2391 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2392 return false; 2393 } 2394 } 2395 return true; 2396} 2397 2398/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2399/// constant +0.0. 2400static inline bool isZeroNode(SDOperand Elt) { 2401 return ((isa<ConstantSDNode>(Elt) && 2402 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2403 (isa<ConstantFPSDNode>(Elt) && 2404 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2405} 2406 2407/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2408/// to an zero vector. 2409static bool isZeroShuffle(SDNode *N) { 2410 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2411 return false; 2412 2413 SDOperand V1 = N->getOperand(0); 2414 SDOperand V2 = N->getOperand(1); 2415 SDOperand Mask = N->getOperand(2); 2416 unsigned NumElems = Mask.getNumOperands(); 2417 for (unsigned i = 0; i != NumElems; ++i) { 2418 SDOperand Arg = Mask.getOperand(i); 2419 if (Arg.getOpcode() != ISD::UNDEF) { 2420 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2421 if (Idx < NumElems) { 2422 unsigned Opc = V1.Val->getOpcode(); 2423 if (Opc == ISD::UNDEF) 2424 continue; 2425 if (Opc != ISD::BUILD_VECTOR || 2426 !isZeroNode(V1.Val->getOperand(Idx))) 2427 return false; 2428 } else if (Idx >= NumElems) { 2429 unsigned Opc = V2.Val->getOpcode(); 2430 if (Opc == ISD::UNDEF) 2431 continue; 2432 if (Opc != ISD::BUILD_VECTOR || 2433 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2434 return false; 2435 } 2436 } 2437 } 2438 return true; 2439} 2440 2441/// getZeroVector - Returns a vector of specified type with all zero elements. 2442/// 2443static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2444 assert(MVT::isVector(VT) && "Expected a vector type"); 2445 unsigned NumElems = MVT::getVectorNumElements(VT); 2446 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2447 bool isFP = MVT::isFloatingPoint(EVT); 2448 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2449 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2450 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2451} 2452 2453/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2454/// that point to V2 points to its first element. 2455static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2456 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2457 2458 bool Changed = false; 2459 SmallVector<SDOperand, 8> MaskVec; 2460 unsigned NumElems = Mask.getNumOperands(); 2461 for (unsigned i = 0; i != NumElems; ++i) { 2462 SDOperand Arg = Mask.getOperand(i); 2463 if (Arg.getOpcode() != ISD::UNDEF) { 2464 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2465 if (Val > NumElems) { 2466 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2467 Changed = true; 2468 } 2469 } 2470 MaskVec.push_back(Arg); 2471 } 2472 2473 if (Changed) 2474 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2475 &MaskVec[0], MaskVec.size()); 2476 return Mask; 2477} 2478 2479/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2480/// operation of specified width. 2481static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2482 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2483 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2484 2485 SmallVector<SDOperand, 8> MaskVec; 2486 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2487 for (unsigned i = 1; i != NumElems; ++i) 2488 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2489 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2490} 2491 2492/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2493/// of specified width. 2494static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2495 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2496 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2497 SmallVector<SDOperand, 8> MaskVec; 2498 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2499 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2500 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2501 } 2502 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2503} 2504 2505/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2506/// of specified width. 2507static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2508 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2509 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2510 unsigned Half = NumElems/2; 2511 SmallVector<SDOperand, 8> MaskVec; 2512 for (unsigned i = 0; i != Half; ++i) { 2513 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2514 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2515 } 2516 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2517} 2518 2519/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2520/// 2521static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2522 SDOperand V1 = Op.getOperand(0); 2523 SDOperand Mask = Op.getOperand(2); 2524 MVT::ValueType VT = Op.getValueType(); 2525 unsigned NumElems = Mask.getNumOperands(); 2526 Mask = getUnpacklMask(NumElems, DAG); 2527 while (NumElems != 4) { 2528 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2529 NumElems >>= 1; 2530 } 2531 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2532 2533 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2534 Mask = getZeroVector(MaskVT, DAG); 2535 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2536 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2537 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2538} 2539 2540/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2541/// vector of zero or undef vector. 2542static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2543 unsigned NumElems, unsigned Idx, 2544 bool isZero, SelectionDAG &DAG) { 2545 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2546 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2547 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2548 SDOperand Zero = DAG.getConstant(0, EVT); 2549 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2550 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2551 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2552 &MaskVec[0], MaskVec.size()); 2553 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2554} 2555 2556/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2557/// 2558static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2559 unsigned NumNonZero, unsigned NumZero, 2560 SelectionDAG &DAG, TargetLowering &TLI) { 2561 if (NumNonZero > 8) 2562 return SDOperand(); 2563 2564 SDOperand V(0, 0); 2565 bool First = true; 2566 for (unsigned i = 0; i < 16; ++i) { 2567 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2568 if (ThisIsNonZero && First) { 2569 if (NumZero) 2570 V = getZeroVector(MVT::v8i16, DAG); 2571 else 2572 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2573 First = false; 2574 } 2575 2576 if ((i & 1) != 0) { 2577 SDOperand ThisElt(0, 0), LastElt(0, 0); 2578 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2579 if (LastIsNonZero) { 2580 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2581 } 2582 if (ThisIsNonZero) { 2583 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2584 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2585 ThisElt, DAG.getConstant(8, MVT::i8)); 2586 if (LastIsNonZero) 2587 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2588 } else 2589 ThisElt = LastElt; 2590 2591 if (ThisElt.Val) 2592 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2593 DAG.getConstant(i/2, TLI.getPointerTy())); 2594 } 2595 } 2596 2597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2598} 2599 2600/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2601/// 2602static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2603 unsigned NumNonZero, unsigned NumZero, 2604 SelectionDAG &DAG, TargetLowering &TLI) { 2605 if (NumNonZero > 4) 2606 return SDOperand(); 2607 2608 SDOperand V(0, 0); 2609 bool First = true; 2610 for (unsigned i = 0; i < 8; ++i) { 2611 bool isNonZero = (NonZeros & (1 << i)) != 0; 2612 if (isNonZero) { 2613 if (First) { 2614 if (NumZero) 2615 V = getZeroVector(MVT::v8i16, DAG); 2616 else 2617 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2618 First = false; 2619 } 2620 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2621 DAG.getConstant(i, TLI.getPointerTy())); 2622 } 2623 } 2624 2625 return V; 2626} 2627 2628SDOperand 2629X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2630 // All zero's are handled with pxor. 2631 if (ISD::isBuildVectorAllZeros(Op.Val)) 2632 return Op; 2633 2634 // All one's are handled with pcmpeqd. 2635 if (ISD::isBuildVectorAllOnes(Op.Val)) 2636 return Op; 2637 2638 MVT::ValueType VT = Op.getValueType(); 2639 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2640 unsigned EVTBits = MVT::getSizeInBits(EVT); 2641 2642 unsigned NumElems = Op.getNumOperands(); 2643 unsigned NumZero = 0; 2644 unsigned NumNonZero = 0; 2645 unsigned NonZeros = 0; 2646 unsigned NumNonZeroImms = 0; 2647 std::set<SDOperand> Values; 2648 for (unsigned i = 0; i < NumElems; ++i) { 2649 SDOperand Elt = Op.getOperand(i); 2650 if (Elt.getOpcode() != ISD::UNDEF) { 2651 Values.insert(Elt); 2652 if (isZeroNode(Elt)) 2653 NumZero++; 2654 else { 2655 NonZeros |= (1 << i); 2656 NumNonZero++; 2657 if (Elt.getOpcode() == ISD::Constant || 2658 Elt.getOpcode() == ISD::ConstantFP) 2659 NumNonZeroImms++; 2660 } 2661 } 2662 } 2663 2664 if (NumNonZero == 0) { 2665 if (NumZero == 0) 2666 // All undef vector. Return an UNDEF. 2667 return DAG.getNode(ISD::UNDEF, VT); 2668 else 2669 // A mix of zero and undef. Return a zero vector. 2670 return getZeroVector(VT, DAG); 2671 } 2672 2673 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2674 if (Values.size() == 1) 2675 return SDOperand(); 2676 2677 // Special case for single non-zero element. 2678 if (NumNonZero == 1) { 2679 unsigned Idx = CountTrailingZeros_32(NonZeros); 2680 SDOperand Item = Op.getOperand(Idx); 2681 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2682 if (Idx == 0) 2683 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2684 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2685 NumZero > 0, DAG); 2686 2687 if (EVTBits == 32) { 2688 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2689 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2690 DAG); 2691 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2692 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2693 SmallVector<SDOperand, 8> MaskVec; 2694 for (unsigned i = 0; i < NumElems; i++) 2695 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2696 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2697 &MaskVec[0], MaskVec.size()); 2698 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2699 DAG.getNode(ISD::UNDEF, VT), Mask); 2700 } 2701 } 2702 2703 // A vector full of immediates; various special cases are already 2704 // handled, so this is best done with a single constant-pool load. 2705 if (NumNonZero == NumNonZeroImms) 2706 return SDOperand(); 2707 2708 // Let legalizer expand 2-wide build_vectors. 2709 if (EVTBits == 64) 2710 return SDOperand(); 2711 2712 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2713 if (EVTBits == 8 && NumElems == 16) { 2714 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2715 *this); 2716 if (V.Val) return V; 2717 } 2718 2719 if (EVTBits == 16 && NumElems == 8) { 2720 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2721 *this); 2722 if (V.Val) return V; 2723 } 2724 2725 // If element VT is == 32 bits, turn it into a number of shuffles. 2726 SmallVector<SDOperand, 8> V; 2727 V.resize(NumElems); 2728 if (NumElems == 4 && NumZero > 0) { 2729 for (unsigned i = 0; i < 4; ++i) { 2730 bool isZero = !(NonZeros & (1 << i)); 2731 if (isZero) 2732 V[i] = getZeroVector(VT, DAG); 2733 else 2734 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2735 } 2736 2737 for (unsigned i = 0; i < 2; ++i) { 2738 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2739 default: break; 2740 case 0: 2741 V[i] = V[i*2]; // Must be a zero vector. 2742 break; 2743 case 1: 2744 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2745 getMOVLMask(NumElems, DAG)); 2746 break; 2747 case 2: 2748 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2749 getMOVLMask(NumElems, DAG)); 2750 break; 2751 case 3: 2752 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2753 getUnpacklMask(NumElems, DAG)); 2754 break; 2755 } 2756 } 2757 2758 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2759 // clears the upper bits. 2760 // FIXME: we can do the same for v4f32 case when we know both parts of 2761 // the lower half come from scalar_to_vector (loadf32). We should do 2762 // that in post legalizer dag combiner with target specific hooks. 2763 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2764 return V[0]; 2765 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2766 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2767 SmallVector<SDOperand, 8> MaskVec; 2768 bool Reverse = (NonZeros & 0x3) == 2; 2769 for (unsigned i = 0; i < 2; ++i) 2770 if (Reverse) 2771 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2772 else 2773 MaskVec.push_back(DAG.getConstant(i, EVT)); 2774 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2775 for (unsigned i = 0; i < 2; ++i) 2776 if (Reverse) 2777 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2778 else 2779 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2780 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2781 &MaskVec[0], MaskVec.size()); 2782 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2783 } 2784 2785 if (Values.size() > 2) { 2786 // Expand into a number of unpckl*. 2787 // e.g. for v4f32 2788 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2789 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2790 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2791 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2792 for (unsigned i = 0; i < NumElems; ++i) 2793 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2794 NumElems >>= 1; 2795 while (NumElems != 0) { 2796 for (unsigned i = 0; i < NumElems; ++i) 2797 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2798 UnpckMask); 2799 NumElems >>= 1; 2800 } 2801 return V[0]; 2802 } 2803 2804 return SDOperand(); 2805} 2806 2807SDOperand 2808X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2809 SDOperand V1 = Op.getOperand(0); 2810 SDOperand V2 = Op.getOperand(1); 2811 SDOperand PermMask = Op.getOperand(2); 2812 MVT::ValueType VT = Op.getValueType(); 2813 unsigned NumElems = PermMask.getNumOperands(); 2814 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2815 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2816 bool V1IsSplat = false; 2817 bool V2IsSplat = false; 2818 2819 if (isUndefShuffle(Op.Val)) 2820 return DAG.getNode(ISD::UNDEF, VT); 2821 2822 if (isZeroShuffle(Op.Val)) 2823 return getZeroVector(VT, DAG); 2824 2825 if (isIdentityMask(PermMask.Val)) 2826 return V1; 2827 else if (isIdentityMask(PermMask.Val, true)) 2828 return V2; 2829 2830 if (isSplatMask(PermMask.Val)) { 2831 if (NumElems <= 4) return Op; 2832 // Promote it to a v4i32 splat. 2833 return PromoteSplat(Op, DAG); 2834 } 2835 2836 if (X86::isMOVLMask(PermMask.Val)) 2837 return (V1IsUndef) ? V2 : Op; 2838 2839 if (X86::isMOVSHDUPMask(PermMask.Val) || 2840 X86::isMOVSLDUPMask(PermMask.Val) || 2841 X86::isMOVHLPSMask(PermMask.Val) || 2842 X86::isMOVHPMask(PermMask.Val) || 2843 X86::isMOVLPMask(PermMask.Val)) 2844 return Op; 2845 2846 if (ShouldXformToMOVHLPS(PermMask.Val) || 2847 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2848 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2849 2850 bool Commuted = false; 2851 V1IsSplat = isSplatVector(V1.Val); 2852 V2IsSplat = isSplatVector(V2.Val); 2853 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2854 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2855 std::swap(V1IsSplat, V2IsSplat); 2856 std::swap(V1IsUndef, V2IsUndef); 2857 Commuted = true; 2858 } 2859 2860 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2861 if (V2IsUndef) return V1; 2862 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2863 if (V2IsSplat) { 2864 // V2 is a splat, so the mask may be malformed. That is, it may point 2865 // to any V2 element. The instruction selectior won't like this. Get 2866 // a corrected mask and commute to form a proper MOVS{S|D}. 2867 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2868 if (NewMask.Val != PermMask.Val) 2869 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2870 } 2871 return Op; 2872 } 2873 2874 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2875 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2876 X86::isUNPCKLMask(PermMask.Val) || 2877 X86::isUNPCKHMask(PermMask.Val)) 2878 return Op; 2879 2880 if (V2IsSplat) { 2881 // Normalize mask so all entries that point to V2 points to its first 2882 // element then try to match unpck{h|l} again. If match, return a 2883 // new vector_shuffle with the corrected mask. 2884 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2885 if (NewMask.Val != PermMask.Val) { 2886 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2887 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2888 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2889 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2890 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2891 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2892 } 2893 } 2894 } 2895 2896 // Normalize the node to match x86 shuffle ops if needed 2897 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2898 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2899 2900 if (Commuted) { 2901 // Commute is back and try unpck* again. 2902 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2903 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2904 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2905 X86::isUNPCKLMask(PermMask.Val) || 2906 X86::isUNPCKHMask(PermMask.Val)) 2907 return Op; 2908 } 2909 2910 // If VT is integer, try PSHUF* first, then SHUFP*. 2911 if (MVT::isInteger(VT)) { 2912 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 2913 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 2914 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 2915 X86::isPSHUFDMask(PermMask.Val)) || 2916 X86::isPSHUFHWMask(PermMask.Val) || 2917 X86::isPSHUFLWMask(PermMask.Val)) { 2918 if (V2.getOpcode() != ISD::UNDEF) 2919 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2920 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2921 return Op; 2922 } 2923 2924 if (X86::isSHUFPMask(PermMask.Val) && 2925 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2926 return Op; 2927 2928 // Handle v8i16 shuffle high / low shuffle node pair. 2929 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2930 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2931 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2932 SmallVector<SDOperand, 8> MaskVec; 2933 for (unsigned i = 0; i != 4; ++i) 2934 MaskVec.push_back(PermMask.getOperand(i)); 2935 for (unsigned i = 4; i != 8; ++i) 2936 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2937 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2938 &MaskVec[0], MaskVec.size()); 2939 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2940 MaskVec.clear(); 2941 for (unsigned i = 0; i != 4; ++i) 2942 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2943 for (unsigned i = 4; i != 8; ++i) 2944 MaskVec.push_back(PermMask.getOperand(i)); 2945 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2946 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2947 } 2948 } else { 2949 // Floating point cases in the other order. 2950 if (X86::isSHUFPMask(PermMask.Val)) 2951 return Op; 2952 if (X86::isPSHUFDMask(PermMask.Val) || 2953 X86::isPSHUFHWMask(PermMask.Val) || 2954 X86::isPSHUFLWMask(PermMask.Val)) { 2955 if (V2.getOpcode() != ISD::UNDEF) 2956 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2957 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2958 return Op; 2959 } 2960 } 2961 2962 if (NumElems == 4 && 2963 // Don't do this for MMX. 2964 MVT::getSizeInBits(VT) != 64) { 2965 MVT::ValueType MaskVT = PermMask.getValueType(); 2966 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2967 SmallVector<std::pair<int, int>, 8> Locs; 2968 Locs.reserve(NumElems); 2969 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2970 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2971 unsigned NumHi = 0; 2972 unsigned NumLo = 0; 2973 // If no more than two elements come from either vector. This can be 2974 // implemented with two shuffles. First shuffle gather the elements. 2975 // The second shuffle, which takes the first shuffle as both of its 2976 // vector operands, put the elements into the right order. 2977 for (unsigned i = 0; i != NumElems; ++i) { 2978 SDOperand Elt = PermMask.getOperand(i); 2979 if (Elt.getOpcode() == ISD::UNDEF) { 2980 Locs[i] = std::make_pair(-1, -1); 2981 } else { 2982 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2983 if (Val < NumElems) { 2984 Locs[i] = std::make_pair(0, NumLo); 2985 Mask1[NumLo] = Elt; 2986 NumLo++; 2987 } else { 2988 Locs[i] = std::make_pair(1, NumHi); 2989 if (2+NumHi < NumElems) 2990 Mask1[2+NumHi] = Elt; 2991 NumHi++; 2992 } 2993 } 2994 } 2995 if (NumLo <= 2 && NumHi <= 2) { 2996 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2997 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2998 &Mask1[0], Mask1.size())); 2999 for (unsigned i = 0; i != NumElems; ++i) { 3000 if (Locs[i].first == -1) 3001 continue; 3002 else { 3003 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 3004 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 3005 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 3006 } 3007 } 3008 3009 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 3010 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3011 &Mask2[0], Mask2.size())); 3012 } 3013 3014 // Break it into (shuffle shuffle_hi, shuffle_lo). 3015 Locs.clear(); 3016 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3017 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 3018 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 3019 unsigned MaskIdx = 0; 3020 unsigned LoIdx = 0; 3021 unsigned HiIdx = NumElems/2; 3022 for (unsigned i = 0; i != NumElems; ++i) { 3023 if (i == NumElems/2) { 3024 MaskPtr = &HiMask; 3025 MaskIdx = 1; 3026 LoIdx = 0; 3027 HiIdx = NumElems/2; 3028 } 3029 SDOperand Elt = PermMask.getOperand(i); 3030 if (Elt.getOpcode() == ISD::UNDEF) { 3031 Locs[i] = std::make_pair(-1, -1); 3032 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 3033 Locs[i] = std::make_pair(MaskIdx, LoIdx); 3034 (*MaskPtr)[LoIdx] = Elt; 3035 LoIdx++; 3036 } else { 3037 Locs[i] = std::make_pair(MaskIdx, HiIdx); 3038 (*MaskPtr)[HiIdx] = Elt; 3039 HiIdx++; 3040 } 3041 } 3042 3043 SDOperand LoShuffle = 3044 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3045 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3046 &LoMask[0], LoMask.size())); 3047 SDOperand HiShuffle = 3048 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 3049 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3050 &HiMask[0], HiMask.size())); 3051 SmallVector<SDOperand, 8> MaskOps; 3052 for (unsigned i = 0; i != NumElems; ++i) { 3053 if (Locs[i].first == -1) { 3054 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 3055 } else { 3056 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 3057 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 3058 } 3059 } 3060 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 3061 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3062 &MaskOps[0], MaskOps.size())); 3063 } 3064 3065 return SDOperand(); 3066} 3067 3068SDOperand 3069X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3070 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3071 return SDOperand(); 3072 3073 MVT::ValueType VT = Op.getValueType(); 3074 // TODO: handle v16i8. 3075 if (MVT::getSizeInBits(VT) == 16) { 3076 // Transform it so it match pextrw which produces a 32-bit result. 3077 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3078 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3079 Op.getOperand(0), Op.getOperand(1)); 3080 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3081 DAG.getValueType(VT)); 3082 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3083 } else if (MVT::getSizeInBits(VT) == 32) { 3084 SDOperand Vec = Op.getOperand(0); 3085 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3086 if (Idx == 0) 3087 return Op; 3088 // SHUFPS the element to the lowest double word, then movss. 3089 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3090 SmallVector<SDOperand, 8> IdxVec; 3091 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3092 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3093 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3094 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3095 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3096 &IdxVec[0], IdxVec.size()); 3097 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3098 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3099 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3100 DAG.getConstant(0, getPointerTy())); 3101 } else if (MVT::getSizeInBits(VT) == 64) { 3102 SDOperand Vec = Op.getOperand(0); 3103 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3104 if (Idx == 0) 3105 return Op; 3106 3107 // UNPCKHPD the element to the lowest double word, then movsd. 3108 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3109 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3110 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3111 SmallVector<SDOperand, 8> IdxVec; 3112 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3113 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3114 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3115 &IdxVec[0], IdxVec.size()); 3116 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3117 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3118 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3119 DAG.getConstant(0, getPointerTy())); 3120 } 3121 3122 return SDOperand(); 3123} 3124 3125SDOperand 3126X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3127 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3128 // as its second argument. 3129 MVT::ValueType VT = Op.getValueType(); 3130 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3131 SDOperand N0 = Op.getOperand(0); 3132 SDOperand N1 = Op.getOperand(1); 3133 SDOperand N2 = Op.getOperand(2); 3134 if (MVT::getSizeInBits(BaseVT) == 16) { 3135 if (N1.getValueType() != MVT::i32) 3136 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3137 if (N2.getValueType() != MVT::i32) 3138 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3139 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3140 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3141 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3142 if (Idx == 0) { 3143 // Use a movss. 3144 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3145 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3146 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3147 SmallVector<SDOperand, 8> MaskVec; 3148 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3149 for (unsigned i = 1; i <= 3; ++i) 3150 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3151 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3152 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3153 &MaskVec[0], MaskVec.size())); 3154 } else { 3155 // Use two pinsrw instructions to insert a 32 bit value. 3156 Idx <<= 1; 3157 if (MVT::isFloatingPoint(N1.getValueType())) { 3158 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3159 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3160 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3161 DAG.getConstant(0, getPointerTy())); 3162 } 3163 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3164 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3165 DAG.getConstant(Idx, getPointerTy())); 3166 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3167 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3168 DAG.getConstant(Idx+1, getPointerTy())); 3169 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3170 } 3171 } 3172 3173 return SDOperand(); 3174} 3175 3176SDOperand 3177X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3178 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3179 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3180} 3181 3182// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3183// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3184// one of the above mentioned nodes. It has to be wrapped because otherwise 3185// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3186// be used to form addressing mode. These wrapped nodes will be selected 3187// into MOV32ri. 3188SDOperand 3189X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3190 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3191 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3192 getPointerTy(), 3193 CP->getAlignment()); 3194 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3195 // With PIC, the address is actually $g + Offset. 3196 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3197 !Subtarget->isPICStyleRIPRel()) { 3198 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3199 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3200 Result); 3201 } 3202 3203 return Result; 3204} 3205 3206SDOperand 3207X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3208 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3209 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3210 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3211 // With PIC, the address is actually $g + Offset. 3212 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3213 !Subtarget->isPICStyleRIPRel()) { 3214 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3215 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3216 Result); 3217 } 3218 3219 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3220 // load the value at address GV, not the value of GV itself. This means that 3221 // the GlobalAddress must be in the base or index register of the address, not 3222 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3223 // The same applies for external symbols during PIC codegen 3224 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3225 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3226 3227 return Result; 3228} 3229 3230// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3231static SDOperand 3232LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3233 const MVT::ValueType PtrVT) { 3234 SDOperand InFlag; 3235 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3236 DAG.getNode(X86ISD::GlobalBaseReg, 3237 PtrVT), InFlag); 3238 InFlag = Chain.getValue(1); 3239 3240 // emit leal symbol@TLSGD(,%ebx,1), %eax 3241 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3242 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3243 GA->getValueType(0), 3244 GA->getOffset()); 3245 SDOperand Ops[] = { Chain, TGA, InFlag }; 3246 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3247 InFlag = Result.getValue(2); 3248 Chain = Result.getValue(1); 3249 3250 // call ___tls_get_addr. This function receives its argument in 3251 // the register EAX. 3252 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3253 InFlag = Chain.getValue(1); 3254 3255 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3256 SDOperand Ops1[] = { Chain, 3257 DAG.getTargetExternalSymbol("___tls_get_addr", 3258 PtrVT), 3259 DAG.getRegister(X86::EAX, PtrVT), 3260 DAG.getRegister(X86::EBX, PtrVT), 3261 InFlag }; 3262 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3263 InFlag = Chain.getValue(1); 3264 3265 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3266} 3267 3268// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3269// "local exec" model. 3270static SDOperand 3271LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3272 const MVT::ValueType PtrVT) { 3273 // Get the Thread Pointer 3274 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3275 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3276 // exec) 3277 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3278 GA->getValueType(0), 3279 GA->getOffset()); 3280 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3281 3282 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3283 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3284 3285 // The address of the thread local variable is the add of the thread 3286 // pointer with the offset of the variable. 3287 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3288} 3289 3290SDOperand 3291X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3292 // TODO: implement the "local dynamic" model 3293 // TODO: implement the "initial exec"model for pic executables 3294 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3295 "TLS not implemented for non-ELF and 64-bit targets"); 3296 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3297 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3298 // otherwise use the "Local Exec"TLS Model 3299 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3300 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3301 else 3302 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3303} 3304 3305SDOperand 3306X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3307 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3308 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3309 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3310 // With PIC, the address is actually $g + Offset. 3311 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3312 !Subtarget->isPICStyleRIPRel()) { 3313 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3314 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3315 Result); 3316 } 3317 3318 return Result; 3319} 3320 3321SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3322 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3323 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3324 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3325 // With PIC, the address is actually $g + Offset. 3326 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3327 !Subtarget->isPICStyleRIPRel()) { 3328 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3329 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3330 Result); 3331 } 3332 3333 return Result; 3334} 3335 3336SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3337 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3338 "Not an i64 shift!"); 3339 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3340 SDOperand ShOpLo = Op.getOperand(0); 3341 SDOperand ShOpHi = Op.getOperand(1); 3342 SDOperand ShAmt = Op.getOperand(2); 3343 SDOperand Tmp1 = isSRA ? 3344 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3345 DAG.getConstant(0, MVT::i32); 3346 3347 SDOperand Tmp2, Tmp3; 3348 if (Op.getOpcode() == ISD::SHL_PARTS) { 3349 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3350 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3351 } else { 3352 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3353 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3354 } 3355 3356 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3357 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3358 DAG.getConstant(32, MVT::i8)); 3359 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3360 SDOperand Cond = NewCCModeling 3361 ? DAG.getNode(X86ISD::CMP_NEW, MVT::i32, 3362 AndNode, DAG.getConstant(0, MVT::i8)) 3363 : DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3364 3365 SDOperand Hi, Lo; 3366 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3367 unsigned Opc = NewCCModeling ? X86ISD::CMOV_NEW : X86ISD::CMOV; 3368 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3369 SmallVector<SDOperand, 4> Ops; 3370 if (Op.getOpcode() == ISD::SHL_PARTS) { 3371 Ops.push_back(Tmp2); 3372 Ops.push_back(Tmp3); 3373 Ops.push_back(CC); 3374 Ops.push_back(Cond); 3375 if (NewCCModeling) 3376 Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3377 else { 3378 Hi = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); 3379 Cond = Hi.getValue(1); 3380 } 3381 3382 Ops.clear(); 3383 Ops.push_back(Tmp3); 3384 Ops.push_back(Tmp1); 3385 Ops.push_back(CC); 3386 Ops.push_back(Cond); 3387 if (NewCCModeling) 3388 Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3389 else 3390 Lo = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); 3391 } else { 3392 Ops.push_back(Tmp2); 3393 Ops.push_back(Tmp3); 3394 Ops.push_back(CC); 3395 Ops.push_back(Cond); 3396 if (NewCCModeling) 3397 Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3398 else { 3399 Lo = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); 3400 Cond = Lo.getValue(1); 3401 } 3402 3403 Ops.clear(); 3404 Ops.push_back(Tmp3); 3405 Ops.push_back(Tmp1); 3406 Ops.push_back(CC); 3407 Ops.push_back(Cond); 3408 if (NewCCModeling) 3409 Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); 3410 else 3411 Hi = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); 3412 } 3413 3414 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3415 Ops.clear(); 3416 Ops.push_back(Lo); 3417 Ops.push_back(Hi); 3418 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3419} 3420 3421SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) { 3422 unsigned Opcode = Op.getOpcode(); 3423 MVT::ValueType NVT = Op.getValueType(); 3424 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; 3425 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; 3426 unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV; 3427 3428 SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 3429 SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2); 3430 3431 if (isDiv) 3432 return DR; 3433 3434 return SDOperand(DR.Val, 1); 3435} 3436 3437SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3438 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3439 Op.getOperand(0).getValueType() >= MVT::i16 && 3440 "Unknown SINT_TO_FP to lower!"); 3441 3442 SDOperand Result; 3443 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3444 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3445 MachineFunction &MF = DAG.getMachineFunction(); 3446 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3447 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3448 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3449 StackSlot, NULL, 0); 3450 3451 // These are really Legal; caller falls through into that case. 3452 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) 3453 return Result; 3454 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) 3455 return Result; 3456 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 3457 Subtarget->is64Bit()) 3458 return Result; 3459 3460 // Build the FILD 3461 SDVTList Tys; 3462 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || 3463 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); 3464 if (useSSE) 3465 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3466 else 3467 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3468 SmallVector<SDOperand, 8> Ops; 3469 Ops.push_back(Chain); 3470 Ops.push_back(StackSlot); 3471 Ops.push_back(DAG.getValueType(SrcVT)); 3472 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3473 Tys, &Ops[0], Ops.size()); 3474 3475 if (useSSE) { 3476 Chain = Result.getValue(1); 3477 SDOperand InFlag = Result.getValue(2); 3478 3479 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3480 // shouldn't be necessary except that RFP cannot be live across 3481 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3482 MachineFunction &MF = DAG.getMachineFunction(); 3483 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3484 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3485 Tys = DAG.getVTList(MVT::Other); 3486 SmallVector<SDOperand, 8> Ops; 3487 Ops.push_back(Chain); 3488 Ops.push_back(Result); 3489 Ops.push_back(StackSlot); 3490 Ops.push_back(DAG.getValueType(Op.getValueType())); 3491 Ops.push_back(InFlag); 3492 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3493 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3494 } 3495 3496 return Result; 3497} 3498 3499SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3500 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3501 "Unknown FP_TO_SINT to lower!"); 3502 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3503 // stack slot. 3504 SDOperand Result; 3505 MachineFunction &MF = DAG.getMachineFunction(); 3506 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3507 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3508 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3509 3510 // These are really Legal. 3511 if (Op.getValueType() == MVT::i32 && 3512 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) 3513 return Result; 3514 if (Op.getValueType() == MVT::i32 && 3515 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) 3516 return Result; 3517 if (Subtarget->is64Bit() && 3518 Op.getValueType() == MVT::i64 && 3519 Op.getOperand(0).getValueType() != MVT::f80) 3520 return Result; 3521 3522 unsigned Opc; 3523 switch (Op.getValueType()) { 3524 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3525 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3526 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3527 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3528 } 3529 3530 SDOperand Chain = DAG.getEntryNode(); 3531 SDOperand Value = Op.getOperand(0); 3532 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || 3533 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { 3534 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3535 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3536 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3537 SDOperand Ops[] = { 3538 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3539 }; 3540 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3541 Chain = Value.getValue(1); 3542 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3543 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3544 } 3545 3546 // Build the FP_TO_INT*_IN_MEM 3547 SDOperand Ops[] = { Chain, Value, StackSlot }; 3548 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3549 3550 // Load the result. 3551 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3552} 3553 3554SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3555 MVT::ValueType VT = Op.getValueType(); 3556 MVT::ValueType EltVT = VT; 3557 if (MVT::isVector(VT)) 3558 EltVT = MVT::getVectorElementType(VT); 3559 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3560 std::vector<Constant*> CV; 3561 if (EltVT == MVT::f64) { 3562 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3563 CV.push_back(C); 3564 CV.push_back(C); 3565 } else { 3566 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3567 CV.push_back(C); 3568 CV.push_back(C); 3569 CV.push_back(C); 3570 CV.push_back(C); 3571 } 3572 Constant *C = ConstantVector::get(CV); 3573 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3574 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3575 false, 16); 3576 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3577} 3578 3579SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3580 MVT::ValueType VT = Op.getValueType(); 3581 MVT::ValueType EltVT = VT; 3582 unsigned EltNum = 1; 3583 if (MVT::isVector(VT)) { 3584 EltVT = MVT::getVectorElementType(VT); 3585 EltNum = MVT::getVectorNumElements(VT); 3586 } 3587 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3588 std::vector<Constant*> CV; 3589 if (EltVT == MVT::f64) { 3590 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 3591 CV.push_back(C); 3592 CV.push_back(C); 3593 } else { 3594 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 3595 CV.push_back(C); 3596 CV.push_back(C); 3597 CV.push_back(C); 3598 CV.push_back(C); 3599 } 3600 Constant *C = ConstantVector::get(CV); 3601 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3602 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3603 false, 16); 3604 if (MVT::isVector(VT)) { 3605 return DAG.getNode(ISD::BIT_CONVERT, VT, 3606 DAG.getNode(ISD::XOR, MVT::v2i64, 3607 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3608 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3609 } else { 3610 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3611 } 3612} 3613 3614SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3615 SDOperand Op0 = Op.getOperand(0); 3616 SDOperand Op1 = Op.getOperand(1); 3617 MVT::ValueType VT = Op.getValueType(); 3618 MVT::ValueType SrcVT = Op1.getValueType(); 3619 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3620 3621 // If second operand is smaller, extend it first. 3622 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3623 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3624 SrcVT = VT; 3625 SrcTy = MVT::getTypeForValueType(SrcVT); 3626 } 3627 3628 // First get the sign bit of second operand. 3629 std::vector<Constant*> CV; 3630 if (SrcVT == MVT::f64) { 3631 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 3632 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3633 } else { 3634 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 3635 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3636 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3637 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3638 } 3639 Constant *C = ConstantVector::get(CV); 3640 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3641 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3642 false, 16); 3643 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3644 3645 // Shift sign bit right or left if the two operands have different types. 3646 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3647 // Op0 is MVT::f32, Op1 is MVT::f64. 3648 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3649 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3650 DAG.getConstant(32, MVT::i32)); 3651 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3652 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3653 DAG.getConstant(0, getPointerTy())); 3654 } 3655 3656 // Clear first operand sign bit. 3657 CV.clear(); 3658 if (VT == MVT::f64) { 3659 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 3660 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3661 } else { 3662 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 3663 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3664 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3665 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3666 } 3667 C = ConstantVector::get(CV); 3668 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3669 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3670 false, 16); 3671 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3672 3673 // Or the value with the sign bit. 3674 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3675} 3676 3677SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3678 SDOperand Chain) { 3679 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3680 SDOperand Cond; 3681 SDOperand Op0 = Op.getOperand(0); 3682 SDOperand Op1 = Op.getOperand(1); 3683 SDOperand CC = Op.getOperand(2); 3684 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3685 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3686 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3687 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3688 unsigned X86CC; 3689 3690 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3691 Op0, Op1, DAG)) { 3692 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3693 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3694 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3695 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3696 } 3697 3698 assert(isFP && "Illegal integer SetCC!"); 3699 3700 SDOperand COps[] = { Chain, Op0, Op1 }; 3701 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3702 3703 switch (SetCCOpcode) { 3704 default: assert(false && "Illegal floating point SetCC!"); 3705 case ISD::SETOEQ: { // !PF & ZF 3706 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3707 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3708 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3709 Tmp1.getValue(1) }; 3710 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3711 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3712 } 3713 case ISD::SETUNE: { // PF | !ZF 3714 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3715 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3716 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3717 Tmp1.getValue(1) }; 3718 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3719 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3720 } 3721 } 3722} 3723 3724SDOperand X86TargetLowering::LowerSETCC_New(SDOperand Op, SelectionDAG &DAG) { 3725 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3726 SDOperand Cond; 3727 SDOperand Op0 = Op.getOperand(0); 3728 SDOperand Op1 = Op.getOperand(1); 3729 SDOperand CC = Op.getOperand(2); 3730 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3731 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3732 unsigned X86CC; 3733 3734 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3735 Op0, Op1, DAG)) { 3736 Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Op0, Op1); 3737 return DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 3738 DAG.getConstant(X86CC, MVT::i8), Cond); 3739 } 3740 3741 assert(isFP && "Illegal integer SetCC!"); 3742 3743 Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Op0, Op1); 3744 switch (SetCCOpcode) { 3745 default: assert(false && "Illegal floating point SetCC!"); 3746 case ISD::SETOEQ: { // !PF & ZF 3747 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 3748 DAG.getConstant(X86::COND_NP, MVT::i8), Cond); 3749 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 3750 DAG.getConstant(X86::COND_E, MVT::i8), Cond); 3751 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3752 } 3753 case ISD::SETUNE: { // PF | !ZF 3754 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 3755 DAG.getConstant(X86::COND_P, MVT::i8), Cond); 3756 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 3757 DAG.getConstant(X86::COND_NE, MVT::i8), Cond); 3758 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3759 } 3760 } 3761} 3762 3763 3764SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3765 bool addTest = true; 3766 SDOperand Chain = DAG.getEntryNode(); 3767 SDOperand Cond = Op.getOperand(0); 3768 SDOperand CC; 3769 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3770 3771 if (Cond.getOpcode() == ISD::SETCC) 3772 Cond = LowerSETCC(Cond, DAG, Chain); 3773 3774 if (Cond.getOpcode() == X86ISD::SETCC) { 3775 CC = Cond.getOperand(0); 3776 3777 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3778 // (since flag operand cannot be shared). Use it as the condition setting 3779 // operand in place of the X86ISD::SETCC. 3780 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3781 // to use a test instead of duplicating the X86ISD::CMP (for register 3782 // pressure reason)? 3783 SDOperand Cmp = Cond.getOperand(1); 3784 unsigned Opc = Cmp.getOpcode(); 3785 bool IllegalFPCMov = 3786 ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || 3787 (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && 3788 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3789 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3790 !IllegalFPCMov) { 3791 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3792 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3793 addTest = false; 3794 } 3795 } 3796 3797 if (addTest) { 3798 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3799 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3800 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3801 } 3802 3803 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3804 SmallVector<SDOperand, 4> Ops; 3805 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3806 // condition is true. 3807 Ops.push_back(Op.getOperand(2)); 3808 Ops.push_back(Op.getOperand(1)); 3809 Ops.push_back(CC); 3810 Ops.push_back(Cond.getValue(1)); 3811 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3812} 3813 3814SDOperand X86TargetLowering::LowerSELECT_New(SDOperand Op, SelectionDAG &DAG) { 3815 bool addTest = true; 3816 SDOperand Cond = Op.getOperand(0); 3817 SDOperand CC; 3818 3819 if (Cond.getOpcode() == ISD::SETCC) 3820 Cond = LowerSETCC_New(Cond, DAG); 3821 3822 if (Cond.getOpcode() == X86ISD::SETCC_NEW) { 3823 CC = Cond.getOperand(0); 3824 3825 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3826 // (since flag operand cannot be shared). Use it as the condition setting 3827 // operand in place of the X86ISD::SETCC. 3828 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3829 // to use a test instead of duplicating the X86ISD::CMP (for register 3830 // pressure reason)? 3831 SDOperand Cmp = Cond.getOperand(1); 3832 unsigned Opc = Cmp.getOpcode(); 3833 bool IllegalFPCMov = 3834 ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || 3835 (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && 3836 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3837 if ((Opc == X86ISD::CMP_NEW || 3838 Opc == X86ISD::COMI_NEW || 3839 Opc == X86ISD::UCOMI_NEW) && 3840 !IllegalFPCMov) { 3841 Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); 3842 addTest = false; 3843 } 3844 } 3845 3846 if (addTest) { 3847 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3848 Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Cond, 3849 DAG.getConstant(0, MVT::i8)); 3850 } 3851 3852 const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), 3853 MVT::Flag); 3854 SmallVector<SDOperand, 4> Ops; 3855 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3856 // condition is true. 3857 Ops.push_back(Op.getOperand(2)); 3858 Ops.push_back(Op.getOperand(1)); 3859 Ops.push_back(CC); 3860 Ops.push_back(Cond); 3861 return DAG.getNode(X86ISD::CMOV_NEW, VTs, 2, &Ops[0], Ops.size()); 3862} 3863 3864SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3865 bool addTest = true; 3866 SDOperand Chain = Op.getOperand(0); 3867 SDOperand Cond = Op.getOperand(1); 3868 SDOperand Dest = Op.getOperand(2); 3869 SDOperand CC; 3870 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3871 3872 if (Cond.getOpcode() == ISD::SETCC) 3873 Cond = LowerSETCC(Cond, DAG, Chain); 3874 3875 if (Cond.getOpcode() == X86ISD::SETCC) { 3876 CC = Cond.getOperand(0); 3877 3878 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3879 // (since flag operand cannot be shared). Use it as the condition setting 3880 // operand in place of the X86ISD::SETCC. 3881 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3882 // to use a test instead of duplicating the X86ISD::CMP (for register 3883 // pressure reason)? 3884 SDOperand Cmp = Cond.getOperand(1); 3885 unsigned Opc = Cmp.getOpcode(); 3886 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3887 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3888 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3889 addTest = false; 3890 } 3891 } 3892 3893 if (addTest) { 3894 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3895 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3896 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3897 } 3898 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3899 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3900} 3901 3902SDOperand X86TargetLowering::LowerBRCOND_New(SDOperand Op, SelectionDAG &DAG) { 3903 bool addTest = true; 3904 SDOperand Chain = Op.getOperand(0); 3905 SDOperand Cond = Op.getOperand(1); 3906 SDOperand Dest = Op.getOperand(2); 3907 SDOperand CC; 3908 3909 if (Cond.getOpcode() == ISD::SETCC) 3910 Cond = LowerSETCC_New(Cond, DAG); 3911 3912 if (Cond.getOpcode() == X86ISD::SETCC_NEW) { 3913 CC = Cond.getOperand(0); 3914 3915 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3916 // (since flag operand cannot be shared). Use it as the condition setting 3917 // operand in place of the X86ISD::SETCC. 3918 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3919 // to use a test instead of duplicating the X86ISD::CMP (for register 3920 // pressure reason)? 3921 SDOperand Cmp = Cond.getOperand(1); 3922 unsigned Opc = Cmp.getOpcode(); 3923 if (Opc == X86ISD::CMP_NEW || 3924 Opc == X86ISD::COMI_NEW || 3925 Opc == X86ISD::UCOMI_NEW) { 3926 Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); 3927 addTest = false; 3928 } 3929 } 3930 3931 if (addTest) { 3932 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3933 Cond= DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); 3934 } 3935 return DAG.getNode(X86ISD::BRCOND_NEW, Op.getValueType(), 3936 Chain, Op.getOperand(2), CC, Cond); 3937} 3938 3939SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3940 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3941 3942 if (Subtarget->is64Bit()) 3943 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3944 else 3945 switch (CallingConv) { 3946 default: 3947 assert(0 && "Unsupported calling convention"); 3948 case CallingConv::Fast: 3949 // TODO: Implement fastcc 3950 // Falls through 3951 case CallingConv::C: 3952 case CallingConv::X86_StdCall: 3953 return LowerCCCCallTo(Op, DAG, CallingConv); 3954 case CallingConv::X86_FastCall: 3955 return LowerFastCCCallTo(Op, DAG, CallingConv); 3956 } 3957} 3958 3959 3960// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3961// Calls to _alloca is needed to probe the stack when allocating more than 4k 3962// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3963// that the guard pages used by the OS virtual memory manager are allocated in 3964// correct sequence. 3965SDOperand 3966X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3967 SelectionDAG &DAG) { 3968 assert(Subtarget->isTargetCygMing() && 3969 "This should be used only on Cygwin/Mingw targets"); 3970 3971 // Get the inputs. 3972 SDOperand Chain = Op.getOperand(0); 3973 SDOperand Size = Op.getOperand(1); 3974 // FIXME: Ensure alignment here 3975 3976 SDOperand Flag; 3977 3978 MVT::ValueType IntPtr = getPointerTy(); 3979 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3980 3981 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3982 Flag = Chain.getValue(1); 3983 3984 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3985 SDOperand Ops[] = { Chain, 3986 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3987 DAG.getRegister(X86::EAX, IntPtr), 3988 Flag }; 3989 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3990 Flag = Chain.getValue(1); 3991 3992 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3993 3994 std::vector<MVT::ValueType> Tys; 3995 Tys.push_back(SPTy); 3996 Tys.push_back(MVT::Other); 3997 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3998 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3999} 4000 4001SDOperand 4002X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 4003 MachineFunction &MF = DAG.getMachineFunction(); 4004 const Function* Fn = MF.getFunction(); 4005 if (Fn->hasExternalLinkage() && 4006 Subtarget->isTargetCygMing() && 4007 Fn->getName() == "main") 4008 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 4009 4010 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 4011 if (Subtarget->is64Bit()) 4012 return LowerX86_64CCCArguments(Op, DAG); 4013 else 4014 switch(CC) { 4015 default: 4016 assert(0 && "Unsupported calling convention"); 4017 case CallingConv::Fast: 4018 // TODO: implement fastcc. 4019 4020 // Falls through 4021 case CallingConv::C: 4022 return LowerCCCArguments(Op, DAG); 4023 case CallingConv::X86_StdCall: 4024 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 4025 return LowerCCCArguments(Op, DAG, true); 4026 case CallingConv::X86_FastCall: 4027 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 4028 return LowerFastCCArguments(Op, DAG); 4029 } 4030} 4031 4032SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 4033 SDOperand InFlag(0, 0); 4034 SDOperand Chain = Op.getOperand(0); 4035 unsigned Align = 4036 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 4037 if (Align == 0) Align = 1; 4038 4039 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 4040 // If not DWORD aligned or size is more than the threshold, call memset. 4041 // The libc version is likely to be faster for these cases. It can use the 4042 // address value and run time information about the CPU. 4043 if ((Align & 3) != 0 || 4044 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 4045 MVT::ValueType IntPtr = getPointerTy(); 4046 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 4047 TargetLowering::ArgListTy Args; 4048 TargetLowering::ArgListEntry Entry; 4049 Entry.Node = Op.getOperand(1); 4050 Entry.Ty = IntPtrTy; 4051 Args.push_back(Entry); 4052 // Extend the unsigned i8 argument to be an int value for the call. 4053 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 4054 Entry.Ty = IntPtrTy; 4055 Args.push_back(Entry); 4056 Entry.Node = Op.getOperand(3); 4057 Args.push_back(Entry); 4058 std::pair<SDOperand,SDOperand> CallResult = 4059 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4060 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 4061 return CallResult.second; 4062 } 4063 4064 MVT::ValueType AVT; 4065 SDOperand Count; 4066 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 4067 unsigned BytesLeft = 0; 4068 bool TwoRepStos = false; 4069 if (ValC) { 4070 unsigned ValReg; 4071 uint64_t Val = ValC->getValue() & 255; 4072 4073 // If the value is a constant, then we can potentially use larger sets. 4074 switch (Align & 3) { 4075 case 2: // WORD aligned 4076 AVT = MVT::i16; 4077 ValReg = X86::AX; 4078 Val = (Val << 8) | Val; 4079 break; 4080 case 0: // DWORD aligned 4081 AVT = MVT::i32; 4082 ValReg = X86::EAX; 4083 Val = (Val << 8) | Val; 4084 Val = (Val << 16) | Val; 4085 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 4086 AVT = MVT::i64; 4087 ValReg = X86::RAX; 4088 Val = (Val << 32) | Val; 4089 } 4090 break; 4091 default: // Byte aligned 4092 AVT = MVT::i8; 4093 ValReg = X86::AL; 4094 Count = Op.getOperand(3); 4095 break; 4096 } 4097 4098 if (AVT > MVT::i8) { 4099 if (I) { 4100 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4101 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 4102 BytesLeft = I->getValue() % UBytes; 4103 } else { 4104 assert(AVT >= MVT::i32 && 4105 "Do not use rep;stos if not at least DWORD aligned"); 4106 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 4107 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 4108 TwoRepStos = true; 4109 } 4110 } 4111 4112 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 4113 InFlag); 4114 InFlag = Chain.getValue(1); 4115 } else { 4116 AVT = MVT::i8; 4117 Count = Op.getOperand(3); 4118 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 4119 InFlag = Chain.getValue(1); 4120 } 4121 4122 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4123 Count, InFlag); 4124 InFlag = Chain.getValue(1); 4125 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4126 Op.getOperand(1), InFlag); 4127 InFlag = Chain.getValue(1); 4128 4129 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4130 SmallVector<SDOperand, 8> Ops; 4131 Ops.push_back(Chain); 4132 Ops.push_back(DAG.getValueType(AVT)); 4133 Ops.push_back(InFlag); 4134 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4135 4136 if (TwoRepStos) { 4137 InFlag = Chain.getValue(1); 4138 Count = Op.getOperand(3); 4139 MVT::ValueType CVT = Count.getValueType(); 4140 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4141 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4142 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4143 Left, InFlag); 4144 InFlag = Chain.getValue(1); 4145 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4146 Ops.clear(); 4147 Ops.push_back(Chain); 4148 Ops.push_back(DAG.getValueType(MVT::i8)); 4149 Ops.push_back(InFlag); 4150 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 4151 } else if (BytesLeft) { 4152 // Issue stores for the last 1 - 7 bytes. 4153 SDOperand Value; 4154 unsigned Val = ValC->getValue() & 255; 4155 unsigned Offset = I->getValue() - BytesLeft; 4156 SDOperand DstAddr = Op.getOperand(1); 4157 MVT::ValueType AddrVT = DstAddr.getValueType(); 4158 if (BytesLeft >= 4) { 4159 Val = (Val << 8) | Val; 4160 Val = (Val << 16) | Val; 4161 Value = DAG.getConstant(Val, MVT::i32); 4162 Chain = DAG.getStore(Chain, Value, 4163 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4164 DAG.getConstant(Offset, AddrVT)), 4165 NULL, 0); 4166 BytesLeft -= 4; 4167 Offset += 4; 4168 } 4169 if (BytesLeft >= 2) { 4170 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 4171 Chain = DAG.getStore(Chain, Value, 4172 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4173 DAG.getConstant(Offset, AddrVT)), 4174 NULL, 0); 4175 BytesLeft -= 2; 4176 Offset += 2; 4177 } 4178 if (BytesLeft == 1) { 4179 Value = DAG.getConstant(Val, MVT::i8); 4180 Chain = DAG.getStore(Chain, Value, 4181 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 4182 DAG.getConstant(Offset, AddrVT)), 4183 NULL, 0); 4184 } 4185 } 4186 4187 return Chain; 4188} 4189 4190SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 4191 SDOperand ChainOp = Op.getOperand(0); 4192 SDOperand DestOp = Op.getOperand(1); 4193 SDOperand SourceOp = Op.getOperand(2); 4194 SDOperand CountOp = Op.getOperand(3); 4195 SDOperand AlignOp = Op.getOperand(4); 4196 unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue(); 4197 if (Align == 0) Align = 1; 4198 4199 // The libc version is likely to be faster for the following cases. It can 4200 // use the address value and run time information about the CPU. 4201 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 4202 4203 // If not DWORD aligned, call memcpy. 4204 if ((Align & 3) != 0) 4205 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4206 4207 // If size is unknown, call memcpy. 4208 ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp); 4209 if (!I) 4210 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4211 4212 // If size is more than the threshold, call memcpy. 4213 unsigned Size = I->getValue(); 4214 if (Size > Subtarget->getMinRepStrSizeThreshold()) 4215 return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); 4216 4217 return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); 4218} 4219 4220SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain, 4221 SDOperand Dest, 4222 SDOperand Source, 4223 SDOperand Count, 4224 SelectionDAG &DAG) { 4225 MVT::ValueType IntPtr = getPointerTy(); 4226 TargetLowering::ArgListTy Args; 4227 TargetLowering::ArgListEntry Entry; 4228 Entry.Ty = getTargetData()->getIntPtrType(); 4229 Entry.Node = Dest; Args.push_back(Entry); 4230 Entry.Node = Source; Args.push_back(Entry); 4231 Entry.Node = Count; Args.push_back(Entry); 4232 std::pair<SDOperand,SDOperand> CallResult = 4233 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 4234 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 4235 return CallResult.second; 4236} 4237 4238SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, 4239 SDOperand Dest, 4240 SDOperand Source, 4241 unsigned Size, 4242 unsigned Align, 4243 SelectionDAG &DAG) { 4244 MVT::ValueType AVT; 4245 unsigned BytesLeft = 0; 4246 switch (Align & 3) { 4247 case 2: // WORD aligned 4248 AVT = MVT::i16; 4249 break; 4250 case 0: // DWORD aligned 4251 AVT = MVT::i32; 4252 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 4253 AVT = MVT::i64; 4254 break; 4255 default: // Byte aligned 4256 AVT = MVT::i8; 4257 break; 4258 } 4259 4260 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 4261 SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); 4262 BytesLeft = Size % UBytes; 4263 4264 SDOperand InFlag(0, 0); 4265 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4266 Count, InFlag); 4267 InFlag = Chain.getValue(1); 4268 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4269 Dest, InFlag); 4270 InFlag = Chain.getValue(1); 4271 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4272 Source, InFlag); 4273 InFlag = Chain.getValue(1); 4274 4275 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4276 SmallVector<SDOperand, 8> Ops; 4277 Ops.push_back(Chain); 4278 Ops.push_back(DAG.getValueType(AVT)); 4279 Ops.push_back(InFlag); 4280 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4281 4282 if (BytesLeft) { 4283 // Issue loads and stores for the last 1 - 7 bytes. 4284 unsigned Offset = Size - BytesLeft; 4285 SDOperand DstAddr = Dest; 4286 MVT::ValueType DstVT = DstAddr.getValueType(); 4287 SDOperand SrcAddr = Source; 4288 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4289 SDOperand Value; 4290 if (BytesLeft >= 4) { 4291 Value = DAG.getLoad(MVT::i32, Chain, 4292 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4293 DAG.getConstant(Offset, SrcVT)), 4294 NULL, 0); 4295 Chain = Value.getValue(1); 4296 Chain = DAG.getStore(Chain, Value, 4297 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4298 DAG.getConstant(Offset, DstVT)), 4299 NULL, 0); 4300 BytesLeft -= 4; 4301 Offset += 4; 4302 } 4303 if (BytesLeft >= 2) { 4304 Value = DAG.getLoad(MVT::i16, Chain, 4305 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4306 DAG.getConstant(Offset, SrcVT)), 4307 NULL, 0); 4308 Chain = Value.getValue(1); 4309 Chain = DAG.getStore(Chain, Value, 4310 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4311 DAG.getConstant(Offset, DstVT)), 4312 NULL, 0); 4313 BytesLeft -= 2; 4314 Offset += 2; 4315 } 4316 4317 if (BytesLeft == 1) { 4318 Value = DAG.getLoad(MVT::i8, Chain, 4319 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4320 DAG.getConstant(Offset, SrcVT)), 4321 NULL, 0); 4322 Chain = Value.getValue(1); 4323 Chain = DAG.getStore(Chain, Value, 4324 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4325 DAG.getConstant(Offset, DstVT)), 4326 NULL, 0); 4327 } 4328 } 4329 4330 return Chain; 4331} 4332 4333SDOperand 4334X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4335 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4336 SDOperand TheOp = Op.getOperand(0); 4337 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4338 if (Subtarget->is64Bit()) { 4339 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4340 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4341 MVT::i64, Copy1.getValue(2)); 4342 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4343 DAG.getConstant(32, MVT::i8)); 4344 SDOperand Ops[] = { 4345 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4346 }; 4347 4348 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4349 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4350 } 4351 4352 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4353 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4354 MVT::i32, Copy1.getValue(2)); 4355 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4356 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4357 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4358} 4359 4360SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4361 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4362 4363 if (!Subtarget->is64Bit()) { 4364 // vastart just stores the address of the VarArgsFrameIndex slot into the 4365 // memory location argument. 4366 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4367 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4368 SV->getOffset()); 4369 } 4370 4371 // __va_list_tag: 4372 // gp_offset (0 - 6 * 8) 4373 // fp_offset (48 - 48 + 8 * 16) 4374 // overflow_arg_area (point to parameters coming in memory). 4375 // reg_save_area 4376 SmallVector<SDOperand, 8> MemOps; 4377 SDOperand FIN = Op.getOperand(1); 4378 // Store gp_offset 4379 SDOperand Store = DAG.getStore(Op.getOperand(0), 4380 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4381 FIN, SV->getValue(), SV->getOffset()); 4382 MemOps.push_back(Store); 4383 4384 // Store fp_offset 4385 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4386 DAG.getConstant(4, getPointerTy())); 4387 Store = DAG.getStore(Op.getOperand(0), 4388 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4389 FIN, SV->getValue(), SV->getOffset()); 4390 MemOps.push_back(Store); 4391 4392 // Store ptr to overflow_arg_area 4393 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4394 DAG.getConstant(4, getPointerTy())); 4395 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4396 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4397 SV->getOffset()); 4398 MemOps.push_back(Store); 4399 4400 // Store ptr to reg_save_area. 4401 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4402 DAG.getConstant(8, getPointerTy())); 4403 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4404 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4405 SV->getOffset()); 4406 MemOps.push_back(Store); 4407 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4408} 4409 4410SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4411 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4412 SDOperand Chain = Op.getOperand(0); 4413 SDOperand DstPtr = Op.getOperand(1); 4414 SDOperand SrcPtr = Op.getOperand(2); 4415 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4416 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4417 4418 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4419 SrcSV->getValue(), SrcSV->getOffset()); 4420 Chain = SrcPtr.getValue(1); 4421 for (unsigned i = 0; i < 3; ++i) { 4422 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4423 SrcSV->getValue(), SrcSV->getOffset()); 4424 Chain = Val.getValue(1); 4425 Chain = DAG.getStore(Chain, Val, DstPtr, 4426 DstSV->getValue(), DstSV->getOffset()); 4427 if (i == 2) 4428 break; 4429 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4430 DAG.getConstant(8, getPointerTy())); 4431 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4432 DAG.getConstant(8, getPointerTy())); 4433 } 4434 return Chain; 4435} 4436 4437SDOperand 4438X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4439 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4440 switch (IntNo) { 4441 default: return SDOperand(); // Don't custom lower most intrinsics. 4442 // Comparison intrinsics. 4443 case Intrinsic::x86_sse_comieq_ss: 4444 case Intrinsic::x86_sse_comilt_ss: 4445 case Intrinsic::x86_sse_comile_ss: 4446 case Intrinsic::x86_sse_comigt_ss: 4447 case Intrinsic::x86_sse_comige_ss: 4448 case Intrinsic::x86_sse_comineq_ss: 4449 case Intrinsic::x86_sse_ucomieq_ss: 4450 case Intrinsic::x86_sse_ucomilt_ss: 4451 case Intrinsic::x86_sse_ucomile_ss: 4452 case Intrinsic::x86_sse_ucomigt_ss: 4453 case Intrinsic::x86_sse_ucomige_ss: 4454 case Intrinsic::x86_sse_ucomineq_ss: 4455 case Intrinsic::x86_sse2_comieq_sd: 4456 case Intrinsic::x86_sse2_comilt_sd: 4457 case Intrinsic::x86_sse2_comile_sd: 4458 case Intrinsic::x86_sse2_comigt_sd: 4459 case Intrinsic::x86_sse2_comige_sd: 4460 case Intrinsic::x86_sse2_comineq_sd: 4461 case Intrinsic::x86_sse2_ucomieq_sd: 4462 case Intrinsic::x86_sse2_ucomilt_sd: 4463 case Intrinsic::x86_sse2_ucomile_sd: 4464 case Intrinsic::x86_sse2_ucomigt_sd: 4465 case Intrinsic::x86_sse2_ucomige_sd: 4466 case Intrinsic::x86_sse2_ucomineq_sd: { 4467 unsigned Opc = 0; 4468 ISD::CondCode CC = ISD::SETCC_INVALID; 4469 switch (IntNo) { 4470 default: break; 4471 case Intrinsic::x86_sse_comieq_ss: 4472 case Intrinsic::x86_sse2_comieq_sd: 4473 Opc = X86ISD::COMI; 4474 CC = ISD::SETEQ; 4475 break; 4476 case Intrinsic::x86_sse_comilt_ss: 4477 case Intrinsic::x86_sse2_comilt_sd: 4478 Opc = X86ISD::COMI; 4479 CC = ISD::SETLT; 4480 break; 4481 case Intrinsic::x86_sse_comile_ss: 4482 case Intrinsic::x86_sse2_comile_sd: 4483 Opc = X86ISD::COMI; 4484 CC = ISD::SETLE; 4485 break; 4486 case Intrinsic::x86_sse_comigt_ss: 4487 case Intrinsic::x86_sse2_comigt_sd: 4488 Opc = X86ISD::COMI; 4489 CC = ISD::SETGT; 4490 break; 4491 case Intrinsic::x86_sse_comige_ss: 4492 case Intrinsic::x86_sse2_comige_sd: 4493 Opc = X86ISD::COMI; 4494 CC = ISD::SETGE; 4495 break; 4496 case Intrinsic::x86_sse_comineq_ss: 4497 case Intrinsic::x86_sse2_comineq_sd: 4498 Opc = X86ISD::COMI; 4499 CC = ISD::SETNE; 4500 break; 4501 case Intrinsic::x86_sse_ucomieq_ss: 4502 case Intrinsic::x86_sse2_ucomieq_sd: 4503 Opc = X86ISD::UCOMI; 4504 CC = ISD::SETEQ; 4505 break; 4506 case Intrinsic::x86_sse_ucomilt_ss: 4507 case Intrinsic::x86_sse2_ucomilt_sd: 4508 Opc = X86ISD::UCOMI; 4509 CC = ISD::SETLT; 4510 break; 4511 case Intrinsic::x86_sse_ucomile_ss: 4512 case Intrinsic::x86_sse2_ucomile_sd: 4513 Opc = X86ISD::UCOMI; 4514 CC = ISD::SETLE; 4515 break; 4516 case Intrinsic::x86_sse_ucomigt_ss: 4517 case Intrinsic::x86_sse2_ucomigt_sd: 4518 Opc = X86ISD::UCOMI; 4519 CC = ISD::SETGT; 4520 break; 4521 case Intrinsic::x86_sse_ucomige_ss: 4522 case Intrinsic::x86_sse2_ucomige_sd: 4523 Opc = X86ISD::UCOMI; 4524 CC = ISD::SETGE; 4525 break; 4526 case Intrinsic::x86_sse_ucomineq_ss: 4527 case Intrinsic::x86_sse2_ucomineq_sd: 4528 Opc = X86ISD::UCOMI; 4529 CC = ISD::SETNE; 4530 break; 4531 } 4532 4533 unsigned X86CC; 4534 SDOperand LHS = Op.getOperand(1); 4535 SDOperand RHS = Op.getOperand(2); 4536 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4537 4538 if (NewCCModeling) { 4539 Opc = (Opc == X86ISD::UCOMI) ? X86ISD::UCOMI_NEW : X86ISD::COMI_NEW; 4540 SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); 4541 SDOperand SetCC = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, 4542 DAG.getConstant(X86CC, MVT::i8), Cond); 4543 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4544 } else { 4545 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4546 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4547 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4548 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4549 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4550 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4551 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4552 } 4553 } 4554 } 4555} 4556 4557SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4558 // Depths > 0 not supported yet! 4559 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4560 return SDOperand(); 4561 4562 // Just load the return address 4563 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4564 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4565} 4566 4567SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4568 // Depths > 0 not supported yet! 4569 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4570 return SDOperand(); 4571 4572 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4573 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4574 DAG.getConstant(4, getPointerTy())); 4575} 4576 4577SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4578 SelectionDAG &DAG) { 4579 // Is not yet supported on x86-64 4580 if (Subtarget->is64Bit()) 4581 return SDOperand(); 4582 4583 return DAG.getConstant(8, getPointerTy()); 4584} 4585 4586SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4587{ 4588 assert(!Subtarget->is64Bit() && 4589 "Lowering of eh_return builtin is not supported yet on x86-64"); 4590 4591 MachineFunction &MF = DAG.getMachineFunction(); 4592 SDOperand Chain = Op.getOperand(0); 4593 SDOperand Offset = Op.getOperand(1); 4594 SDOperand Handler = Op.getOperand(2); 4595 4596 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4597 getPointerTy()); 4598 4599 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4600 DAG.getConstant(-4UL, getPointerTy())); 4601 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4602 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4603 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4604 MF.addLiveOut(X86::ECX); 4605 4606 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4607 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4608} 4609 4610SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4611 SelectionDAG &DAG) { 4612 SDOperand Root = Op.getOperand(0); 4613 SDOperand Trmp = Op.getOperand(1); // trampoline 4614 SDOperand FPtr = Op.getOperand(2); // nested function 4615 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4616 4617 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4618 4619 if (Subtarget->is64Bit()) { 4620 return SDOperand(); // not yet supported 4621 } else { 4622 Function *Func = (Function *) 4623 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4624 unsigned CC = Func->getCallingConv(); 4625 unsigned NestReg; 4626 4627 switch (CC) { 4628 default: 4629 assert(0 && "Unsupported calling convention"); 4630 case CallingConv::C: 4631 case CallingConv::Fast: 4632 case CallingConv::X86_StdCall: { 4633 // Pass 'nest' parameter in ECX. 4634 // Must be kept in sync with X86CallingConv.td 4635 NestReg = X86::ECX; 4636 4637 // Check that ECX wasn't needed by an 'inreg' parameter. 4638 const FunctionType *FTy = Func->getFunctionType(); 4639 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4640 4641 if (Attrs && !Func->isVarArg()) { 4642 unsigned InRegCount = 0; 4643 unsigned Idx = 1; 4644 4645 for (FunctionType::param_iterator I = FTy->param_begin(), 4646 E = FTy->param_end(); I != E; ++I, ++Idx) 4647 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4648 // FIXME: should only count parameters that are lowered to integers. 4649 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4650 4651 if (InRegCount > 2) { 4652 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4653 abort(); 4654 } 4655 } 4656 break; 4657 } 4658 case CallingConv::X86_FastCall: 4659 // Pass 'nest' parameter in EAX. 4660 // Must be kept in sync with X86CallingConv.td 4661 NestReg = X86::EAX; 4662 break; 4663 } 4664 4665 const X86InstrInfo *TII = 4666 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4667 4668 SDOperand OutChains[4]; 4669 SDOperand Addr, Disp; 4670 4671 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4672 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4673 4674 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4675 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4676 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4677 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4678 4679 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4680 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4681 TrmpSV->getOffset() + 1, false, 1); 4682 4683 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4684 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4685 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4686 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4687 4688 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4689 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4690 TrmpSV->getOffset() + 6, false, 1); 4691 4692 SDOperand Ops[] = 4693 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4694 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4695 } 4696} 4697 4698/// LowerOperation - Provide custom lowering hooks for some operations. 4699/// 4700SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4701 switch (Op.getOpcode()) { 4702 default: assert(0 && "Should not custom lower this!"); 4703 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4704 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4705 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4706 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4707 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4708 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4709 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4710 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4711 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4712 case ISD::SHL_PARTS: 4713 case ISD::SRA_PARTS: 4714 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4715 case ISD::SDIV: 4716 case ISD::UDIV: 4717 case ISD::SREM: 4718 case ISD::UREM: return LowerIntegerDivOrRem(Op, DAG); 4719 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4720 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4721 case ISD::FABS: return LowerFABS(Op, DAG); 4722 case ISD::FNEG: return LowerFNEG(Op, DAG); 4723 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4724 case ISD::SETCC: return NewCCModeling 4725 ? LowerSETCC_New(Op, DAG) 4726 : LowerSETCC(Op, DAG, DAG.getEntryNode()); 4727 case ISD::SELECT: return NewCCModeling 4728 ? LowerSELECT_New(Op, DAG) 4729 : LowerSELECT(Op, DAG); 4730 case ISD::BRCOND: return NewCCModeling 4731 ? LowerBRCOND_New(Op, DAG) 4732 : LowerBRCOND(Op, DAG); 4733 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4734 case ISD::CALL: return LowerCALL(Op, DAG); 4735 case ISD::RET: return LowerRET(Op, DAG); 4736 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4737 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4738 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4739 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4740 case ISD::VASTART: return LowerVASTART(Op, DAG); 4741 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4742 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4743 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4744 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4745 case ISD::FRAME_TO_ARGS_OFFSET: 4746 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4747 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4748 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4749 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4750 } 4751 return SDOperand(); 4752} 4753 4754const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4755 switch (Opcode) { 4756 default: return NULL; 4757 case X86ISD::SHLD: return "X86ISD::SHLD"; 4758 case X86ISD::SHRD: return "X86ISD::SHRD"; 4759 case X86ISD::FAND: return "X86ISD::FAND"; 4760 case X86ISD::FOR: return "X86ISD::FOR"; 4761 case X86ISD::FXOR: return "X86ISD::FXOR"; 4762 case X86ISD::FSRL: return "X86ISD::FSRL"; 4763 case X86ISD::FILD: return "X86ISD::FILD"; 4764 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4765 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4766 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4767 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4768 case X86ISD::FLD: return "X86ISD::FLD"; 4769 case X86ISD::FST: return "X86ISD::FST"; 4770 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4771 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4772 case X86ISD::CALL: return "X86ISD::CALL"; 4773 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4774 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4775 case X86ISD::CMP: return "X86ISD::CMP"; 4776 case X86ISD::CMP_NEW: return "X86ISD::CMP_NEW"; 4777 case X86ISD::COMI: return "X86ISD::COMI"; 4778 case X86ISD::COMI_NEW: return "X86ISD::COMI_NEW"; 4779 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4780 case X86ISD::UCOMI_NEW: return "X86ISD::UCOMI_NEW"; 4781 case X86ISD::SETCC: return "X86ISD::SETCC"; 4782 case X86ISD::SETCC_NEW: return "X86ISD::SETCC_NEW"; 4783 case X86ISD::CMOV: return "X86ISD::CMOV"; 4784 case X86ISD::CMOV_NEW: return "X86ISD::CMOV_NEW"; 4785 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4786 case X86ISD::BRCOND_NEW: return "X86ISD::BRCOND_NEW"; 4787 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4788 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4789 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4790 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4791 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4792 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4793 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4794 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4795 case X86ISD::FMAX: return "X86ISD::FMAX"; 4796 case X86ISD::FMIN: return "X86ISD::FMIN"; 4797 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4798 case X86ISD::FRCP: return "X86ISD::FRCP"; 4799 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4800 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4801 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4802 case X86ISD::DIV: return "X86ISD::DIV"; 4803 case X86ISD::IDIV: return "X86ISD::IDIV"; 4804 } 4805} 4806 4807// isLegalAddressingMode - Return true if the addressing mode represented 4808// by AM is legal for this target, for a load/store of the specified type. 4809bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4810 const Type *Ty) const { 4811 // X86 supports extremely general addressing modes. 4812 4813 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4814 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4815 return false; 4816 4817 if (AM.BaseGV) { 4818 // We can only fold this if we don't need an extra load. 4819 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4820 return false; 4821 4822 // X86-64 only supports addr of globals in small code model. 4823 if (Subtarget->is64Bit()) { 4824 if (getTargetMachine().getCodeModel() != CodeModel::Small) 4825 return false; 4826 // If lower 4G is not available, then we must use rip-relative addressing. 4827 if (AM.BaseOffs || AM.Scale > 1) 4828 return false; 4829 } 4830 } 4831 4832 switch (AM.Scale) { 4833 case 0: 4834 case 1: 4835 case 2: 4836 case 4: 4837 case 8: 4838 // These scales always work. 4839 break; 4840 case 3: 4841 case 5: 4842 case 9: 4843 // These scales are formed with basereg+scalereg. Only accept if there is 4844 // no basereg yet. 4845 if (AM.HasBaseReg) 4846 return false; 4847 break; 4848 default: // Other stuff never works. 4849 return false; 4850 } 4851 4852 return true; 4853} 4854 4855 4856/// isShuffleMaskLegal - Targets can use this to indicate that they only 4857/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4858/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4859/// are assumed to be legal. 4860bool 4861X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4862 // Only do shuffles on 128-bit vector types for now. 4863 if (MVT::getSizeInBits(VT) == 64) return false; 4864 return (Mask.Val->getNumOperands() <= 4 || 4865 isIdentityMask(Mask.Val) || 4866 isIdentityMask(Mask.Val, true) || 4867 isSplatMask(Mask.Val) || 4868 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4869 X86::isUNPCKLMask(Mask.Val) || 4870 X86::isUNPCKHMask(Mask.Val) || 4871 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4872 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4873} 4874 4875bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4876 MVT::ValueType EVT, 4877 SelectionDAG &DAG) const { 4878 unsigned NumElts = BVOps.size(); 4879 // Only do shuffles on 128-bit vector types for now. 4880 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4881 if (NumElts == 2) return true; 4882 if (NumElts == 4) { 4883 return (isMOVLMask(&BVOps[0], 4) || 4884 isCommutedMOVL(&BVOps[0], 4, true) || 4885 isSHUFPMask(&BVOps[0], 4) || 4886 isCommutedSHUFP(&BVOps[0], 4)); 4887 } 4888 return false; 4889} 4890 4891//===----------------------------------------------------------------------===// 4892// X86 Scheduler Hooks 4893//===----------------------------------------------------------------------===// 4894 4895MachineBasicBlock * 4896X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4897 MachineBasicBlock *BB) { 4898 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4899 switch (MI->getOpcode()) { 4900 default: assert(false && "Unexpected instr type to insert"); 4901 case X86::CMOV_FR32: 4902 case X86::CMOV_FR64: 4903 case X86::CMOV_V4F32: 4904 case X86::CMOV_V2F64: 4905 case X86::CMOV_V2I64: 4906 4907 case X86::NEW_CMOV_FR32: 4908 case X86::NEW_CMOV_FR64: 4909 case X86::NEW_CMOV_V4F32: 4910 case X86::NEW_CMOV_V2F64: 4911 case X86::NEW_CMOV_V2I64: { 4912 // To "insert" a SELECT_CC instruction, we actually have to insert the 4913 // diamond control-flow pattern. The incoming instruction knows the 4914 // destination vreg to set, the condition code register to branch on, the 4915 // true/false values to select between, and a branch opcode to use. 4916 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4917 ilist<MachineBasicBlock>::iterator It = BB; 4918 ++It; 4919 4920 // thisMBB: 4921 // ... 4922 // TrueVal = ... 4923 // cmpTY ccX, r1, r2 4924 // bCC copy1MBB 4925 // fallthrough --> copy0MBB 4926 MachineBasicBlock *thisMBB = BB; 4927 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4928 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4929 unsigned Opc = 4930 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4931 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4932 MachineFunction *F = BB->getParent(); 4933 F->getBasicBlockList().insert(It, copy0MBB); 4934 F->getBasicBlockList().insert(It, sinkMBB); 4935 // Update machine-CFG edges by first adding all successors of the current 4936 // block to the new block which will contain the Phi node for the select. 4937 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4938 e = BB->succ_end(); i != e; ++i) 4939 sinkMBB->addSuccessor(*i); 4940 // Next, remove all successors of the current block, and add the true 4941 // and fallthrough blocks as its successors. 4942 while(!BB->succ_empty()) 4943 BB->removeSuccessor(BB->succ_begin()); 4944 BB->addSuccessor(copy0MBB); 4945 BB->addSuccessor(sinkMBB); 4946 4947 // copy0MBB: 4948 // %FalseValue = ... 4949 // # fallthrough to sinkMBB 4950 BB = copy0MBB; 4951 4952 // Update machine-CFG edges 4953 BB->addSuccessor(sinkMBB); 4954 4955 // sinkMBB: 4956 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4957 // ... 4958 BB = sinkMBB; 4959 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4960 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4961 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4962 4963 delete MI; // The pseudo instruction is gone now. 4964 return BB; 4965 } 4966 4967 case X86::FP32_TO_INT16_IN_MEM: 4968 case X86::FP32_TO_INT32_IN_MEM: 4969 case X86::FP32_TO_INT64_IN_MEM: 4970 case X86::FP64_TO_INT16_IN_MEM: 4971 case X86::FP64_TO_INT32_IN_MEM: 4972 case X86::FP64_TO_INT64_IN_MEM: 4973 case X86::FP80_TO_INT16_IN_MEM: 4974 case X86::FP80_TO_INT32_IN_MEM: 4975 case X86::FP80_TO_INT64_IN_MEM: { 4976 // Change the floating point control register to use "round towards zero" 4977 // mode when truncating to an integer value. 4978 MachineFunction *F = BB->getParent(); 4979 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4980 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4981 4982 // Load the old value of the high byte of the control word... 4983 unsigned OldCW = 4984 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4985 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4986 4987 // Set the high part to be round to zero... 4988 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4989 .addImm(0xC7F); 4990 4991 // Reload the modified control word now... 4992 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4993 4994 // Restore the memory image of control word to original value 4995 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4996 .addReg(OldCW); 4997 4998 // Get the X86 opcode to use. 4999 unsigned Opc; 5000 switch (MI->getOpcode()) { 5001 default: assert(0 && "illegal opcode!"); 5002 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 5003 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 5004 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 5005 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 5006 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 5007 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 5008 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 5009 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 5010 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 5011 } 5012 5013 X86AddressMode AM; 5014 MachineOperand &Op = MI->getOperand(0); 5015 if (Op.isRegister()) { 5016 AM.BaseType = X86AddressMode::RegBase; 5017 AM.Base.Reg = Op.getReg(); 5018 } else { 5019 AM.BaseType = X86AddressMode::FrameIndexBase; 5020 AM.Base.FrameIndex = Op.getFrameIndex(); 5021 } 5022 Op = MI->getOperand(1); 5023 if (Op.isImmediate()) 5024 AM.Scale = Op.getImm(); 5025 Op = MI->getOperand(2); 5026 if (Op.isImmediate()) 5027 AM.IndexReg = Op.getImm(); 5028 Op = MI->getOperand(3); 5029 if (Op.isGlobalAddress()) { 5030 AM.GV = Op.getGlobal(); 5031 } else { 5032 AM.Disp = Op.getImm(); 5033 } 5034 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 5035 .addReg(MI->getOperand(4).getReg()); 5036 5037 // Reload the original control word now. 5038 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 5039 5040 delete MI; // The pseudo instruction is gone now. 5041 return BB; 5042 } 5043 } 5044} 5045 5046//===----------------------------------------------------------------------===// 5047// X86 Optimization Hooks 5048//===----------------------------------------------------------------------===// 5049 5050void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 5051 uint64_t Mask, 5052 uint64_t &KnownZero, 5053 uint64_t &KnownOne, 5054 const SelectionDAG &DAG, 5055 unsigned Depth) const { 5056 unsigned Opc = Op.getOpcode(); 5057 assert((Opc >= ISD::BUILTIN_OP_END || 5058 Opc == ISD::INTRINSIC_WO_CHAIN || 5059 Opc == ISD::INTRINSIC_W_CHAIN || 5060 Opc == ISD::INTRINSIC_VOID) && 5061 "Should use MaskedValueIsZero if you don't know whether Op" 5062 " is a target node!"); 5063 5064 KnownZero = KnownOne = 0; // Don't know anything. 5065 switch (Opc) { 5066 default: break; 5067 case X86ISD::SETCC: 5068 case X86ISD::SETCC_NEW: 5069 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 5070 break; 5071 } 5072} 5073 5074/// getShuffleScalarElt - Returns the scalar element that will make up the ith 5075/// element of the result of the vector shuffle. 5076static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 5077 MVT::ValueType VT = N->getValueType(0); 5078 SDOperand PermMask = N->getOperand(2); 5079 unsigned NumElems = PermMask.getNumOperands(); 5080 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 5081 i %= NumElems; 5082 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5083 return (i == 0) 5084 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5085 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 5086 SDOperand Idx = PermMask.getOperand(i); 5087 if (Idx.getOpcode() == ISD::UNDEF) 5088 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 5089 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 5090 } 5091 return SDOperand(); 5092} 5093 5094/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 5095/// node is a GlobalAddress + an offset. 5096static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 5097 unsigned Opc = N->getOpcode(); 5098 if (Opc == X86ISD::Wrapper) { 5099 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 5100 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 5101 return true; 5102 } 5103 } else if (Opc == ISD::ADD) { 5104 SDOperand N1 = N->getOperand(0); 5105 SDOperand N2 = N->getOperand(1); 5106 if (isGAPlusOffset(N1.Val, GA, Offset)) { 5107 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 5108 if (V) { 5109 Offset += V->getSignExtended(); 5110 return true; 5111 } 5112 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 5113 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 5114 if (V) { 5115 Offset += V->getSignExtended(); 5116 return true; 5117 } 5118 } 5119 } 5120 return false; 5121} 5122 5123/// isConsecutiveLoad - Returns true if N is loading from an address of Base 5124/// + Dist * Size. 5125static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 5126 MachineFrameInfo *MFI) { 5127 if (N->getOperand(0).Val != Base->getOperand(0).Val) 5128 return false; 5129 5130 SDOperand Loc = N->getOperand(1); 5131 SDOperand BaseLoc = Base->getOperand(1); 5132 if (Loc.getOpcode() == ISD::FrameIndex) { 5133 if (BaseLoc.getOpcode() != ISD::FrameIndex) 5134 return false; 5135 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 5136 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 5137 int FS = MFI->getObjectSize(FI); 5138 int BFS = MFI->getObjectSize(BFI); 5139 if (FS != BFS || FS != Size) return false; 5140 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 5141 } else { 5142 GlobalValue *GV1 = NULL; 5143 GlobalValue *GV2 = NULL; 5144 int64_t Offset1 = 0; 5145 int64_t Offset2 = 0; 5146 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 5147 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 5148 if (isGA1 && isGA2 && GV1 == GV2) 5149 return Offset1 == (Offset2 + Dist*Size); 5150 } 5151 5152 return false; 5153} 5154 5155static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 5156 const X86Subtarget *Subtarget) { 5157 GlobalValue *GV; 5158 int64_t Offset; 5159 if (isGAPlusOffset(Base, GV, Offset)) 5160 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 5161 else { 5162 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 5163 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 5164 if (BFI < 0) 5165 // Fixed objects do not specify alignment, however the offsets are known. 5166 return ((Subtarget->getStackAlignment() % 16) == 0 && 5167 (MFI->getObjectOffset(BFI) % 16) == 0); 5168 else 5169 return MFI->getObjectAlignment(BFI) >= 16; 5170 } 5171 return false; 5172} 5173 5174 5175/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 5176/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 5177/// if the load addresses are consecutive, non-overlapping, and in the right 5178/// order. 5179static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 5180 const X86Subtarget *Subtarget) { 5181 MachineFunction &MF = DAG.getMachineFunction(); 5182 MachineFrameInfo *MFI = MF.getFrameInfo(); 5183 MVT::ValueType VT = N->getValueType(0); 5184 MVT::ValueType EVT = MVT::getVectorElementType(VT); 5185 SDOperand PermMask = N->getOperand(2); 5186 int NumElems = (int)PermMask.getNumOperands(); 5187 SDNode *Base = NULL; 5188 for (int i = 0; i < NumElems; ++i) { 5189 SDOperand Idx = PermMask.getOperand(i); 5190 if (Idx.getOpcode() == ISD::UNDEF) { 5191 if (!Base) return SDOperand(); 5192 } else { 5193 SDOperand Arg = 5194 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 5195 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 5196 return SDOperand(); 5197 if (!Base) 5198 Base = Arg.Val; 5199 else if (!isConsecutiveLoad(Arg.Val, Base, 5200 i, MVT::getSizeInBits(EVT)/8,MFI)) 5201 return SDOperand(); 5202 } 5203 } 5204 5205 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 5206 LoadSDNode *LD = cast<LoadSDNode>(Base); 5207 if (isAlign16) { 5208 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5209 LD->getSrcValueOffset(), LD->isVolatile()); 5210 } else { 5211 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 5212 LD->getSrcValueOffset(), LD->isVolatile(), 5213 LD->getAlignment()); 5214 } 5215} 5216 5217/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 5218static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 5219 const X86Subtarget *Subtarget) { 5220 SDOperand Cond = N->getOperand(0); 5221 5222 // If we have SSE[12] support, try to form min/max nodes. 5223 if (Subtarget->hasSSE2() && 5224 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 5225 if (Cond.getOpcode() == ISD::SETCC) { 5226 // Get the LHS/RHS of the select. 5227 SDOperand LHS = N->getOperand(1); 5228 SDOperand RHS = N->getOperand(2); 5229 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 5230 5231 unsigned Opcode = 0; 5232 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 5233 switch (CC) { 5234 default: break; 5235 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 5236 case ISD::SETULE: 5237 case ISD::SETLE: 5238 if (!UnsafeFPMath) break; 5239 // FALL THROUGH. 5240 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 5241 case ISD::SETLT: 5242 Opcode = X86ISD::FMIN; 5243 break; 5244 5245 case ISD::SETOGT: // (X > Y) ? X : Y -> max 5246 case ISD::SETUGT: 5247 case ISD::SETGT: 5248 if (!UnsafeFPMath) break; 5249 // FALL THROUGH. 5250 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 5251 case ISD::SETGE: 5252 Opcode = X86ISD::FMAX; 5253 break; 5254 } 5255 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 5256 switch (CC) { 5257 default: break; 5258 case ISD::SETOGT: // (X > Y) ? Y : X -> min 5259 case ISD::SETUGT: 5260 case ISD::SETGT: 5261 if (!UnsafeFPMath) break; 5262 // FALL THROUGH. 5263 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 5264 case ISD::SETGE: 5265 Opcode = X86ISD::FMIN; 5266 break; 5267 5268 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 5269 case ISD::SETULE: 5270 case ISD::SETLE: 5271 if (!UnsafeFPMath) break; 5272 // FALL THROUGH. 5273 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 5274 case ISD::SETLT: 5275 Opcode = X86ISD::FMAX; 5276 break; 5277 } 5278 } 5279 5280 if (Opcode) 5281 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 5282 } 5283 5284 } 5285 5286 return SDOperand(); 5287} 5288 5289 5290SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5291 DAGCombinerInfo &DCI) const { 5292 SelectionDAG &DAG = DCI.DAG; 5293 switch (N->getOpcode()) { 5294 default: break; 5295 case ISD::VECTOR_SHUFFLE: 5296 return PerformShuffleCombine(N, DAG, Subtarget); 5297 case ISD::SELECT: 5298 return PerformSELECTCombine(N, DAG, Subtarget); 5299 } 5300 5301 return SDOperand(); 5302} 5303 5304//===----------------------------------------------------------------------===// 5305// X86 Inline Assembly Support 5306//===----------------------------------------------------------------------===// 5307 5308/// getConstraintType - Given a constraint letter, return the type of 5309/// constraint it is for this target. 5310X86TargetLowering::ConstraintType 5311X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5312 if (Constraint.size() == 1) { 5313 switch (Constraint[0]) { 5314 case 'A': 5315 case 'r': 5316 case 'R': 5317 case 'l': 5318 case 'q': 5319 case 'Q': 5320 case 'x': 5321 case 'Y': 5322 return C_RegisterClass; 5323 default: 5324 break; 5325 } 5326 } 5327 return TargetLowering::getConstraintType(Constraint); 5328} 5329 5330/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5331/// vector. If it is invalid, don't add anything to Ops. 5332void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5333 char Constraint, 5334 std::vector<SDOperand>&Ops, 5335 SelectionDAG &DAG) { 5336 SDOperand Result(0, 0); 5337 5338 switch (Constraint) { 5339 default: break; 5340 case 'I': 5341 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5342 if (C->getValue() <= 31) { 5343 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5344 break; 5345 } 5346 } 5347 return; 5348 case 'N': 5349 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5350 if (C->getValue() <= 255) { 5351 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5352 break; 5353 } 5354 } 5355 return; 5356 case 'i': { 5357 // Literal immediates are always ok. 5358 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5359 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5360 break; 5361 } 5362 5363 // If we are in non-pic codegen mode, we allow the address of a global (with 5364 // an optional displacement) to be used with 'i'. 5365 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5366 int64_t Offset = 0; 5367 5368 // Match either (GA) or (GA+C) 5369 if (GA) { 5370 Offset = GA->getOffset(); 5371 } else if (Op.getOpcode() == ISD::ADD) { 5372 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5373 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5374 if (C && GA) { 5375 Offset = GA->getOffset()+C->getValue(); 5376 } else { 5377 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5378 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5379 if (C && GA) 5380 Offset = GA->getOffset()+C->getValue(); 5381 else 5382 C = 0, GA = 0; 5383 } 5384 } 5385 5386 if (GA) { 5387 // If addressing this global requires a load (e.g. in PIC mode), we can't 5388 // match. 5389 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5390 false)) 5391 return; 5392 5393 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5394 Offset); 5395 Result = Op; 5396 break; 5397 } 5398 5399 // Otherwise, not valid for this mode. 5400 return; 5401 } 5402 } 5403 5404 if (Result.Val) { 5405 Ops.push_back(Result); 5406 return; 5407 } 5408 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5409} 5410 5411std::vector<unsigned> X86TargetLowering:: 5412getRegClassForInlineAsmConstraint(const std::string &Constraint, 5413 MVT::ValueType VT) const { 5414 if (Constraint.size() == 1) { 5415 // FIXME: not handling fp-stack yet! 5416 switch (Constraint[0]) { // GCC X86 Constraint Letters 5417 default: break; // Unknown constraint letter 5418 case 'A': // EAX/EDX 5419 if (VT == MVT::i32 || VT == MVT::i64) 5420 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5421 break; 5422 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5423 case 'Q': // Q_REGS 5424 if (VT == MVT::i32) 5425 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5426 else if (VT == MVT::i16) 5427 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5428 else if (VT == MVT::i8) 5429 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5430 break; 5431 } 5432 } 5433 5434 return std::vector<unsigned>(); 5435} 5436 5437std::pair<unsigned, const TargetRegisterClass*> 5438X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5439 MVT::ValueType VT) const { 5440 // First, see if this is a constraint that directly corresponds to an LLVM 5441 // register class. 5442 if (Constraint.size() == 1) { 5443 // GCC Constraint Letters 5444 switch (Constraint[0]) { 5445 default: break; 5446 case 'r': // GENERAL_REGS 5447 case 'R': // LEGACY_REGS 5448 case 'l': // INDEX_REGS 5449 if (VT == MVT::i64 && Subtarget->is64Bit()) 5450 return std::make_pair(0U, X86::GR64RegisterClass); 5451 if (VT == MVT::i32) 5452 return std::make_pair(0U, X86::GR32RegisterClass); 5453 else if (VT == MVT::i16) 5454 return std::make_pair(0U, X86::GR16RegisterClass); 5455 else if (VT == MVT::i8) 5456 return std::make_pair(0U, X86::GR8RegisterClass); 5457 break; 5458 case 'y': // MMX_REGS if MMX allowed. 5459 if (!Subtarget->hasMMX()) break; 5460 return std::make_pair(0U, X86::VR64RegisterClass); 5461 break; 5462 case 'Y': // SSE_REGS if SSE2 allowed 5463 if (!Subtarget->hasSSE2()) break; 5464 // FALL THROUGH. 5465 case 'x': // SSE_REGS if SSE1 allowed 5466 if (!Subtarget->hasSSE1()) break; 5467 5468 switch (VT) { 5469 default: break; 5470 // Scalar SSE types. 5471 case MVT::f32: 5472 case MVT::i32: 5473 return std::make_pair(0U, X86::FR32RegisterClass); 5474 case MVT::f64: 5475 case MVT::i64: 5476 return std::make_pair(0U, X86::FR64RegisterClass); 5477 // Vector types. 5478 case MVT::v16i8: 5479 case MVT::v8i16: 5480 case MVT::v4i32: 5481 case MVT::v2i64: 5482 case MVT::v4f32: 5483 case MVT::v2f64: 5484 return std::make_pair(0U, X86::VR128RegisterClass); 5485 } 5486 break; 5487 } 5488 } 5489 5490 // Use the default implementation in TargetLowering to convert the register 5491 // constraint into a member of a register class. 5492 std::pair<unsigned, const TargetRegisterClass*> Res; 5493 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5494 5495 // Not found as a standard register? 5496 if (Res.second == 0) { 5497 // GCC calls "st(0)" just plain "st". 5498 if (StringsEqualNoCase("{st}", Constraint)) { 5499 Res.first = X86::ST0; 5500 Res.second = X86::RFP80RegisterClass; 5501 } 5502 5503 return Res; 5504 } 5505 5506 // Otherwise, check to see if this is a register class of the wrong value 5507 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5508 // turn into {ax},{dx}. 5509 if (Res.second->hasType(VT)) 5510 return Res; // Correct type already, nothing to do. 5511 5512 // All of the single-register GCC register classes map their values onto 5513 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5514 // really want an 8-bit or 32-bit register, map to the appropriate register 5515 // class and return the appropriate register. 5516 if (Res.second != X86::GR16RegisterClass) 5517 return Res; 5518 5519 if (VT == MVT::i8) { 5520 unsigned DestReg = 0; 5521 switch (Res.first) { 5522 default: break; 5523 case X86::AX: DestReg = X86::AL; break; 5524 case X86::DX: DestReg = X86::DL; break; 5525 case X86::CX: DestReg = X86::CL; break; 5526 case X86::BX: DestReg = X86::BL; break; 5527 } 5528 if (DestReg) { 5529 Res.first = DestReg; 5530 Res.second = Res.second = X86::GR8RegisterClass; 5531 } 5532 } else if (VT == MVT::i32) { 5533 unsigned DestReg = 0; 5534 switch (Res.first) { 5535 default: break; 5536 case X86::AX: DestReg = X86::EAX; break; 5537 case X86::DX: DestReg = X86::EDX; break; 5538 case X86::CX: DestReg = X86::ECX; break; 5539 case X86::BX: DestReg = X86::EBX; break; 5540 case X86::SI: DestReg = X86::ESI; break; 5541 case X86::DI: DestReg = X86::EDI; break; 5542 case X86::BP: DestReg = X86::EBP; break; 5543 case X86::SP: DestReg = X86::ESP; break; 5544 } 5545 if (DestReg) { 5546 Res.first = DestReg; 5547 Res.second = Res.second = X86::GR32RegisterClass; 5548 } 5549 } else if (VT == MVT::i64) { 5550 unsigned DestReg = 0; 5551 switch (Res.first) { 5552 default: break; 5553 case X86::AX: DestReg = X86::RAX; break; 5554 case X86::DX: DestReg = X86::RDX; break; 5555 case X86::CX: DestReg = X86::RCX; break; 5556 case X86::BX: DestReg = X86::RBX; break; 5557 case X86::SI: DestReg = X86::RSI; break; 5558 case X86::DI: DestReg = X86::RDI; break; 5559 case X86::BP: DestReg = X86::RBP; break; 5560 case X86::SP: DestReg = X86::RSP; break; 5561 } 5562 if (DestReg) { 5563 Res.first = DestReg; 5564 Res.second = Res.second = X86::GR64RegisterClass; 5565 } 5566 } 5567 5568 return Res; 5569} 5570