X86ISelLowering.cpp revision 5f6913cecd9f760c02754772cb57fa4d9a96624f
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/ADT/StringExtras.h" 37#include "llvm/ParameterAttributes.h" 38using namespace llvm; 39 40X86TargetLowering::X86TargetLowering(TargetMachine &TM) 41 : TargetLowering(TM) { 42 Subtarget = &TM.getSubtarget<X86Subtarget>(); 43 X86ScalarSSE = Subtarget->hasSSE2(); 44 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 45 46 RegInfo = TM.getRegisterInfo(); 47 48 // Set up the TargetLowering object. 49 50 // X86 is weird, it always uses i8 for shift amounts and setcc results. 51 setShiftAmountType(MVT::i8); 52 setSetCCResultType(MVT::i8); 53 setSetCCResultContents(ZeroOrOneSetCCResult); 54 setSchedulingPreference(SchedulingForRegPressure); 55 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 56 setStackPointerRegisterToSaveRestore(X86StackPtr); 57 58 if (Subtarget->isTargetDarwin()) { 59 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 60 setUseUnderscoreSetJmp(false); 61 setUseUnderscoreLongJmp(false); 62 } else if (Subtarget->isTargetMingw()) { 63 // MS runtime is weird: it exports _setjmp, but longjmp! 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(false); 66 } else { 67 setUseUnderscoreSetJmp(true); 68 setUseUnderscoreLongJmp(true); 69 } 70 71 // Set up the register classes. 72 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 73 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 74 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 75 if (Subtarget->is64Bit()) 76 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 77 78 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 79 80 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 81 // operation. 82 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 83 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 84 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 85 86 if (Subtarget->is64Bit()) { 87 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 89 } else { 90 if (X86ScalarSSE) 91 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 93 else 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 95 } 96 97 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 98 // this operation. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 100 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 101 // SSE has no i16 to fp conversion, only i32 102 if (X86ScalarSSE) { 103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 104 // f32 and f64 cases are Legal, f80 case is not 105 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 106 } else { 107 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 108 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 109 } 110 111 if (!Subtarget->is64Bit()) { 112 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 113 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 114 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 115 } 116 117 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 118 // this operation. 119 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 120 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 121 122 if (X86ScalarSSE) { 123 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 124 // f32 and f64 cases are Legal, f80 case is not 125 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 126 } else { 127 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 128 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 129 } 130 131 // Handle FP_TO_UINT by promoting the destination to a larger signed 132 // conversion. 133 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 134 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 135 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 136 137 if (Subtarget->is64Bit()) { 138 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 139 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 140 } else { 141 if (X86ScalarSSE && !Subtarget->hasSSE3()) 142 // Expand FP_TO_UINT into a select. 143 // FIXME: We would like to use a Custom expander here eventually to do 144 // the optimal thing for SSE vs. the default expansion in the legalizer. 145 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 146 else 147 // With SSE3 we can use fisttpll to convert to a signed i64. 148 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 149 } 150 151 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 152 if (!X86ScalarSSE) { 153 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 154 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 155 } 156 157 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 158 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 159 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 160 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 161 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 162 if (Subtarget->is64Bit()) 163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 165 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 166 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 167 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 168 setOperationAction(ISD::FREM , MVT::f64 , Expand); 169 170 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 171 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 172 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 173 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 176 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 177 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 178 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 179 if (Subtarget->is64Bit()) { 180 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 181 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 182 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 183 } 184 185 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 186 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 187 188 // These should be promoted to a larger select which is supported. 189 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 190 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 191 // X86 wants to expand cmov itself. 192 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 193 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 194 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 195 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 196 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 197 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 198 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 199 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 200 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 201 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 202 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 203 if (Subtarget->is64Bit()) { 204 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 205 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 206 } 207 // X86 ret instruction may pop stack. 208 setOperationAction(ISD::RET , MVT::Other, Custom); 209 if (!Subtarget->is64Bit()) 210 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 211 212 // Darwin ABI issue. 213 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 214 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 215 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 216 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 217 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 218 if (Subtarget->is64Bit()) { 219 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 220 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 221 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 222 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 223 } 224 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 225 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 226 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 227 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 228 // X86 wants to expand memset / memcpy itself. 229 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 230 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 231 232 // We don't have line number support yet. 233 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 234 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 235 // FIXME - use subtarget debug flags 236 if (!Subtarget->isTargetDarwin() && 237 !Subtarget->isTargetELF() && 238 !Subtarget->isTargetCygMing()) 239 setOperationAction(ISD::LABEL, MVT::Other, Expand); 240 241 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 242 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 243 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 244 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 245 if (Subtarget->is64Bit()) { 246 // FIXME: Verify 247 setExceptionPointerRegister(X86::RAX); 248 setExceptionSelectorRegister(X86::RDX); 249 } else { 250 setExceptionPointerRegister(X86::EAX); 251 setExceptionSelectorRegister(X86::EDX); 252 } 253 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 254 255 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 256 257 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 258 setOperationAction(ISD::VASTART , MVT::Other, Custom); 259 setOperationAction(ISD::VAARG , MVT::Other, Expand); 260 setOperationAction(ISD::VAEND , MVT::Other, Expand); 261 if (Subtarget->is64Bit()) 262 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 263 else 264 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 265 266 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 267 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 268 if (Subtarget->is64Bit()) 269 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 270 if (Subtarget->isTargetCygMing()) 271 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 272 else 273 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 274 275 if (X86ScalarSSE) { 276 // Set up the FP register classes. 277 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 278 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 279 280 // Use ANDPD to simulate FABS. 281 setOperationAction(ISD::FABS , MVT::f64, Custom); 282 setOperationAction(ISD::FABS , MVT::f32, Custom); 283 284 // Use XORP to simulate FNEG. 285 setOperationAction(ISD::FNEG , MVT::f64, Custom); 286 setOperationAction(ISD::FNEG , MVT::f32, Custom); 287 288 // Use ANDPD and ORPD to simulate FCOPYSIGN. 289 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 290 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 291 292 // We don't support sin/cos/fmod 293 setOperationAction(ISD::FSIN , MVT::f64, Expand); 294 setOperationAction(ISD::FCOS , MVT::f64, Expand); 295 setOperationAction(ISD::FREM , MVT::f64, Expand); 296 setOperationAction(ISD::FSIN , MVT::f32, Expand); 297 setOperationAction(ISD::FCOS , MVT::f32, Expand); 298 setOperationAction(ISD::FREM , MVT::f32, Expand); 299 300 // Expand FP immediates into loads from the stack, except for the special 301 // cases we handle. 302 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 303 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 304 addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd 305 306 // Conversions to long double (in X87) go through memory. 307 setConvertAction(MVT::f32, MVT::f80, Expand); 308 setConvertAction(MVT::f64, MVT::f80, Expand); 309 310 // Conversions from long double (in X87) go through memory. 311 setConvertAction(MVT::f80, MVT::f32, Expand); 312 setConvertAction(MVT::f80, MVT::f64, Expand); 313 } else { 314 // Set up the FP register classes. 315 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 316 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 317 318 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 319 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 320 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 321 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 322 323 // Floating truncations need to go through memory. 324 setConvertAction(MVT::f80, MVT::f32, Expand); 325 setConvertAction(MVT::f64, MVT::f32, Expand); 326 setConvertAction(MVT::f80, MVT::f64, Expand); 327 328 if (!UnsafeFPMath) { 329 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 330 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 331 } 332 333 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 334 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 335 addLegalFPImmediate(APFloat(+0.0)); // FLD0 336 addLegalFPImmediate(APFloat(+1.0)); // FLD1 337 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 338 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 339 } 340 341 // Long double always uses X87. 342 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 343 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 344 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 345 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 346 347 // First set operation action for all vector types to expand. Then we 348 // will selectively turn on ones that can be effectively codegen'd. 349 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 350 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 351 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 352 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 353 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 354 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 355 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 356 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 357 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 358 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 359 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 360 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 361 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 362 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 363 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 364 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 366 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 367 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 368 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 369 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 370 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 371 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 372 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 373 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 374 } 375 376 if (Subtarget->hasMMX()) { 377 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 378 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 379 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 380 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 381 382 // FIXME: add MMX packed arithmetics 383 384 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 385 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 386 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 387 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 388 389 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 390 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 391 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 392 393 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 394 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 395 396 setOperationAction(ISD::AND, MVT::v8i8, Promote); 397 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 398 setOperationAction(ISD::AND, MVT::v4i16, Promote); 399 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 400 setOperationAction(ISD::AND, MVT::v2i32, Promote); 401 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 402 setOperationAction(ISD::AND, MVT::v1i64, Legal); 403 404 setOperationAction(ISD::OR, MVT::v8i8, Promote); 405 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 406 setOperationAction(ISD::OR, MVT::v4i16, Promote); 407 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 408 setOperationAction(ISD::OR, MVT::v2i32, Promote); 409 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 410 setOperationAction(ISD::OR, MVT::v1i64, Legal); 411 412 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 413 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 414 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 415 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 416 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 417 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 418 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 419 420 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 421 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 422 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 423 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 424 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 425 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 426 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 427 428 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 429 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 430 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 431 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 432 433 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 434 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 435 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 436 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 437 438 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 439 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 440 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 441 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 442 } 443 444 if (Subtarget->hasSSE1()) { 445 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 446 447 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 448 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 449 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 450 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 451 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 452 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 453 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 454 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 455 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 456 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 457 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 458 } 459 460 if (Subtarget->hasSSE2()) { 461 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 462 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 463 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 464 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 465 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 466 467 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 468 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 469 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 470 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 471 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 472 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 473 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 474 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 475 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 476 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 477 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 478 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 479 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 480 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 481 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 482 483 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 484 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 485 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 486 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 487 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 488 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 489 490 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 491 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 492 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 493 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 494 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 495 } 496 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 497 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 498 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 499 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 500 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 501 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 502 503 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 504 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 505 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 506 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 507 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 508 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 509 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 510 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 511 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 512 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 513 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 514 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 515 } 516 517 // Custom lower v2i64 and v2f64 selects. 518 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 519 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 520 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 521 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 522 } 523 524 // We want to custom lower some of our intrinsics. 525 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 526 527 // We have target-specific dag combine patterns for the following nodes: 528 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 529 setTargetDAGCombine(ISD::SELECT); 530 531 computeRegisterProperties(); 532 533 // FIXME: These should be based on subtarget info. Plus, the values should 534 // be smaller when we are in optimizing for size mode. 535 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 536 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 537 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 538 allowUnalignedMemoryAccesses = true; // x86 supports it! 539} 540 541 542//===----------------------------------------------------------------------===// 543// Return Value Calling Convention Implementation 544//===----------------------------------------------------------------------===// 545 546#include "X86GenCallingConv.inc" 547 548/// LowerRET - Lower an ISD::RET node. 549SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 550 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 551 552 SmallVector<CCValAssign, 16> RVLocs; 553 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 554 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 555 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 556 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 557 558 559 // If this is the first return lowered for this function, add the regs to the 560 // liveout set for the function. 561 if (DAG.getMachineFunction().liveout_empty()) { 562 for (unsigned i = 0; i != RVLocs.size(); ++i) 563 if (RVLocs[i].isRegLoc()) 564 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 565 } 566 567 SDOperand Chain = Op.getOperand(0); 568 SDOperand Flag; 569 570 // Copy the result values into the output registers. 571 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 572 RVLocs[0].getLocReg() != X86::ST0) { 573 for (unsigned i = 0; i != RVLocs.size(); ++i) { 574 CCValAssign &VA = RVLocs[i]; 575 assert(VA.isRegLoc() && "Can only return in registers!"); 576 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 577 Flag); 578 Flag = Chain.getValue(1); 579 } 580 } else { 581 // We need to handle a destination of ST0 specially, because it isn't really 582 // a register. 583 SDOperand Value = Op.getOperand(1); 584 585 // If this is an FP return with ScalarSSE, we need to move the value from 586 // an XMM register onto the fp-stack. 587 if (X86ScalarSSE) { 588 SDOperand MemLoc; 589 590 // If this is a load into a scalarsse value, don't store the loaded value 591 // back to the stack, only to reload it: just replace the scalar-sse load. 592 if (ISD::isNON_EXTLoad(Value.Val) && 593 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 594 Chain = Value.getOperand(0); 595 MemLoc = Value.getOperand(1); 596 } else { 597 // Spill the value to memory and reload it into top of stack. 598 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 599 MachineFunction &MF = DAG.getMachineFunction(); 600 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 601 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 602 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 603 } 604 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 605 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 606 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 607 Chain = Value.getValue(1); 608 } 609 610 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 611 SDOperand Ops[] = { Chain, Value }; 612 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 613 Flag = Chain.getValue(1); 614 } 615 616 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 617 if (Flag.Val) 618 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 619 else 620 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 621} 622 623 624/// LowerCallResult - Lower the result values of an ISD::CALL into the 625/// appropriate copies out of appropriate physical registers. This assumes that 626/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 627/// being lowered. The returns a SDNode with the same number of values as the 628/// ISD::CALL. 629SDNode *X86TargetLowering:: 630LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 631 unsigned CallingConv, SelectionDAG &DAG) { 632 633 // Assign locations to each value returned by this call. 634 SmallVector<CCValAssign, 16> RVLocs; 635 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 636 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 637 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 638 639 640 SmallVector<SDOperand, 8> ResultVals; 641 642 // Copy all of the result registers out of their specified physreg. 643 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 644 for (unsigned i = 0; i != RVLocs.size(); ++i) { 645 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 646 RVLocs[i].getValVT(), InFlag).getValue(1); 647 InFlag = Chain.getValue(2); 648 ResultVals.push_back(Chain.getValue(0)); 649 } 650 } else { 651 // Copies from the FP stack are special, as ST0 isn't a valid register 652 // before the fp stackifier runs. 653 654 // Copy ST0 into an RFP register with FP_GET_RESULT. 655 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 656 SDOperand GROps[] = { Chain, InFlag }; 657 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 658 Chain = RetVal.getValue(1); 659 InFlag = RetVal.getValue(2); 660 661 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 662 // an XMM register. 663 if (X86ScalarSSE) { 664 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 665 // shouldn't be necessary except that RFP cannot be live across 666 // multiple blocks. When stackifier is fixed, they can be uncoupled. 667 MachineFunction &MF = DAG.getMachineFunction(); 668 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 669 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 670 SDOperand Ops[] = { 671 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 672 }; 673 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 674 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 675 Chain = RetVal.getValue(1); 676 } 677 ResultVals.push_back(RetVal); 678 } 679 680 // Merge everything together with a MERGE_VALUES node. 681 ResultVals.push_back(Chain); 682 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 683 &ResultVals[0], ResultVals.size()).Val; 684} 685 686 687//===----------------------------------------------------------------------===// 688// C & StdCall Calling Convention implementation 689//===----------------------------------------------------------------------===// 690// StdCall calling convention seems to be standard for many Windows' API 691// routines and around. It differs from C calling convention just a little: 692// callee should clean up the stack, not caller. Symbols should be also 693// decorated in some fancy way :) It doesn't support any vector arguments. 694 695/// AddLiveIn - This helper function adds the specified physical register to the 696/// MachineFunction as a live in value. It also creates a corresponding virtual 697/// register for it. 698static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 699 const TargetRegisterClass *RC) { 700 assert(RC->contains(PReg) && "Not the correct regclass!"); 701 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 702 MF.addLiveIn(PReg, VReg); 703 return VReg; 704} 705 706SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, 707 const CCValAssign &VA, 708 MachineFrameInfo *MFI, 709 SDOperand Root, unsigned i) { 710 // Create the nodes corresponding to a load from this parameter slot. 711 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 712 VA.getLocMemOffset()); 713 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 714 715 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 716 717 if (Flags & ISD::ParamFlags::ByVal) 718 return FIN; 719 else 720 return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0); 721} 722 723SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 724 bool isStdCall) { 725 unsigned NumArgs = Op.Val->getNumValues() - 1; 726 MachineFunction &MF = DAG.getMachineFunction(); 727 MachineFrameInfo *MFI = MF.getFrameInfo(); 728 SDOperand Root = Op.getOperand(0); 729 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 730 731 // Assign locations to all of the incoming arguments. 732 SmallVector<CCValAssign, 16> ArgLocs; 733 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 734 getTargetMachine(), ArgLocs); 735 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 736 737 SmallVector<SDOperand, 8> ArgValues; 738 unsigned LastVal = ~0U; 739 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 740 CCValAssign &VA = ArgLocs[i]; 741 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 742 // places. 743 assert(VA.getValNo() != LastVal && 744 "Don't support value assigned to multiple locs yet"); 745 LastVal = VA.getValNo(); 746 747 if (VA.isRegLoc()) { 748 MVT::ValueType RegVT = VA.getLocVT(); 749 TargetRegisterClass *RC; 750 if (RegVT == MVT::i32) 751 RC = X86::GR32RegisterClass; 752 else { 753 assert(MVT::isVector(RegVT)); 754 RC = X86::VR128RegisterClass; 755 } 756 757 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 758 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 759 760 // If this is an 8 or 16-bit value, it is really passed promoted to 32 761 // bits. Insert an assert[sz]ext to capture this, then truncate to the 762 // right size. 763 if (VA.getLocInfo() == CCValAssign::SExt) 764 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 765 DAG.getValueType(VA.getValVT())); 766 else if (VA.getLocInfo() == CCValAssign::ZExt) 767 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 768 DAG.getValueType(VA.getValVT())); 769 770 if (VA.getLocInfo() != CCValAssign::Full) 771 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 772 773 ArgValues.push_back(ArgValue); 774 } else { 775 assert(VA.isMemLoc()); 776 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 777 } 778 } 779 780 unsigned StackSize = CCInfo.getNextStackOffset(); 781 782 ArgValues.push_back(Root); 783 784 // If the function takes variable number of arguments, make a frame index for 785 // the start of the first vararg value... for expansion of llvm.va_start. 786 if (isVarArg) 787 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 788 789 if (isStdCall && !isVarArg) { 790 BytesToPopOnReturn = StackSize; // Callee pops everything.. 791 BytesCallerReserves = 0; 792 } else { 793 BytesToPopOnReturn = 0; // Callee pops nothing. 794 795 // If this is an sret function, the return should pop the hidden pointer. 796 if (NumArgs && 797 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 798 ISD::ParamFlags::StructReturn)) 799 BytesToPopOnReturn = 4; 800 801 BytesCallerReserves = StackSize; 802 } 803 804 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 805 806 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 807 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 808 809 // Return the new list of results. 810 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 811 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 812} 813 814SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 815 unsigned CC) { 816 SDOperand Chain = Op.getOperand(0); 817 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 818 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 819 SDOperand Callee = Op.getOperand(4); 820 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 821 822 // Analyze operands of the call, assigning locations to each operand. 823 SmallVector<CCValAssign, 16> ArgLocs; 824 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 825 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 826 827 // Get a count of how many bytes are to be pushed on the stack. 828 unsigned NumBytes = CCInfo.getNextStackOffset(); 829 830 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 831 832 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 833 SmallVector<SDOperand, 8> MemOpChains; 834 835 SDOperand StackPtr; 836 837 // Walk the register/memloc assignments, inserting copies/loads. 838 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 839 CCValAssign &VA = ArgLocs[i]; 840 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 841 842 // Promote the value if needed. 843 switch (VA.getLocInfo()) { 844 default: assert(0 && "Unknown loc info!"); 845 case CCValAssign::Full: break; 846 case CCValAssign::SExt: 847 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 848 break; 849 case CCValAssign::ZExt: 850 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 851 break; 852 case CCValAssign::AExt: 853 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 854 break; 855 } 856 857 if (VA.isRegLoc()) { 858 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 859 } else { 860 assert(VA.isMemLoc()); 861 if (StackPtr.Val == 0) 862 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 863 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 864 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 865 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 866 } 867 } 868 869 // If the first argument is an sret pointer, remember it. 870 bool isSRet = NumOps && 871 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 872 ISD::ParamFlags::StructReturn); 873 874 if (!MemOpChains.empty()) 875 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 876 &MemOpChains[0], MemOpChains.size()); 877 878 // Build a sequence of copy-to-reg nodes chained together with token chain 879 // and flag operands which copy the outgoing args into registers. 880 SDOperand InFlag; 881 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 882 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 883 InFlag); 884 InFlag = Chain.getValue(1); 885 } 886 887 // ELF / PIC requires GOT in the EBX register before function calls via PLT 888 // GOT pointer. 889 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 890 Subtarget->isPICStyleGOT()) { 891 Chain = DAG.getCopyToReg(Chain, X86::EBX, 892 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 893 InFlag); 894 InFlag = Chain.getValue(1); 895 } 896 897 // If the callee is a GlobalAddress node (quite common, every direct call is) 898 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 899 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 900 // We should use extra load for direct calls to dllimported functions in 901 // non-JIT mode. 902 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 903 getTargetMachine(), true)) 904 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 905 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 906 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 907 908 // Returns a chain & a flag for retval copy to use. 909 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 910 SmallVector<SDOperand, 8> Ops; 911 Ops.push_back(Chain); 912 Ops.push_back(Callee); 913 914 // Add argument registers to the end of the list so that they are known live 915 // into the call. 916 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 917 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 918 RegsToPass[i].second.getValueType())); 919 920 // Add an implicit use GOT pointer in EBX. 921 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 922 Subtarget->isPICStyleGOT()) 923 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 924 925 if (InFlag.Val) 926 Ops.push_back(InFlag); 927 928 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 929 NodeTys, &Ops[0], Ops.size()); 930 InFlag = Chain.getValue(1); 931 932 // Create the CALLSEQ_END node. 933 unsigned NumBytesForCalleeToPush = 0; 934 935 if (CC == CallingConv::X86_StdCall) { 936 if (isVarArg) 937 NumBytesForCalleeToPush = isSRet ? 4 : 0; 938 else 939 NumBytesForCalleeToPush = NumBytes; 940 } else { 941 // If this is is a call to a struct-return function, the callee 942 // pops the hidden struct pointer, so we have to push it back. 943 // This is common for Darwin/X86, Linux & Mingw32 targets. 944 NumBytesForCalleeToPush = isSRet ? 4 : 0; 945 } 946 947 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 948 Ops.clear(); 949 Ops.push_back(Chain); 950 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 951 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 952 Ops.push_back(InFlag); 953 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 954 InFlag = Chain.getValue(1); 955 956 // Handle result values, copying them out of physregs into vregs that we 957 // return. 958 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 959} 960 961 962//===----------------------------------------------------------------------===// 963// FastCall Calling Convention implementation 964//===----------------------------------------------------------------------===// 965// 966// The X86 'fastcall' calling convention passes up to two integer arguments in 967// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 968// and requires that the callee pop its arguments off the stack (allowing proper 969// tail calls), and has the same return value conventions as C calling convs. 970// 971// This calling convention always arranges for the callee pop value to be 8n+4 972// bytes, which is needed for tail recursion elimination and stack alignment 973// reasons. 974SDOperand 975X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 976 MachineFunction &MF = DAG.getMachineFunction(); 977 MachineFrameInfo *MFI = MF.getFrameInfo(); 978 SDOperand Root = Op.getOperand(0); 979 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 980 981 // Assign locations to all of the incoming arguments. 982 SmallVector<CCValAssign, 16> ArgLocs; 983 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 984 getTargetMachine(), ArgLocs); 985 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 986 987 SmallVector<SDOperand, 8> ArgValues; 988 unsigned LastVal = ~0U; 989 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 990 CCValAssign &VA = ArgLocs[i]; 991 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 992 // places. 993 assert(VA.getValNo() != LastVal && 994 "Don't support value assigned to multiple locs yet"); 995 LastVal = VA.getValNo(); 996 997 if (VA.isRegLoc()) { 998 MVT::ValueType RegVT = VA.getLocVT(); 999 TargetRegisterClass *RC; 1000 if (RegVT == MVT::i32) 1001 RC = X86::GR32RegisterClass; 1002 else { 1003 assert(MVT::isVector(RegVT)); 1004 RC = X86::VR128RegisterClass; 1005 } 1006 1007 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1008 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1009 1010 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1011 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1012 // right size. 1013 if (VA.getLocInfo() == CCValAssign::SExt) 1014 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1015 DAG.getValueType(VA.getValVT())); 1016 else if (VA.getLocInfo() == CCValAssign::ZExt) 1017 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1018 DAG.getValueType(VA.getValVT())); 1019 1020 if (VA.getLocInfo() != CCValAssign::Full) 1021 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1022 1023 ArgValues.push_back(ArgValue); 1024 } else { 1025 assert(VA.isMemLoc()); 1026 1027 // Create the nodes corresponding to a load from this parameter slot. 1028 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1029 VA.getLocMemOffset()); 1030 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1031 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1032 } 1033 } 1034 1035 ArgValues.push_back(Root); 1036 1037 unsigned StackSize = CCInfo.getNextStackOffset(); 1038 1039 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1040 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1041 // arguments and the arguments after the retaddr has been pushed are aligned. 1042 if ((StackSize & 7) == 0) 1043 StackSize += 4; 1044 } 1045 1046 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1047 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1048 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1049 BytesCallerReserves = 0; 1050 1051 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1052 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1053 1054 // Return the new list of results. 1055 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1056 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1057} 1058 1059SDOperand 1060X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, 1061 const SDOperand &StackPtr, 1062 const CCValAssign &VA, 1063 SDOperand Chain, 1064 SDOperand Arg) { 1065 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1066 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1067 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1068 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1069 if (Flags & ISD::ParamFlags::ByVal) { 1070 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1071 ISD::ParamFlags::ByValAlignOffs); 1072 1073 assert (Align >= 8); 1074 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1075 ISD::ParamFlags::ByValSizeOffs; 1076 1077 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1078 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1079 1080 return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, 1081 AlignNode); 1082 } else { 1083 return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1084 } 1085} 1086 1087SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1088 unsigned CC) { 1089 SDOperand Chain = Op.getOperand(0); 1090 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1091 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1092 SDOperand Callee = Op.getOperand(4); 1093 1094 // Analyze operands of the call, assigning locations to each operand. 1095 SmallVector<CCValAssign, 16> ArgLocs; 1096 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1097 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1098 1099 // Get a count of how many bytes are to be pushed on the stack. 1100 unsigned NumBytes = CCInfo.getNextStackOffset(); 1101 1102 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1103 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1104 // arguments and the arguments after the retaddr has been pushed are aligned. 1105 if ((NumBytes & 7) == 0) 1106 NumBytes += 4; 1107 } 1108 1109 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1110 1111 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1112 SmallVector<SDOperand, 8> MemOpChains; 1113 1114 SDOperand StackPtr; 1115 1116 // Walk the register/memloc assignments, inserting copies/loads. 1117 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1118 CCValAssign &VA = ArgLocs[i]; 1119 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1120 1121 // Promote the value if needed. 1122 switch (VA.getLocInfo()) { 1123 default: assert(0 && "Unknown loc info!"); 1124 case CCValAssign::Full: break; 1125 case CCValAssign::SExt: 1126 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1127 break; 1128 case CCValAssign::ZExt: 1129 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1130 break; 1131 case CCValAssign::AExt: 1132 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1133 break; 1134 } 1135 1136 if (VA.isRegLoc()) { 1137 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1138 } else { 1139 assert(VA.isMemLoc()); 1140 if (StackPtr.Val == 0) 1141 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1142 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1143 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1144 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1145 } 1146 } 1147 1148 if (!MemOpChains.empty()) 1149 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1150 &MemOpChains[0], MemOpChains.size()); 1151 1152 // Build a sequence of copy-to-reg nodes chained together with token chain 1153 // and flag operands which copy the outgoing args into registers. 1154 SDOperand InFlag; 1155 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1156 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1157 InFlag); 1158 InFlag = Chain.getValue(1); 1159 } 1160 1161 // If the callee is a GlobalAddress node (quite common, every direct call is) 1162 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1163 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1164 // We should use extra load for direct calls to dllimported functions in 1165 // non-JIT mode. 1166 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1167 getTargetMachine(), true)) 1168 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1169 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1170 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1171 1172 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1173 // GOT pointer. 1174 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1175 Subtarget->isPICStyleGOT()) { 1176 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1177 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1178 InFlag); 1179 InFlag = Chain.getValue(1); 1180 } 1181 1182 // Returns a chain & a flag for retval copy to use. 1183 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1184 SmallVector<SDOperand, 8> Ops; 1185 Ops.push_back(Chain); 1186 Ops.push_back(Callee); 1187 1188 // Add argument registers to the end of the list so that they are known live 1189 // into the call. 1190 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1191 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1192 RegsToPass[i].second.getValueType())); 1193 1194 // Add an implicit use GOT pointer in EBX. 1195 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1196 Subtarget->isPICStyleGOT()) 1197 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1198 1199 if (InFlag.Val) 1200 Ops.push_back(InFlag); 1201 1202 // FIXME: Do not generate X86ISD::TAILCALL for now. 1203 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1204 NodeTys, &Ops[0], Ops.size()); 1205 InFlag = Chain.getValue(1); 1206 1207 // Returns a flag for retval copy to use. 1208 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1209 Ops.clear(); 1210 Ops.push_back(Chain); 1211 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1212 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1213 Ops.push_back(InFlag); 1214 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1215 InFlag = Chain.getValue(1); 1216 1217 // Handle result values, copying them out of physregs into vregs that we 1218 // return. 1219 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1220} 1221 1222 1223//===----------------------------------------------------------------------===// 1224// X86-64 C Calling Convention implementation 1225//===----------------------------------------------------------------------===// 1226 1227SDOperand 1228X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1229 MachineFunction &MF = DAG.getMachineFunction(); 1230 MachineFrameInfo *MFI = MF.getFrameInfo(); 1231 SDOperand Root = Op.getOperand(0); 1232 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1233 1234 static const unsigned GPR64ArgRegs[] = { 1235 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1236 }; 1237 static const unsigned XMMArgRegs[] = { 1238 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1239 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1240 }; 1241 1242 1243 // Assign locations to all of the incoming arguments. 1244 SmallVector<CCValAssign, 16> ArgLocs; 1245 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1246 getTargetMachine(), ArgLocs); 1247 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1248 1249 SmallVector<SDOperand, 8> ArgValues; 1250 unsigned LastVal = ~0U; 1251 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1252 CCValAssign &VA = ArgLocs[i]; 1253 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1254 // places. 1255 assert(VA.getValNo() != LastVal && 1256 "Don't support value assigned to multiple locs yet"); 1257 LastVal = VA.getValNo(); 1258 1259 if (VA.isRegLoc()) { 1260 MVT::ValueType RegVT = VA.getLocVT(); 1261 TargetRegisterClass *RC; 1262 if (RegVT == MVT::i32) 1263 RC = X86::GR32RegisterClass; 1264 else if (RegVT == MVT::i64) 1265 RC = X86::GR64RegisterClass; 1266 else if (RegVT == MVT::f32) 1267 RC = X86::FR32RegisterClass; 1268 else if (RegVT == MVT::f64) 1269 RC = X86::FR64RegisterClass; 1270 else { 1271 assert(MVT::isVector(RegVT)); 1272 if (MVT::getSizeInBits(RegVT) == 64) { 1273 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1274 RegVT = MVT::i64; 1275 } else 1276 RC = X86::VR128RegisterClass; 1277 } 1278 1279 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1280 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1281 1282 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1283 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1284 // right size. 1285 if (VA.getLocInfo() == CCValAssign::SExt) 1286 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1287 DAG.getValueType(VA.getValVT())); 1288 else if (VA.getLocInfo() == CCValAssign::ZExt) 1289 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1290 DAG.getValueType(VA.getValVT())); 1291 1292 if (VA.getLocInfo() != CCValAssign::Full) 1293 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1294 1295 // Handle MMX values passed in GPRs. 1296 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1297 MVT::getSizeInBits(RegVT) == 64) 1298 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1299 1300 ArgValues.push_back(ArgValue); 1301 } else { 1302 assert(VA.isMemLoc()); 1303 ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i)); 1304 } 1305 } 1306 1307 unsigned StackSize = CCInfo.getNextStackOffset(); 1308 1309 // If the function takes variable number of arguments, make a frame index for 1310 // the start of the first vararg value... for expansion of llvm.va_start. 1311 if (isVarArg) { 1312 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1313 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1314 1315 // For X86-64, if there are vararg parameters that are passed via 1316 // registers, then we must store them to their spots on the stack so they 1317 // may be loaded by deferencing the result of va_next. 1318 VarArgsGPOffset = NumIntRegs * 8; 1319 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1320 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1321 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1322 1323 // Store the integer parameter registers. 1324 SmallVector<SDOperand, 8> MemOps; 1325 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1326 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1327 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1328 for (; NumIntRegs != 6; ++NumIntRegs) { 1329 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1330 X86::GR64RegisterClass); 1331 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1332 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1333 MemOps.push_back(Store); 1334 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1335 DAG.getConstant(8, getPointerTy())); 1336 } 1337 1338 // Now store the XMM (fp + vector) parameter registers. 1339 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1340 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1341 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1342 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1343 X86::VR128RegisterClass); 1344 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1345 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1346 MemOps.push_back(Store); 1347 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1348 DAG.getConstant(16, getPointerTy())); 1349 } 1350 if (!MemOps.empty()) 1351 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1352 &MemOps[0], MemOps.size()); 1353 } 1354 1355 ArgValues.push_back(Root); 1356 1357 BytesToPopOnReturn = 0; // Callee pops nothing. 1358 BytesCallerReserves = StackSize; 1359 1360 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1361 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1362 1363 // Return the new list of results. 1364 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1365 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1366} 1367 1368SDOperand 1369X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1370 unsigned CC) { 1371 SDOperand Chain = Op.getOperand(0); 1372 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1373 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1374 SDOperand Callee = Op.getOperand(4); 1375 1376 // Analyze operands of the call, assigning locations to each operand. 1377 SmallVector<CCValAssign, 16> ArgLocs; 1378 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1379 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1380 1381 // Get a count of how many bytes are to be pushed on the stack. 1382 unsigned NumBytes = CCInfo.getNextStackOffset(); 1383 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1384 1385 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1386 SmallVector<SDOperand, 8> MemOpChains; 1387 1388 SDOperand StackPtr; 1389 1390 // Walk the register/memloc assignments, inserting copies/loads. 1391 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1392 CCValAssign &VA = ArgLocs[i]; 1393 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1394 1395 // Promote the value if needed. 1396 switch (VA.getLocInfo()) { 1397 default: assert(0 && "Unknown loc info!"); 1398 case CCValAssign::Full: break; 1399 case CCValAssign::SExt: 1400 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1401 break; 1402 case CCValAssign::ZExt: 1403 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1404 break; 1405 case CCValAssign::AExt: 1406 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1407 break; 1408 } 1409 1410 if (VA.isRegLoc()) { 1411 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1412 } else { 1413 assert(VA.isMemLoc()); 1414 if (StackPtr.Val == 0) 1415 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1416 1417 MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, 1418 Arg)); 1419 } 1420 } 1421 1422 if (!MemOpChains.empty()) 1423 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1424 &MemOpChains[0], MemOpChains.size()); 1425 1426 // Build a sequence of copy-to-reg nodes chained together with token chain 1427 // and flag operands which copy the outgoing args into registers. 1428 SDOperand InFlag; 1429 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1430 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1431 InFlag); 1432 InFlag = Chain.getValue(1); 1433 } 1434 1435 if (isVarArg) { 1436 // From AMD64 ABI document: 1437 // For calls that may call functions that use varargs or stdargs 1438 // (prototype-less calls or calls to functions containing ellipsis (...) in 1439 // the declaration) %al is used as hidden argument to specify the number 1440 // of SSE registers used. The contents of %al do not need to match exactly 1441 // the number of registers, but must be an ubound on the number of SSE 1442 // registers used and is in the range 0 - 8 inclusive. 1443 1444 // Count the number of XMM registers allocated. 1445 static const unsigned XMMArgRegs[] = { 1446 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1447 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1448 }; 1449 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1450 1451 Chain = DAG.getCopyToReg(Chain, X86::AL, 1452 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1453 InFlag = Chain.getValue(1); 1454 } 1455 1456 // If the callee is a GlobalAddress node (quite common, every direct call is) 1457 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1458 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1459 // We should use extra load for direct calls to dllimported functions in 1460 // non-JIT mode. 1461 if (getTargetMachine().getCodeModel() != CodeModel::Large 1462 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1463 getTargetMachine(), true)) 1464 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1465 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1466 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1467 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1468 1469 // Returns a chain & a flag for retval copy to use. 1470 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1471 SmallVector<SDOperand, 8> Ops; 1472 Ops.push_back(Chain); 1473 Ops.push_back(Callee); 1474 1475 // Add argument registers to the end of the list so that they are known live 1476 // into the call. 1477 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1478 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1479 RegsToPass[i].second.getValueType())); 1480 1481 if (InFlag.Val) 1482 Ops.push_back(InFlag); 1483 1484 // FIXME: Do not generate X86ISD::TAILCALL for now. 1485 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1486 NodeTys, &Ops[0], Ops.size()); 1487 InFlag = Chain.getValue(1); 1488 1489 // Returns a flag for retval copy to use. 1490 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1491 Ops.clear(); 1492 Ops.push_back(Chain); 1493 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1494 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1495 Ops.push_back(InFlag); 1496 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1497 InFlag = Chain.getValue(1); 1498 1499 // Handle result values, copying them out of physregs into vregs that we 1500 // return. 1501 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1502} 1503 1504 1505//===----------------------------------------------------------------------===// 1506// Other Lowering Hooks 1507//===----------------------------------------------------------------------===// 1508 1509 1510SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1511 MachineFunction &MF = DAG.getMachineFunction(); 1512 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1513 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1514 1515 if (ReturnAddrIndex == 0) { 1516 // Set up a frame object for the return address. 1517 if (Subtarget->is64Bit()) 1518 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1519 else 1520 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1521 1522 FuncInfo->setRAIndex(ReturnAddrIndex); 1523 } 1524 1525 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1526} 1527 1528 1529 1530/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1531/// specific condition code. It returns a false if it cannot do a direct 1532/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1533/// needed. 1534static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1535 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1536 SelectionDAG &DAG) { 1537 X86CC = X86::COND_INVALID; 1538 if (!isFP) { 1539 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1540 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1541 // X > -1 -> X == 0, jump !sign. 1542 RHS = DAG.getConstant(0, RHS.getValueType()); 1543 X86CC = X86::COND_NS; 1544 return true; 1545 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1546 // X < 0 -> X == 0, jump on sign. 1547 X86CC = X86::COND_S; 1548 return true; 1549 } else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) { 1550 // X < 1 -> X <= 0 1551 RHS = DAG.getConstant(0, RHS.getValueType()); 1552 X86CC = X86::COND_LE; 1553 return true; 1554 } 1555 } 1556 1557 switch (SetCCOpcode) { 1558 default: break; 1559 case ISD::SETEQ: X86CC = X86::COND_E; break; 1560 case ISD::SETGT: X86CC = X86::COND_G; break; 1561 case ISD::SETGE: X86CC = X86::COND_GE; break; 1562 case ISD::SETLT: X86CC = X86::COND_L; break; 1563 case ISD::SETLE: X86CC = X86::COND_LE; break; 1564 case ISD::SETNE: X86CC = X86::COND_NE; break; 1565 case ISD::SETULT: X86CC = X86::COND_B; break; 1566 case ISD::SETUGT: X86CC = X86::COND_A; break; 1567 case ISD::SETULE: X86CC = X86::COND_BE; break; 1568 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1569 } 1570 } else { 1571 // On a floating point condition, the flags are set as follows: 1572 // ZF PF CF op 1573 // 0 | 0 | 0 | X > Y 1574 // 0 | 0 | 1 | X < Y 1575 // 1 | 0 | 0 | X == Y 1576 // 1 | 1 | 1 | unordered 1577 bool Flip = false; 1578 switch (SetCCOpcode) { 1579 default: break; 1580 case ISD::SETUEQ: 1581 case ISD::SETEQ: X86CC = X86::COND_E; break; 1582 case ISD::SETOLT: Flip = true; // Fallthrough 1583 case ISD::SETOGT: 1584 case ISD::SETGT: X86CC = X86::COND_A; break; 1585 case ISD::SETOLE: Flip = true; // Fallthrough 1586 case ISD::SETOGE: 1587 case ISD::SETGE: X86CC = X86::COND_AE; break; 1588 case ISD::SETUGT: Flip = true; // Fallthrough 1589 case ISD::SETULT: 1590 case ISD::SETLT: X86CC = X86::COND_B; break; 1591 case ISD::SETUGE: Flip = true; // Fallthrough 1592 case ISD::SETULE: 1593 case ISD::SETLE: X86CC = X86::COND_BE; break; 1594 case ISD::SETONE: 1595 case ISD::SETNE: X86CC = X86::COND_NE; break; 1596 case ISD::SETUO: X86CC = X86::COND_P; break; 1597 case ISD::SETO: X86CC = X86::COND_NP; break; 1598 } 1599 if (Flip) 1600 std::swap(LHS, RHS); 1601 } 1602 1603 return X86CC != X86::COND_INVALID; 1604} 1605 1606/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1607/// code. Current x86 isa includes the following FP cmov instructions: 1608/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1609static bool hasFPCMov(unsigned X86CC) { 1610 switch (X86CC) { 1611 default: 1612 return false; 1613 case X86::COND_B: 1614 case X86::COND_BE: 1615 case X86::COND_E: 1616 case X86::COND_P: 1617 case X86::COND_A: 1618 case X86::COND_AE: 1619 case X86::COND_NE: 1620 case X86::COND_NP: 1621 return true; 1622 } 1623} 1624 1625/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1626/// true if Op is undef or if its value falls within the specified range (L, H]. 1627static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1628 if (Op.getOpcode() == ISD::UNDEF) 1629 return true; 1630 1631 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1632 return (Val >= Low && Val < Hi); 1633} 1634 1635/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1636/// true if Op is undef or if its value equal to the specified value. 1637static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1638 if (Op.getOpcode() == ISD::UNDEF) 1639 return true; 1640 return cast<ConstantSDNode>(Op)->getValue() == Val; 1641} 1642 1643/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1644/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1645bool X86::isPSHUFDMask(SDNode *N) { 1646 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1647 1648 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1649 return false; 1650 1651 // Check if the value doesn't reference the second vector. 1652 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1653 SDOperand Arg = N->getOperand(i); 1654 if (Arg.getOpcode() == ISD::UNDEF) continue; 1655 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1656 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1657 return false; 1658 } 1659 1660 return true; 1661} 1662 1663/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1664/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1665bool X86::isPSHUFHWMask(SDNode *N) { 1666 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1667 1668 if (N->getNumOperands() != 8) 1669 return false; 1670 1671 // Lower quadword copied in order. 1672 for (unsigned i = 0; i != 4; ++i) { 1673 SDOperand Arg = N->getOperand(i); 1674 if (Arg.getOpcode() == ISD::UNDEF) continue; 1675 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1676 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1677 return false; 1678 } 1679 1680 // Upper quadword shuffled. 1681 for (unsigned i = 4; i != 8; ++i) { 1682 SDOperand Arg = N->getOperand(i); 1683 if (Arg.getOpcode() == ISD::UNDEF) continue; 1684 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1685 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1686 if (Val < 4 || Val > 7) 1687 return false; 1688 } 1689 1690 return true; 1691} 1692 1693/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1694/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1695bool X86::isPSHUFLWMask(SDNode *N) { 1696 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1697 1698 if (N->getNumOperands() != 8) 1699 return false; 1700 1701 // Upper quadword copied in order. 1702 for (unsigned i = 4; i != 8; ++i) 1703 if (!isUndefOrEqual(N->getOperand(i), i)) 1704 return false; 1705 1706 // Lower quadword shuffled. 1707 for (unsigned i = 0; i != 4; ++i) 1708 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1709 return false; 1710 1711 return true; 1712} 1713 1714/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1715/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1716static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1717 if (NumElems != 2 && NumElems != 4) return false; 1718 1719 unsigned Half = NumElems / 2; 1720 for (unsigned i = 0; i < Half; ++i) 1721 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1722 return false; 1723 for (unsigned i = Half; i < NumElems; ++i) 1724 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1725 return false; 1726 1727 return true; 1728} 1729 1730bool X86::isSHUFPMask(SDNode *N) { 1731 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1732 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1733} 1734 1735/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1736/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1737/// half elements to come from vector 1 (which would equal the dest.) and 1738/// the upper half to come from vector 2. 1739static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1740 if (NumOps != 2 && NumOps != 4) return false; 1741 1742 unsigned Half = NumOps / 2; 1743 for (unsigned i = 0; i < Half; ++i) 1744 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1745 return false; 1746 for (unsigned i = Half; i < NumOps; ++i) 1747 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1748 return false; 1749 return true; 1750} 1751 1752static bool isCommutedSHUFP(SDNode *N) { 1753 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1754 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1755} 1756 1757/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1758/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1759bool X86::isMOVHLPSMask(SDNode *N) { 1760 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1761 1762 if (N->getNumOperands() != 4) 1763 return false; 1764 1765 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1766 return isUndefOrEqual(N->getOperand(0), 6) && 1767 isUndefOrEqual(N->getOperand(1), 7) && 1768 isUndefOrEqual(N->getOperand(2), 2) && 1769 isUndefOrEqual(N->getOperand(3), 3); 1770} 1771 1772/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1773/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1774/// <2, 3, 2, 3> 1775bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1776 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1777 1778 if (N->getNumOperands() != 4) 1779 return false; 1780 1781 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1782 return isUndefOrEqual(N->getOperand(0), 2) && 1783 isUndefOrEqual(N->getOperand(1), 3) && 1784 isUndefOrEqual(N->getOperand(2), 2) && 1785 isUndefOrEqual(N->getOperand(3), 3); 1786} 1787 1788/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1789/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1790bool X86::isMOVLPMask(SDNode *N) { 1791 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1792 1793 unsigned NumElems = N->getNumOperands(); 1794 if (NumElems != 2 && NumElems != 4) 1795 return false; 1796 1797 for (unsigned i = 0; i < NumElems/2; ++i) 1798 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1799 return false; 1800 1801 for (unsigned i = NumElems/2; i < NumElems; ++i) 1802 if (!isUndefOrEqual(N->getOperand(i), i)) 1803 return false; 1804 1805 return true; 1806} 1807 1808/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1809/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1810/// and MOVLHPS. 1811bool X86::isMOVHPMask(SDNode *N) { 1812 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1813 1814 unsigned NumElems = N->getNumOperands(); 1815 if (NumElems != 2 && NumElems != 4) 1816 return false; 1817 1818 for (unsigned i = 0; i < NumElems/2; ++i) 1819 if (!isUndefOrEqual(N->getOperand(i), i)) 1820 return false; 1821 1822 for (unsigned i = 0; i < NumElems/2; ++i) { 1823 SDOperand Arg = N->getOperand(i + NumElems/2); 1824 if (!isUndefOrEqual(Arg, i + NumElems)) 1825 return false; 1826 } 1827 1828 return true; 1829} 1830 1831/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1832/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1833bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1834 bool V2IsSplat = false) { 1835 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1836 return false; 1837 1838 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1839 SDOperand BitI = Elts[i]; 1840 SDOperand BitI1 = Elts[i+1]; 1841 if (!isUndefOrEqual(BitI, j)) 1842 return false; 1843 if (V2IsSplat) { 1844 if (isUndefOrEqual(BitI1, NumElts)) 1845 return false; 1846 } else { 1847 if (!isUndefOrEqual(BitI1, j + NumElts)) 1848 return false; 1849 } 1850 } 1851 1852 return true; 1853} 1854 1855bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1856 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1857 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1858} 1859 1860/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1861/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1862bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1863 bool V2IsSplat = false) { 1864 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1865 return false; 1866 1867 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1868 SDOperand BitI = Elts[i]; 1869 SDOperand BitI1 = Elts[i+1]; 1870 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1871 return false; 1872 if (V2IsSplat) { 1873 if (isUndefOrEqual(BitI1, NumElts)) 1874 return false; 1875 } else { 1876 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1877 return false; 1878 } 1879 } 1880 1881 return true; 1882} 1883 1884bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1885 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1886 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1887} 1888 1889/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1890/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1891/// <0, 0, 1, 1> 1892bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1893 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1894 1895 unsigned NumElems = N->getNumOperands(); 1896 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1897 return false; 1898 1899 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1900 SDOperand BitI = N->getOperand(i); 1901 SDOperand BitI1 = N->getOperand(i+1); 1902 1903 if (!isUndefOrEqual(BitI, j)) 1904 return false; 1905 if (!isUndefOrEqual(BitI1, j)) 1906 return false; 1907 } 1908 1909 return true; 1910} 1911 1912/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1913/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1914/// <2, 2, 3, 3> 1915bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1916 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1917 1918 unsigned NumElems = N->getNumOperands(); 1919 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1920 return false; 1921 1922 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1923 SDOperand BitI = N->getOperand(i); 1924 SDOperand BitI1 = N->getOperand(i + 1); 1925 1926 if (!isUndefOrEqual(BitI, j)) 1927 return false; 1928 if (!isUndefOrEqual(BitI1, j)) 1929 return false; 1930 } 1931 1932 return true; 1933} 1934 1935/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1936/// specifies a shuffle of elements that is suitable for input to MOVSS, 1937/// MOVSD, and MOVD, i.e. setting the lowest element. 1938static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1939 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1940 return false; 1941 1942 if (!isUndefOrEqual(Elts[0], NumElts)) 1943 return false; 1944 1945 for (unsigned i = 1; i < NumElts; ++i) { 1946 if (!isUndefOrEqual(Elts[i], i)) 1947 return false; 1948 } 1949 1950 return true; 1951} 1952 1953bool X86::isMOVLMask(SDNode *N) { 1954 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1955 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1956} 1957 1958/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1959/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1960/// element of vector 2 and the other elements to come from vector 1 in order. 1961static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1962 bool V2IsSplat = false, 1963 bool V2IsUndef = false) { 1964 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1965 return false; 1966 1967 if (!isUndefOrEqual(Ops[0], 0)) 1968 return false; 1969 1970 for (unsigned i = 1; i < NumOps; ++i) { 1971 SDOperand Arg = Ops[i]; 1972 if (!(isUndefOrEqual(Arg, i+NumOps) || 1973 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1974 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1975 return false; 1976 } 1977 1978 return true; 1979} 1980 1981static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1982 bool V2IsUndef = false) { 1983 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1984 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1985 V2IsSplat, V2IsUndef); 1986} 1987 1988/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1989/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1990bool X86::isMOVSHDUPMask(SDNode *N) { 1991 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1992 1993 if (N->getNumOperands() != 4) 1994 return false; 1995 1996 // Expect 1, 1, 3, 3 1997 for (unsigned i = 0; i < 2; ++i) { 1998 SDOperand Arg = N->getOperand(i); 1999 if (Arg.getOpcode() == ISD::UNDEF) continue; 2000 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2001 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2002 if (Val != 1) return false; 2003 } 2004 2005 bool HasHi = false; 2006 for (unsigned i = 2; i < 4; ++i) { 2007 SDOperand Arg = N->getOperand(i); 2008 if (Arg.getOpcode() == ISD::UNDEF) continue; 2009 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2010 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2011 if (Val != 3) return false; 2012 HasHi = true; 2013 } 2014 2015 // Don't use movshdup if it can be done with a shufps. 2016 return HasHi; 2017} 2018 2019/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 2020/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 2021bool X86::isMOVSLDUPMask(SDNode *N) { 2022 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2023 2024 if (N->getNumOperands() != 4) 2025 return false; 2026 2027 // Expect 0, 0, 2, 2 2028 for (unsigned i = 0; i < 2; ++i) { 2029 SDOperand Arg = N->getOperand(i); 2030 if (Arg.getOpcode() == ISD::UNDEF) continue; 2031 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2032 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2033 if (Val != 0) return false; 2034 } 2035 2036 bool HasHi = false; 2037 for (unsigned i = 2; i < 4; ++i) { 2038 SDOperand Arg = N->getOperand(i); 2039 if (Arg.getOpcode() == ISD::UNDEF) continue; 2040 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2041 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2042 if (Val != 2) return false; 2043 HasHi = true; 2044 } 2045 2046 // Don't use movshdup if it can be done with a shufps. 2047 return HasHi; 2048} 2049 2050/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2051/// specifies a identity operation on the LHS or RHS. 2052static bool isIdentityMask(SDNode *N, bool RHS = false) { 2053 unsigned NumElems = N->getNumOperands(); 2054 for (unsigned i = 0; i < NumElems; ++i) 2055 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2056 return false; 2057 return true; 2058} 2059 2060/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2061/// a splat of a single element. 2062static bool isSplatMask(SDNode *N) { 2063 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2064 2065 // This is a splat operation if each element of the permute is the same, and 2066 // if the value doesn't reference the second vector. 2067 unsigned NumElems = N->getNumOperands(); 2068 SDOperand ElementBase; 2069 unsigned i = 0; 2070 for (; i != NumElems; ++i) { 2071 SDOperand Elt = N->getOperand(i); 2072 if (isa<ConstantSDNode>(Elt)) { 2073 ElementBase = Elt; 2074 break; 2075 } 2076 } 2077 2078 if (!ElementBase.Val) 2079 return false; 2080 2081 for (; i != NumElems; ++i) { 2082 SDOperand Arg = N->getOperand(i); 2083 if (Arg.getOpcode() == ISD::UNDEF) continue; 2084 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2085 if (Arg != ElementBase) return false; 2086 } 2087 2088 // Make sure it is a splat of the first vector operand. 2089 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2090} 2091 2092/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2093/// a splat of a single element and it's a 2 or 4 element mask. 2094bool X86::isSplatMask(SDNode *N) { 2095 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2096 2097 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2098 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2099 return false; 2100 return ::isSplatMask(N); 2101} 2102 2103/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2104/// specifies a splat of zero element. 2105bool X86::isSplatLoMask(SDNode *N) { 2106 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2107 2108 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2109 if (!isUndefOrEqual(N->getOperand(i), 0)) 2110 return false; 2111 return true; 2112} 2113 2114/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2115/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2116/// instructions. 2117unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2118 unsigned NumOperands = N->getNumOperands(); 2119 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2120 unsigned Mask = 0; 2121 for (unsigned i = 0; i < NumOperands; ++i) { 2122 unsigned Val = 0; 2123 SDOperand Arg = N->getOperand(NumOperands-i-1); 2124 if (Arg.getOpcode() != ISD::UNDEF) 2125 Val = cast<ConstantSDNode>(Arg)->getValue(); 2126 if (Val >= NumOperands) Val -= NumOperands; 2127 Mask |= Val; 2128 if (i != NumOperands - 1) 2129 Mask <<= Shift; 2130 } 2131 2132 return Mask; 2133} 2134 2135/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2136/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2137/// instructions. 2138unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2139 unsigned Mask = 0; 2140 // 8 nodes, but we only care about the last 4. 2141 for (unsigned i = 7; i >= 4; --i) { 2142 unsigned Val = 0; 2143 SDOperand Arg = N->getOperand(i); 2144 if (Arg.getOpcode() != ISD::UNDEF) 2145 Val = cast<ConstantSDNode>(Arg)->getValue(); 2146 Mask |= (Val - 4); 2147 if (i != 4) 2148 Mask <<= 2; 2149 } 2150 2151 return Mask; 2152} 2153 2154/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2155/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2156/// instructions. 2157unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2158 unsigned Mask = 0; 2159 // 8 nodes, but we only care about the first 4. 2160 for (int i = 3; i >= 0; --i) { 2161 unsigned Val = 0; 2162 SDOperand Arg = N->getOperand(i); 2163 if (Arg.getOpcode() != ISD::UNDEF) 2164 Val = cast<ConstantSDNode>(Arg)->getValue(); 2165 Mask |= Val; 2166 if (i != 0) 2167 Mask <<= 2; 2168 } 2169 2170 return Mask; 2171} 2172 2173/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2174/// specifies a 8 element shuffle that can be broken into a pair of 2175/// PSHUFHW and PSHUFLW. 2176static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2177 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2178 2179 if (N->getNumOperands() != 8) 2180 return false; 2181 2182 // Lower quadword shuffled. 2183 for (unsigned i = 0; i != 4; ++i) { 2184 SDOperand Arg = N->getOperand(i); 2185 if (Arg.getOpcode() == ISD::UNDEF) continue; 2186 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2187 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2188 if (Val > 4) 2189 return false; 2190 } 2191 2192 // Upper quadword shuffled. 2193 for (unsigned i = 4; i != 8; ++i) { 2194 SDOperand Arg = N->getOperand(i); 2195 if (Arg.getOpcode() == ISD::UNDEF) continue; 2196 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2197 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2198 if (Val < 4 || Val > 7) 2199 return false; 2200 } 2201 2202 return true; 2203} 2204 2205/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2206/// values in ther permute mask. 2207static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2208 SDOperand &V2, SDOperand &Mask, 2209 SelectionDAG &DAG) { 2210 MVT::ValueType VT = Op.getValueType(); 2211 MVT::ValueType MaskVT = Mask.getValueType(); 2212 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2213 unsigned NumElems = Mask.getNumOperands(); 2214 SmallVector<SDOperand, 8> MaskVec; 2215 2216 for (unsigned i = 0; i != NumElems; ++i) { 2217 SDOperand Arg = Mask.getOperand(i); 2218 if (Arg.getOpcode() == ISD::UNDEF) { 2219 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2220 continue; 2221 } 2222 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2223 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2224 if (Val < NumElems) 2225 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2226 else 2227 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2228 } 2229 2230 std::swap(V1, V2); 2231 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2232 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2233} 2234 2235/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2236/// match movhlps. The lower half elements should come from upper half of 2237/// V1 (and in order), and the upper half elements should come from the upper 2238/// half of V2 (and in order). 2239static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2240 unsigned NumElems = Mask->getNumOperands(); 2241 if (NumElems != 4) 2242 return false; 2243 for (unsigned i = 0, e = 2; i != e; ++i) 2244 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2245 return false; 2246 for (unsigned i = 2; i != 4; ++i) 2247 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2248 return false; 2249 return true; 2250} 2251 2252/// isScalarLoadToVector - Returns true if the node is a scalar load that 2253/// is promoted to a vector. 2254static inline bool isScalarLoadToVector(SDNode *N) { 2255 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2256 N = N->getOperand(0).Val; 2257 return ISD::isNON_EXTLoad(N); 2258 } 2259 return false; 2260} 2261 2262/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2263/// match movlp{s|d}. The lower half elements should come from lower half of 2264/// V1 (and in order), and the upper half elements should come from the upper 2265/// half of V2 (and in order). And since V1 will become the source of the 2266/// MOVLP, it must be either a vector load or a scalar load to vector. 2267static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2268 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2269 return false; 2270 // Is V2 is a vector load, don't do this transformation. We will try to use 2271 // load folding shufps op. 2272 if (ISD::isNON_EXTLoad(V2)) 2273 return false; 2274 2275 unsigned NumElems = Mask->getNumOperands(); 2276 if (NumElems != 2 && NumElems != 4) 2277 return false; 2278 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2279 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2280 return false; 2281 for (unsigned i = NumElems/2; i != NumElems; ++i) 2282 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2283 return false; 2284 return true; 2285} 2286 2287/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2288/// all the same. 2289static bool isSplatVector(SDNode *N) { 2290 if (N->getOpcode() != ISD::BUILD_VECTOR) 2291 return false; 2292 2293 SDOperand SplatValue = N->getOperand(0); 2294 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2295 if (N->getOperand(i) != SplatValue) 2296 return false; 2297 return true; 2298} 2299 2300/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2301/// to an undef. 2302static bool isUndefShuffle(SDNode *N) { 2303 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2304 return false; 2305 2306 SDOperand V1 = N->getOperand(0); 2307 SDOperand V2 = N->getOperand(1); 2308 SDOperand Mask = N->getOperand(2); 2309 unsigned NumElems = Mask.getNumOperands(); 2310 for (unsigned i = 0; i != NumElems; ++i) { 2311 SDOperand Arg = Mask.getOperand(i); 2312 if (Arg.getOpcode() != ISD::UNDEF) { 2313 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2314 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2315 return false; 2316 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2317 return false; 2318 } 2319 } 2320 return true; 2321} 2322 2323/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2324/// constant +0.0. 2325static inline bool isZeroNode(SDOperand Elt) { 2326 return ((isa<ConstantSDNode>(Elt) && 2327 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2328 (isa<ConstantFPSDNode>(Elt) && 2329 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 2330} 2331 2332/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2333/// to an zero vector. 2334static bool isZeroShuffle(SDNode *N) { 2335 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2336 return false; 2337 2338 SDOperand V1 = N->getOperand(0); 2339 SDOperand V2 = N->getOperand(1); 2340 SDOperand Mask = N->getOperand(2); 2341 unsigned NumElems = Mask.getNumOperands(); 2342 for (unsigned i = 0; i != NumElems; ++i) { 2343 SDOperand Arg = Mask.getOperand(i); 2344 if (Arg.getOpcode() != ISD::UNDEF) { 2345 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2346 if (Idx < NumElems) { 2347 unsigned Opc = V1.Val->getOpcode(); 2348 if (Opc == ISD::UNDEF) 2349 continue; 2350 if (Opc != ISD::BUILD_VECTOR || 2351 !isZeroNode(V1.Val->getOperand(Idx))) 2352 return false; 2353 } else if (Idx >= NumElems) { 2354 unsigned Opc = V2.Val->getOpcode(); 2355 if (Opc == ISD::UNDEF) 2356 continue; 2357 if (Opc != ISD::BUILD_VECTOR || 2358 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2359 return false; 2360 } 2361 } 2362 } 2363 return true; 2364} 2365 2366/// getZeroVector - Returns a vector of specified type with all zero elements. 2367/// 2368static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2369 assert(MVT::isVector(VT) && "Expected a vector type"); 2370 unsigned NumElems = MVT::getVectorNumElements(VT); 2371 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2372 bool isFP = MVT::isFloatingPoint(EVT); 2373 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2374 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2375 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2376} 2377 2378/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2379/// that point to V2 points to its first element. 2380static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2381 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2382 2383 bool Changed = false; 2384 SmallVector<SDOperand, 8> MaskVec; 2385 unsigned NumElems = Mask.getNumOperands(); 2386 for (unsigned i = 0; i != NumElems; ++i) { 2387 SDOperand Arg = Mask.getOperand(i); 2388 if (Arg.getOpcode() != ISD::UNDEF) { 2389 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2390 if (Val > NumElems) { 2391 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2392 Changed = true; 2393 } 2394 } 2395 MaskVec.push_back(Arg); 2396 } 2397 2398 if (Changed) 2399 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2400 &MaskVec[0], MaskVec.size()); 2401 return Mask; 2402} 2403 2404/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2405/// operation of specified width. 2406static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2407 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2408 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2409 2410 SmallVector<SDOperand, 8> MaskVec; 2411 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2412 for (unsigned i = 1; i != NumElems; ++i) 2413 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2414 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2415} 2416 2417/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2418/// of specified width. 2419static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2420 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2421 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2422 SmallVector<SDOperand, 8> MaskVec; 2423 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2424 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2425 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2426 } 2427 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2428} 2429 2430/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2431/// of specified width. 2432static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2433 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2434 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2435 unsigned Half = NumElems/2; 2436 SmallVector<SDOperand, 8> MaskVec; 2437 for (unsigned i = 0; i != Half; ++i) { 2438 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2439 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2440 } 2441 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2442} 2443 2444/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2445/// 2446static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2447 SDOperand V1 = Op.getOperand(0); 2448 SDOperand Mask = Op.getOperand(2); 2449 MVT::ValueType VT = Op.getValueType(); 2450 unsigned NumElems = Mask.getNumOperands(); 2451 Mask = getUnpacklMask(NumElems, DAG); 2452 while (NumElems != 4) { 2453 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2454 NumElems >>= 1; 2455 } 2456 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2457 2458 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2459 Mask = getZeroVector(MaskVT, DAG); 2460 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2461 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2462 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2463} 2464 2465/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2466/// vector of zero or undef vector. 2467static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2468 unsigned NumElems, unsigned Idx, 2469 bool isZero, SelectionDAG &DAG) { 2470 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2471 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2472 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2473 SDOperand Zero = DAG.getConstant(0, EVT); 2474 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2475 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2476 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2477 &MaskVec[0], MaskVec.size()); 2478 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2479} 2480 2481/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2482/// 2483static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2484 unsigned NumNonZero, unsigned NumZero, 2485 SelectionDAG &DAG, TargetLowering &TLI) { 2486 if (NumNonZero > 8) 2487 return SDOperand(); 2488 2489 SDOperand V(0, 0); 2490 bool First = true; 2491 for (unsigned i = 0; i < 16; ++i) { 2492 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2493 if (ThisIsNonZero && First) { 2494 if (NumZero) 2495 V = getZeroVector(MVT::v8i16, DAG); 2496 else 2497 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2498 First = false; 2499 } 2500 2501 if ((i & 1) != 0) { 2502 SDOperand ThisElt(0, 0), LastElt(0, 0); 2503 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2504 if (LastIsNonZero) { 2505 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2506 } 2507 if (ThisIsNonZero) { 2508 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2509 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2510 ThisElt, DAG.getConstant(8, MVT::i8)); 2511 if (LastIsNonZero) 2512 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2513 } else 2514 ThisElt = LastElt; 2515 2516 if (ThisElt.Val) 2517 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2518 DAG.getConstant(i/2, TLI.getPointerTy())); 2519 } 2520 } 2521 2522 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2523} 2524 2525/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2526/// 2527static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2528 unsigned NumNonZero, unsigned NumZero, 2529 SelectionDAG &DAG, TargetLowering &TLI) { 2530 if (NumNonZero > 4) 2531 return SDOperand(); 2532 2533 SDOperand V(0, 0); 2534 bool First = true; 2535 for (unsigned i = 0; i < 8; ++i) { 2536 bool isNonZero = (NonZeros & (1 << i)) != 0; 2537 if (isNonZero) { 2538 if (First) { 2539 if (NumZero) 2540 V = getZeroVector(MVT::v8i16, DAG); 2541 else 2542 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2543 First = false; 2544 } 2545 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2546 DAG.getConstant(i, TLI.getPointerTy())); 2547 } 2548 } 2549 2550 return V; 2551} 2552 2553SDOperand 2554X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2555 // All zero's are handled with pxor. 2556 if (ISD::isBuildVectorAllZeros(Op.Val)) 2557 return Op; 2558 2559 // All one's are handled with pcmpeqd. 2560 if (ISD::isBuildVectorAllOnes(Op.Val)) 2561 return Op; 2562 2563 MVT::ValueType VT = Op.getValueType(); 2564 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2565 unsigned EVTBits = MVT::getSizeInBits(EVT); 2566 2567 unsigned NumElems = Op.getNumOperands(); 2568 unsigned NumZero = 0; 2569 unsigned NumNonZero = 0; 2570 unsigned NonZeros = 0; 2571 unsigned NumNonZeroImms = 0; 2572 std::set<SDOperand> Values; 2573 for (unsigned i = 0; i < NumElems; ++i) { 2574 SDOperand Elt = Op.getOperand(i); 2575 if (Elt.getOpcode() != ISD::UNDEF) { 2576 Values.insert(Elt); 2577 if (isZeroNode(Elt)) 2578 NumZero++; 2579 else { 2580 NonZeros |= (1 << i); 2581 NumNonZero++; 2582 if (Elt.getOpcode() == ISD::Constant || 2583 Elt.getOpcode() == ISD::ConstantFP) 2584 NumNonZeroImms++; 2585 } 2586 } 2587 } 2588 2589 if (NumNonZero == 0) { 2590 if (NumZero == 0) 2591 // All undef vector. Return an UNDEF. 2592 return DAG.getNode(ISD::UNDEF, VT); 2593 else 2594 // A mix of zero and undef. Return a zero vector. 2595 return getZeroVector(VT, DAG); 2596 } 2597 2598 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2599 if (Values.size() == 1) 2600 return SDOperand(); 2601 2602 // Special case for single non-zero element. 2603 if (NumNonZero == 1) { 2604 unsigned Idx = CountTrailingZeros_32(NonZeros); 2605 SDOperand Item = Op.getOperand(Idx); 2606 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2607 if (Idx == 0) 2608 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2609 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2610 NumZero > 0, DAG); 2611 2612 if (EVTBits == 32) { 2613 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2614 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2615 DAG); 2616 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2617 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2618 SmallVector<SDOperand, 8> MaskVec; 2619 for (unsigned i = 0; i < NumElems; i++) 2620 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2621 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2622 &MaskVec[0], MaskVec.size()); 2623 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2624 DAG.getNode(ISD::UNDEF, VT), Mask); 2625 } 2626 } 2627 2628 // A vector full of immediates; various special cases are already 2629 // handled, so this is best done with a single constant-pool load. 2630 if (NumNonZero == NumNonZeroImms) 2631 return SDOperand(); 2632 2633 // Let legalizer expand 2-wide build_vectors. 2634 if (EVTBits == 64) 2635 return SDOperand(); 2636 2637 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2638 if (EVTBits == 8 && NumElems == 16) { 2639 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2640 *this); 2641 if (V.Val) return V; 2642 } 2643 2644 if (EVTBits == 16 && NumElems == 8) { 2645 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2646 *this); 2647 if (V.Val) return V; 2648 } 2649 2650 // If element VT is == 32 bits, turn it into a number of shuffles. 2651 SmallVector<SDOperand, 8> V; 2652 V.resize(NumElems); 2653 if (NumElems == 4 && NumZero > 0) { 2654 for (unsigned i = 0; i < 4; ++i) { 2655 bool isZero = !(NonZeros & (1 << i)); 2656 if (isZero) 2657 V[i] = getZeroVector(VT, DAG); 2658 else 2659 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2660 } 2661 2662 for (unsigned i = 0; i < 2; ++i) { 2663 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2664 default: break; 2665 case 0: 2666 V[i] = V[i*2]; // Must be a zero vector. 2667 break; 2668 case 1: 2669 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2670 getMOVLMask(NumElems, DAG)); 2671 break; 2672 case 2: 2673 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2674 getMOVLMask(NumElems, DAG)); 2675 break; 2676 case 3: 2677 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2678 getUnpacklMask(NumElems, DAG)); 2679 break; 2680 } 2681 } 2682 2683 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2684 // clears the upper bits. 2685 // FIXME: we can do the same for v4f32 case when we know both parts of 2686 // the lower half come from scalar_to_vector (loadf32). We should do 2687 // that in post legalizer dag combiner with target specific hooks. 2688 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2689 return V[0]; 2690 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2691 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2692 SmallVector<SDOperand, 8> MaskVec; 2693 bool Reverse = (NonZeros & 0x3) == 2; 2694 for (unsigned i = 0; i < 2; ++i) 2695 if (Reverse) 2696 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2697 else 2698 MaskVec.push_back(DAG.getConstant(i, EVT)); 2699 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2700 for (unsigned i = 0; i < 2; ++i) 2701 if (Reverse) 2702 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2703 else 2704 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2705 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2706 &MaskVec[0], MaskVec.size()); 2707 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2708 } 2709 2710 if (Values.size() > 2) { 2711 // Expand into a number of unpckl*. 2712 // e.g. for v4f32 2713 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2714 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2715 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2716 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2717 for (unsigned i = 0; i < NumElems; ++i) 2718 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2719 NumElems >>= 1; 2720 while (NumElems != 0) { 2721 for (unsigned i = 0; i < NumElems; ++i) 2722 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2723 UnpckMask); 2724 NumElems >>= 1; 2725 } 2726 return V[0]; 2727 } 2728 2729 return SDOperand(); 2730} 2731 2732SDOperand 2733X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2734 SDOperand V1 = Op.getOperand(0); 2735 SDOperand V2 = Op.getOperand(1); 2736 SDOperand PermMask = Op.getOperand(2); 2737 MVT::ValueType VT = Op.getValueType(); 2738 unsigned NumElems = PermMask.getNumOperands(); 2739 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2740 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2741 bool V1IsSplat = false; 2742 bool V2IsSplat = false; 2743 2744 if (isUndefShuffle(Op.Val)) 2745 return DAG.getNode(ISD::UNDEF, VT); 2746 2747 if (isZeroShuffle(Op.Val)) 2748 return getZeroVector(VT, DAG); 2749 2750 if (isIdentityMask(PermMask.Val)) 2751 return V1; 2752 else if (isIdentityMask(PermMask.Val, true)) 2753 return V2; 2754 2755 if (isSplatMask(PermMask.Val)) { 2756 if (NumElems <= 4) return Op; 2757 // Promote it to a v4i32 splat. 2758 return PromoteSplat(Op, DAG); 2759 } 2760 2761 if (X86::isMOVLMask(PermMask.Val)) 2762 return (V1IsUndef) ? V2 : Op; 2763 2764 if (X86::isMOVSHDUPMask(PermMask.Val) || 2765 X86::isMOVSLDUPMask(PermMask.Val) || 2766 X86::isMOVHLPSMask(PermMask.Val) || 2767 X86::isMOVHPMask(PermMask.Val) || 2768 X86::isMOVLPMask(PermMask.Val)) 2769 return Op; 2770 2771 if (ShouldXformToMOVHLPS(PermMask.Val) || 2772 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2773 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2774 2775 bool Commuted = false; 2776 V1IsSplat = isSplatVector(V1.Val); 2777 V2IsSplat = isSplatVector(V2.Val); 2778 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2779 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2780 std::swap(V1IsSplat, V2IsSplat); 2781 std::swap(V1IsUndef, V2IsUndef); 2782 Commuted = true; 2783 } 2784 2785 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2786 if (V2IsUndef) return V1; 2787 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2788 if (V2IsSplat) { 2789 // V2 is a splat, so the mask may be malformed. That is, it may point 2790 // to any V2 element. The instruction selectior won't like this. Get 2791 // a corrected mask and commute to form a proper MOVS{S|D}. 2792 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2793 if (NewMask.Val != PermMask.Val) 2794 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2795 } 2796 return Op; 2797 } 2798 2799 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2800 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2801 X86::isUNPCKLMask(PermMask.Val) || 2802 X86::isUNPCKHMask(PermMask.Val)) 2803 return Op; 2804 2805 if (V2IsSplat) { 2806 // Normalize mask so all entries that point to V2 points to its first 2807 // element then try to match unpck{h|l} again. If match, return a 2808 // new vector_shuffle with the corrected mask. 2809 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2810 if (NewMask.Val != PermMask.Val) { 2811 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2812 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2813 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2814 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2815 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2816 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2817 } 2818 } 2819 } 2820 2821 // Normalize the node to match x86 shuffle ops if needed 2822 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2823 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2824 2825 if (Commuted) { 2826 // Commute is back and try unpck* again. 2827 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2828 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2829 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2830 X86::isUNPCKLMask(PermMask.Val) || 2831 X86::isUNPCKHMask(PermMask.Val)) 2832 return Op; 2833 } 2834 2835 // If VT is integer, try PSHUF* first, then SHUFP*. 2836 if (MVT::isInteger(VT)) { 2837 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 2838 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 2839 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 2840 X86::isPSHUFDMask(PermMask.Val)) || 2841 X86::isPSHUFHWMask(PermMask.Val) || 2842 X86::isPSHUFLWMask(PermMask.Val)) { 2843 if (V2.getOpcode() != ISD::UNDEF) 2844 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2845 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2846 return Op; 2847 } 2848 2849 if (X86::isSHUFPMask(PermMask.Val) && 2850 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2851 return Op; 2852 2853 // Handle v8i16 shuffle high / low shuffle node pair. 2854 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2855 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2856 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2857 SmallVector<SDOperand, 8> MaskVec; 2858 for (unsigned i = 0; i != 4; ++i) 2859 MaskVec.push_back(PermMask.getOperand(i)); 2860 for (unsigned i = 4; i != 8; ++i) 2861 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2862 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2863 &MaskVec[0], MaskVec.size()); 2864 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2865 MaskVec.clear(); 2866 for (unsigned i = 0; i != 4; ++i) 2867 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2868 for (unsigned i = 4; i != 8; ++i) 2869 MaskVec.push_back(PermMask.getOperand(i)); 2870 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2871 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2872 } 2873 } else { 2874 // Floating point cases in the other order. 2875 if (X86::isSHUFPMask(PermMask.Val)) 2876 return Op; 2877 if (X86::isPSHUFDMask(PermMask.Val) || 2878 X86::isPSHUFHWMask(PermMask.Val) || 2879 X86::isPSHUFLWMask(PermMask.Val)) { 2880 if (V2.getOpcode() != ISD::UNDEF) 2881 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2882 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2883 return Op; 2884 } 2885 } 2886 2887 if (NumElems == 4 && 2888 // Don't do this for MMX. 2889 MVT::getSizeInBits(VT) != 64) { 2890 MVT::ValueType MaskVT = PermMask.getValueType(); 2891 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2892 SmallVector<std::pair<int, int>, 8> Locs; 2893 Locs.reserve(NumElems); 2894 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2895 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2896 unsigned NumHi = 0; 2897 unsigned NumLo = 0; 2898 // If no more than two elements come from either vector. This can be 2899 // implemented with two shuffles. First shuffle gather the elements. 2900 // The second shuffle, which takes the first shuffle as both of its 2901 // vector operands, put the elements into the right order. 2902 for (unsigned i = 0; i != NumElems; ++i) { 2903 SDOperand Elt = PermMask.getOperand(i); 2904 if (Elt.getOpcode() == ISD::UNDEF) { 2905 Locs[i] = std::make_pair(-1, -1); 2906 } else { 2907 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2908 if (Val < NumElems) { 2909 Locs[i] = std::make_pair(0, NumLo); 2910 Mask1[NumLo] = Elt; 2911 NumLo++; 2912 } else { 2913 Locs[i] = std::make_pair(1, NumHi); 2914 if (2+NumHi < NumElems) 2915 Mask1[2+NumHi] = Elt; 2916 NumHi++; 2917 } 2918 } 2919 } 2920 if (NumLo <= 2 && NumHi <= 2) { 2921 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2922 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2923 &Mask1[0], Mask1.size())); 2924 for (unsigned i = 0; i != NumElems; ++i) { 2925 if (Locs[i].first == -1) 2926 continue; 2927 else { 2928 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2929 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2930 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2931 } 2932 } 2933 2934 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2935 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2936 &Mask2[0], Mask2.size())); 2937 } 2938 2939 // Break it into (shuffle shuffle_hi, shuffle_lo). 2940 Locs.clear(); 2941 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2942 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2943 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2944 unsigned MaskIdx = 0; 2945 unsigned LoIdx = 0; 2946 unsigned HiIdx = NumElems/2; 2947 for (unsigned i = 0; i != NumElems; ++i) { 2948 if (i == NumElems/2) { 2949 MaskPtr = &HiMask; 2950 MaskIdx = 1; 2951 LoIdx = 0; 2952 HiIdx = NumElems/2; 2953 } 2954 SDOperand Elt = PermMask.getOperand(i); 2955 if (Elt.getOpcode() == ISD::UNDEF) { 2956 Locs[i] = std::make_pair(-1, -1); 2957 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2958 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2959 (*MaskPtr)[LoIdx] = Elt; 2960 LoIdx++; 2961 } else { 2962 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2963 (*MaskPtr)[HiIdx] = Elt; 2964 HiIdx++; 2965 } 2966 } 2967 2968 SDOperand LoShuffle = 2969 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2970 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2971 &LoMask[0], LoMask.size())); 2972 SDOperand HiShuffle = 2973 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2974 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2975 &HiMask[0], HiMask.size())); 2976 SmallVector<SDOperand, 8> MaskOps; 2977 for (unsigned i = 0; i != NumElems; ++i) { 2978 if (Locs[i].first == -1) { 2979 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2980 } else { 2981 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2982 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2983 } 2984 } 2985 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2986 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2987 &MaskOps[0], MaskOps.size())); 2988 } 2989 2990 return SDOperand(); 2991} 2992 2993SDOperand 2994X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2995 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2996 return SDOperand(); 2997 2998 MVT::ValueType VT = Op.getValueType(); 2999 // TODO: handle v16i8. 3000 if (MVT::getSizeInBits(VT) == 16) { 3001 // Transform it so it match pextrw which produces a 32-bit result. 3002 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 3003 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 3004 Op.getOperand(0), Op.getOperand(1)); 3005 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 3006 DAG.getValueType(VT)); 3007 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 3008 } else if (MVT::getSizeInBits(VT) == 32) { 3009 SDOperand Vec = Op.getOperand(0); 3010 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3011 if (Idx == 0) 3012 return Op; 3013 // SHUFPS the element to the lowest double word, then movss. 3014 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3015 SmallVector<SDOperand, 8> IdxVec; 3016 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 3017 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3018 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3019 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3020 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3021 &IdxVec[0], IdxVec.size()); 3022 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3023 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3024 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3025 DAG.getConstant(0, getPointerTy())); 3026 } else if (MVT::getSizeInBits(VT) == 64) { 3027 SDOperand Vec = Op.getOperand(0); 3028 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3029 if (Idx == 0) 3030 return Op; 3031 3032 // UNPCKHPD the element to the lowest double word, then movsd. 3033 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3034 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3035 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3036 SmallVector<SDOperand, 8> IdxVec; 3037 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3038 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3039 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3040 &IdxVec[0], IdxVec.size()); 3041 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3042 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3043 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3044 DAG.getConstant(0, getPointerTy())); 3045 } 3046 3047 return SDOperand(); 3048} 3049 3050SDOperand 3051X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3052 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3053 // as its second argument. 3054 MVT::ValueType VT = Op.getValueType(); 3055 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3056 SDOperand N0 = Op.getOperand(0); 3057 SDOperand N1 = Op.getOperand(1); 3058 SDOperand N2 = Op.getOperand(2); 3059 if (MVT::getSizeInBits(BaseVT) == 16) { 3060 if (N1.getValueType() != MVT::i32) 3061 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3062 if (N2.getValueType() != MVT::i32) 3063 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3064 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3065 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3066 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3067 if (Idx == 0) { 3068 // Use a movss. 3069 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3070 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3071 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3072 SmallVector<SDOperand, 8> MaskVec; 3073 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3074 for (unsigned i = 1; i <= 3; ++i) 3075 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3076 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3077 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3078 &MaskVec[0], MaskVec.size())); 3079 } else { 3080 // Use two pinsrw instructions to insert a 32 bit value. 3081 Idx <<= 1; 3082 if (MVT::isFloatingPoint(N1.getValueType())) { 3083 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3084 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3085 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3086 DAG.getConstant(0, getPointerTy())); 3087 } 3088 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3089 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3090 DAG.getConstant(Idx, getPointerTy())); 3091 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3092 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3093 DAG.getConstant(Idx+1, getPointerTy())); 3094 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3095 } 3096 } 3097 3098 return SDOperand(); 3099} 3100 3101SDOperand 3102X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3103 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3104 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3105} 3106 3107// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3108// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3109// one of the above mentioned nodes. It has to be wrapped because otherwise 3110// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3111// be used to form addressing mode. These wrapped nodes will be selected 3112// into MOV32ri. 3113SDOperand 3114X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3115 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3116 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3117 getPointerTy(), 3118 CP->getAlignment()); 3119 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3120 // With PIC, the address is actually $g + Offset. 3121 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3122 !Subtarget->isPICStyleRIPRel()) { 3123 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3124 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3125 Result); 3126 } 3127 3128 return Result; 3129} 3130 3131SDOperand 3132X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3133 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3134 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3135 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3136 // With PIC, the address is actually $g + Offset. 3137 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3138 !Subtarget->isPICStyleRIPRel()) { 3139 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3140 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3141 Result); 3142 } 3143 3144 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3145 // load the value at address GV, not the value of GV itself. This means that 3146 // the GlobalAddress must be in the base or index register of the address, not 3147 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3148 // The same applies for external symbols during PIC codegen 3149 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3150 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3151 3152 return Result; 3153} 3154 3155// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3156static SDOperand 3157LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3158 const MVT::ValueType PtrVT) { 3159 SDOperand InFlag; 3160 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3161 DAG.getNode(X86ISD::GlobalBaseReg, 3162 PtrVT), InFlag); 3163 InFlag = Chain.getValue(1); 3164 3165 // emit leal symbol@TLSGD(,%ebx,1), %eax 3166 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3167 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3168 GA->getValueType(0), 3169 GA->getOffset()); 3170 SDOperand Ops[] = { Chain, TGA, InFlag }; 3171 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3172 InFlag = Result.getValue(2); 3173 Chain = Result.getValue(1); 3174 3175 // call ___tls_get_addr. This function receives its argument in 3176 // the register EAX. 3177 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3178 InFlag = Chain.getValue(1); 3179 3180 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3181 SDOperand Ops1[] = { Chain, 3182 DAG.getTargetExternalSymbol("___tls_get_addr", 3183 PtrVT), 3184 DAG.getRegister(X86::EAX, PtrVT), 3185 DAG.getRegister(X86::EBX, PtrVT), 3186 InFlag }; 3187 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3188 InFlag = Chain.getValue(1); 3189 3190 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3191} 3192 3193// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3194// "local exec" model. 3195static SDOperand 3196LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3197 const MVT::ValueType PtrVT) { 3198 // Get the Thread Pointer 3199 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3200 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3201 // exec) 3202 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3203 GA->getValueType(0), 3204 GA->getOffset()); 3205 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3206 3207 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3208 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3209 3210 // The address of the thread local variable is the add of the thread 3211 // pointer with the offset of the variable. 3212 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3213} 3214 3215SDOperand 3216X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3217 // TODO: implement the "local dynamic" model 3218 // TODO: implement the "initial exec"model for pic executables 3219 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3220 "TLS not implemented for non-ELF and 64-bit targets"); 3221 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3222 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3223 // otherwise use the "Local Exec"TLS Model 3224 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3225 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3226 else 3227 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3228} 3229 3230SDOperand 3231X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3232 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3233 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3234 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3235 // With PIC, the address is actually $g + Offset. 3236 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3237 !Subtarget->isPICStyleRIPRel()) { 3238 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3239 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3240 Result); 3241 } 3242 3243 return Result; 3244} 3245 3246SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3247 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3248 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3249 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3250 // With PIC, the address is actually $g + Offset. 3251 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3252 !Subtarget->isPICStyleRIPRel()) { 3253 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3254 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3255 Result); 3256 } 3257 3258 return Result; 3259} 3260 3261SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3262 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3263 "Not an i64 shift!"); 3264 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3265 SDOperand ShOpLo = Op.getOperand(0); 3266 SDOperand ShOpHi = Op.getOperand(1); 3267 SDOperand ShAmt = Op.getOperand(2); 3268 SDOperand Tmp1 = isSRA ? 3269 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3270 DAG.getConstant(0, MVT::i32); 3271 3272 SDOperand Tmp2, Tmp3; 3273 if (Op.getOpcode() == ISD::SHL_PARTS) { 3274 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3275 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3276 } else { 3277 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3278 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3279 } 3280 3281 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3282 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3283 DAG.getConstant(32, MVT::i8)); 3284 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3285 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3286 3287 SDOperand Hi, Lo; 3288 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3289 3290 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3291 SmallVector<SDOperand, 4> Ops; 3292 if (Op.getOpcode() == ISD::SHL_PARTS) { 3293 Ops.push_back(Tmp2); 3294 Ops.push_back(Tmp3); 3295 Ops.push_back(CC); 3296 Ops.push_back(InFlag); 3297 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3298 InFlag = Hi.getValue(1); 3299 3300 Ops.clear(); 3301 Ops.push_back(Tmp3); 3302 Ops.push_back(Tmp1); 3303 Ops.push_back(CC); 3304 Ops.push_back(InFlag); 3305 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3306 } else { 3307 Ops.push_back(Tmp2); 3308 Ops.push_back(Tmp3); 3309 Ops.push_back(CC); 3310 Ops.push_back(InFlag); 3311 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3312 InFlag = Lo.getValue(1); 3313 3314 Ops.clear(); 3315 Ops.push_back(Tmp3); 3316 Ops.push_back(Tmp1); 3317 Ops.push_back(CC); 3318 Ops.push_back(InFlag); 3319 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3320 } 3321 3322 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3323 Ops.clear(); 3324 Ops.push_back(Lo); 3325 Ops.push_back(Hi); 3326 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3327} 3328 3329SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3330 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3331 Op.getOperand(0).getValueType() >= MVT::i16 && 3332 "Unknown SINT_TO_FP to lower!"); 3333 3334 SDOperand Result; 3335 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3336 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3337 MachineFunction &MF = DAG.getMachineFunction(); 3338 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3339 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3340 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3341 StackSlot, NULL, 0); 3342 3343 // These are really Legal; caller falls through into that case. 3344 if (SrcVT==MVT::i32 && Op.getValueType() != MVT::f80 && X86ScalarSSE) 3345 return Result; 3346 3347 // Build the FILD 3348 SDVTList Tys; 3349 bool useSSE = X86ScalarSSE && Op.getValueType() != MVT::f80; 3350 if (useSSE) 3351 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3352 else 3353 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3354 SmallVector<SDOperand, 8> Ops; 3355 Ops.push_back(Chain); 3356 Ops.push_back(StackSlot); 3357 Ops.push_back(DAG.getValueType(SrcVT)); 3358 Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3359 Tys, &Ops[0], Ops.size()); 3360 3361 if (useSSE) { 3362 Chain = Result.getValue(1); 3363 SDOperand InFlag = Result.getValue(2); 3364 3365 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3366 // shouldn't be necessary except that RFP cannot be live across 3367 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3368 MachineFunction &MF = DAG.getMachineFunction(); 3369 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3370 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3371 Tys = DAG.getVTList(MVT::Other); 3372 SmallVector<SDOperand, 8> Ops; 3373 Ops.push_back(Chain); 3374 Ops.push_back(Result); 3375 Ops.push_back(StackSlot); 3376 Ops.push_back(DAG.getValueType(Op.getValueType())); 3377 Ops.push_back(InFlag); 3378 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3379 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3380 } 3381 3382 return Result; 3383} 3384 3385SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3386 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3387 "Unknown FP_TO_SINT to lower!"); 3388 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3389 // stack slot. 3390 SDOperand Result; 3391 MachineFunction &MF = DAG.getMachineFunction(); 3392 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3393 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3394 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3395 3396 // These are really Legal. 3397 if (Op.getValueType() == MVT::i32 && X86ScalarSSE && 3398 Op.getOperand(0).getValueType() != MVT::f80) 3399 return Result; 3400 3401 unsigned Opc; 3402 switch (Op.getValueType()) { 3403 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3404 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3405 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3406 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3407 } 3408 3409 SDOperand Chain = DAG.getEntryNode(); 3410 SDOperand Value = Op.getOperand(0); 3411 if (X86ScalarSSE && Op.getOperand(0).getValueType() != MVT::f80) { 3412 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3413 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3414 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3415 SDOperand Ops[] = { 3416 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3417 }; 3418 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3419 Chain = Value.getValue(1); 3420 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3421 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3422 } 3423 3424 // Build the FP_TO_INT*_IN_MEM 3425 SDOperand Ops[] = { Chain, Value, StackSlot }; 3426 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3427 3428 // Load the result. 3429 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3430} 3431 3432SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3433 MVT::ValueType VT = Op.getValueType(); 3434 MVT::ValueType EltVT = VT; 3435 if (MVT::isVector(VT)) 3436 EltVT = MVT::getVectorElementType(VT); 3437 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3438 std::vector<Constant*> CV; 3439 if (EltVT == MVT::f64) { 3440 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63)))); 3441 CV.push_back(C); 3442 CV.push_back(C); 3443 } else { 3444 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31)))); 3445 CV.push_back(C); 3446 CV.push_back(C); 3447 CV.push_back(C); 3448 CV.push_back(C); 3449 } 3450 Constant *C = ConstantVector::get(CV); 3451 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3452 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3453 false, 16); 3454 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3455} 3456 3457SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3458 MVT::ValueType VT = Op.getValueType(); 3459 MVT::ValueType EltVT = VT; 3460 unsigned EltNum = 1; 3461 if (MVT::isVector(VT)) { 3462 EltVT = MVT::getVectorElementType(VT); 3463 EltNum = MVT::getVectorNumElements(VT); 3464 } 3465 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3466 std::vector<Constant*> CV; 3467 if (EltVT == MVT::f64) { 3468 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63))); 3469 CV.push_back(C); 3470 CV.push_back(C); 3471 } else { 3472 Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31))); 3473 CV.push_back(C); 3474 CV.push_back(C); 3475 CV.push_back(C); 3476 CV.push_back(C); 3477 } 3478 Constant *C = ConstantVector::get(CV); 3479 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3480 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3481 false, 16); 3482 if (MVT::isVector(VT)) { 3483 return DAG.getNode(ISD::BIT_CONVERT, VT, 3484 DAG.getNode(ISD::XOR, MVT::v2i64, 3485 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3486 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3487 } else { 3488 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3489 } 3490} 3491 3492SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3493 SDOperand Op0 = Op.getOperand(0); 3494 SDOperand Op1 = Op.getOperand(1); 3495 MVT::ValueType VT = Op.getValueType(); 3496 MVT::ValueType SrcVT = Op1.getValueType(); 3497 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3498 3499 // If second operand is smaller, extend it first. 3500 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3501 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3502 SrcVT = VT; 3503 SrcTy = MVT::getTypeForValueType(SrcVT); 3504 } 3505 3506 // First get the sign bit of second operand. 3507 std::vector<Constant*> CV; 3508 if (SrcVT == MVT::f64) { 3509 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63)))); 3510 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3511 } else { 3512 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31)))); 3513 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3514 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3515 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3516 } 3517 Constant *C = ConstantVector::get(CV); 3518 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3519 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3520 false, 16); 3521 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3522 3523 // Shift sign bit right or left if the two operands have different types. 3524 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3525 // Op0 is MVT::f32, Op1 is MVT::f64. 3526 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3527 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3528 DAG.getConstant(32, MVT::i32)); 3529 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3530 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3531 DAG.getConstant(0, getPointerTy())); 3532 } 3533 3534 // Clear first operand sign bit. 3535 CV.clear(); 3536 if (VT == MVT::f64) { 3537 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63))))); 3538 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0)))); 3539 } else { 3540 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31))))); 3541 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3542 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3543 CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0)))); 3544 } 3545 C = ConstantVector::get(CV); 3546 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3547 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3548 false, 16); 3549 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3550 3551 // Or the value with the sign bit. 3552 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3553} 3554 3555SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3556 SDOperand Chain) { 3557 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3558 SDOperand Cond; 3559 SDOperand Op0 = Op.getOperand(0); 3560 SDOperand Op1 = Op.getOperand(1); 3561 SDOperand CC = Op.getOperand(2); 3562 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3563 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3564 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3565 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3566 unsigned X86CC; 3567 3568 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3569 Op0, Op1, DAG)) { 3570 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3571 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3572 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3573 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3574 } 3575 3576 assert(isFP && "Illegal integer SetCC!"); 3577 3578 SDOperand COps[] = { Chain, Op0, Op1 }; 3579 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3580 3581 switch (SetCCOpcode) { 3582 default: assert(false && "Illegal floating point SetCC!"); 3583 case ISD::SETOEQ: { // !PF & ZF 3584 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3585 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3586 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3587 Tmp1.getValue(1) }; 3588 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3589 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3590 } 3591 case ISD::SETUNE: { // PF | !ZF 3592 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3593 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3594 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3595 Tmp1.getValue(1) }; 3596 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3597 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3598 } 3599 } 3600} 3601 3602SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3603 bool addTest = true; 3604 SDOperand Chain = DAG.getEntryNode(); 3605 SDOperand Cond = Op.getOperand(0); 3606 SDOperand CC; 3607 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3608 3609 if (Cond.getOpcode() == ISD::SETCC) 3610 Cond = LowerSETCC(Cond, DAG, Chain); 3611 3612 if (Cond.getOpcode() == X86ISD::SETCC) { 3613 CC = Cond.getOperand(0); 3614 3615 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3616 // (since flag operand cannot be shared). Use it as the condition setting 3617 // operand in place of the X86ISD::SETCC. 3618 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3619 // to use a test instead of duplicating the X86ISD::CMP (for register 3620 // pressure reason)? 3621 SDOperand Cmp = Cond.getOperand(1); 3622 unsigned Opc = Cmp.getOpcode(); 3623 bool IllegalFPCMov = !X86ScalarSSE && 3624 MVT::isFloatingPoint(Op.getValueType()) && 3625 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3626 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3627 !IllegalFPCMov) { 3628 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3629 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3630 addTest = false; 3631 } 3632 } 3633 3634 if (addTest) { 3635 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3636 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3637 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3638 } 3639 3640 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3641 SmallVector<SDOperand, 4> Ops; 3642 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3643 // condition is true. 3644 Ops.push_back(Op.getOperand(2)); 3645 Ops.push_back(Op.getOperand(1)); 3646 Ops.push_back(CC); 3647 Ops.push_back(Cond.getValue(1)); 3648 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3649} 3650 3651SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3652 bool addTest = true; 3653 SDOperand Chain = Op.getOperand(0); 3654 SDOperand Cond = Op.getOperand(1); 3655 SDOperand Dest = Op.getOperand(2); 3656 SDOperand CC; 3657 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3658 3659 if (Cond.getOpcode() == ISD::SETCC) 3660 Cond = LowerSETCC(Cond, DAG, Chain); 3661 3662 if (Cond.getOpcode() == X86ISD::SETCC) { 3663 CC = Cond.getOperand(0); 3664 3665 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3666 // (since flag operand cannot be shared). Use it as the condition setting 3667 // operand in place of the X86ISD::SETCC. 3668 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3669 // to use a test instead of duplicating the X86ISD::CMP (for register 3670 // pressure reason)? 3671 SDOperand Cmp = Cond.getOperand(1); 3672 unsigned Opc = Cmp.getOpcode(); 3673 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3674 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3675 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3676 addTest = false; 3677 } 3678 } 3679 3680 if (addTest) { 3681 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3682 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3683 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3684 } 3685 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3686 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3687} 3688 3689SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3690 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3691 3692 if (Subtarget->is64Bit()) 3693 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3694 else 3695 switch (CallingConv) { 3696 default: 3697 assert(0 && "Unsupported calling convention"); 3698 case CallingConv::Fast: 3699 // TODO: Implement fastcc 3700 // Falls through 3701 case CallingConv::C: 3702 case CallingConv::X86_StdCall: 3703 return LowerCCCCallTo(Op, DAG, CallingConv); 3704 case CallingConv::X86_FastCall: 3705 return LowerFastCCCallTo(Op, DAG, CallingConv); 3706 } 3707} 3708 3709 3710// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3711// Calls to _alloca is needed to probe the stack when allocating more than 4k 3712// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3713// that the guard pages used by the OS virtual memory manager are allocated in 3714// correct sequence. 3715SDOperand 3716X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3717 SelectionDAG &DAG) { 3718 assert(Subtarget->isTargetCygMing() && 3719 "This should be used only on Cygwin/Mingw targets"); 3720 3721 // Get the inputs. 3722 SDOperand Chain = Op.getOperand(0); 3723 SDOperand Size = Op.getOperand(1); 3724 // FIXME: Ensure alignment here 3725 3726 SDOperand Flag; 3727 3728 MVT::ValueType IntPtr = getPointerTy(); 3729 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3730 3731 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3732 Flag = Chain.getValue(1); 3733 3734 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3735 SDOperand Ops[] = { Chain, 3736 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3737 DAG.getRegister(X86::EAX, IntPtr), 3738 Flag }; 3739 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3740 Flag = Chain.getValue(1); 3741 3742 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3743 3744 std::vector<MVT::ValueType> Tys; 3745 Tys.push_back(SPTy); 3746 Tys.push_back(MVT::Other); 3747 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3748 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3749} 3750 3751SDOperand 3752X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3753 MachineFunction &MF = DAG.getMachineFunction(); 3754 const Function* Fn = MF.getFunction(); 3755 if (Fn->hasExternalLinkage() && 3756 Subtarget->isTargetCygMing() && 3757 Fn->getName() == "main") 3758 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3759 3760 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3761 if (Subtarget->is64Bit()) 3762 return LowerX86_64CCCArguments(Op, DAG); 3763 else 3764 switch(CC) { 3765 default: 3766 assert(0 && "Unsupported calling convention"); 3767 case CallingConv::Fast: 3768 // TODO: implement fastcc. 3769 3770 // Falls through 3771 case CallingConv::C: 3772 return LowerCCCArguments(Op, DAG); 3773 case CallingConv::X86_StdCall: 3774 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3775 return LowerCCCArguments(Op, DAG, true); 3776 case CallingConv::X86_FastCall: 3777 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3778 return LowerFastCCArguments(Op, DAG); 3779 } 3780} 3781 3782SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3783 SDOperand InFlag(0, 0); 3784 SDOperand Chain = Op.getOperand(0); 3785 unsigned Align = 3786 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3787 if (Align == 0) Align = 1; 3788 3789 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3790 // If not DWORD aligned or size is more than the threshold, call memset. 3791 // The libc version is likely to be faster for these cases. It can use the 3792 // address value and run time information about the CPU. 3793 if ((Align & 3) != 0 || 3794 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3795 MVT::ValueType IntPtr = getPointerTy(); 3796 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3797 TargetLowering::ArgListTy Args; 3798 TargetLowering::ArgListEntry Entry; 3799 Entry.Node = Op.getOperand(1); 3800 Entry.Ty = IntPtrTy; 3801 Args.push_back(Entry); 3802 // Extend the unsigned i8 argument to be an int value for the call. 3803 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3804 Entry.Ty = IntPtrTy; 3805 Args.push_back(Entry); 3806 Entry.Node = Op.getOperand(3); 3807 Args.push_back(Entry); 3808 std::pair<SDOperand,SDOperand> CallResult = 3809 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3810 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3811 return CallResult.second; 3812 } 3813 3814 MVT::ValueType AVT; 3815 SDOperand Count; 3816 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3817 unsigned BytesLeft = 0; 3818 bool TwoRepStos = false; 3819 if (ValC) { 3820 unsigned ValReg; 3821 uint64_t Val = ValC->getValue() & 255; 3822 3823 // If the value is a constant, then we can potentially use larger sets. 3824 switch (Align & 3) { 3825 case 2: // WORD aligned 3826 AVT = MVT::i16; 3827 ValReg = X86::AX; 3828 Val = (Val << 8) | Val; 3829 break; 3830 case 0: // DWORD aligned 3831 AVT = MVT::i32; 3832 ValReg = X86::EAX; 3833 Val = (Val << 8) | Val; 3834 Val = (Val << 16) | Val; 3835 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3836 AVT = MVT::i64; 3837 ValReg = X86::RAX; 3838 Val = (Val << 32) | Val; 3839 } 3840 break; 3841 default: // Byte aligned 3842 AVT = MVT::i8; 3843 ValReg = X86::AL; 3844 Count = Op.getOperand(3); 3845 break; 3846 } 3847 3848 if (AVT > MVT::i8) { 3849 if (I) { 3850 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3851 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3852 BytesLeft = I->getValue() % UBytes; 3853 } else { 3854 assert(AVT >= MVT::i32 && 3855 "Do not use rep;stos if not at least DWORD aligned"); 3856 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3857 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3858 TwoRepStos = true; 3859 } 3860 } 3861 3862 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3863 InFlag); 3864 InFlag = Chain.getValue(1); 3865 } else { 3866 AVT = MVT::i8; 3867 Count = Op.getOperand(3); 3868 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3869 InFlag = Chain.getValue(1); 3870 } 3871 3872 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3873 Count, InFlag); 3874 InFlag = Chain.getValue(1); 3875 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3876 Op.getOperand(1), InFlag); 3877 InFlag = Chain.getValue(1); 3878 3879 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3880 SmallVector<SDOperand, 8> Ops; 3881 Ops.push_back(Chain); 3882 Ops.push_back(DAG.getValueType(AVT)); 3883 Ops.push_back(InFlag); 3884 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3885 3886 if (TwoRepStos) { 3887 InFlag = Chain.getValue(1); 3888 Count = Op.getOperand(3); 3889 MVT::ValueType CVT = Count.getValueType(); 3890 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3891 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3892 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3893 Left, InFlag); 3894 InFlag = Chain.getValue(1); 3895 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3896 Ops.clear(); 3897 Ops.push_back(Chain); 3898 Ops.push_back(DAG.getValueType(MVT::i8)); 3899 Ops.push_back(InFlag); 3900 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3901 } else if (BytesLeft) { 3902 // Issue stores for the last 1 - 7 bytes. 3903 SDOperand Value; 3904 unsigned Val = ValC->getValue() & 255; 3905 unsigned Offset = I->getValue() - BytesLeft; 3906 SDOperand DstAddr = Op.getOperand(1); 3907 MVT::ValueType AddrVT = DstAddr.getValueType(); 3908 if (BytesLeft >= 4) { 3909 Val = (Val << 8) | Val; 3910 Val = (Val << 16) | Val; 3911 Value = DAG.getConstant(Val, MVT::i32); 3912 Chain = DAG.getStore(Chain, Value, 3913 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3914 DAG.getConstant(Offset, AddrVT)), 3915 NULL, 0); 3916 BytesLeft -= 4; 3917 Offset += 4; 3918 } 3919 if (BytesLeft >= 2) { 3920 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3921 Chain = DAG.getStore(Chain, Value, 3922 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3923 DAG.getConstant(Offset, AddrVT)), 3924 NULL, 0); 3925 BytesLeft -= 2; 3926 Offset += 2; 3927 } 3928 if (BytesLeft == 1) { 3929 Value = DAG.getConstant(Val, MVT::i8); 3930 Chain = DAG.getStore(Chain, Value, 3931 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3932 DAG.getConstant(Offset, AddrVT)), 3933 NULL, 0); 3934 } 3935 } 3936 3937 return Chain; 3938} 3939 3940SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3941 SDOperand Chain = Op.getOperand(0); 3942 unsigned Align = 3943 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3944 if (Align == 0) Align = 1; 3945 3946 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3947 // If not DWORD aligned or size is more than the threshold, call memcpy. 3948 // The libc version is likely to be faster for these cases. It can use the 3949 // address value and run time information about the CPU. 3950 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 3951 if ((Align & 3) != 0 || 3952 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3953 MVT::ValueType IntPtr = getPointerTy(); 3954 TargetLowering::ArgListTy Args; 3955 TargetLowering::ArgListEntry Entry; 3956 Entry.Ty = getTargetData()->getIntPtrType(); 3957 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3958 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3959 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3960 std::pair<SDOperand,SDOperand> CallResult = 3961 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3962 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3963 return CallResult.second; 3964 } 3965 3966 MVT::ValueType AVT; 3967 SDOperand Count; 3968 unsigned BytesLeft = 0; 3969 bool TwoRepMovs = false; 3970 switch (Align & 3) { 3971 case 2: // WORD aligned 3972 AVT = MVT::i16; 3973 break; 3974 case 0: // DWORD aligned 3975 AVT = MVT::i32; 3976 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3977 AVT = MVT::i64; 3978 break; 3979 default: // Byte aligned 3980 AVT = MVT::i8; 3981 Count = Op.getOperand(3); 3982 break; 3983 } 3984 3985 if (AVT > MVT::i8) { 3986 if (I) { 3987 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3988 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3989 BytesLeft = I->getValue() % UBytes; 3990 } else { 3991 assert(AVT >= MVT::i32 && 3992 "Do not use rep;movs if not at least DWORD aligned"); 3993 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3994 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3995 TwoRepMovs = true; 3996 } 3997 } 3998 3999 SDOperand InFlag(0, 0); 4000 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 4001 Count, InFlag); 4002 InFlag = Chain.getValue(1); 4003 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 4004 Op.getOperand(1), InFlag); 4005 InFlag = Chain.getValue(1); 4006 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 4007 Op.getOperand(2), InFlag); 4008 InFlag = Chain.getValue(1); 4009 4010 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4011 SmallVector<SDOperand, 8> Ops; 4012 Ops.push_back(Chain); 4013 Ops.push_back(DAG.getValueType(AVT)); 4014 Ops.push_back(InFlag); 4015 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4016 4017 if (TwoRepMovs) { 4018 InFlag = Chain.getValue(1); 4019 Count = Op.getOperand(3); 4020 MVT::ValueType CVT = Count.getValueType(); 4021 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 4022 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 4023 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 4024 Left, InFlag); 4025 InFlag = Chain.getValue(1); 4026 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4027 Ops.clear(); 4028 Ops.push_back(Chain); 4029 Ops.push_back(DAG.getValueType(MVT::i8)); 4030 Ops.push_back(InFlag); 4031 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 4032 } else if (BytesLeft) { 4033 // Issue loads and stores for the last 1 - 7 bytes. 4034 unsigned Offset = I->getValue() - BytesLeft; 4035 SDOperand DstAddr = Op.getOperand(1); 4036 MVT::ValueType DstVT = DstAddr.getValueType(); 4037 SDOperand SrcAddr = Op.getOperand(2); 4038 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4039 SDOperand Value; 4040 if (BytesLeft >= 4) { 4041 Value = DAG.getLoad(MVT::i32, Chain, 4042 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4043 DAG.getConstant(Offset, SrcVT)), 4044 NULL, 0); 4045 Chain = Value.getValue(1); 4046 Chain = DAG.getStore(Chain, Value, 4047 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4048 DAG.getConstant(Offset, DstVT)), 4049 NULL, 0); 4050 BytesLeft -= 4; 4051 Offset += 4; 4052 } 4053 if (BytesLeft >= 2) { 4054 Value = DAG.getLoad(MVT::i16, Chain, 4055 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4056 DAG.getConstant(Offset, SrcVT)), 4057 NULL, 0); 4058 Chain = Value.getValue(1); 4059 Chain = DAG.getStore(Chain, Value, 4060 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4061 DAG.getConstant(Offset, DstVT)), 4062 NULL, 0); 4063 BytesLeft -= 2; 4064 Offset += 2; 4065 } 4066 4067 if (BytesLeft == 1) { 4068 Value = DAG.getLoad(MVT::i8, Chain, 4069 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4070 DAG.getConstant(Offset, SrcVT)), 4071 NULL, 0); 4072 Chain = Value.getValue(1); 4073 Chain = DAG.getStore(Chain, Value, 4074 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4075 DAG.getConstant(Offset, DstVT)), 4076 NULL, 0); 4077 } 4078 } 4079 4080 return Chain; 4081} 4082 4083SDOperand 4084X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4085 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4086 SDOperand TheOp = Op.getOperand(0); 4087 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4088 if (Subtarget->is64Bit()) { 4089 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4090 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4091 MVT::i64, Copy1.getValue(2)); 4092 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4093 DAG.getConstant(32, MVT::i8)); 4094 SDOperand Ops[] = { 4095 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4096 }; 4097 4098 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4099 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4100 } 4101 4102 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4103 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4104 MVT::i32, Copy1.getValue(2)); 4105 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4106 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4107 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4108} 4109 4110SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4111 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4112 4113 if (!Subtarget->is64Bit()) { 4114 // vastart just stores the address of the VarArgsFrameIndex slot into the 4115 // memory location argument. 4116 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4117 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4118 SV->getOffset()); 4119 } 4120 4121 // __va_list_tag: 4122 // gp_offset (0 - 6 * 8) 4123 // fp_offset (48 - 48 + 8 * 16) 4124 // overflow_arg_area (point to parameters coming in memory). 4125 // reg_save_area 4126 SmallVector<SDOperand, 8> MemOps; 4127 SDOperand FIN = Op.getOperand(1); 4128 // Store gp_offset 4129 SDOperand Store = DAG.getStore(Op.getOperand(0), 4130 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4131 FIN, SV->getValue(), SV->getOffset()); 4132 MemOps.push_back(Store); 4133 4134 // Store fp_offset 4135 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4136 DAG.getConstant(4, getPointerTy())); 4137 Store = DAG.getStore(Op.getOperand(0), 4138 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4139 FIN, SV->getValue(), SV->getOffset()); 4140 MemOps.push_back(Store); 4141 4142 // Store ptr to overflow_arg_area 4143 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4144 DAG.getConstant(4, getPointerTy())); 4145 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4146 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4147 SV->getOffset()); 4148 MemOps.push_back(Store); 4149 4150 // Store ptr to reg_save_area. 4151 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4152 DAG.getConstant(8, getPointerTy())); 4153 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4154 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4155 SV->getOffset()); 4156 MemOps.push_back(Store); 4157 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4158} 4159 4160SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4161 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4162 SDOperand Chain = Op.getOperand(0); 4163 SDOperand DstPtr = Op.getOperand(1); 4164 SDOperand SrcPtr = Op.getOperand(2); 4165 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4166 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4167 4168 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4169 SrcSV->getValue(), SrcSV->getOffset()); 4170 Chain = SrcPtr.getValue(1); 4171 for (unsigned i = 0; i < 3; ++i) { 4172 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4173 SrcSV->getValue(), SrcSV->getOffset()); 4174 Chain = Val.getValue(1); 4175 Chain = DAG.getStore(Chain, Val, DstPtr, 4176 DstSV->getValue(), DstSV->getOffset()); 4177 if (i == 2) 4178 break; 4179 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4180 DAG.getConstant(8, getPointerTy())); 4181 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4182 DAG.getConstant(8, getPointerTy())); 4183 } 4184 return Chain; 4185} 4186 4187SDOperand 4188X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4189 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4190 switch (IntNo) { 4191 default: return SDOperand(); // Don't custom lower most intrinsics. 4192 // Comparison intrinsics. 4193 case Intrinsic::x86_sse_comieq_ss: 4194 case Intrinsic::x86_sse_comilt_ss: 4195 case Intrinsic::x86_sse_comile_ss: 4196 case Intrinsic::x86_sse_comigt_ss: 4197 case Intrinsic::x86_sse_comige_ss: 4198 case Intrinsic::x86_sse_comineq_ss: 4199 case Intrinsic::x86_sse_ucomieq_ss: 4200 case Intrinsic::x86_sse_ucomilt_ss: 4201 case Intrinsic::x86_sse_ucomile_ss: 4202 case Intrinsic::x86_sse_ucomigt_ss: 4203 case Intrinsic::x86_sse_ucomige_ss: 4204 case Intrinsic::x86_sse_ucomineq_ss: 4205 case Intrinsic::x86_sse2_comieq_sd: 4206 case Intrinsic::x86_sse2_comilt_sd: 4207 case Intrinsic::x86_sse2_comile_sd: 4208 case Intrinsic::x86_sse2_comigt_sd: 4209 case Intrinsic::x86_sse2_comige_sd: 4210 case Intrinsic::x86_sse2_comineq_sd: 4211 case Intrinsic::x86_sse2_ucomieq_sd: 4212 case Intrinsic::x86_sse2_ucomilt_sd: 4213 case Intrinsic::x86_sse2_ucomile_sd: 4214 case Intrinsic::x86_sse2_ucomigt_sd: 4215 case Intrinsic::x86_sse2_ucomige_sd: 4216 case Intrinsic::x86_sse2_ucomineq_sd: { 4217 unsigned Opc = 0; 4218 ISD::CondCode CC = ISD::SETCC_INVALID; 4219 switch (IntNo) { 4220 default: break; 4221 case Intrinsic::x86_sse_comieq_ss: 4222 case Intrinsic::x86_sse2_comieq_sd: 4223 Opc = X86ISD::COMI; 4224 CC = ISD::SETEQ; 4225 break; 4226 case Intrinsic::x86_sse_comilt_ss: 4227 case Intrinsic::x86_sse2_comilt_sd: 4228 Opc = X86ISD::COMI; 4229 CC = ISD::SETLT; 4230 break; 4231 case Intrinsic::x86_sse_comile_ss: 4232 case Intrinsic::x86_sse2_comile_sd: 4233 Opc = X86ISD::COMI; 4234 CC = ISD::SETLE; 4235 break; 4236 case Intrinsic::x86_sse_comigt_ss: 4237 case Intrinsic::x86_sse2_comigt_sd: 4238 Opc = X86ISD::COMI; 4239 CC = ISD::SETGT; 4240 break; 4241 case Intrinsic::x86_sse_comige_ss: 4242 case Intrinsic::x86_sse2_comige_sd: 4243 Opc = X86ISD::COMI; 4244 CC = ISD::SETGE; 4245 break; 4246 case Intrinsic::x86_sse_comineq_ss: 4247 case Intrinsic::x86_sse2_comineq_sd: 4248 Opc = X86ISD::COMI; 4249 CC = ISD::SETNE; 4250 break; 4251 case Intrinsic::x86_sse_ucomieq_ss: 4252 case Intrinsic::x86_sse2_ucomieq_sd: 4253 Opc = X86ISD::UCOMI; 4254 CC = ISD::SETEQ; 4255 break; 4256 case Intrinsic::x86_sse_ucomilt_ss: 4257 case Intrinsic::x86_sse2_ucomilt_sd: 4258 Opc = X86ISD::UCOMI; 4259 CC = ISD::SETLT; 4260 break; 4261 case Intrinsic::x86_sse_ucomile_ss: 4262 case Intrinsic::x86_sse2_ucomile_sd: 4263 Opc = X86ISD::UCOMI; 4264 CC = ISD::SETLE; 4265 break; 4266 case Intrinsic::x86_sse_ucomigt_ss: 4267 case Intrinsic::x86_sse2_ucomigt_sd: 4268 Opc = X86ISD::UCOMI; 4269 CC = ISD::SETGT; 4270 break; 4271 case Intrinsic::x86_sse_ucomige_ss: 4272 case Intrinsic::x86_sse2_ucomige_sd: 4273 Opc = X86ISD::UCOMI; 4274 CC = ISD::SETGE; 4275 break; 4276 case Intrinsic::x86_sse_ucomineq_ss: 4277 case Intrinsic::x86_sse2_ucomineq_sd: 4278 Opc = X86ISD::UCOMI; 4279 CC = ISD::SETNE; 4280 break; 4281 } 4282 4283 unsigned X86CC; 4284 SDOperand LHS = Op.getOperand(1); 4285 SDOperand RHS = Op.getOperand(2); 4286 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4287 4288 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4289 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4290 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4291 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4292 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4293 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4294 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4295 } 4296 } 4297} 4298 4299SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4300 // Depths > 0 not supported yet! 4301 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4302 return SDOperand(); 4303 4304 // Just load the return address 4305 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4306 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4307} 4308 4309SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4310 // Depths > 0 not supported yet! 4311 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4312 return SDOperand(); 4313 4314 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4315 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4316 DAG.getConstant(4, getPointerTy())); 4317} 4318 4319SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4320 SelectionDAG &DAG) { 4321 // Is not yet supported on x86-64 4322 if (Subtarget->is64Bit()) 4323 return SDOperand(); 4324 4325 return DAG.getConstant(8, getPointerTy()); 4326} 4327 4328SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4329{ 4330 assert(!Subtarget->is64Bit() && 4331 "Lowering of eh_return builtin is not supported yet on x86-64"); 4332 4333 MachineFunction &MF = DAG.getMachineFunction(); 4334 SDOperand Chain = Op.getOperand(0); 4335 SDOperand Offset = Op.getOperand(1); 4336 SDOperand Handler = Op.getOperand(2); 4337 4338 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4339 getPointerTy()); 4340 4341 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4342 DAG.getConstant(-4UL, getPointerTy())); 4343 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4344 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4345 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4346 MF.addLiveOut(X86::ECX); 4347 4348 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4349 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4350} 4351 4352SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4353 SelectionDAG &DAG) { 4354 SDOperand Root = Op.getOperand(0); 4355 SDOperand Trmp = Op.getOperand(1); // trampoline 4356 SDOperand FPtr = Op.getOperand(2); // nested function 4357 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4358 4359 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4360 4361 if (Subtarget->is64Bit()) { 4362 return SDOperand(); // not yet supported 4363 } else { 4364 Function *Func = (Function *) 4365 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4366 unsigned CC = Func->getCallingConv(); 4367 unsigned NestReg; 4368 4369 switch (CC) { 4370 default: 4371 assert(0 && "Unsupported calling convention"); 4372 case CallingConv::C: 4373 case CallingConv::Fast: 4374 case CallingConv::X86_StdCall: { 4375 // Pass 'nest' parameter in ECX. 4376 // Must be kept in sync with X86CallingConv.td 4377 NestReg = X86::ECX; 4378 4379 // Check that ECX wasn't needed by an 'inreg' parameter. 4380 const FunctionType *FTy = Func->getFunctionType(); 4381 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4382 4383 if (Attrs && !Func->isVarArg()) { 4384 unsigned InRegCount = 0; 4385 unsigned Idx = 1; 4386 4387 for (FunctionType::param_iterator I = FTy->param_begin(), 4388 E = FTy->param_end(); I != E; ++I, ++Idx) 4389 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4390 // FIXME: should only count parameters that are lowered to integers. 4391 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4392 4393 if (InRegCount > 2) { 4394 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4395 abort(); 4396 } 4397 } 4398 break; 4399 } 4400 case CallingConv::X86_FastCall: 4401 // Pass 'nest' parameter in EAX. 4402 // Must be kept in sync with X86CallingConv.td 4403 NestReg = X86::EAX; 4404 break; 4405 } 4406 4407 const X86InstrInfo *TII = 4408 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4409 4410 SDOperand OutChains[4]; 4411 SDOperand Addr, Disp; 4412 4413 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4414 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4415 4416 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4417 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4418 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4419 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4420 4421 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4422 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4423 TrmpSV->getOffset() + 1, false, 1); 4424 4425 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4426 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4427 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4428 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4429 4430 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4431 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4432 TrmpSV->getOffset() + 6, false, 1); 4433 4434 SDOperand Ops[] = 4435 { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) }; 4436 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2); 4437 } 4438} 4439 4440/// LowerOperation - Provide custom lowering hooks for some operations. 4441/// 4442SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4443 switch (Op.getOpcode()) { 4444 default: assert(0 && "Should not custom lower this!"); 4445 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4446 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4447 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4448 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4449 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4450 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4451 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4452 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4453 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4454 case ISD::SHL_PARTS: 4455 case ISD::SRA_PARTS: 4456 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4457 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4458 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4459 case ISD::FABS: return LowerFABS(Op, DAG); 4460 case ISD::FNEG: return LowerFNEG(Op, DAG); 4461 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4462 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4463 case ISD::SELECT: return LowerSELECT(Op, DAG); 4464 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4465 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4466 case ISD::CALL: return LowerCALL(Op, DAG); 4467 case ISD::RET: return LowerRET(Op, DAG); 4468 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4469 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4470 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4471 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4472 case ISD::VASTART: return LowerVASTART(Op, DAG); 4473 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4474 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4475 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4476 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4477 case ISD::FRAME_TO_ARGS_OFFSET: 4478 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4479 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4480 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4481 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4482 } 4483 return SDOperand(); 4484} 4485 4486const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4487 switch (Opcode) { 4488 default: return NULL; 4489 case X86ISD::SHLD: return "X86ISD::SHLD"; 4490 case X86ISD::SHRD: return "X86ISD::SHRD"; 4491 case X86ISD::FAND: return "X86ISD::FAND"; 4492 case X86ISD::FOR: return "X86ISD::FOR"; 4493 case X86ISD::FXOR: return "X86ISD::FXOR"; 4494 case X86ISD::FSRL: return "X86ISD::FSRL"; 4495 case X86ISD::FILD: return "X86ISD::FILD"; 4496 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4497 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4498 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4499 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4500 case X86ISD::FLD: return "X86ISD::FLD"; 4501 case X86ISD::FST: return "X86ISD::FST"; 4502 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4503 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4504 case X86ISD::CALL: return "X86ISD::CALL"; 4505 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4506 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4507 case X86ISD::CMP: return "X86ISD::CMP"; 4508 case X86ISD::COMI: return "X86ISD::COMI"; 4509 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4510 case X86ISD::SETCC: return "X86ISD::SETCC"; 4511 case X86ISD::CMOV: return "X86ISD::CMOV"; 4512 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4513 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4514 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4515 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4516 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4517 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4518 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4519 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4520 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4521 case X86ISD::FMAX: return "X86ISD::FMAX"; 4522 case X86ISD::FMIN: return "X86ISD::FMIN"; 4523 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4524 case X86ISD::FRCP: return "X86ISD::FRCP"; 4525 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4526 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4527 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4528 } 4529} 4530 4531// isLegalAddressingMode - Return true if the addressing mode represented 4532// by AM is legal for this target, for a load/store of the specified type. 4533bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4534 const Type *Ty) const { 4535 // X86 supports extremely general addressing modes. 4536 4537 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4538 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4539 return false; 4540 4541 if (AM.BaseGV) { 4542 // We can only fold this if we don't need an extra load. 4543 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4544 return false; 4545 4546 // X86-64 only supports addr of globals in small code model. 4547 if (Subtarget->is64Bit()) { 4548 if (getTargetMachine().getCodeModel() != CodeModel::Small) 4549 return false; 4550 // If lower 4G is not available, then we must use rip-relative addressing. 4551 if (AM.BaseOffs || AM.Scale > 1) 4552 return false; 4553 } 4554 } 4555 4556 switch (AM.Scale) { 4557 case 0: 4558 case 1: 4559 case 2: 4560 case 4: 4561 case 8: 4562 // These scales always work. 4563 break; 4564 case 3: 4565 case 5: 4566 case 9: 4567 // These scales are formed with basereg+scalereg. Only accept if there is 4568 // no basereg yet. 4569 if (AM.HasBaseReg) 4570 return false; 4571 break; 4572 default: // Other stuff never works. 4573 return false; 4574 } 4575 4576 return true; 4577} 4578 4579 4580/// isShuffleMaskLegal - Targets can use this to indicate that they only 4581/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4582/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4583/// are assumed to be legal. 4584bool 4585X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4586 // Only do shuffles on 128-bit vector types for now. 4587 if (MVT::getSizeInBits(VT) == 64) return false; 4588 return (Mask.Val->getNumOperands() <= 4 || 4589 isIdentityMask(Mask.Val) || 4590 isIdentityMask(Mask.Val, true) || 4591 isSplatMask(Mask.Val) || 4592 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4593 X86::isUNPCKLMask(Mask.Val) || 4594 X86::isUNPCKHMask(Mask.Val) || 4595 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4596 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4597} 4598 4599bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4600 MVT::ValueType EVT, 4601 SelectionDAG &DAG) const { 4602 unsigned NumElts = BVOps.size(); 4603 // Only do shuffles on 128-bit vector types for now. 4604 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4605 if (NumElts == 2) return true; 4606 if (NumElts == 4) { 4607 return (isMOVLMask(&BVOps[0], 4) || 4608 isCommutedMOVL(&BVOps[0], 4, true) || 4609 isSHUFPMask(&BVOps[0], 4) || 4610 isCommutedSHUFP(&BVOps[0], 4)); 4611 } 4612 return false; 4613} 4614 4615//===----------------------------------------------------------------------===// 4616// X86 Scheduler Hooks 4617//===----------------------------------------------------------------------===// 4618 4619MachineBasicBlock * 4620X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4621 MachineBasicBlock *BB) { 4622 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4623 switch (MI->getOpcode()) { 4624 default: assert(false && "Unexpected instr type to insert"); 4625 case X86::CMOV_FR32: 4626 case X86::CMOV_FR64: 4627 case X86::CMOV_V4F32: 4628 case X86::CMOV_V2F64: 4629 case X86::CMOV_V2I64: { 4630 // To "insert" a SELECT_CC instruction, we actually have to insert the 4631 // diamond control-flow pattern. The incoming instruction knows the 4632 // destination vreg to set, the condition code register to branch on, the 4633 // true/false values to select between, and a branch opcode to use. 4634 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4635 ilist<MachineBasicBlock>::iterator It = BB; 4636 ++It; 4637 4638 // thisMBB: 4639 // ... 4640 // TrueVal = ... 4641 // cmpTY ccX, r1, r2 4642 // bCC copy1MBB 4643 // fallthrough --> copy0MBB 4644 MachineBasicBlock *thisMBB = BB; 4645 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4646 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4647 unsigned Opc = 4648 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4649 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4650 MachineFunction *F = BB->getParent(); 4651 F->getBasicBlockList().insert(It, copy0MBB); 4652 F->getBasicBlockList().insert(It, sinkMBB); 4653 // Update machine-CFG edges by first adding all successors of the current 4654 // block to the new block which will contain the Phi node for the select. 4655 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4656 e = BB->succ_end(); i != e; ++i) 4657 sinkMBB->addSuccessor(*i); 4658 // Next, remove all successors of the current block, and add the true 4659 // and fallthrough blocks as its successors. 4660 while(!BB->succ_empty()) 4661 BB->removeSuccessor(BB->succ_begin()); 4662 BB->addSuccessor(copy0MBB); 4663 BB->addSuccessor(sinkMBB); 4664 4665 // copy0MBB: 4666 // %FalseValue = ... 4667 // # fallthrough to sinkMBB 4668 BB = copy0MBB; 4669 4670 // Update machine-CFG edges 4671 BB->addSuccessor(sinkMBB); 4672 4673 // sinkMBB: 4674 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4675 // ... 4676 BB = sinkMBB; 4677 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4678 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4679 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4680 4681 delete MI; // The pseudo instruction is gone now. 4682 return BB; 4683 } 4684 4685 case X86::FP32_TO_INT16_IN_MEM: 4686 case X86::FP32_TO_INT32_IN_MEM: 4687 case X86::FP32_TO_INT64_IN_MEM: 4688 case X86::FP64_TO_INT16_IN_MEM: 4689 case X86::FP64_TO_INT32_IN_MEM: 4690 case X86::FP64_TO_INT64_IN_MEM: 4691 case X86::FP80_TO_INT16_IN_MEM: 4692 case X86::FP80_TO_INT32_IN_MEM: 4693 case X86::FP80_TO_INT64_IN_MEM: { 4694 // Change the floating point control register to use "round towards zero" 4695 // mode when truncating to an integer value. 4696 MachineFunction *F = BB->getParent(); 4697 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4698 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4699 4700 // Load the old value of the high byte of the control word... 4701 unsigned OldCW = 4702 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4703 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4704 4705 // Set the high part to be round to zero... 4706 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4707 .addImm(0xC7F); 4708 4709 // Reload the modified control word now... 4710 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4711 4712 // Restore the memory image of control word to original value 4713 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4714 .addReg(OldCW); 4715 4716 // Get the X86 opcode to use. 4717 unsigned Opc; 4718 switch (MI->getOpcode()) { 4719 default: assert(0 && "illegal opcode!"); 4720 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 4721 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 4722 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 4723 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 4724 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 4725 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 4726 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 4727 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 4728 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 4729 } 4730 4731 X86AddressMode AM; 4732 MachineOperand &Op = MI->getOperand(0); 4733 if (Op.isRegister()) { 4734 AM.BaseType = X86AddressMode::RegBase; 4735 AM.Base.Reg = Op.getReg(); 4736 } else { 4737 AM.BaseType = X86AddressMode::FrameIndexBase; 4738 AM.Base.FrameIndex = Op.getFrameIndex(); 4739 } 4740 Op = MI->getOperand(1); 4741 if (Op.isImmediate()) 4742 AM.Scale = Op.getImm(); 4743 Op = MI->getOperand(2); 4744 if (Op.isImmediate()) 4745 AM.IndexReg = Op.getImm(); 4746 Op = MI->getOperand(3); 4747 if (Op.isGlobalAddress()) { 4748 AM.GV = Op.getGlobal(); 4749 } else { 4750 AM.Disp = Op.getImm(); 4751 } 4752 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4753 .addReg(MI->getOperand(4).getReg()); 4754 4755 // Reload the original control word now. 4756 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4757 4758 delete MI; // The pseudo instruction is gone now. 4759 return BB; 4760 } 4761 } 4762} 4763 4764//===----------------------------------------------------------------------===// 4765// X86 Optimization Hooks 4766//===----------------------------------------------------------------------===// 4767 4768void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4769 uint64_t Mask, 4770 uint64_t &KnownZero, 4771 uint64_t &KnownOne, 4772 const SelectionDAG &DAG, 4773 unsigned Depth) const { 4774 unsigned Opc = Op.getOpcode(); 4775 assert((Opc >= ISD::BUILTIN_OP_END || 4776 Opc == ISD::INTRINSIC_WO_CHAIN || 4777 Opc == ISD::INTRINSIC_W_CHAIN || 4778 Opc == ISD::INTRINSIC_VOID) && 4779 "Should use MaskedValueIsZero if you don't know whether Op" 4780 " is a target node!"); 4781 4782 KnownZero = KnownOne = 0; // Don't know anything. 4783 switch (Opc) { 4784 default: break; 4785 case X86ISD::SETCC: 4786 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4787 break; 4788 } 4789} 4790 4791/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4792/// element of the result of the vector shuffle. 4793static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4794 MVT::ValueType VT = N->getValueType(0); 4795 SDOperand PermMask = N->getOperand(2); 4796 unsigned NumElems = PermMask.getNumOperands(); 4797 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4798 i %= NumElems; 4799 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4800 return (i == 0) 4801 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4802 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4803 SDOperand Idx = PermMask.getOperand(i); 4804 if (Idx.getOpcode() == ISD::UNDEF) 4805 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4806 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4807 } 4808 return SDOperand(); 4809} 4810 4811/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4812/// node is a GlobalAddress + an offset. 4813static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4814 unsigned Opc = N->getOpcode(); 4815 if (Opc == X86ISD::Wrapper) { 4816 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4817 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4818 return true; 4819 } 4820 } else if (Opc == ISD::ADD) { 4821 SDOperand N1 = N->getOperand(0); 4822 SDOperand N2 = N->getOperand(1); 4823 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4824 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4825 if (V) { 4826 Offset += V->getSignExtended(); 4827 return true; 4828 } 4829 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4830 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4831 if (V) { 4832 Offset += V->getSignExtended(); 4833 return true; 4834 } 4835 } 4836 } 4837 return false; 4838} 4839 4840/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4841/// + Dist * Size. 4842static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4843 MachineFrameInfo *MFI) { 4844 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4845 return false; 4846 4847 SDOperand Loc = N->getOperand(1); 4848 SDOperand BaseLoc = Base->getOperand(1); 4849 if (Loc.getOpcode() == ISD::FrameIndex) { 4850 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4851 return false; 4852 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 4853 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4854 int FS = MFI->getObjectSize(FI); 4855 int BFS = MFI->getObjectSize(BFI); 4856 if (FS != BFS || FS != Size) return false; 4857 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4858 } else { 4859 GlobalValue *GV1 = NULL; 4860 GlobalValue *GV2 = NULL; 4861 int64_t Offset1 = 0; 4862 int64_t Offset2 = 0; 4863 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4864 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4865 if (isGA1 && isGA2 && GV1 == GV2) 4866 return Offset1 == (Offset2 + Dist*Size); 4867 } 4868 4869 return false; 4870} 4871 4872static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4873 const X86Subtarget *Subtarget) { 4874 GlobalValue *GV; 4875 int64_t Offset; 4876 if (isGAPlusOffset(Base, GV, Offset)) 4877 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4878 else { 4879 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4880 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 4881 if (BFI < 0) 4882 // Fixed objects do not specify alignment, however the offsets are known. 4883 return ((Subtarget->getStackAlignment() % 16) == 0 && 4884 (MFI->getObjectOffset(BFI) % 16) == 0); 4885 else 4886 return MFI->getObjectAlignment(BFI) >= 16; 4887 } 4888 return false; 4889} 4890 4891 4892/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4893/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4894/// if the load addresses are consecutive, non-overlapping, and in the right 4895/// order. 4896static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4897 const X86Subtarget *Subtarget) { 4898 MachineFunction &MF = DAG.getMachineFunction(); 4899 MachineFrameInfo *MFI = MF.getFrameInfo(); 4900 MVT::ValueType VT = N->getValueType(0); 4901 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4902 SDOperand PermMask = N->getOperand(2); 4903 int NumElems = (int)PermMask.getNumOperands(); 4904 SDNode *Base = NULL; 4905 for (int i = 0; i < NumElems; ++i) { 4906 SDOperand Idx = PermMask.getOperand(i); 4907 if (Idx.getOpcode() == ISD::UNDEF) { 4908 if (!Base) return SDOperand(); 4909 } else { 4910 SDOperand Arg = 4911 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4912 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4913 return SDOperand(); 4914 if (!Base) 4915 Base = Arg.Val; 4916 else if (!isConsecutiveLoad(Arg.Val, Base, 4917 i, MVT::getSizeInBits(EVT)/8,MFI)) 4918 return SDOperand(); 4919 } 4920 } 4921 4922 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4923 LoadSDNode *LD = cast<LoadSDNode>(Base); 4924 if (isAlign16) { 4925 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4926 LD->getSrcValueOffset(), LD->isVolatile()); 4927 } else { 4928 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4929 LD->getSrcValueOffset(), LD->isVolatile(), 4930 LD->getAlignment()); 4931 } 4932} 4933 4934/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4935static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4936 const X86Subtarget *Subtarget) { 4937 SDOperand Cond = N->getOperand(0); 4938 4939 // If we have SSE[12] support, try to form min/max nodes. 4940 if (Subtarget->hasSSE2() && 4941 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4942 if (Cond.getOpcode() == ISD::SETCC) { 4943 // Get the LHS/RHS of the select. 4944 SDOperand LHS = N->getOperand(1); 4945 SDOperand RHS = N->getOperand(2); 4946 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4947 4948 unsigned Opcode = 0; 4949 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4950 switch (CC) { 4951 default: break; 4952 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4953 case ISD::SETULE: 4954 case ISD::SETLE: 4955 if (!UnsafeFPMath) break; 4956 // FALL THROUGH. 4957 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4958 case ISD::SETLT: 4959 Opcode = X86ISD::FMIN; 4960 break; 4961 4962 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4963 case ISD::SETUGT: 4964 case ISD::SETGT: 4965 if (!UnsafeFPMath) break; 4966 // FALL THROUGH. 4967 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4968 case ISD::SETGE: 4969 Opcode = X86ISD::FMAX; 4970 break; 4971 } 4972 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4973 switch (CC) { 4974 default: break; 4975 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4976 case ISD::SETUGT: 4977 case ISD::SETGT: 4978 if (!UnsafeFPMath) break; 4979 // FALL THROUGH. 4980 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4981 case ISD::SETGE: 4982 Opcode = X86ISD::FMIN; 4983 break; 4984 4985 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4986 case ISD::SETULE: 4987 case ISD::SETLE: 4988 if (!UnsafeFPMath) break; 4989 // FALL THROUGH. 4990 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4991 case ISD::SETLT: 4992 Opcode = X86ISD::FMAX; 4993 break; 4994 } 4995 } 4996 4997 if (Opcode) 4998 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4999 } 5000 5001 } 5002 5003 return SDOperand(); 5004} 5005 5006 5007SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 5008 DAGCombinerInfo &DCI) const { 5009 SelectionDAG &DAG = DCI.DAG; 5010 switch (N->getOpcode()) { 5011 default: break; 5012 case ISD::VECTOR_SHUFFLE: 5013 return PerformShuffleCombine(N, DAG, Subtarget); 5014 case ISD::SELECT: 5015 return PerformSELECTCombine(N, DAG, Subtarget); 5016 } 5017 5018 return SDOperand(); 5019} 5020 5021//===----------------------------------------------------------------------===// 5022// X86 Inline Assembly Support 5023//===----------------------------------------------------------------------===// 5024 5025/// getConstraintType - Given a constraint letter, return the type of 5026/// constraint it is for this target. 5027X86TargetLowering::ConstraintType 5028X86TargetLowering::getConstraintType(const std::string &Constraint) const { 5029 if (Constraint.size() == 1) { 5030 switch (Constraint[0]) { 5031 case 'A': 5032 case 'r': 5033 case 'R': 5034 case 'l': 5035 case 'q': 5036 case 'Q': 5037 case 'x': 5038 case 'Y': 5039 return C_RegisterClass; 5040 default: 5041 break; 5042 } 5043 } 5044 return TargetLowering::getConstraintType(Constraint); 5045} 5046 5047/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5048/// vector. If it is invalid, don't add anything to Ops. 5049void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5050 char Constraint, 5051 std::vector<SDOperand>&Ops, 5052 SelectionDAG &DAG) { 5053 SDOperand Result(0, 0); 5054 5055 switch (Constraint) { 5056 default: break; 5057 case 'I': 5058 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5059 if (C->getValue() <= 31) { 5060 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5061 break; 5062 } 5063 } 5064 return; 5065 case 'N': 5066 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5067 if (C->getValue() <= 255) { 5068 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5069 break; 5070 } 5071 } 5072 return; 5073 case 'i': { 5074 // Literal immediates are always ok. 5075 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5076 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5077 break; 5078 } 5079 5080 // If we are in non-pic codegen mode, we allow the address of a global (with 5081 // an optional displacement) to be used with 'i'. 5082 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5083 int64_t Offset = 0; 5084 5085 // Match either (GA) or (GA+C) 5086 if (GA) { 5087 Offset = GA->getOffset(); 5088 } else if (Op.getOpcode() == ISD::ADD) { 5089 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5090 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5091 if (C && GA) { 5092 Offset = GA->getOffset()+C->getValue(); 5093 } else { 5094 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5095 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5096 if (C && GA) 5097 Offset = GA->getOffset()+C->getValue(); 5098 else 5099 C = 0, GA = 0; 5100 } 5101 } 5102 5103 if (GA) { 5104 // If addressing this global requires a load (e.g. in PIC mode), we can't 5105 // match. 5106 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5107 false)) 5108 return; 5109 5110 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5111 Offset); 5112 Result = Op; 5113 break; 5114 } 5115 5116 // Otherwise, not valid for this mode. 5117 return; 5118 } 5119 } 5120 5121 if (Result.Val) { 5122 Ops.push_back(Result); 5123 return; 5124 } 5125 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5126} 5127 5128std::vector<unsigned> X86TargetLowering:: 5129getRegClassForInlineAsmConstraint(const std::string &Constraint, 5130 MVT::ValueType VT) const { 5131 if (Constraint.size() == 1) { 5132 // FIXME: not handling fp-stack yet! 5133 switch (Constraint[0]) { // GCC X86 Constraint Letters 5134 default: break; // Unknown constraint letter 5135 case 'A': // EAX/EDX 5136 if (VT == MVT::i32 || VT == MVT::i64) 5137 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5138 break; 5139 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5140 case 'Q': // Q_REGS 5141 if (VT == MVT::i32) 5142 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5143 else if (VT == MVT::i16) 5144 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5145 else if (VT == MVT::i8) 5146 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5147 break; 5148 } 5149 } 5150 5151 return std::vector<unsigned>(); 5152} 5153 5154std::pair<unsigned, const TargetRegisterClass*> 5155X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5156 MVT::ValueType VT) const { 5157 // First, see if this is a constraint that directly corresponds to an LLVM 5158 // register class. 5159 if (Constraint.size() == 1) { 5160 // GCC Constraint Letters 5161 switch (Constraint[0]) { 5162 default: break; 5163 case 'r': // GENERAL_REGS 5164 case 'R': // LEGACY_REGS 5165 case 'l': // INDEX_REGS 5166 if (VT == MVT::i64 && Subtarget->is64Bit()) 5167 return std::make_pair(0U, X86::GR64RegisterClass); 5168 if (VT == MVT::i32) 5169 return std::make_pair(0U, X86::GR32RegisterClass); 5170 else if (VT == MVT::i16) 5171 return std::make_pair(0U, X86::GR16RegisterClass); 5172 else if (VT == MVT::i8) 5173 return std::make_pair(0U, X86::GR8RegisterClass); 5174 break; 5175 case 'y': // MMX_REGS if MMX allowed. 5176 if (!Subtarget->hasMMX()) break; 5177 return std::make_pair(0U, X86::VR64RegisterClass); 5178 break; 5179 case 'Y': // SSE_REGS if SSE2 allowed 5180 if (!Subtarget->hasSSE2()) break; 5181 // FALL THROUGH. 5182 case 'x': // SSE_REGS if SSE1 allowed 5183 if (!Subtarget->hasSSE1()) break; 5184 5185 switch (VT) { 5186 default: break; 5187 // Scalar SSE types. 5188 case MVT::f32: 5189 case MVT::i32: 5190 return std::make_pair(0U, X86::FR32RegisterClass); 5191 case MVT::f64: 5192 case MVT::i64: 5193 return std::make_pair(0U, X86::FR64RegisterClass); 5194 // Vector types. 5195 case MVT::v16i8: 5196 case MVT::v8i16: 5197 case MVT::v4i32: 5198 case MVT::v2i64: 5199 case MVT::v4f32: 5200 case MVT::v2f64: 5201 return std::make_pair(0U, X86::VR128RegisterClass); 5202 } 5203 break; 5204 } 5205 } 5206 5207 // Use the default implementation in TargetLowering to convert the register 5208 // constraint into a member of a register class. 5209 std::pair<unsigned, const TargetRegisterClass*> Res; 5210 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5211 5212 // Not found as a standard register? 5213 if (Res.second == 0) { 5214 // GCC calls "st(0)" just plain "st". 5215 if (StringsEqualNoCase("{st}", Constraint)) { 5216 Res.first = X86::ST0; 5217 Res.second = X86::RSTRegisterClass; 5218 } 5219 5220 return Res; 5221 } 5222 5223 // Otherwise, check to see if this is a register class of the wrong value 5224 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5225 // turn into {ax},{dx}. 5226 if (Res.second->hasType(VT)) 5227 return Res; // Correct type already, nothing to do. 5228 5229 // All of the single-register GCC register classes map their values onto 5230 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5231 // really want an 8-bit or 32-bit register, map to the appropriate register 5232 // class and return the appropriate register. 5233 if (Res.second != X86::GR16RegisterClass) 5234 return Res; 5235 5236 if (VT == MVT::i8) { 5237 unsigned DestReg = 0; 5238 switch (Res.first) { 5239 default: break; 5240 case X86::AX: DestReg = X86::AL; break; 5241 case X86::DX: DestReg = X86::DL; break; 5242 case X86::CX: DestReg = X86::CL; break; 5243 case X86::BX: DestReg = X86::BL; break; 5244 } 5245 if (DestReg) { 5246 Res.first = DestReg; 5247 Res.second = Res.second = X86::GR8RegisterClass; 5248 } 5249 } else if (VT == MVT::i32) { 5250 unsigned DestReg = 0; 5251 switch (Res.first) { 5252 default: break; 5253 case X86::AX: DestReg = X86::EAX; break; 5254 case X86::DX: DestReg = X86::EDX; break; 5255 case X86::CX: DestReg = X86::ECX; break; 5256 case X86::BX: DestReg = X86::EBX; break; 5257 case X86::SI: DestReg = X86::ESI; break; 5258 case X86::DI: DestReg = X86::EDI; break; 5259 case X86::BP: DestReg = X86::EBP; break; 5260 case X86::SP: DestReg = X86::ESP; break; 5261 } 5262 if (DestReg) { 5263 Res.first = DestReg; 5264 Res.second = Res.second = X86::GR32RegisterClass; 5265 } 5266 } else if (VT == MVT::i64) { 5267 unsigned DestReg = 0; 5268 switch (Res.first) { 5269 default: break; 5270 case X86::AX: DestReg = X86::RAX; break; 5271 case X86::DX: DestReg = X86::RDX; break; 5272 case X86::CX: DestReg = X86::RCX; break; 5273 case X86::BX: DestReg = X86::RBX; break; 5274 case X86::SI: DestReg = X86::RSI; break; 5275 case X86::DI: DestReg = X86::RDI; break; 5276 case X86::BP: DestReg = X86::RBP; break; 5277 case X86::SP: DestReg = X86::RSP; break; 5278 } 5279 if (DestReg) { 5280 Res.first = DestReg; 5281 Res.second = Res.second = X86::GR64RegisterClass; 5282 } 5283 } 5284 5285 return Res; 5286} 5287