X86ISelLowering.cpp revision f04afdbb48568ef09f11fd10ac03426101f2dbf8
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86InstrBuilder.h" 17#include "X86ISelLowering.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86TargetMachine.h" 20#include "llvm/CallingConv.h" 21#include "llvm/Constants.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/ADT/VectorExtras.h" 27#include "llvm/Analysis/ScalarEvolutionExpressions.h" 28#include "llvm/CodeGen/CallingConvLower.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/SelectionDAG.h" 33#include "llvm/CodeGen/SSARegMap.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/ADT/StringExtras.h" 37#include "llvm/ParameterAttributes.h" 38using namespace llvm; 39 40X86TargetLowering::X86TargetLowering(TargetMachine &TM) 41 : TargetLowering(TM) { 42 Subtarget = &TM.getSubtarget<X86Subtarget>(); 43 X86ScalarSSE = Subtarget->hasSSE2(); 44 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 45 46 RegInfo = TM.getRegisterInfo(); 47 48 // Set up the TargetLowering object. 49 50 // X86 is weird, it always uses i8 for shift amounts and setcc results. 51 setShiftAmountType(MVT::i8); 52 setSetCCResultType(MVT::i8); 53 setSetCCResultContents(ZeroOrOneSetCCResult); 54 setSchedulingPreference(SchedulingForRegPressure); 55 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 56 setStackPointerRegisterToSaveRestore(X86StackPtr); 57 58 if (Subtarget->isTargetDarwin()) { 59 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 60 setUseUnderscoreSetJmp(false); 61 setUseUnderscoreLongJmp(false); 62 } else if (Subtarget->isTargetMingw()) { 63 // MS runtime is weird: it exports _setjmp, but longjmp! 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(false); 66 } else { 67 setUseUnderscoreSetJmp(true); 68 setUseUnderscoreLongJmp(true); 69 } 70 71 // Set up the register classes. 72 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 73 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 74 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 75 if (Subtarget->is64Bit()) 76 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 77 78 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 79 80 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 81 // operation. 82 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 83 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 84 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 85 86 if (Subtarget->is64Bit()) { 87 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 88 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 89 } else { 90 if (X86ScalarSSE) 91 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 92 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 93 else 94 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 95 } 96 97 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 98 // this operation. 99 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 100 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 101 // SSE has no i16 to fp conversion, only i32 102 if (X86ScalarSSE) 103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 104 else { 105 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 106 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 107 } 108 109 if (!Subtarget->is64Bit()) { 110 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 111 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 112 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 113 } 114 115 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 116 // this operation. 117 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 118 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 119 120 if (X86ScalarSSE) { 121 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 122 } else { 123 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 124 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 125 } 126 127 // Handle FP_TO_UINT by promoting the destination to a larger signed 128 // conversion. 129 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 130 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 131 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 132 133 if (Subtarget->is64Bit()) { 134 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 135 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 136 } else { 137 if (X86ScalarSSE && !Subtarget->hasSSE3()) 138 // Expand FP_TO_UINT into a select. 139 // FIXME: We would like to use a Custom expander here eventually to do 140 // the optimal thing for SSE vs. the default expansion in the legalizer. 141 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 142 else 143 // With SSE3 we can use fisttpll to convert to a signed i64. 144 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 145 } 146 147 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 148 if (!X86ScalarSSE) { 149 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 150 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 151 } 152 153 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 154 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 155 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 156 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 157 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 158 if (Subtarget->is64Bit()) 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 163 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 164 setOperationAction(ISD::FREM , MVT::f64 , Expand); 165 166 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 167 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 168 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 169 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 170 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 171 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 172 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 173 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 174 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 175 if (Subtarget->is64Bit()) { 176 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 177 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 178 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 179 } 180 181 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 182 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 183 184 // These should be promoted to a larger select which is supported. 185 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 186 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 187 // X86 wants to expand cmov itself. 188 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 189 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 190 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 191 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 192 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 193 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 195 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 196 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 197 if (Subtarget->is64Bit()) { 198 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 199 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 200 } 201 // X86 ret instruction may pop stack. 202 setOperationAction(ISD::RET , MVT::Other, Custom); 203 if (!Subtarget->is64Bit()) 204 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 205 206 // Darwin ABI issue. 207 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 208 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 209 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 210 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 211 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 212 if (Subtarget->is64Bit()) { 213 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 214 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 215 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 216 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 217 } 218 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 219 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 220 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 221 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 222 // X86 wants to expand memset / memcpy itself. 223 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 224 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 225 226 // We don't have line number support yet. 227 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 228 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 229 // FIXME - use subtarget debug flags 230 if (!Subtarget->isTargetDarwin() && 231 !Subtarget->isTargetELF() && 232 !Subtarget->isTargetCygMing()) 233 setOperationAction(ISD::LABEL, MVT::Other, Expand); 234 235 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 236 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 237 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 238 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 239 if (Subtarget->is64Bit()) { 240 // FIXME: Verify 241 setExceptionPointerRegister(X86::RAX); 242 setExceptionSelectorRegister(X86::RDX); 243 } else { 244 setExceptionPointerRegister(X86::EAX); 245 setExceptionSelectorRegister(X86::EDX); 246 } 247 248 setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand); 249 setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand); 250 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 251 252 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 253 setOperationAction(ISD::VASTART , MVT::Other, Custom); 254 setOperationAction(ISD::VAARG , MVT::Other, Expand); 255 setOperationAction(ISD::VAEND , MVT::Other, Expand); 256 if (Subtarget->is64Bit()) 257 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 258 else 259 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 260 261 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 262 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 263 if (Subtarget->is64Bit()) 264 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 265 if (Subtarget->isTargetCygMing()) 266 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 267 else 268 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 269 270 if (X86ScalarSSE) { 271 // Set up the FP register classes. 272 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 273 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 274 275 // Use ANDPD to simulate FABS. 276 setOperationAction(ISD::FABS , MVT::f64, Custom); 277 setOperationAction(ISD::FABS , MVT::f32, Custom); 278 279 // Use XORP to simulate FNEG. 280 setOperationAction(ISD::FNEG , MVT::f64, Custom); 281 setOperationAction(ISD::FNEG , MVT::f32, Custom); 282 283 // Use ANDPD and ORPD to simulate FCOPYSIGN. 284 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 285 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 286 287 // We don't support sin/cos/fmod 288 setOperationAction(ISD::FSIN , MVT::f64, Expand); 289 setOperationAction(ISD::FCOS , MVT::f64, Expand); 290 setOperationAction(ISD::FREM , MVT::f64, Expand); 291 setOperationAction(ISD::FSIN , MVT::f32, Expand); 292 setOperationAction(ISD::FCOS , MVT::f32, Expand); 293 setOperationAction(ISD::FREM , MVT::f32, Expand); 294 295 // Expand FP immediates into loads from the stack, except for the special 296 // cases we handle. 297 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 298 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 299 addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd 300 301 // Conversions to long double (in X87) go through memory. 302 setConvertAction(MVT::f32, MVT::f80, Expand); 303 setConvertAction(MVT::f64, MVT::f80, Expand); 304 305 // Conversions from long double (in X87) go through memory. 306 setConvertAction(MVT::f80, MVT::f32, Expand); 307 setConvertAction(MVT::f80, MVT::f64, Expand); 308 } else { 309 // Set up the FP register classes. 310 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 311 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 312 313 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 314 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 315 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 316 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 317 318 // Floating truncations need to go through memory. 319 setConvertAction(MVT::f80, MVT::f32, Expand); 320 setConvertAction(MVT::f64, MVT::f32, Expand); 321 setConvertAction(MVT::f80, MVT::f64, Expand); 322 323 if (!UnsafeFPMath) { 324 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 325 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 326 } 327 328 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 329 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 330 addLegalFPImmediate(APFloat(+0.0)); // FLD0 331 addLegalFPImmediate(APFloat(+1.0)); // FLD1 332 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 333 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 334 } 335 336 // Long double always uses X87. 337 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 338 339 // First set operation action for all vector types to expand. Then we 340 // will selectively turn on ones that can be effectively codegen'd. 341 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 342 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 343 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 344 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 345 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 346 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 347 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 348 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 349 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 350 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 351 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 352 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 353 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 354 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 355 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 356 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 357 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 358 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 359 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 360 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 361 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 362 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 363 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 364 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 365 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 366 } 367 368 if (Subtarget->hasMMX()) { 369 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 370 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 371 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 372 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 373 374 // FIXME: add MMX packed arithmetics 375 376 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 377 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 378 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 379 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 380 381 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 382 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 383 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 384 385 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 386 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 387 388 setOperationAction(ISD::AND, MVT::v8i8, Promote); 389 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 390 setOperationAction(ISD::AND, MVT::v4i16, Promote); 391 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 392 setOperationAction(ISD::AND, MVT::v2i32, Promote); 393 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 394 setOperationAction(ISD::AND, MVT::v1i64, Legal); 395 396 setOperationAction(ISD::OR, MVT::v8i8, Promote); 397 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 398 setOperationAction(ISD::OR, MVT::v4i16, Promote); 399 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 400 setOperationAction(ISD::OR, MVT::v2i32, Promote); 401 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 402 setOperationAction(ISD::OR, MVT::v1i64, Legal); 403 404 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 405 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 406 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 407 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 408 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 409 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 410 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 411 412 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 413 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 414 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 415 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 416 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 417 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 418 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 419 420 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 421 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 422 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 423 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 424 425 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 426 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 427 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 428 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 429 430 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 431 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 432 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 433 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 434 } 435 436 if (Subtarget->hasSSE1()) { 437 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 438 439 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 440 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 441 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 442 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 443 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 444 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 445 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 446 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 447 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 448 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 449 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 450 } 451 452 if (Subtarget->hasSSE2()) { 453 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 454 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 455 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 456 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 457 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 458 459 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 460 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 461 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 462 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 463 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 464 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 465 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 466 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 467 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 468 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 469 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 470 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 471 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 472 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 473 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 474 475 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 476 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 477 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 478 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 479 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 480 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 481 482 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 483 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 484 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 485 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 486 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 487 } 488 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 489 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 490 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 491 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 492 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 494 495 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 496 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 497 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 498 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 499 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 500 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 501 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 502 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 503 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 504 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 505 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 506 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 507 } 508 509 // Custom lower v2i64 and v2f64 selects. 510 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 511 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 512 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 513 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 514 } 515 516 // We want to custom lower some of our intrinsics. 517 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 518 519 // We have target-specific dag combine patterns for the following nodes: 520 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 521 setTargetDAGCombine(ISD::SELECT); 522 523 computeRegisterProperties(); 524 525 // FIXME: These should be based on subtarget info. Plus, the values should 526 // be smaller when we are in optimizing for size mode. 527 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 528 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 529 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 530 allowUnalignedMemoryAccesses = true; // x86 supports it! 531} 532 533 534//===----------------------------------------------------------------------===// 535// Return Value Calling Convention Implementation 536//===----------------------------------------------------------------------===// 537 538#include "X86GenCallingConv.inc" 539 540/// LowerRET - Lower an ISD::RET node. 541SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 542 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 543 544 SmallVector<CCValAssign, 16> RVLocs; 545 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 546 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 547 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 548 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 549 550 551 // If this is the first return lowered for this function, add the regs to the 552 // liveout set for the function. 553 if (DAG.getMachineFunction().liveout_empty()) { 554 for (unsigned i = 0; i != RVLocs.size(); ++i) 555 if (RVLocs[i].isRegLoc()) 556 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 557 } 558 559 SDOperand Chain = Op.getOperand(0); 560 SDOperand Flag; 561 562 // Copy the result values into the output registers. 563 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 564 RVLocs[0].getLocReg() != X86::ST0) { 565 for (unsigned i = 0; i != RVLocs.size(); ++i) { 566 CCValAssign &VA = RVLocs[i]; 567 assert(VA.isRegLoc() && "Can only return in registers!"); 568 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 569 Flag); 570 Flag = Chain.getValue(1); 571 } 572 } else { 573 // We need to handle a destination of ST0 specially, because it isn't really 574 // a register. 575 SDOperand Value = Op.getOperand(1); 576 577 // If this is an FP return with ScalarSSE, we need to move the value from 578 // an XMM register onto the fp-stack. 579 if (X86ScalarSSE) { 580 SDOperand MemLoc; 581 582 // If this is a load into a scalarsse value, don't store the loaded value 583 // back to the stack, only to reload it: just replace the scalar-sse load. 584 if (ISD::isNON_EXTLoad(Value.Val) && 585 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 586 Chain = Value.getOperand(0); 587 MemLoc = Value.getOperand(1); 588 } else { 589 // Spill the value to memory and reload it into top of stack. 590 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 591 MachineFunction &MF = DAG.getMachineFunction(); 592 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 593 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 594 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 595 } 596 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 597 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 598 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 599 Chain = Value.getValue(1); 600 } 601 602 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 603 SDOperand Ops[] = { Chain, Value }; 604 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 605 Flag = Chain.getValue(1); 606 } 607 608 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 609 if (Flag.Val) 610 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 611 else 612 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 613} 614 615 616/// LowerCallResult - Lower the result values of an ISD::CALL into the 617/// appropriate copies out of appropriate physical registers. This assumes that 618/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 619/// being lowered. The returns a SDNode with the same number of values as the 620/// ISD::CALL. 621SDNode *X86TargetLowering:: 622LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 623 unsigned CallingConv, SelectionDAG &DAG) { 624 625 // Assign locations to each value returned by this call. 626 SmallVector<CCValAssign, 16> RVLocs; 627 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 628 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 629 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 630 631 632 SmallVector<SDOperand, 8> ResultVals; 633 634 // Copy all of the result registers out of their specified physreg. 635 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 636 for (unsigned i = 0; i != RVLocs.size(); ++i) { 637 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 638 RVLocs[i].getValVT(), InFlag).getValue(1); 639 InFlag = Chain.getValue(2); 640 ResultVals.push_back(Chain.getValue(0)); 641 } 642 } else { 643 // Copies from the FP stack are special, as ST0 isn't a valid register 644 // before the fp stackifier runs. 645 646 // Copy ST0 into an RFP register with FP_GET_RESULT. 647 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 648 SDOperand GROps[] = { Chain, InFlag }; 649 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 650 Chain = RetVal.getValue(1); 651 InFlag = RetVal.getValue(2); 652 653 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 654 // an XMM register. 655 if (X86ScalarSSE) { 656 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 657 // shouldn't be necessary except that RFP cannot be live across 658 // multiple blocks. When stackifier is fixed, they can be uncoupled. 659 MachineFunction &MF = DAG.getMachineFunction(); 660 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 661 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 662 SDOperand Ops[] = { 663 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 664 }; 665 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 666 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 667 Chain = RetVal.getValue(1); 668 } 669 ResultVals.push_back(RetVal); 670 } 671 672 // Merge everything together with a MERGE_VALUES node. 673 ResultVals.push_back(Chain); 674 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 675 &ResultVals[0], ResultVals.size()).Val; 676} 677 678 679//===----------------------------------------------------------------------===// 680// C & StdCall Calling Convention implementation 681//===----------------------------------------------------------------------===// 682// StdCall calling convention seems to be standard for many Windows' API 683// routines and around. It differs from C calling convention just a little: 684// callee should clean up the stack, not caller. Symbols should be also 685// decorated in some fancy way :) It doesn't support any vector arguments. 686 687/// AddLiveIn - This helper function adds the specified physical register to the 688/// MachineFunction as a live in value. It also creates a corresponding virtual 689/// register for it. 690static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 691 const TargetRegisterClass *RC) { 692 assert(RC->contains(PReg) && "Not the correct regclass!"); 693 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 694 MF.addLiveIn(PReg, VReg); 695 return VReg; 696} 697 698SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 699 bool isStdCall) { 700 unsigned NumArgs = Op.Val->getNumValues() - 1; 701 MachineFunction &MF = DAG.getMachineFunction(); 702 MachineFrameInfo *MFI = MF.getFrameInfo(); 703 SDOperand Root = Op.getOperand(0); 704 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 705 706 // Assign locations to all of the incoming arguments. 707 SmallVector<CCValAssign, 16> ArgLocs; 708 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 709 getTargetMachine(), ArgLocs); 710 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 711 712 SmallVector<SDOperand, 8> ArgValues; 713 unsigned LastVal = ~0U; 714 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 715 CCValAssign &VA = ArgLocs[i]; 716 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 717 // places. 718 assert(VA.getValNo() != LastVal && 719 "Don't support value assigned to multiple locs yet"); 720 LastVal = VA.getValNo(); 721 722 if (VA.isRegLoc()) { 723 MVT::ValueType RegVT = VA.getLocVT(); 724 TargetRegisterClass *RC; 725 if (RegVT == MVT::i32) 726 RC = X86::GR32RegisterClass; 727 else { 728 assert(MVT::isVector(RegVT)); 729 RC = X86::VR128RegisterClass; 730 } 731 732 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 733 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 734 735 // If this is an 8 or 16-bit value, it is really passed promoted to 32 736 // bits. Insert an assert[sz]ext to capture this, then truncate to the 737 // right size. 738 if (VA.getLocInfo() == CCValAssign::SExt) 739 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 740 DAG.getValueType(VA.getValVT())); 741 else if (VA.getLocInfo() == CCValAssign::ZExt) 742 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 743 DAG.getValueType(VA.getValVT())); 744 745 if (VA.getLocInfo() != CCValAssign::Full) 746 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 747 748 ArgValues.push_back(ArgValue); 749 } else { 750 assert(VA.isMemLoc()); 751 752 // Create the nodes corresponding to a load from this parameter slot. 753 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 754 VA.getLocMemOffset()); 755 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 756 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 757 } 758 } 759 760 unsigned StackSize = CCInfo.getNextStackOffset(); 761 762 ArgValues.push_back(Root); 763 764 // If the function takes variable number of arguments, make a frame index for 765 // the start of the first vararg value... for expansion of llvm.va_start. 766 if (isVarArg) 767 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 768 769 if (isStdCall && !isVarArg) { 770 BytesToPopOnReturn = StackSize; // Callee pops everything.. 771 BytesCallerReserves = 0; 772 } else { 773 BytesToPopOnReturn = 0; // Callee pops nothing. 774 775 // If this is an sret function, the return should pop the hidden pointer. 776 if (NumArgs && 777 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 778 ISD::ParamFlags::StructReturn)) 779 BytesToPopOnReturn = 4; 780 781 BytesCallerReserves = StackSize; 782 } 783 784 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 785 786 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 787 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 788 789 // Return the new list of results. 790 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 791 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 792} 793 794SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 795 unsigned CC) { 796 SDOperand Chain = Op.getOperand(0); 797 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 798 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 799 SDOperand Callee = Op.getOperand(4); 800 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 801 802 // Analyze operands of the call, assigning locations to each operand. 803 SmallVector<CCValAssign, 16> ArgLocs; 804 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 805 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 806 807 // Get a count of how many bytes are to be pushed on the stack. 808 unsigned NumBytes = CCInfo.getNextStackOffset(); 809 810 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 811 812 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 813 SmallVector<SDOperand, 8> MemOpChains; 814 815 SDOperand StackPtr; 816 817 // Walk the register/memloc assignments, inserting copies/loads. 818 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 819 CCValAssign &VA = ArgLocs[i]; 820 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 821 822 // Promote the value if needed. 823 switch (VA.getLocInfo()) { 824 default: assert(0 && "Unknown loc info!"); 825 case CCValAssign::Full: break; 826 case CCValAssign::SExt: 827 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 828 break; 829 case CCValAssign::ZExt: 830 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 831 break; 832 case CCValAssign::AExt: 833 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 834 break; 835 } 836 837 if (VA.isRegLoc()) { 838 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 839 } else { 840 assert(VA.isMemLoc()); 841 if (StackPtr.Val == 0) 842 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 843 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 844 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 845 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 846 } 847 } 848 849 // If the first argument is an sret pointer, remember it. 850 bool isSRet = NumOps && 851 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 852 ISD::ParamFlags::StructReturn); 853 854 if (!MemOpChains.empty()) 855 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 856 &MemOpChains[0], MemOpChains.size()); 857 858 // Build a sequence of copy-to-reg nodes chained together with token chain 859 // and flag operands which copy the outgoing args into registers. 860 SDOperand InFlag; 861 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 862 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 863 InFlag); 864 InFlag = Chain.getValue(1); 865 } 866 867 // ELF / PIC requires GOT in the EBX register before function calls via PLT 868 // GOT pointer. 869 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 870 Subtarget->isPICStyleGOT()) { 871 Chain = DAG.getCopyToReg(Chain, X86::EBX, 872 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 873 InFlag); 874 InFlag = Chain.getValue(1); 875 } 876 877 // If the callee is a GlobalAddress node (quite common, every direct call is) 878 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 879 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 880 // We should use extra load for direct calls to dllimported functions in 881 // non-JIT mode. 882 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 883 getTargetMachine(), true)) 884 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 885 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 886 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 887 888 // Returns a chain & a flag for retval copy to use. 889 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 890 SmallVector<SDOperand, 8> Ops; 891 Ops.push_back(Chain); 892 Ops.push_back(Callee); 893 894 // Add argument registers to the end of the list so that they are known live 895 // into the call. 896 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 897 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 898 RegsToPass[i].second.getValueType())); 899 900 // Add an implicit use GOT pointer in EBX. 901 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 902 Subtarget->isPICStyleGOT()) 903 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 904 905 if (InFlag.Val) 906 Ops.push_back(InFlag); 907 908 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 909 NodeTys, &Ops[0], Ops.size()); 910 InFlag = Chain.getValue(1); 911 912 // Create the CALLSEQ_END node. 913 unsigned NumBytesForCalleeToPush = 0; 914 915 if (CC == CallingConv::X86_StdCall) { 916 if (isVarArg) 917 NumBytesForCalleeToPush = isSRet ? 4 : 0; 918 else 919 NumBytesForCalleeToPush = NumBytes; 920 } else { 921 // If this is is a call to a struct-return function, the callee 922 // pops the hidden struct pointer, so we have to push it back. 923 // This is common for Darwin/X86, Linux & Mingw32 targets. 924 NumBytesForCalleeToPush = isSRet ? 4 : 0; 925 } 926 927 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 928 Ops.clear(); 929 Ops.push_back(Chain); 930 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 931 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 932 Ops.push_back(InFlag); 933 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 934 InFlag = Chain.getValue(1); 935 936 // Handle result values, copying them out of physregs into vregs that we 937 // return. 938 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 939} 940 941 942//===----------------------------------------------------------------------===// 943// FastCall Calling Convention implementation 944//===----------------------------------------------------------------------===// 945// 946// The X86 'fastcall' calling convention passes up to two integer arguments in 947// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 948// and requires that the callee pop its arguments off the stack (allowing proper 949// tail calls), and has the same return value conventions as C calling convs. 950// 951// This calling convention always arranges for the callee pop value to be 8n+4 952// bytes, which is needed for tail recursion elimination and stack alignment 953// reasons. 954SDOperand 955X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 956 MachineFunction &MF = DAG.getMachineFunction(); 957 MachineFrameInfo *MFI = MF.getFrameInfo(); 958 SDOperand Root = Op.getOperand(0); 959 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 960 961 // Assign locations to all of the incoming arguments. 962 SmallVector<CCValAssign, 16> ArgLocs; 963 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 964 getTargetMachine(), ArgLocs); 965 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 966 967 SmallVector<SDOperand, 8> ArgValues; 968 unsigned LastVal = ~0U; 969 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 970 CCValAssign &VA = ArgLocs[i]; 971 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 972 // places. 973 assert(VA.getValNo() != LastVal && 974 "Don't support value assigned to multiple locs yet"); 975 LastVal = VA.getValNo(); 976 977 if (VA.isRegLoc()) { 978 MVT::ValueType RegVT = VA.getLocVT(); 979 TargetRegisterClass *RC; 980 if (RegVT == MVT::i32) 981 RC = X86::GR32RegisterClass; 982 else { 983 assert(MVT::isVector(RegVT)); 984 RC = X86::VR128RegisterClass; 985 } 986 987 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 988 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 989 990 // If this is an 8 or 16-bit value, it is really passed promoted to 32 991 // bits. Insert an assert[sz]ext to capture this, then truncate to the 992 // right size. 993 if (VA.getLocInfo() == CCValAssign::SExt) 994 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 995 DAG.getValueType(VA.getValVT())); 996 else if (VA.getLocInfo() == CCValAssign::ZExt) 997 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 998 DAG.getValueType(VA.getValVT())); 999 1000 if (VA.getLocInfo() != CCValAssign::Full) 1001 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1002 1003 ArgValues.push_back(ArgValue); 1004 } else { 1005 assert(VA.isMemLoc()); 1006 1007 // Create the nodes corresponding to a load from this parameter slot. 1008 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1009 VA.getLocMemOffset()); 1010 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1011 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1012 } 1013 } 1014 1015 ArgValues.push_back(Root); 1016 1017 unsigned StackSize = CCInfo.getNextStackOffset(); 1018 1019 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1020 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1021 // arguments and the arguments after the retaddr has been pushed are aligned. 1022 if ((StackSize & 7) == 0) 1023 StackSize += 4; 1024 } 1025 1026 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1027 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1028 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1029 BytesCallerReserves = 0; 1030 1031 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1032 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1033 1034 // Return the new list of results. 1035 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1036 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1037} 1038 1039SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1040 unsigned CC) { 1041 SDOperand Chain = Op.getOperand(0); 1042 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1043 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1044 SDOperand Callee = Op.getOperand(4); 1045 1046 // Analyze operands of the call, assigning locations to each operand. 1047 SmallVector<CCValAssign, 16> ArgLocs; 1048 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1049 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1050 1051 // Get a count of how many bytes are to be pushed on the stack. 1052 unsigned NumBytes = CCInfo.getNextStackOffset(); 1053 1054 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1055 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1056 // arguments and the arguments after the retaddr has been pushed are aligned. 1057 if ((NumBytes & 7) == 0) 1058 NumBytes += 4; 1059 } 1060 1061 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1062 1063 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1064 SmallVector<SDOperand, 8> MemOpChains; 1065 1066 SDOperand StackPtr; 1067 1068 // Walk the register/memloc assignments, inserting copies/loads. 1069 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1070 CCValAssign &VA = ArgLocs[i]; 1071 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1072 1073 // Promote the value if needed. 1074 switch (VA.getLocInfo()) { 1075 default: assert(0 && "Unknown loc info!"); 1076 case CCValAssign::Full: break; 1077 case CCValAssign::SExt: 1078 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1079 break; 1080 case CCValAssign::ZExt: 1081 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1082 break; 1083 case CCValAssign::AExt: 1084 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1085 break; 1086 } 1087 1088 if (VA.isRegLoc()) { 1089 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1090 } else { 1091 assert(VA.isMemLoc()); 1092 if (StackPtr.Val == 0) 1093 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1094 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1095 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1096 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1097 } 1098 } 1099 1100 if (!MemOpChains.empty()) 1101 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1102 &MemOpChains[0], MemOpChains.size()); 1103 1104 // Build a sequence of copy-to-reg nodes chained together with token chain 1105 // and flag operands which copy the outgoing args into registers. 1106 SDOperand InFlag; 1107 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1108 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1109 InFlag); 1110 InFlag = Chain.getValue(1); 1111 } 1112 1113 // If the callee is a GlobalAddress node (quite common, every direct call is) 1114 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1115 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1116 // We should use extra load for direct calls to dllimported functions in 1117 // non-JIT mode. 1118 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1119 getTargetMachine(), true)) 1120 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1121 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1122 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1123 1124 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1125 // GOT pointer. 1126 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1127 Subtarget->isPICStyleGOT()) { 1128 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1129 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1130 InFlag); 1131 InFlag = Chain.getValue(1); 1132 } 1133 1134 // Returns a chain & a flag for retval copy to use. 1135 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1136 SmallVector<SDOperand, 8> Ops; 1137 Ops.push_back(Chain); 1138 Ops.push_back(Callee); 1139 1140 // Add argument registers to the end of the list so that they are known live 1141 // into the call. 1142 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1143 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1144 RegsToPass[i].second.getValueType())); 1145 1146 // Add an implicit use GOT pointer in EBX. 1147 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1148 Subtarget->isPICStyleGOT()) 1149 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1150 1151 if (InFlag.Val) 1152 Ops.push_back(InFlag); 1153 1154 // FIXME: Do not generate X86ISD::TAILCALL for now. 1155 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1156 NodeTys, &Ops[0], Ops.size()); 1157 InFlag = Chain.getValue(1); 1158 1159 // Returns a flag for retval copy to use. 1160 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1161 Ops.clear(); 1162 Ops.push_back(Chain); 1163 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1164 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1165 Ops.push_back(InFlag); 1166 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1167 InFlag = Chain.getValue(1); 1168 1169 // Handle result values, copying them out of physregs into vregs that we 1170 // return. 1171 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1172} 1173 1174 1175//===----------------------------------------------------------------------===// 1176// X86-64 C Calling Convention implementation 1177//===----------------------------------------------------------------------===// 1178 1179SDOperand 1180X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1181 MachineFunction &MF = DAG.getMachineFunction(); 1182 MachineFrameInfo *MFI = MF.getFrameInfo(); 1183 SDOperand Root = Op.getOperand(0); 1184 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1185 1186 static const unsigned GPR64ArgRegs[] = { 1187 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1188 }; 1189 static const unsigned XMMArgRegs[] = { 1190 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1191 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1192 }; 1193 1194 1195 // Assign locations to all of the incoming arguments. 1196 SmallVector<CCValAssign, 16> ArgLocs; 1197 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1198 getTargetMachine(), ArgLocs); 1199 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1200 1201 SmallVector<SDOperand, 8> ArgValues; 1202 unsigned LastVal = ~0U; 1203 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1204 CCValAssign &VA = ArgLocs[i]; 1205 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1206 // places. 1207 assert(VA.getValNo() != LastVal && 1208 "Don't support value assigned to multiple locs yet"); 1209 LastVal = VA.getValNo(); 1210 1211 if (VA.isRegLoc()) { 1212 MVT::ValueType RegVT = VA.getLocVT(); 1213 TargetRegisterClass *RC; 1214 if (RegVT == MVT::i32) 1215 RC = X86::GR32RegisterClass; 1216 else if (RegVT == MVT::i64) 1217 RC = X86::GR64RegisterClass; 1218 else if (RegVT == MVT::f32) 1219 RC = X86::FR32RegisterClass; 1220 else if (RegVT == MVT::f64) 1221 RC = X86::FR64RegisterClass; 1222 else { 1223 assert(MVT::isVector(RegVT)); 1224 if (MVT::getSizeInBits(RegVT) == 64) { 1225 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1226 RegVT = MVT::i64; 1227 } else 1228 RC = X86::VR128RegisterClass; 1229 } 1230 1231 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1232 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1233 1234 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1235 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1236 // right size. 1237 if (VA.getLocInfo() == CCValAssign::SExt) 1238 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1239 DAG.getValueType(VA.getValVT())); 1240 else if (VA.getLocInfo() == CCValAssign::ZExt) 1241 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1242 DAG.getValueType(VA.getValVT())); 1243 1244 if (VA.getLocInfo() != CCValAssign::Full) 1245 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1246 1247 // Handle MMX values passed in GPRs. 1248 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1249 MVT::getSizeInBits(RegVT) == 64) 1250 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1251 1252 ArgValues.push_back(ArgValue); 1253 } else { 1254 assert(VA.isMemLoc()); 1255 1256 // Create the nodes corresponding to a load from this parameter slot. 1257 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1258 VA.getLocMemOffset()); 1259 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1260 1261 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 1262 if (Flags & ISD::ParamFlags::ByVal) 1263 ArgValues.push_back(FIN); 1264 else 1265 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1266 } 1267 } 1268 1269 unsigned StackSize = CCInfo.getNextStackOffset(); 1270 1271 // If the function takes variable number of arguments, make a frame index for 1272 // the start of the first vararg value... for expansion of llvm.va_start. 1273 if (isVarArg) { 1274 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1275 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1276 1277 // For X86-64, if there are vararg parameters that are passed via 1278 // registers, then we must store them to their spots on the stack so they 1279 // may be loaded by deferencing the result of va_next. 1280 VarArgsGPOffset = NumIntRegs * 8; 1281 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1282 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1283 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1284 1285 // Store the integer parameter registers. 1286 SmallVector<SDOperand, 8> MemOps; 1287 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1288 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1289 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1290 for (; NumIntRegs != 6; ++NumIntRegs) { 1291 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1292 X86::GR64RegisterClass); 1293 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1294 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1295 MemOps.push_back(Store); 1296 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1297 DAG.getConstant(8, getPointerTy())); 1298 } 1299 1300 // Now store the XMM (fp + vector) parameter registers. 1301 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1302 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1303 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1304 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1305 X86::VR128RegisterClass); 1306 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1307 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1308 MemOps.push_back(Store); 1309 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1310 DAG.getConstant(16, getPointerTy())); 1311 } 1312 if (!MemOps.empty()) 1313 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1314 &MemOps[0], MemOps.size()); 1315 } 1316 1317 ArgValues.push_back(Root); 1318 1319 BytesToPopOnReturn = 0; // Callee pops nothing. 1320 BytesCallerReserves = StackSize; 1321 1322 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1323 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1324 1325 // Return the new list of results. 1326 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1327 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1328} 1329 1330SDOperand 1331X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1332 unsigned CC) { 1333 SDOperand Chain = Op.getOperand(0); 1334 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1335 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1336 SDOperand Callee = Op.getOperand(4); 1337 1338 // Analyze operands of the call, assigning locations to each operand. 1339 SmallVector<CCValAssign, 16> ArgLocs; 1340 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1341 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1342 1343 // Get a count of how many bytes are to be pushed on the stack. 1344 unsigned NumBytes = CCInfo.getNextStackOffset(); 1345 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1346 1347 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1348 SmallVector<SDOperand, 8> MemOpChains; 1349 1350 SDOperand StackPtr; 1351 1352 // Walk the register/memloc assignments, inserting copies/loads. 1353 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1354 CCValAssign &VA = ArgLocs[i]; 1355 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1356 1357 // Promote the value if needed. 1358 switch (VA.getLocInfo()) { 1359 default: assert(0 && "Unknown loc info!"); 1360 case CCValAssign::Full: break; 1361 case CCValAssign::SExt: 1362 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1363 break; 1364 case CCValAssign::ZExt: 1365 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1366 break; 1367 case CCValAssign::AExt: 1368 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1369 break; 1370 } 1371 1372 if (VA.isRegLoc()) { 1373 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1374 } else { 1375 assert(VA.isMemLoc()); 1376 if (StackPtr.Val == 0) 1377 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1378 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1379 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1380 1381 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1382 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1383 if (Flags & ISD::ParamFlags::ByVal) { 1384 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1385 ISD::ParamFlags::ByValAlignOffs); 1386 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1387 ISD::ParamFlags::ByValSizeOffs; 1388 1389 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1390 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1391 1392 assert(0 && "Not Implemented"); 1393 1394 SDOperand Copy = DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, 1395 Arg, SizeNode, AlignNode); 1396 MemOpChains.push_back(Copy); 1397 } 1398 else { 1399 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1400 } 1401 } 1402 } 1403 1404 if (!MemOpChains.empty()) 1405 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1406 &MemOpChains[0], MemOpChains.size()); 1407 1408 // Build a sequence of copy-to-reg nodes chained together with token chain 1409 // and flag operands which copy the outgoing args into registers. 1410 SDOperand InFlag; 1411 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1412 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1413 InFlag); 1414 InFlag = Chain.getValue(1); 1415 } 1416 1417 if (isVarArg) { 1418 // From AMD64 ABI document: 1419 // For calls that may call functions that use varargs or stdargs 1420 // (prototype-less calls or calls to functions containing ellipsis (...) in 1421 // the declaration) %al is used as hidden argument to specify the number 1422 // of SSE registers used. The contents of %al do not need to match exactly 1423 // the number of registers, but must be an ubound on the number of SSE 1424 // registers used and is in the range 0 - 8 inclusive. 1425 1426 // Count the number of XMM registers allocated. 1427 static const unsigned XMMArgRegs[] = { 1428 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1429 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1430 }; 1431 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1432 1433 Chain = DAG.getCopyToReg(Chain, X86::AL, 1434 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1435 InFlag = Chain.getValue(1); 1436 } 1437 1438 // If the callee is a GlobalAddress node (quite common, every direct call is) 1439 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1440 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1441 // We should use extra load for direct calls to dllimported functions in 1442 // non-JIT mode. 1443 if (getTargetMachine().getCodeModel() != CodeModel::Large 1444 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1445 getTargetMachine(), true)) 1446 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1447 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1448 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1449 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1450 1451 // Returns a chain & a flag for retval copy to use. 1452 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1453 SmallVector<SDOperand, 8> Ops; 1454 Ops.push_back(Chain); 1455 Ops.push_back(Callee); 1456 1457 // Add argument registers to the end of the list so that they are known live 1458 // into the call. 1459 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1460 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1461 RegsToPass[i].second.getValueType())); 1462 1463 if (InFlag.Val) 1464 Ops.push_back(InFlag); 1465 1466 // FIXME: Do not generate X86ISD::TAILCALL for now. 1467 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1468 NodeTys, &Ops[0], Ops.size()); 1469 InFlag = Chain.getValue(1); 1470 1471 // Returns a flag for retval copy to use. 1472 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1473 Ops.clear(); 1474 Ops.push_back(Chain); 1475 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1476 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1477 Ops.push_back(InFlag); 1478 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1479 InFlag = Chain.getValue(1); 1480 1481 // Handle result values, copying them out of physregs into vregs that we 1482 // return. 1483 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1484} 1485 1486 1487//===----------------------------------------------------------------------===// 1488// Other Lowering Hooks 1489//===----------------------------------------------------------------------===// 1490 1491 1492SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1493 MachineFunction &MF = DAG.getMachineFunction(); 1494 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1495 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1496 1497 if (ReturnAddrIndex == 0) { 1498 // Set up a frame object for the return address. 1499 if (Subtarget->is64Bit()) 1500 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1501 else 1502 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1503 1504 FuncInfo->setRAIndex(ReturnAddrIndex); 1505 } 1506 1507 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1508} 1509 1510 1511 1512/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1513/// specific condition code. It returns a false if it cannot do a direct 1514/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1515/// needed. 1516static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1517 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1518 SelectionDAG &DAG) { 1519 X86CC = X86::COND_INVALID; 1520 if (!isFP) { 1521 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1522 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1523 // X > -1 -> X == 0, jump !sign. 1524 RHS = DAG.getConstant(0, RHS.getValueType()); 1525 X86CC = X86::COND_NS; 1526 return true; 1527 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1528 // X < 0 -> X == 0, jump on sign. 1529 X86CC = X86::COND_S; 1530 return true; 1531 } 1532 } 1533 1534 switch (SetCCOpcode) { 1535 default: break; 1536 case ISD::SETEQ: X86CC = X86::COND_E; break; 1537 case ISD::SETGT: X86CC = X86::COND_G; break; 1538 case ISD::SETGE: X86CC = X86::COND_GE; break; 1539 case ISD::SETLT: X86CC = X86::COND_L; break; 1540 case ISD::SETLE: X86CC = X86::COND_LE; break; 1541 case ISD::SETNE: X86CC = X86::COND_NE; break; 1542 case ISD::SETULT: X86CC = X86::COND_B; break; 1543 case ISD::SETUGT: X86CC = X86::COND_A; break; 1544 case ISD::SETULE: X86CC = X86::COND_BE; break; 1545 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1546 } 1547 } else { 1548 // On a floating point condition, the flags are set as follows: 1549 // ZF PF CF op 1550 // 0 | 0 | 0 | X > Y 1551 // 0 | 0 | 1 | X < Y 1552 // 1 | 0 | 0 | X == Y 1553 // 1 | 1 | 1 | unordered 1554 bool Flip = false; 1555 switch (SetCCOpcode) { 1556 default: break; 1557 case ISD::SETUEQ: 1558 case ISD::SETEQ: X86CC = X86::COND_E; break; 1559 case ISD::SETOLT: Flip = true; // Fallthrough 1560 case ISD::SETOGT: 1561 case ISD::SETGT: X86CC = X86::COND_A; break; 1562 case ISD::SETOLE: Flip = true; // Fallthrough 1563 case ISD::SETOGE: 1564 case ISD::SETGE: X86CC = X86::COND_AE; break; 1565 case ISD::SETUGT: Flip = true; // Fallthrough 1566 case ISD::SETULT: 1567 case ISD::SETLT: X86CC = X86::COND_B; break; 1568 case ISD::SETUGE: Flip = true; // Fallthrough 1569 case ISD::SETULE: 1570 case ISD::SETLE: X86CC = X86::COND_BE; break; 1571 case ISD::SETONE: 1572 case ISD::SETNE: X86CC = X86::COND_NE; break; 1573 case ISD::SETUO: X86CC = X86::COND_P; break; 1574 case ISD::SETO: X86CC = X86::COND_NP; break; 1575 } 1576 if (Flip) 1577 std::swap(LHS, RHS); 1578 } 1579 1580 return X86CC != X86::COND_INVALID; 1581} 1582 1583/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1584/// code. Current x86 isa includes the following FP cmov instructions: 1585/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1586static bool hasFPCMov(unsigned X86CC) { 1587 switch (X86CC) { 1588 default: 1589 return false; 1590 case X86::COND_B: 1591 case X86::COND_BE: 1592 case X86::COND_E: 1593 case X86::COND_P: 1594 case X86::COND_A: 1595 case X86::COND_AE: 1596 case X86::COND_NE: 1597 case X86::COND_NP: 1598 return true; 1599 } 1600} 1601 1602/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1603/// true if Op is undef or if its value falls within the specified range (L, H]. 1604static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1605 if (Op.getOpcode() == ISD::UNDEF) 1606 return true; 1607 1608 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1609 return (Val >= Low && Val < Hi); 1610} 1611 1612/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1613/// true if Op is undef or if its value equal to the specified value. 1614static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1615 if (Op.getOpcode() == ISD::UNDEF) 1616 return true; 1617 return cast<ConstantSDNode>(Op)->getValue() == Val; 1618} 1619 1620/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1621/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1622bool X86::isPSHUFDMask(SDNode *N) { 1623 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1624 1625 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1626 return false; 1627 1628 // Check if the value doesn't reference the second vector. 1629 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1630 SDOperand Arg = N->getOperand(i); 1631 if (Arg.getOpcode() == ISD::UNDEF) continue; 1632 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1633 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1634 return false; 1635 } 1636 1637 return true; 1638} 1639 1640/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1641/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1642bool X86::isPSHUFHWMask(SDNode *N) { 1643 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1644 1645 if (N->getNumOperands() != 8) 1646 return false; 1647 1648 // Lower quadword copied in order. 1649 for (unsigned i = 0; i != 4; ++i) { 1650 SDOperand Arg = N->getOperand(i); 1651 if (Arg.getOpcode() == ISD::UNDEF) continue; 1652 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1653 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1654 return false; 1655 } 1656 1657 // Upper quadword shuffled. 1658 for (unsigned i = 4; i != 8; ++i) { 1659 SDOperand Arg = N->getOperand(i); 1660 if (Arg.getOpcode() == ISD::UNDEF) continue; 1661 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1662 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1663 if (Val < 4 || Val > 7) 1664 return false; 1665 } 1666 1667 return true; 1668} 1669 1670/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1671/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1672bool X86::isPSHUFLWMask(SDNode *N) { 1673 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1674 1675 if (N->getNumOperands() != 8) 1676 return false; 1677 1678 // Upper quadword copied in order. 1679 for (unsigned i = 4; i != 8; ++i) 1680 if (!isUndefOrEqual(N->getOperand(i), i)) 1681 return false; 1682 1683 // Lower quadword shuffled. 1684 for (unsigned i = 0; i != 4; ++i) 1685 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1686 return false; 1687 1688 return true; 1689} 1690 1691/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1692/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1693static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1694 if (NumElems != 2 && NumElems != 4) return false; 1695 1696 unsigned Half = NumElems / 2; 1697 for (unsigned i = 0; i < Half; ++i) 1698 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1699 return false; 1700 for (unsigned i = Half; i < NumElems; ++i) 1701 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1702 return false; 1703 1704 return true; 1705} 1706 1707bool X86::isSHUFPMask(SDNode *N) { 1708 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1709 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1710} 1711 1712/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1713/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1714/// half elements to come from vector 1 (which would equal the dest.) and 1715/// the upper half to come from vector 2. 1716static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1717 if (NumOps != 2 && NumOps != 4) return false; 1718 1719 unsigned Half = NumOps / 2; 1720 for (unsigned i = 0; i < Half; ++i) 1721 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1722 return false; 1723 for (unsigned i = Half; i < NumOps; ++i) 1724 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1725 return false; 1726 return true; 1727} 1728 1729static bool isCommutedSHUFP(SDNode *N) { 1730 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1731 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1732} 1733 1734/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1735/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1736bool X86::isMOVHLPSMask(SDNode *N) { 1737 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1738 1739 if (N->getNumOperands() != 4) 1740 return false; 1741 1742 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1743 return isUndefOrEqual(N->getOperand(0), 6) && 1744 isUndefOrEqual(N->getOperand(1), 7) && 1745 isUndefOrEqual(N->getOperand(2), 2) && 1746 isUndefOrEqual(N->getOperand(3), 3); 1747} 1748 1749/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1750/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1751/// <2, 3, 2, 3> 1752bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1753 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1754 1755 if (N->getNumOperands() != 4) 1756 return false; 1757 1758 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1759 return isUndefOrEqual(N->getOperand(0), 2) && 1760 isUndefOrEqual(N->getOperand(1), 3) && 1761 isUndefOrEqual(N->getOperand(2), 2) && 1762 isUndefOrEqual(N->getOperand(3), 3); 1763} 1764 1765/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1766/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1767bool X86::isMOVLPMask(SDNode *N) { 1768 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1769 1770 unsigned NumElems = N->getNumOperands(); 1771 if (NumElems != 2 && NumElems != 4) 1772 return false; 1773 1774 for (unsigned i = 0; i < NumElems/2; ++i) 1775 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1776 return false; 1777 1778 for (unsigned i = NumElems/2; i < NumElems; ++i) 1779 if (!isUndefOrEqual(N->getOperand(i), i)) 1780 return false; 1781 1782 return true; 1783} 1784 1785/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1786/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1787/// and MOVLHPS. 1788bool X86::isMOVHPMask(SDNode *N) { 1789 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1790 1791 unsigned NumElems = N->getNumOperands(); 1792 if (NumElems != 2 && NumElems != 4) 1793 return false; 1794 1795 for (unsigned i = 0; i < NumElems/2; ++i) 1796 if (!isUndefOrEqual(N->getOperand(i), i)) 1797 return false; 1798 1799 for (unsigned i = 0; i < NumElems/2; ++i) { 1800 SDOperand Arg = N->getOperand(i + NumElems/2); 1801 if (!isUndefOrEqual(Arg, i + NumElems)) 1802 return false; 1803 } 1804 1805 return true; 1806} 1807 1808/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1809/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1810bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1811 bool V2IsSplat = false) { 1812 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1813 return false; 1814 1815 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1816 SDOperand BitI = Elts[i]; 1817 SDOperand BitI1 = Elts[i+1]; 1818 if (!isUndefOrEqual(BitI, j)) 1819 return false; 1820 if (V2IsSplat) { 1821 if (isUndefOrEqual(BitI1, NumElts)) 1822 return false; 1823 } else { 1824 if (!isUndefOrEqual(BitI1, j + NumElts)) 1825 return false; 1826 } 1827 } 1828 1829 return true; 1830} 1831 1832bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1833 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1834 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1835} 1836 1837/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1838/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1839bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1840 bool V2IsSplat = false) { 1841 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1842 return false; 1843 1844 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1845 SDOperand BitI = Elts[i]; 1846 SDOperand BitI1 = Elts[i+1]; 1847 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1848 return false; 1849 if (V2IsSplat) { 1850 if (isUndefOrEqual(BitI1, NumElts)) 1851 return false; 1852 } else { 1853 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1854 return false; 1855 } 1856 } 1857 1858 return true; 1859} 1860 1861bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1862 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1863 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1864} 1865 1866/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1867/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1868/// <0, 0, 1, 1> 1869bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1870 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1871 1872 unsigned NumElems = N->getNumOperands(); 1873 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1874 return false; 1875 1876 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1877 SDOperand BitI = N->getOperand(i); 1878 SDOperand BitI1 = N->getOperand(i+1); 1879 1880 if (!isUndefOrEqual(BitI, j)) 1881 return false; 1882 if (!isUndefOrEqual(BitI1, j)) 1883 return false; 1884 } 1885 1886 return true; 1887} 1888 1889/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1890/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1891/// <2, 2, 3, 3> 1892bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1893 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1894 1895 unsigned NumElems = N->getNumOperands(); 1896 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1897 return false; 1898 1899 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1900 SDOperand BitI = N->getOperand(i); 1901 SDOperand BitI1 = N->getOperand(i + 1); 1902 1903 if (!isUndefOrEqual(BitI, j)) 1904 return false; 1905 if (!isUndefOrEqual(BitI1, j)) 1906 return false; 1907 } 1908 1909 return true; 1910} 1911 1912/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1913/// specifies a shuffle of elements that is suitable for input to MOVSS, 1914/// MOVSD, and MOVD, i.e. setting the lowest element. 1915static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1916 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1917 return false; 1918 1919 if (!isUndefOrEqual(Elts[0], NumElts)) 1920 return false; 1921 1922 for (unsigned i = 1; i < NumElts; ++i) { 1923 if (!isUndefOrEqual(Elts[i], i)) 1924 return false; 1925 } 1926 1927 return true; 1928} 1929 1930bool X86::isMOVLMask(SDNode *N) { 1931 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1932 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1933} 1934 1935/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1936/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1937/// element of vector 2 and the other elements to come from vector 1 in order. 1938static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1939 bool V2IsSplat = false, 1940 bool V2IsUndef = false) { 1941 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1942 return false; 1943 1944 if (!isUndefOrEqual(Ops[0], 0)) 1945 return false; 1946 1947 for (unsigned i = 1; i < NumOps; ++i) { 1948 SDOperand Arg = Ops[i]; 1949 if (!(isUndefOrEqual(Arg, i+NumOps) || 1950 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1951 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1952 return false; 1953 } 1954 1955 return true; 1956} 1957 1958static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1959 bool V2IsUndef = false) { 1960 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1961 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1962 V2IsSplat, V2IsUndef); 1963} 1964 1965/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1966/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1967bool X86::isMOVSHDUPMask(SDNode *N) { 1968 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1969 1970 if (N->getNumOperands() != 4) 1971 return false; 1972 1973 // Expect 1, 1, 3, 3 1974 for (unsigned i = 0; i < 2; ++i) { 1975 SDOperand Arg = N->getOperand(i); 1976 if (Arg.getOpcode() == ISD::UNDEF) continue; 1977 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1978 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1979 if (Val != 1) return false; 1980 } 1981 1982 bool HasHi = false; 1983 for (unsigned i = 2; i < 4; ++i) { 1984 SDOperand Arg = N->getOperand(i); 1985 if (Arg.getOpcode() == ISD::UNDEF) continue; 1986 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1987 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1988 if (Val != 3) return false; 1989 HasHi = true; 1990 } 1991 1992 // Don't use movshdup if it can be done with a shufps. 1993 return HasHi; 1994} 1995 1996/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1997/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1998bool X86::isMOVSLDUPMask(SDNode *N) { 1999 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2000 2001 if (N->getNumOperands() != 4) 2002 return false; 2003 2004 // Expect 0, 0, 2, 2 2005 for (unsigned i = 0; i < 2; ++i) { 2006 SDOperand Arg = N->getOperand(i); 2007 if (Arg.getOpcode() == ISD::UNDEF) continue; 2008 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2009 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2010 if (Val != 0) return false; 2011 } 2012 2013 bool HasHi = false; 2014 for (unsigned i = 2; i < 4; ++i) { 2015 SDOperand Arg = N->getOperand(i); 2016 if (Arg.getOpcode() == ISD::UNDEF) continue; 2017 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2018 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2019 if (Val != 2) return false; 2020 HasHi = true; 2021 } 2022 2023 // Don't use movshdup if it can be done with a shufps. 2024 return HasHi; 2025} 2026 2027/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2028/// specifies a identity operation on the LHS or RHS. 2029static bool isIdentityMask(SDNode *N, bool RHS = false) { 2030 unsigned NumElems = N->getNumOperands(); 2031 for (unsigned i = 0; i < NumElems; ++i) 2032 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2033 return false; 2034 return true; 2035} 2036 2037/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2038/// a splat of a single element. 2039static bool isSplatMask(SDNode *N) { 2040 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2041 2042 // This is a splat operation if each element of the permute is the same, and 2043 // if the value doesn't reference the second vector. 2044 unsigned NumElems = N->getNumOperands(); 2045 SDOperand ElementBase; 2046 unsigned i = 0; 2047 for (; i != NumElems; ++i) { 2048 SDOperand Elt = N->getOperand(i); 2049 if (isa<ConstantSDNode>(Elt)) { 2050 ElementBase = Elt; 2051 break; 2052 } 2053 } 2054 2055 if (!ElementBase.Val) 2056 return false; 2057 2058 for (; i != NumElems; ++i) { 2059 SDOperand Arg = N->getOperand(i); 2060 if (Arg.getOpcode() == ISD::UNDEF) continue; 2061 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2062 if (Arg != ElementBase) return false; 2063 } 2064 2065 // Make sure it is a splat of the first vector operand. 2066 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2067} 2068 2069/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2070/// a splat of a single element and it's a 2 or 4 element mask. 2071bool X86::isSplatMask(SDNode *N) { 2072 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2073 2074 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2075 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2076 return false; 2077 return ::isSplatMask(N); 2078} 2079 2080/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2081/// specifies a splat of zero element. 2082bool X86::isSplatLoMask(SDNode *N) { 2083 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2084 2085 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2086 if (!isUndefOrEqual(N->getOperand(i), 0)) 2087 return false; 2088 return true; 2089} 2090 2091/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2092/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2093/// instructions. 2094unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2095 unsigned NumOperands = N->getNumOperands(); 2096 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2097 unsigned Mask = 0; 2098 for (unsigned i = 0; i < NumOperands; ++i) { 2099 unsigned Val = 0; 2100 SDOperand Arg = N->getOperand(NumOperands-i-1); 2101 if (Arg.getOpcode() != ISD::UNDEF) 2102 Val = cast<ConstantSDNode>(Arg)->getValue(); 2103 if (Val >= NumOperands) Val -= NumOperands; 2104 Mask |= Val; 2105 if (i != NumOperands - 1) 2106 Mask <<= Shift; 2107 } 2108 2109 return Mask; 2110} 2111 2112/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2113/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2114/// instructions. 2115unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2116 unsigned Mask = 0; 2117 // 8 nodes, but we only care about the last 4. 2118 for (unsigned i = 7; i >= 4; --i) { 2119 unsigned Val = 0; 2120 SDOperand Arg = N->getOperand(i); 2121 if (Arg.getOpcode() != ISD::UNDEF) 2122 Val = cast<ConstantSDNode>(Arg)->getValue(); 2123 Mask |= (Val - 4); 2124 if (i != 4) 2125 Mask <<= 2; 2126 } 2127 2128 return Mask; 2129} 2130 2131/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2132/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2133/// instructions. 2134unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2135 unsigned Mask = 0; 2136 // 8 nodes, but we only care about the first 4. 2137 for (int i = 3; i >= 0; --i) { 2138 unsigned Val = 0; 2139 SDOperand Arg = N->getOperand(i); 2140 if (Arg.getOpcode() != ISD::UNDEF) 2141 Val = cast<ConstantSDNode>(Arg)->getValue(); 2142 Mask |= Val; 2143 if (i != 0) 2144 Mask <<= 2; 2145 } 2146 2147 return Mask; 2148} 2149 2150/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2151/// specifies a 8 element shuffle that can be broken into a pair of 2152/// PSHUFHW and PSHUFLW. 2153static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2154 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2155 2156 if (N->getNumOperands() != 8) 2157 return false; 2158 2159 // Lower quadword shuffled. 2160 for (unsigned i = 0; i != 4; ++i) { 2161 SDOperand Arg = N->getOperand(i); 2162 if (Arg.getOpcode() == ISD::UNDEF) continue; 2163 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2164 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2165 if (Val > 4) 2166 return false; 2167 } 2168 2169 // Upper quadword shuffled. 2170 for (unsigned i = 4; i != 8; ++i) { 2171 SDOperand Arg = N->getOperand(i); 2172 if (Arg.getOpcode() == ISD::UNDEF) continue; 2173 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2174 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2175 if (Val < 4 || Val > 7) 2176 return false; 2177 } 2178 2179 return true; 2180} 2181 2182/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2183/// values in ther permute mask. 2184static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2185 SDOperand &V2, SDOperand &Mask, 2186 SelectionDAG &DAG) { 2187 MVT::ValueType VT = Op.getValueType(); 2188 MVT::ValueType MaskVT = Mask.getValueType(); 2189 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2190 unsigned NumElems = Mask.getNumOperands(); 2191 SmallVector<SDOperand, 8> MaskVec; 2192 2193 for (unsigned i = 0; i != NumElems; ++i) { 2194 SDOperand Arg = Mask.getOperand(i); 2195 if (Arg.getOpcode() == ISD::UNDEF) { 2196 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2197 continue; 2198 } 2199 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2200 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2201 if (Val < NumElems) 2202 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2203 else 2204 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2205 } 2206 2207 std::swap(V1, V2); 2208 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2209 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2210} 2211 2212/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2213/// match movhlps. The lower half elements should come from upper half of 2214/// V1 (and in order), and the upper half elements should come from the upper 2215/// half of V2 (and in order). 2216static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2217 unsigned NumElems = Mask->getNumOperands(); 2218 if (NumElems != 4) 2219 return false; 2220 for (unsigned i = 0, e = 2; i != e; ++i) 2221 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2222 return false; 2223 for (unsigned i = 2; i != 4; ++i) 2224 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2225 return false; 2226 return true; 2227} 2228 2229/// isScalarLoadToVector - Returns true if the node is a scalar load that 2230/// is promoted to a vector. 2231static inline bool isScalarLoadToVector(SDNode *N) { 2232 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2233 N = N->getOperand(0).Val; 2234 return ISD::isNON_EXTLoad(N); 2235 } 2236 return false; 2237} 2238 2239/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2240/// match movlp{s|d}. The lower half elements should come from lower half of 2241/// V1 (and in order), and the upper half elements should come from the upper 2242/// half of V2 (and in order). And since V1 will become the source of the 2243/// MOVLP, it must be either a vector load or a scalar load to vector. 2244static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2245 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2246 return false; 2247 // Is V2 is a vector load, don't do this transformation. We will try to use 2248 // load folding shufps op. 2249 if (ISD::isNON_EXTLoad(V2)) 2250 return false; 2251 2252 unsigned NumElems = Mask->getNumOperands(); 2253 if (NumElems != 2 && NumElems != 4) 2254 return false; 2255 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2256 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2257 return false; 2258 for (unsigned i = NumElems/2; i != NumElems; ++i) 2259 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2260 return false; 2261 return true; 2262} 2263 2264/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2265/// all the same. 2266static bool isSplatVector(SDNode *N) { 2267 if (N->getOpcode() != ISD::BUILD_VECTOR) 2268 return false; 2269 2270 SDOperand SplatValue = N->getOperand(0); 2271 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2272 if (N->getOperand(i) != SplatValue) 2273 return false; 2274 return true; 2275} 2276 2277/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2278/// to an undef. 2279static bool isUndefShuffle(SDNode *N) { 2280 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2281 return false; 2282 2283 SDOperand V1 = N->getOperand(0); 2284 SDOperand V2 = N->getOperand(1); 2285 SDOperand Mask = N->getOperand(2); 2286 unsigned NumElems = Mask.getNumOperands(); 2287 for (unsigned i = 0; i != NumElems; ++i) { 2288 SDOperand Arg = Mask.getOperand(i); 2289 if (Arg.getOpcode() != ISD::UNDEF) { 2290 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2291 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2292 return false; 2293 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2294 return false; 2295 } 2296 } 2297 return true; 2298} 2299 2300/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2301/// constant +0.0. 2302static inline bool isZeroNode(SDOperand Elt) { 2303 return ((isa<ConstantSDNode>(Elt) && 2304 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2305 (isa<ConstantFPSDNode>(Elt) && 2306 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2307} 2308 2309/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2310/// to an zero vector. 2311static bool isZeroShuffle(SDNode *N) { 2312 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2313 return false; 2314 2315 SDOperand V1 = N->getOperand(0); 2316 SDOperand V2 = N->getOperand(1); 2317 SDOperand Mask = N->getOperand(2); 2318 unsigned NumElems = Mask.getNumOperands(); 2319 for (unsigned i = 0; i != NumElems; ++i) { 2320 SDOperand Arg = Mask.getOperand(i); 2321 if (Arg.getOpcode() != ISD::UNDEF) { 2322 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2323 if (Idx < NumElems) { 2324 unsigned Opc = V1.Val->getOpcode(); 2325 if (Opc == ISD::UNDEF) 2326 continue; 2327 if (Opc != ISD::BUILD_VECTOR || 2328 !isZeroNode(V1.Val->getOperand(Idx))) 2329 return false; 2330 } else if (Idx >= NumElems) { 2331 unsigned Opc = V2.Val->getOpcode(); 2332 if (Opc == ISD::UNDEF) 2333 continue; 2334 if (Opc != ISD::BUILD_VECTOR || 2335 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2336 return false; 2337 } 2338 } 2339 } 2340 return true; 2341} 2342 2343/// getZeroVector - Returns a vector of specified type with all zero elements. 2344/// 2345static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2346 assert(MVT::isVector(VT) && "Expected a vector type"); 2347 unsigned NumElems = MVT::getVectorNumElements(VT); 2348 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2349 bool isFP = MVT::isFloatingPoint(EVT); 2350 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2351 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2352 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2353} 2354 2355/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2356/// that point to V2 points to its first element. 2357static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2358 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2359 2360 bool Changed = false; 2361 SmallVector<SDOperand, 8> MaskVec; 2362 unsigned NumElems = Mask.getNumOperands(); 2363 for (unsigned i = 0; i != NumElems; ++i) { 2364 SDOperand Arg = Mask.getOperand(i); 2365 if (Arg.getOpcode() != ISD::UNDEF) { 2366 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2367 if (Val > NumElems) { 2368 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2369 Changed = true; 2370 } 2371 } 2372 MaskVec.push_back(Arg); 2373 } 2374 2375 if (Changed) 2376 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2377 &MaskVec[0], MaskVec.size()); 2378 return Mask; 2379} 2380 2381/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2382/// operation of specified width. 2383static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2384 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2385 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2386 2387 SmallVector<SDOperand, 8> MaskVec; 2388 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2389 for (unsigned i = 1; i != NumElems; ++i) 2390 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2391 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2392} 2393 2394/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2395/// of specified width. 2396static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2397 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2398 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2399 SmallVector<SDOperand, 8> MaskVec; 2400 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2401 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2402 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2403 } 2404 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2405} 2406 2407/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2408/// of specified width. 2409static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2410 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2411 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2412 unsigned Half = NumElems/2; 2413 SmallVector<SDOperand, 8> MaskVec; 2414 for (unsigned i = 0; i != Half; ++i) { 2415 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2416 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2417 } 2418 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2419} 2420 2421/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2422/// 2423static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2424 SDOperand V1 = Op.getOperand(0); 2425 SDOperand Mask = Op.getOperand(2); 2426 MVT::ValueType VT = Op.getValueType(); 2427 unsigned NumElems = Mask.getNumOperands(); 2428 Mask = getUnpacklMask(NumElems, DAG); 2429 while (NumElems != 4) { 2430 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2431 NumElems >>= 1; 2432 } 2433 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2434 2435 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2436 Mask = getZeroVector(MaskVT, DAG); 2437 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2438 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2439 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2440} 2441 2442/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2443/// vector of zero or undef vector. 2444static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2445 unsigned NumElems, unsigned Idx, 2446 bool isZero, SelectionDAG &DAG) { 2447 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2448 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2449 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2450 SDOperand Zero = DAG.getConstant(0, EVT); 2451 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2452 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2453 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2454 &MaskVec[0], MaskVec.size()); 2455 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2456} 2457 2458/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2459/// 2460static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2461 unsigned NumNonZero, unsigned NumZero, 2462 SelectionDAG &DAG, TargetLowering &TLI) { 2463 if (NumNonZero > 8) 2464 return SDOperand(); 2465 2466 SDOperand V(0, 0); 2467 bool First = true; 2468 for (unsigned i = 0; i < 16; ++i) { 2469 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2470 if (ThisIsNonZero && First) { 2471 if (NumZero) 2472 V = getZeroVector(MVT::v8i16, DAG); 2473 else 2474 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2475 First = false; 2476 } 2477 2478 if ((i & 1) != 0) { 2479 SDOperand ThisElt(0, 0), LastElt(0, 0); 2480 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2481 if (LastIsNonZero) { 2482 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2483 } 2484 if (ThisIsNonZero) { 2485 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2486 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2487 ThisElt, DAG.getConstant(8, MVT::i8)); 2488 if (LastIsNonZero) 2489 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2490 } else 2491 ThisElt = LastElt; 2492 2493 if (ThisElt.Val) 2494 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2495 DAG.getConstant(i/2, TLI.getPointerTy())); 2496 } 2497 } 2498 2499 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2500} 2501 2502/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2503/// 2504static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2505 unsigned NumNonZero, unsigned NumZero, 2506 SelectionDAG &DAG, TargetLowering &TLI) { 2507 if (NumNonZero > 4) 2508 return SDOperand(); 2509 2510 SDOperand V(0, 0); 2511 bool First = true; 2512 for (unsigned i = 0; i < 8; ++i) { 2513 bool isNonZero = (NonZeros & (1 << i)) != 0; 2514 if (isNonZero) { 2515 if (First) { 2516 if (NumZero) 2517 V = getZeroVector(MVT::v8i16, DAG); 2518 else 2519 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2520 First = false; 2521 } 2522 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2523 DAG.getConstant(i, TLI.getPointerTy())); 2524 } 2525 } 2526 2527 return V; 2528} 2529 2530SDOperand 2531X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2532 // All zero's are handled with pxor. 2533 if (ISD::isBuildVectorAllZeros(Op.Val)) 2534 return Op; 2535 2536 // All one's are handled with pcmpeqd. 2537 if (ISD::isBuildVectorAllOnes(Op.Val)) 2538 return Op; 2539 2540 MVT::ValueType VT = Op.getValueType(); 2541 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2542 unsigned EVTBits = MVT::getSizeInBits(EVT); 2543 2544 unsigned NumElems = Op.getNumOperands(); 2545 unsigned NumZero = 0; 2546 unsigned NumNonZero = 0; 2547 unsigned NonZeros = 0; 2548 unsigned NumNonZeroImms = 0; 2549 std::set<SDOperand> Values; 2550 for (unsigned i = 0; i < NumElems; ++i) { 2551 SDOperand Elt = Op.getOperand(i); 2552 if (Elt.getOpcode() != ISD::UNDEF) { 2553 Values.insert(Elt); 2554 if (isZeroNode(Elt)) 2555 NumZero++; 2556 else { 2557 NonZeros |= (1 << i); 2558 NumNonZero++; 2559 if (Elt.getOpcode() == ISD::Constant || 2560 Elt.getOpcode() == ISD::ConstantFP) 2561 NumNonZeroImms++; 2562 } 2563 } 2564 } 2565 2566 if (NumNonZero == 0) { 2567 if (NumZero == 0) 2568 // All undef vector. Return an UNDEF. 2569 return DAG.getNode(ISD::UNDEF, VT); 2570 else 2571 // A mix of zero and undef. Return a zero vector. 2572 return getZeroVector(VT, DAG); 2573 } 2574 2575 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2576 if (Values.size() == 1) 2577 return SDOperand(); 2578 2579 // Special case for single non-zero element. 2580 if (NumNonZero == 1) { 2581 unsigned Idx = CountTrailingZeros_32(NonZeros); 2582 SDOperand Item = Op.getOperand(Idx); 2583 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2584 if (Idx == 0) 2585 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2586 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2587 NumZero > 0, DAG); 2588 2589 if (EVTBits == 32) { 2590 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2591 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2592 DAG); 2593 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2594 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2595 SmallVector<SDOperand, 8> MaskVec; 2596 for (unsigned i = 0; i < NumElems; i++) 2597 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2598 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2599 &MaskVec[0], MaskVec.size()); 2600 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2601 DAG.getNode(ISD::UNDEF, VT), Mask); 2602 } 2603 } 2604 2605 // A vector full of immediates; various special cases are already 2606 // handled, so this is best done with a single constant-pool load. 2607 if (NumNonZero == NumNonZeroImms) 2608 return SDOperand(); 2609 2610 // Let legalizer expand 2-wide build_vectors. 2611 if (EVTBits == 64) 2612 return SDOperand(); 2613 2614 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2615 if (EVTBits == 8 && NumElems == 16) { 2616 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2617 *this); 2618 if (V.Val) return V; 2619 } 2620 2621 if (EVTBits == 16 && NumElems == 8) { 2622 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2623 *this); 2624 if (V.Val) return V; 2625 } 2626 2627 // If element VT is == 32 bits, turn it into a number of shuffles. 2628 SmallVector<SDOperand, 8> V; 2629 V.resize(NumElems); 2630 if (NumElems == 4 && NumZero > 0) { 2631 for (unsigned i = 0; i < 4; ++i) { 2632 bool isZero = !(NonZeros & (1 << i)); 2633 if (isZero) 2634 V[i] = getZeroVector(VT, DAG); 2635 else 2636 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2637 } 2638 2639 for (unsigned i = 0; i < 2; ++i) { 2640 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2641 default: break; 2642 case 0: 2643 V[i] = V[i*2]; // Must be a zero vector. 2644 break; 2645 case 1: 2646 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2647 getMOVLMask(NumElems, DAG)); 2648 break; 2649 case 2: 2650 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2651 getMOVLMask(NumElems, DAG)); 2652 break; 2653 case 3: 2654 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2655 getUnpacklMask(NumElems, DAG)); 2656 break; 2657 } 2658 } 2659 2660 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2661 // clears the upper bits. 2662 // FIXME: we can do the same for v4f32 case when we know both parts of 2663 // the lower half come from scalar_to_vector (loadf32). We should do 2664 // that in post legalizer dag combiner with target specific hooks. 2665 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2666 return V[0]; 2667 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2668 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2669 SmallVector<SDOperand, 8> MaskVec; 2670 bool Reverse = (NonZeros & 0x3) == 2; 2671 for (unsigned i = 0; i < 2; ++i) 2672 if (Reverse) 2673 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2674 else 2675 MaskVec.push_back(DAG.getConstant(i, EVT)); 2676 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2677 for (unsigned i = 0; i < 2; ++i) 2678 if (Reverse) 2679 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2680 else 2681 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2682 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2683 &MaskVec[0], MaskVec.size()); 2684 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2685 } 2686 2687 if (Values.size() > 2) { 2688 // Expand into a number of unpckl*. 2689 // e.g. for v4f32 2690 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2691 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2692 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2693 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2694 for (unsigned i = 0; i < NumElems; ++i) 2695 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2696 NumElems >>= 1; 2697 while (NumElems != 0) { 2698 for (unsigned i = 0; i < NumElems; ++i) 2699 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2700 UnpckMask); 2701 NumElems >>= 1; 2702 } 2703 return V[0]; 2704 } 2705 2706 return SDOperand(); 2707} 2708 2709SDOperand 2710X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2711 SDOperand V1 = Op.getOperand(0); 2712 SDOperand V2 = Op.getOperand(1); 2713 SDOperand PermMask = Op.getOperand(2); 2714 MVT::ValueType VT = Op.getValueType(); 2715 unsigned NumElems = PermMask.getNumOperands(); 2716 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2717 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2718 bool V1IsSplat = false; 2719 bool V2IsSplat = false; 2720 2721 if (isUndefShuffle(Op.Val)) 2722 return DAG.getNode(ISD::UNDEF, VT); 2723 2724 if (isZeroShuffle(Op.Val)) 2725 return getZeroVector(VT, DAG); 2726 2727 if (isIdentityMask(PermMask.Val)) 2728 return V1; 2729 else if (isIdentityMask(PermMask.Val, true)) 2730 return V2; 2731 2732 if (isSplatMask(PermMask.Val)) { 2733 if (NumElems <= 4) return Op; 2734 // Promote it to a v4i32 splat. 2735 return PromoteSplat(Op, DAG); 2736 } 2737 2738 if (X86::isMOVLMask(PermMask.Val)) 2739 return (V1IsUndef) ? V2 : Op; 2740 2741 if (X86::isMOVSHDUPMask(PermMask.Val) || 2742 X86::isMOVSLDUPMask(PermMask.Val) || 2743 X86::isMOVHLPSMask(PermMask.Val) || 2744 X86::isMOVHPMask(PermMask.Val) || 2745 X86::isMOVLPMask(PermMask.Val)) 2746 return Op; 2747 2748 if (ShouldXformToMOVHLPS(PermMask.Val) || 2749 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2750 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2751 2752 bool Commuted = false; 2753 V1IsSplat = isSplatVector(V1.Val); 2754 V2IsSplat = isSplatVector(V2.Val); 2755 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2756 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2757 std::swap(V1IsSplat, V2IsSplat); 2758 std::swap(V1IsUndef, V2IsUndef); 2759 Commuted = true; 2760 } 2761 2762 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2763 if (V2IsUndef) return V1; 2764 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2765 if (V2IsSplat) { 2766 // V2 is a splat, so the mask may be malformed. That is, it may point 2767 // to any V2 element. The instruction selectior won't like this. Get 2768 // a corrected mask and commute to form a proper MOVS{S|D}. 2769 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2770 if (NewMask.Val != PermMask.Val) 2771 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2772 } 2773 return Op; 2774 } 2775 2776 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2777 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2778 X86::isUNPCKLMask(PermMask.Val) || 2779 X86::isUNPCKHMask(PermMask.Val)) 2780 return Op; 2781 2782 if (V2IsSplat) { 2783 // Normalize mask so all entries that point to V2 points to its first 2784 // element then try to match unpck{h|l} again. If match, return a 2785 // new vector_shuffle with the corrected mask. 2786 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2787 if (NewMask.Val != PermMask.Val) { 2788 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2789 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2790 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2791 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2792 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2793 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2794 } 2795 } 2796 } 2797 2798 // Normalize the node to match x86 shuffle ops if needed 2799 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2800 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2801 2802 if (Commuted) { 2803 // Commute is back and try unpck* again. 2804 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2805 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2806 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2807 X86::isUNPCKLMask(PermMask.Val) || 2808 X86::isUNPCKHMask(PermMask.Val)) 2809 return Op; 2810 } 2811 2812 // If VT is integer, try PSHUF* first, then SHUFP*. 2813 if (MVT::isInteger(VT)) { 2814 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 2815 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 2816 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 2817 X86::isPSHUFDMask(PermMask.Val)) || 2818 X86::isPSHUFHWMask(PermMask.Val) || 2819 X86::isPSHUFLWMask(PermMask.Val)) { 2820 if (V2.getOpcode() != ISD::UNDEF) 2821 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2822 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2823 return Op; 2824 } 2825 2826 if (X86::isSHUFPMask(PermMask.Val) && 2827 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2828 return Op; 2829 2830 // Handle v8i16 shuffle high / low shuffle node pair. 2831 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2832 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2833 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2834 SmallVector<SDOperand, 8> MaskVec; 2835 for (unsigned i = 0; i != 4; ++i) 2836 MaskVec.push_back(PermMask.getOperand(i)); 2837 for (unsigned i = 4; i != 8; ++i) 2838 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2839 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2840 &MaskVec[0], MaskVec.size()); 2841 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2842 MaskVec.clear(); 2843 for (unsigned i = 0; i != 4; ++i) 2844 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2845 for (unsigned i = 4; i != 8; ++i) 2846 MaskVec.push_back(PermMask.getOperand(i)); 2847 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2848 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2849 } 2850 } else { 2851 // Floating point cases in the other order. 2852 if (X86::isSHUFPMask(PermMask.Val)) 2853 return Op; 2854 if (X86::isPSHUFDMask(PermMask.Val) || 2855 X86::isPSHUFHWMask(PermMask.Val) || 2856 X86::isPSHUFLWMask(PermMask.Val)) { 2857 if (V2.getOpcode() != ISD::UNDEF) 2858 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2859 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2860 return Op; 2861 } 2862 } 2863 2864 if (NumElems == 4 && 2865 // Don't do this for MMX. 2866 MVT::getSizeInBits(VT) != 64) { 2867 MVT::ValueType MaskVT = PermMask.getValueType(); 2868 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2869 SmallVector<std::pair<int, int>, 8> Locs; 2870 Locs.reserve(NumElems); 2871 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2872 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2873 unsigned NumHi = 0; 2874 unsigned NumLo = 0; 2875 // If no more than two elements come from either vector. This can be 2876 // implemented with two shuffles. First shuffle gather the elements. 2877 // The second shuffle, which takes the first shuffle as both of its 2878 // vector operands, put the elements into the right order. 2879 for (unsigned i = 0; i != NumElems; ++i) { 2880 SDOperand Elt = PermMask.getOperand(i); 2881 if (Elt.getOpcode() == ISD::UNDEF) { 2882 Locs[i] = std::make_pair(-1, -1); 2883 } else { 2884 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2885 if (Val < NumElems) { 2886 Locs[i] = std::make_pair(0, NumLo); 2887 Mask1[NumLo] = Elt; 2888 NumLo++; 2889 } else { 2890 Locs[i] = std::make_pair(1, NumHi); 2891 if (2+NumHi < NumElems) 2892 Mask1[2+NumHi] = Elt; 2893 NumHi++; 2894 } 2895 } 2896 } 2897 if (NumLo <= 2 && NumHi <= 2) { 2898 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2899 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2900 &Mask1[0], Mask1.size())); 2901 for (unsigned i = 0; i != NumElems; ++i) { 2902 if (Locs[i].first == -1) 2903 continue; 2904 else { 2905 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2906 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2907 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2908 } 2909 } 2910 2911 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2912 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2913 &Mask2[0], Mask2.size())); 2914 } 2915 2916 // Break it into (shuffle shuffle_hi, shuffle_lo). 2917 Locs.clear(); 2918 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2919 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2920 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2921 unsigned MaskIdx = 0; 2922 unsigned LoIdx = 0; 2923 unsigned HiIdx = NumElems/2; 2924 for (unsigned i = 0; i != NumElems; ++i) { 2925 if (i == NumElems/2) { 2926 MaskPtr = &HiMask; 2927 MaskIdx = 1; 2928 LoIdx = 0; 2929 HiIdx = NumElems/2; 2930 } 2931 SDOperand Elt = PermMask.getOperand(i); 2932 if (Elt.getOpcode() == ISD::UNDEF) { 2933 Locs[i] = std::make_pair(-1, -1); 2934 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2935 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2936 (*MaskPtr)[LoIdx] = Elt; 2937 LoIdx++; 2938 } else { 2939 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2940 (*MaskPtr)[HiIdx] = Elt; 2941 HiIdx++; 2942 } 2943 } 2944 2945 SDOperand LoShuffle = 2946 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2947 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2948 &LoMask[0], LoMask.size())); 2949 SDOperand HiShuffle = 2950 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2951 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2952 &HiMask[0], HiMask.size())); 2953 SmallVector<SDOperand, 8> MaskOps; 2954 for (unsigned i = 0; i != NumElems; ++i) { 2955 if (Locs[i].first == -1) { 2956 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2957 } else { 2958 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2959 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2960 } 2961 } 2962 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2963 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2964 &MaskOps[0], MaskOps.size())); 2965 } 2966 2967 return SDOperand(); 2968} 2969 2970SDOperand 2971X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2972 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2973 return SDOperand(); 2974 2975 MVT::ValueType VT = Op.getValueType(); 2976 // TODO: handle v16i8. 2977 if (MVT::getSizeInBits(VT) == 16) { 2978 // Transform it so it match pextrw which produces a 32-bit result. 2979 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2980 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2981 Op.getOperand(0), Op.getOperand(1)); 2982 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2983 DAG.getValueType(VT)); 2984 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2985 } else if (MVT::getSizeInBits(VT) == 32) { 2986 SDOperand Vec = Op.getOperand(0); 2987 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2988 if (Idx == 0) 2989 return Op; 2990 // SHUFPS the element to the lowest double word, then movss. 2991 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2992 SmallVector<SDOperand, 8> IdxVec; 2993 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 2994 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2995 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2996 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2997 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2998 &IdxVec[0], IdxVec.size()); 2999 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3000 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3001 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3002 DAG.getConstant(0, getPointerTy())); 3003 } else if (MVT::getSizeInBits(VT) == 64) { 3004 SDOperand Vec = Op.getOperand(0); 3005 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3006 if (Idx == 0) 3007 return Op; 3008 3009 // UNPCKHPD the element to the lowest double word, then movsd. 3010 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3011 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3012 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3013 SmallVector<SDOperand, 8> IdxVec; 3014 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3015 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3016 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3017 &IdxVec[0], IdxVec.size()); 3018 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3019 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3020 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3021 DAG.getConstant(0, getPointerTy())); 3022 } 3023 3024 return SDOperand(); 3025} 3026 3027SDOperand 3028X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3029 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3030 // as its second argument. 3031 MVT::ValueType VT = Op.getValueType(); 3032 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3033 SDOperand N0 = Op.getOperand(0); 3034 SDOperand N1 = Op.getOperand(1); 3035 SDOperand N2 = Op.getOperand(2); 3036 if (MVT::getSizeInBits(BaseVT) == 16) { 3037 if (N1.getValueType() != MVT::i32) 3038 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3039 if (N2.getValueType() != MVT::i32) 3040 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3041 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3042 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3043 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3044 if (Idx == 0) { 3045 // Use a movss. 3046 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3047 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3048 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3049 SmallVector<SDOperand, 8> MaskVec; 3050 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3051 for (unsigned i = 1; i <= 3; ++i) 3052 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3053 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3054 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3055 &MaskVec[0], MaskVec.size())); 3056 } else { 3057 // Use two pinsrw instructions to insert a 32 bit value. 3058 Idx <<= 1; 3059 if (MVT::isFloatingPoint(N1.getValueType())) { 3060 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3061 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3062 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3063 DAG.getConstant(0, getPointerTy())); 3064 } 3065 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3066 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3067 DAG.getConstant(Idx, getPointerTy())); 3068 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3069 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3070 DAG.getConstant(Idx+1, getPointerTy())); 3071 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3072 } 3073 } 3074 3075 return SDOperand(); 3076} 3077 3078SDOperand 3079X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3080 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3081 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3082} 3083 3084// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3085// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3086// one of the above mentioned nodes. It has to be wrapped because otherwise 3087// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3088// be used to form addressing mode. These wrapped nodes will be selected 3089// into MOV32ri. 3090SDOperand 3091X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3092 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3093 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3094 getPointerTy(), 3095 CP->getAlignment()); 3096 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3097 // With PIC, the address is actually $g + Offset. 3098 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3099 !Subtarget->isPICStyleRIPRel()) { 3100 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3101 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3102 Result); 3103 } 3104 3105 return Result; 3106} 3107 3108SDOperand 3109X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3110 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3111 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3112 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3113 // With PIC, the address is actually $g + Offset. 3114 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3115 !Subtarget->isPICStyleRIPRel()) { 3116 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3117 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3118 Result); 3119 } 3120 3121 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3122 // load the value at address GV, not the value of GV itself. This means that 3123 // the GlobalAddress must be in the base or index register of the address, not 3124 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3125 // The same applies for external symbols during PIC codegen 3126 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3127 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3128 3129 return Result; 3130} 3131 3132// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3133static SDOperand 3134LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3135 const MVT::ValueType PtrVT) { 3136 SDOperand InFlag; 3137 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3138 DAG.getNode(X86ISD::GlobalBaseReg, 3139 PtrVT), InFlag); 3140 InFlag = Chain.getValue(1); 3141 3142 // emit leal symbol@TLSGD(,%ebx,1), %eax 3143 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3144 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3145 GA->getValueType(0), 3146 GA->getOffset()); 3147 SDOperand Ops[] = { Chain, TGA, InFlag }; 3148 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3149 InFlag = Result.getValue(2); 3150 Chain = Result.getValue(1); 3151 3152 // call ___tls_get_addr. This function receives its argument in 3153 // the register EAX. 3154 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3155 InFlag = Chain.getValue(1); 3156 3157 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3158 SDOperand Ops1[] = { Chain, 3159 DAG.getTargetExternalSymbol("___tls_get_addr", 3160 PtrVT), 3161 DAG.getRegister(X86::EAX, PtrVT), 3162 DAG.getRegister(X86::EBX, PtrVT), 3163 InFlag }; 3164 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3165 InFlag = Chain.getValue(1); 3166 3167 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3168} 3169 3170// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3171// "local exec" model. 3172static SDOperand 3173LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3174 const MVT::ValueType PtrVT) { 3175 // Get the Thread Pointer 3176 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3177 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3178 // exec) 3179 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3180 GA->getValueType(0), 3181 GA->getOffset()); 3182 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3183 3184 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3185 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3186 3187 // The address of the thread local variable is the add of the thread 3188 // pointer with the offset of the variable. 3189 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3190} 3191 3192SDOperand 3193X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3194 // TODO: implement the "local dynamic" model 3195 // TODO: implement the "initial exec"model for pic executables 3196 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3197 "TLS not implemented for non-ELF and 64-bit targets"); 3198 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3199 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3200 // otherwise use the "Local Exec"TLS Model 3201 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3202 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3203 else 3204 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3205} 3206 3207SDOperand 3208X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3209 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3210 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3211 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3212 // With PIC, the address is actually $g + Offset. 3213 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3214 !Subtarget->isPICStyleRIPRel()) { 3215 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3216 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3217 Result); 3218 } 3219 3220 return Result; 3221} 3222 3223SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3224 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3225 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3226 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3227 // With PIC, the address is actually $g + Offset. 3228 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3229 !Subtarget->isPICStyleRIPRel()) { 3230 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3231 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3232 Result); 3233 } 3234 3235 return Result; 3236} 3237 3238SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3239 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3240 "Not an i64 shift!"); 3241 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3242 SDOperand ShOpLo = Op.getOperand(0); 3243 SDOperand ShOpHi = Op.getOperand(1); 3244 SDOperand ShAmt = Op.getOperand(2); 3245 SDOperand Tmp1 = isSRA ? 3246 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3247 DAG.getConstant(0, MVT::i32); 3248 3249 SDOperand Tmp2, Tmp3; 3250 if (Op.getOpcode() == ISD::SHL_PARTS) { 3251 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3252 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3253 } else { 3254 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3255 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3256 } 3257 3258 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3259 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3260 DAG.getConstant(32, MVT::i8)); 3261 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3262 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3263 3264 SDOperand Hi, Lo; 3265 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3266 3267 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3268 SmallVector<SDOperand, 4> Ops; 3269 if (Op.getOpcode() == ISD::SHL_PARTS) { 3270 Ops.push_back(Tmp2); 3271 Ops.push_back(Tmp3); 3272 Ops.push_back(CC); 3273 Ops.push_back(InFlag); 3274 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3275 InFlag = Hi.getValue(1); 3276 3277 Ops.clear(); 3278 Ops.push_back(Tmp3); 3279 Ops.push_back(Tmp1); 3280 Ops.push_back(CC); 3281 Ops.push_back(InFlag); 3282 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3283 } else { 3284 Ops.push_back(Tmp2); 3285 Ops.push_back(Tmp3); 3286 Ops.push_back(CC); 3287 Ops.push_back(InFlag); 3288 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3289 InFlag = Lo.getValue(1); 3290 3291 Ops.clear(); 3292 Ops.push_back(Tmp3); 3293 Ops.push_back(Tmp1); 3294 Ops.push_back(CC); 3295 Ops.push_back(InFlag); 3296 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3297 } 3298 3299 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3300 Ops.clear(); 3301 Ops.push_back(Lo); 3302 Ops.push_back(Hi); 3303 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3304} 3305 3306SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3307 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3308 Op.getOperand(0).getValueType() >= MVT::i16 && 3309 "Unknown SINT_TO_FP to lower!"); 3310 3311 SDOperand Result; 3312 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3313 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3314 MachineFunction &MF = DAG.getMachineFunction(); 3315 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3316 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3317 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3318 StackSlot, NULL, 0); 3319 3320 // Build the FILD 3321 SDVTList Tys; 3322 if (X86ScalarSSE) 3323 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3324 else 3325 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3326 SmallVector<SDOperand, 8> Ops; 3327 Ops.push_back(Chain); 3328 Ops.push_back(StackSlot); 3329 Ops.push_back(DAG.getValueType(SrcVT)); 3330 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3331 Tys, &Ops[0], Ops.size()); 3332 3333 if (X86ScalarSSE) { 3334 Chain = Result.getValue(1); 3335 SDOperand InFlag = Result.getValue(2); 3336 3337 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3338 // shouldn't be necessary except that RFP cannot be live across 3339 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3340 MachineFunction &MF = DAG.getMachineFunction(); 3341 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3342 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3343 Tys = DAG.getVTList(MVT::Other); 3344 SmallVector<SDOperand, 8> Ops; 3345 Ops.push_back(Chain); 3346 Ops.push_back(Result); 3347 Ops.push_back(StackSlot); 3348 Ops.push_back(DAG.getValueType(Op.getValueType())); 3349 Ops.push_back(InFlag); 3350 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3351 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3352 } 3353 3354 return Result; 3355} 3356 3357SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3358 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3359 "Unknown FP_TO_SINT to lower!"); 3360 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3361 // stack slot. 3362 MachineFunction &MF = DAG.getMachineFunction(); 3363 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3364 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3365 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3366 3367 unsigned Opc; 3368 switch (Op.getValueType()) { 3369 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3370 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3371 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3372 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3373 } 3374 3375 SDOperand Chain = DAG.getEntryNode(); 3376 SDOperand Value = Op.getOperand(0); 3377 if (X86ScalarSSE) { 3378 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3379 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3380 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3381 SDOperand Ops[] = { 3382 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3383 }; 3384 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3385 Chain = Value.getValue(1); 3386 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3387 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3388 } 3389 3390 // Build the FP_TO_INT*_IN_MEM 3391 SDOperand Ops[] = { Chain, Value, StackSlot }; 3392 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3393 3394 // Load the result. 3395 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3396} 3397 3398SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3399 MVT::ValueType VT = Op.getValueType(); 3400 MVT::ValueType EltVT = VT; 3401 if (MVT::isVector(VT)) 3402 EltVT = MVT::getVectorElementType(VT); 3403 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3404 std::vector<Constant*> CV; 3405 if (EltVT == MVT::f64) { 3406 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))); 3407 CV.push_back(C); 3408 CV.push_back(C); 3409 } else { 3410 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))); 3411 CV.push_back(C); 3412 CV.push_back(C); 3413 CV.push_back(C); 3414 CV.push_back(C); 3415 } 3416 Constant *C = ConstantVector::get(CV); 3417 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3418 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3419 false, 16); 3420 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3421} 3422 3423SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3424 MVT::ValueType VT = Op.getValueType(); 3425 MVT::ValueType EltVT = VT; 3426 unsigned EltNum = 1; 3427 if (MVT::isVector(VT)) { 3428 EltVT = MVT::getVectorElementType(VT); 3429 EltNum = MVT::getVectorNumElements(VT); 3430 } 3431 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3432 std::vector<Constant*> CV; 3433 if (EltVT == MVT::f64) { 3434 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)); 3435 CV.push_back(C); 3436 CV.push_back(C); 3437 } else { 3438 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31)); 3439 CV.push_back(C); 3440 CV.push_back(C); 3441 CV.push_back(C); 3442 CV.push_back(C); 3443 } 3444 Constant *C = ConstantVector::get(CV); 3445 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3446 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3447 false, 16); 3448 if (MVT::isVector(VT)) { 3449 return DAG.getNode(ISD::BIT_CONVERT, VT, 3450 DAG.getNode(ISD::XOR, MVT::v2i64, 3451 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3452 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3453 } else { 3454 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3455 } 3456} 3457 3458SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3459 SDOperand Op0 = Op.getOperand(0); 3460 SDOperand Op1 = Op.getOperand(1); 3461 MVT::ValueType VT = Op.getValueType(); 3462 MVT::ValueType SrcVT = Op1.getValueType(); 3463 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3464 3465 // If second operand is smaller, extend it first. 3466 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3467 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3468 SrcVT = VT; 3469 } 3470 3471 // First get the sign bit of second operand. 3472 std::vector<Constant*> CV; 3473 if (SrcVT == MVT::f64) { 3474 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); 3475 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3476 } else { 3477 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); 3478 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3479 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3480 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3481 } 3482 Constant *C = ConstantVector::get(CV); 3483 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3484 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3485 false, 16); 3486 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3487 3488 // Shift sign bit right or left if the two operands have different types. 3489 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3490 // Op0 is MVT::f32, Op1 is MVT::f64. 3491 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3492 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3493 DAG.getConstant(32, MVT::i32)); 3494 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3495 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3496 DAG.getConstant(0, getPointerTy())); 3497 } 3498 3499 // Clear first operand sign bit. 3500 CV.clear(); 3501 if (VT == MVT::f64) { 3502 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63)))); 3503 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3504 } else { 3505 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31)))); 3506 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3507 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3508 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3509 } 3510 C = ConstantVector::get(CV); 3511 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3512 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3513 false, 16); 3514 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3515 3516 // Or the value with the sign bit. 3517 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3518} 3519 3520SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3521 SDOperand Chain) { 3522 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3523 SDOperand Cond; 3524 SDOperand Op0 = Op.getOperand(0); 3525 SDOperand Op1 = Op.getOperand(1); 3526 SDOperand CC = Op.getOperand(2); 3527 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3528 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3529 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3530 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3531 unsigned X86CC; 3532 3533 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3534 Op0, Op1, DAG)) { 3535 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3536 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3537 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3538 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3539 } 3540 3541 assert(isFP && "Illegal integer SetCC!"); 3542 3543 SDOperand COps[] = { Chain, Op0, Op1 }; 3544 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3545 3546 switch (SetCCOpcode) { 3547 default: assert(false && "Illegal floating point SetCC!"); 3548 case ISD::SETOEQ: { // !PF & ZF 3549 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3550 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3551 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3552 Tmp1.getValue(1) }; 3553 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3554 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3555 } 3556 case ISD::SETUNE: { // PF | !ZF 3557 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3558 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3559 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3560 Tmp1.getValue(1) }; 3561 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3562 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3563 } 3564 } 3565} 3566 3567SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3568 bool addTest = true; 3569 SDOperand Chain = DAG.getEntryNode(); 3570 SDOperand Cond = Op.getOperand(0); 3571 SDOperand CC; 3572 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3573 3574 if (Cond.getOpcode() == ISD::SETCC) 3575 Cond = LowerSETCC(Cond, DAG, Chain); 3576 3577 if (Cond.getOpcode() == X86ISD::SETCC) { 3578 CC = Cond.getOperand(0); 3579 3580 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3581 // (since flag operand cannot be shared). Use it as the condition setting 3582 // operand in place of the X86ISD::SETCC. 3583 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3584 // to use a test instead of duplicating the X86ISD::CMP (for register 3585 // pressure reason)? 3586 SDOperand Cmp = Cond.getOperand(1); 3587 unsigned Opc = Cmp.getOpcode(); 3588 bool IllegalFPCMov = !X86ScalarSSE && 3589 MVT::isFloatingPoint(Op.getValueType()) && 3590 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3591 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3592 !IllegalFPCMov) { 3593 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3594 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3595 addTest = false; 3596 } 3597 } 3598 3599 if (addTest) { 3600 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3601 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3602 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3603 } 3604 3605 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3606 SmallVector<SDOperand, 4> Ops; 3607 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3608 // condition is true. 3609 Ops.push_back(Op.getOperand(2)); 3610 Ops.push_back(Op.getOperand(1)); 3611 Ops.push_back(CC); 3612 Ops.push_back(Cond.getValue(1)); 3613 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3614} 3615 3616SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3617 bool addTest = true; 3618 SDOperand Chain = Op.getOperand(0); 3619 SDOperand Cond = Op.getOperand(1); 3620 SDOperand Dest = Op.getOperand(2); 3621 SDOperand CC; 3622 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3623 3624 if (Cond.getOpcode() == ISD::SETCC) 3625 Cond = LowerSETCC(Cond, DAG, Chain); 3626 3627 if (Cond.getOpcode() == X86ISD::SETCC) { 3628 CC = Cond.getOperand(0); 3629 3630 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3631 // (since flag operand cannot be shared). Use it as the condition setting 3632 // operand in place of the X86ISD::SETCC. 3633 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3634 // to use a test instead of duplicating the X86ISD::CMP (for register 3635 // pressure reason)? 3636 SDOperand Cmp = Cond.getOperand(1); 3637 unsigned Opc = Cmp.getOpcode(); 3638 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3639 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3640 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3641 addTest = false; 3642 } 3643 } 3644 3645 if (addTest) { 3646 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3647 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3648 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3649 } 3650 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3651 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3652} 3653 3654SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3655 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3656 3657 if (Subtarget->is64Bit()) 3658 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3659 else 3660 switch (CallingConv) { 3661 default: 3662 assert(0 && "Unsupported calling convention"); 3663 case CallingConv::Fast: 3664 // TODO: Implement fastcc 3665 // Falls through 3666 case CallingConv::C: 3667 case CallingConv::X86_StdCall: 3668 return LowerCCCCallTo(Op, DAG, CallingConv); 3669 case CallingConv::X86_FastCall: 3670 return LowerFastCCCallTo(Op, DAG, CallingConv); 3671 } 3672} 3673 3674 3675// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3676// Calls to _alloca is needed to probe the stack when allocating more than 4k 3677// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3678// that the guard pages used by the OS virtual memory manager are allocated in 3679// correct sequence. 3680SDOperand 3681X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3682 SelectionDAG &DAG) { 3683 assert(Subtarget->isTargetCygMing() && 3684 "This should be used only on Cygwin/Mingw targets"); 3685 3686 // Get the inputs. 3687 SDOperand Chain = Op.getOperand(0); 3688 SDOperand Size = Op.getOperand(1); 3689 // FIXME: Ensure alignment here 3690 3691 SDOperand Flag; 3692 3693 MVT::ValueType IntPtr = getPointerTy(); 3694 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3695 3696 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3697 Flag = Chain.getValue(1); 3698 3699 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3700 SDOperand Ops[] = { Chain, 3701 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3702 DAG.getRegister(X86::EAX, IntPtr), 3703 Flag }; 3704 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3705 Flag = Chain.getValue(1); 3706 3707 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3708 3709 std::vector<MVT::ValueType> Tys; 3710 Tys.push_back(SPTy); 3711 Tys.push_back(MVT::Other); 3712 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3713 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3714} 3715 3716SDOperand 3717X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3718 MachineFunction &MF = DAG.getMachineFunction(); 3719 const Function* Fn = MF.getFunction(); 3720 if (Fn->hasExternalLinkage() && 3721 Subtarget->isTargetCygMing() && 3722 Fn->getName() == "main") 3723 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3724 3725 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3726 if (Subtarget->is64Bit()) 3727 return LowerX86_64CCCArguments(Op, DAG); 3728 else 3729 switch(CC) { 3730 default: 3731 assert(0 && "Unsupported calling convention"); 3732 case CallingConv::Fast: 3733 // TODO: implement fastcc. 3734 3735 // Falls through 3736 case CallingConv::C: 3737 return LowerCCCArguments(Op, DAG); 3738 case CallingConv::X86_StdCall: 3739 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3740 return LowerCCCArguments(Op, DAG, true); 3741 case CallingConv::X86_FastCall: 3742 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3743 return LowerFastCCArguments(Op, DAG); 3744 } 3745} 3746 3747SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3748 SDOperand InFlag(0, 0); 3749 SDOperand Chain = Op.getOperand(0); 3750 unsigned Align = 3751 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3752 if (Align == 0) Align = 1; 3753 3754 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3755 // If not DWORD aligned or size is more than the threshold, call memset. 3756 // The libc version is likely to be faster for these cases. It can use the 3757 // address value and run time information about the CPU. 3758 if ((Align & 3) != 0 || 3759 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3760 MVT::ValueType IntPtr = getPointerTy(); 3761 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3762 TargetLowering::ArgListTy Args; 3763 TargetLowering::ArgListEntry Entry; 3764 Entry.Node = Op.getOperand(1); 3765 Entry.Ty = IntPtrTy; 3766 Args.push_back(Entry); 3767 // Extend the unsigned i8 argument to be an int value for the call. 3768 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3769 Entry.Ty = IntPtrTy; 3770 Args.push_back(Entry); 3771 Entry.Node = Op.getOperand(3); 3772 Args.push_back(Entry); 3773 std::pair<SDOperand,SDOperand> CallResult = 3774 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3775 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3776 return CallResult.second; 3777 } 3778 3779 MVT::ValueType AVT; 3780 SDOperand Count; 3781 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3782 unsigned BytesLeft = 0; 3783 bool TwoRepStos = false; 3784 if (ValC) { 3785 unsigned ValReg; 3786 uint64_t Val = ValC->getValue() & 255; 3787 3788 // If the value is a constant, then we can potentially use larger sets. 3789 switch (Align & 3) { 3790 case 2: // WORD aligned 3791 AVT = MVT::i16; 3792 ValReg = X86::AX; 3793 Val = (Val << 8) | Val; 3794 break; 3795 case 0: // DWORD aligned 3796 AVT = MVT::i32; 3797 ValReg = X86::EAX; 3798 Val = (Val << 8) | Val; 3799 Val = (Val << 16) | Val; 3800 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3801 AVT = MVT::i64; 3802 ValReg = X86::RAX; 3803 Val = (Val << 32) | Val; 3804 } 3805 break; 3806 default: // Byte aligned 3807 AVT = MVT::i8; 3808 ValReg = X86::AL; 3809 Count = Op.getOperand(3); 3810 break; 3811 } 3812 3813 if (AVT > MVT::i8) { 3814 if (I) { 3815 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3816 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3817 BytesLeft = I->getValue() % UBytes; 3818 } else { 3819 assert(AVT >= MVT::i32 && 3820 "Do not use rep;stos if not at least DWORD aligned"); 3821 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3822 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3823 TwoRepStos = true; 3824 } 3825 } 3826 3827 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3828 InFlag); 3829 InFlag = Chain.getValue(1); 3830 } else { 3831 AVT = MVT::i8; 3832 Count = Op.getOperand(3); 3833 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3834 InFlag = Chain.getValue(1); 3835 } 3836 3837 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3838 Count, InFlag); 3839 InFlag = Chain.getValue(1); 3840 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3841 Op.getOperand(1), InFlag); 3842 InFlag = Chain.getValue(1); 3843 3844 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3845 SmallVector<SDOperand, 8> Ops; 3846 Ops.push_back(Chain); 3847 Ops.push_back(DAG.getValueType(AVT)); 3848 Ops.push_back(InFlag); 3849 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3850 3851 if (TwoRepStos) { 3852 InFlag = Chain.getValue(1); 3853 Count = Op.getOperand(3); 3854 MVT::ValueType CVT = Count.getValueType(); 3855 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3856 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3857 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3858 Left, InFlag); 3859 InFlag = Chain.getValue(1); 3860 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3861 Ops.clear(); 3862 Ops.push_back(Chain); 3863 Ops.push_back(DAG.getValueType(MVT::i8)); 3864 Ops.push_back(InFlag); 3865 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3866 } else if (BytesLeft) { 3867 // Issue stores for the last 1 - 7 bytes. 3868 SDOperand Value; 3869 unsigned Val = ValC->getValue() & 255; 3870 unsigned Offset = I->getValue() - BytesLeft; 3871 SDOperand DstAddr = Op.getOperand(1); 3872 MVT::ValueType AddrVT = DstAddr.getValueType(); 3873 if (BytesLeft >= 4) { 3874 Val = (Val << 8) | Val; 3875 Val = (Val << 16) | Val; 3876 Value = DAG.getConstant(Val, MVT::i32); 3877 Chain = DAG.getStore(Chain, Value, 3878 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3879 DAG.getConstant(Offset, AddrVT)), 3880 NULL, 0); 3881 BytesLeft -= 4; 3882 Offset += 4; 3883 } 3884 if (BytesLeft >= 2) { 3885 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3886 Chain = DAG.getStore(Chain, Value, 3887 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3888 DAG.getConstant(Offset, AddrVT)), 3889 NULL, 0); 3890 BytesLeft -= 2; 3891 Offset += 2; 3892 } 3893 if (BytesLeft == 1) { 3894 Value = DAG.getConstant(Val, MVT::i8); 3895 Chain = DAG.getStore(Chain, Value, 3896 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3897 DAG.getConstant(Offset, AddrVT)), 3898 NULL, 0); 3899 } 3900 } 3901 3902 return Chain; 3903} 3904 3905SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3906 SDOperand Chain = Op.getOperand(0); 3907 unsigned Align = 3908 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3909 if (Align == 0) Align = 1; 3910 3911 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3912 // If not DWORD aligned or size is more than the threshold, call memcpy. 3913 // The libc version is likely to be faster for these cases. It can use the 3914 // address value and run time information about the CPU. 3915 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 3916 if ((Align & 3) != 0 || 3917 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3918 MVT::ValueType IntPtr = getPointerTy(); 3919 TargetLowering::ArgListTy Args; 3920 TargetLowering::ArgListEntry Entry; 3921 Entry.Ty = getTargetData()->getIntPtrType(); 3922 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3923 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3924 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3925 std::pair<SDOperand,SDOperand> CallResult = 3926 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3927 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3928 return CallResult.second; 3929 } 3930 3931 MVT::ValueType AVT; 3932 SDOperand Count; 3933 unsigned BytesLeft = 0; 3934 bool TwoRepMovs = false; 3935 switch (Align & 3) { 3936 case 2: // WORD aligned 3937 AVT = MVT::i16; 3938 break; 3939 case 0: // DWORD aligned 3940 AVT = MVT::i32; 3941 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3942 AVT = MVT::i64; 3943 break; 3944 default: // Byte aligned 3945 AVT = MVT::i8; 3946 Count = Op.getOperand(3); 3947 break; 3948 } 3949 3950 if (AVT > MVT::i8) { 3951 if (I) { 3952 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3953 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3954 BytesLeft = I->getValue() % UBytes; 3955 } else { 3956 assert(AVT >= MVT::i32 && 3957 "Do not use rep;movs if not at least DWORD aligned"); 3958 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3959 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3960 TwoRepMovs = true; 3961 } 3962 } 3963 3964 SDOperand InFlag(0, 0); 3965 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3966 Count, InFlag); 3967 InFlag = Chain.getValue(1); 3968 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3969 Op.getOperand(1), InFlag); 3970 InFlag = Chain.getValue(1); 3971 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 3972 Op.getOperand(2), InFlag); 3973 InFlag = Chain.getValue(1); 3974 3975 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3976 SmallVector<SDOperand, 8> Ops; 3977 Ops.push_back(Chain); 3978 Ops.push_back(DAG.getValueType(AVT)); 3979 Ops.push_back(InFlag); 3980 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3981 3982 if (TwoRepMovs) { 3983 InFlag = Chain.getValue(1); 3984 Count = Op.getOperand(3); 3985 MVT::ValueType CVT = Count.getValueType(); 3986 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3987 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3988 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3989 Left, InFlag); 3990 InFlag = Chain.getValue(1); 3991 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3992 Ops.clear(); 3993 Ops.push_back(Chain); 3994 Ops.push_back(DAG.getValueType(MVT::i8)); 3995 Ops.push_back(InFlag); 3996 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3997 } else if (BytesLeft) { 3998 // Issue loads and stores for the last 1 - 7 bytes. 3999 unsigned Offset = I->getValue() - BytesLeft; 4000 SDOperand DstAddr = Op.getOperand(1); 4001 MVT::ValueType DstVT = DstAddr.getValueType(); 4002 SDOperand SrcAddr = Op.getOperand(2); 4003 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4004 SDOperand Value; 4005 if (BytesLeft >= 4) { 4006 Value = DAG.getLoad(MVT::i32, Chain, 4007 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4008 DAG.getConstant(Offset, SrcVT)), 4009 NULL, 0); 4010 Chain = Value.getValue(1); 4011 Chain = DAG.getStore(Chain, Value, 4012 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4013 DAG.getConstant(Offset, DstVT)), 4014 NULL, 0); 4015 BytesLeft -= 4; 4016 Offset += 4; 4017 } 4018 if (BytesLeft >= 2) { 4019 Value = DAG.getLoad(MVT::i16, Chain, 4020 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4021 DAG.getConstant(Offset, SrcVT)), 4022 NULL, 0); 4023 Chain = Value.getValue(1); 4024 Chain = DAG.getStore(Chain, Value, 4025 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4026 DAG.getConstant(Offset, DstVT)), 4027 NULL, 0); 4028 BytesLeft -= 2; 4029 Offset += 2; 4030 } 4031 4032 if (BytesLeft == 1) { 4033 Value = DAG.getLoad(MVT::i8, Chain, 4034 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4035 DAG.getConstant(Offset, SrcVT)), 4036 NULL, 0); 4037 Chain = Value.getValue(1); 4038 Chain = DAG.getStore(Chain, Value, 4039 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4040 DAG.getConstant(Offset, DstVT)), 4041 NULL, 0); 4042 } 4043 } 4044 4045 return Chain; 4046} 4047 4048SDOperand 4049X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4050 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4051 SDOperand TheOp = Op.getOperand(0); 4052 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4053 if (Subtarget->is64Bit()) { 4054 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4055 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4056 MVT::i64, Copy1.getValue(2)); 4057 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4058 DAG.getConstant(32, MVT::i8)); 4059 SDOperand Ops[] = { 4060 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4061 }; 4062 4063 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4064 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4065 } 4066 4067 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4068 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4069 MVT::i32, Copy1.getValue(2)); 4070 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4071 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4072 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4073} 4074 4075SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4076 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4077 4078 if (!Subtarget->is64Bit()) { 4079 // vastart just stores the address of the VarArgsFrameIndex slot into the 4080 // memory location argument. 4081 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4082 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4083 SV->getOffset()); 4084 } 4085 4086 // __va_list_tag: 4087 // gp_offset (0 - 6 * 8) 4088 // fp_offset (48 - 48 + 8 * 16) 4089 // overflow_arg_area (point to parameters coming in memory). 4090 // reg_save_area 4091 SmallVector<SDOperand, 8> MemOps; 4092 SDOperand FIN = Op.getOperand(1); 4093 // Store gp_offset 4094 SDOperand Store = DAG.getStore(Op.getOperand(0), 4095 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4096 FIN, SV->getValue(), SV->getOffset()); 4097 MemOps.push_back(Store); 4098 4099 // Store fp_offset 4100 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4101 DAG.getConstant(4, getPointerTy())); 4102 Store = DAG.getStore(Op.getOperand(0), 4103 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4104 FIN, SV->getValue(), SV->getOffset()); 4105 MemOps.push_back(Store); 4106 4107 // Store ptr to overflow_arg_area 4108 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4109 DAG.getConstant(4, getPointerTy())); 4110 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4111 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4112 SV->getOffset()); 4113 MemOps.push_back(Store); 4114 4115 // Store ptr to reg_save_area. 4116 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4117 DAG.getConstant(8, getPointerTy())); 4118 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4119 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4120 SV->getOffset()); 4121 MemOps.push_back(Store); 4122 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4123} 4124 4125SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4126 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4127 SDOperand Chain = Op.getOperand(0); 4128 SDOperand DstPtr = Op.getOperand(1); 4129 SDOperand SrcPtr = Op.getOperand(2); 4130 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4131 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4132 4133 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4134 SrcSV->getValue(), SrcSV->getOffset()); 4135 Chain = SrcPtr.getValue(1); 4136 for (unsigned i = 0; i < 3; ++i) { 4137 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4138 SrcSV->getValue(), SrcSV->getOffset()); 4139 Chain = Val.getValue(1); 4140 Chain = DAG.getStore(Chain, Val, DstPtr, 4141 DstSV->getValue(), DstSV->getOffset()); 4142 if (i == 2) 4143 break; 4144 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4145 DAG.getConstant(8, getPointerTy())); 4146 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4147 DAG.getConstant(8, getPointerTy())); 4148 } 4149 return Chain; 4150} 4151 4152SDOperand 4153X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4154 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4155 switch (IntNo) { 4156 default: return SDOperand(); // Don't custom lower most intrinsics. 4157 // Comparison intrinsics. 4158 case Intrinsic::x86_sse_comieq_ss: 4159 case Intrinsic::x86_sse_comilt_ss: 4160 case Intrinsic::x86_sse_comile_ss: 4161 case Intrinsic::x86_sse_comigt_ss: 4162 case Intrinsic::x86_sse_comige_ss: 4163 case Intrinsic::x86_sse_comineq_ss: 4164 case Intrinsic::x86_sse_ucomieq_ss: 4165 case Intrinsic::x86_sse_ucomilt_ss: 4166 case Intrinsic::x86_sse_ucomile_ss: 4167 case Intrinsic::x86_sse_ucomigt_ss: 4168 case Intrinsic::x86_sse_ucomige_ss: 4169 case Intrinsic::x86_sse_ucomineq_ss: 4170 case Intrinsic::x86_sse2_comieq_sd: 4171 case Intrinsic::x86_sse2_comilt_sd: 4172 case Intrinsic::x86_sse2_comile_sd: 4173 case Intrinsic::x86_sse2_comigt_sd: 4174 case Intrinsic::x86_sse2_comige_sd: 4175 case Intrinsic::x86_sse2_comineq_sd: 4176 case Intrinsic::x86_sse2_ucomieq_sd: 4177 case Intrinsic::x86_sse2_ucomilt_sd: 4178 case Intrinsic::x86_sse2_ucomile_sd: 4179 case Intrinsic::x86_sse2_ucomigt_sd: 4180 case Intrinsic::x86_sse2_ucomige_sd: 4181 case Intrinsic::x86_sse2_ucomineq_sd: { 4182 unsigned Opc = 0; 4183 ISD::CondCode CC = ISD::SETCC_INVALID; 4184 switch (IntNo) { 4185 default: break; 4186 case Intrinsic::x86_sse_comieq_ss: 4187 case Intrinsic::x86_sse2_comieq_sd: 4188 Opc = X86ISD::COMI; 4189 CC = ISD::SETEQ; 4190 break; 4191 case Intrinsic::x86_sse_comilt_ss: 4192 case Intrinsic::x86_sse2_comilt_sd: 4193 Opc = X86ISD::COMI; 4194 CC = ISD::SETLT; 4195 break; 4196 case Intrinsic::x86_sse_comile_ss: 4197 case Intrinsic::x86_sse2_comile_sd: 4198 Opc = X86ISD::COMI; 4199 CC = ISD::SETLE; 4200 break; 4201 case Intrinsic::x86_sse_comigt_ss: 4202 case Intrinsic::x86_sse2_comigt_sd: 4203 Opc = X86ISD::COMI; 4204 CC = ISD::SETGT; 4205 break; 4206 case Intrinsic::x86_sse_comige_ss: 4207 case Intrinsic::x86_sse2_comige_sd: 4208 Opc = X86ISD::COMI; 4209 CC = ISD::SETGE; 4210 break; 4211 case Intrinsic::x86_sse_comineq_ss: 4212 case Intrinsic::x86_sse2_comineq_sd: 4213 Opc = X86ISD::COMI; 4214 CC = ISD::SETNE; 4215 break; 4216 case Intrinsic::x86_sse_ucomieq_ss: 4217 case Intrinsic::x86_sse2_ucomieq_sd: 4218 Opc = X86ISD::UCOMI; 4219 CC = ISD::SETEQ; 4220 break; 4221 case Intrinsic::x86_sse_ucomilt_ss: 4222 case Intrinsic::x86_sse2_ucomilt_sd: 4223 Opc = X86ISD::UCOMI; 4224 CC = ISD::SETLT; 4225 break; 4226 case Intrinsic::x86_sse_ucomile_ss: 4227 case Intrinsic::x86_sse2_ucomile_sd: 4228 Opc = X86ISD::UCOMI; 4229 CC = ISD::SETLE; 4230 break; 4231 case Intrinsic::x86_sse_ucomigt_ss: 4232 case Intrinsic::x86_sse2_ucomigt_sd: 4233 Opc = X86ISD::UCOMI; 4234 CC = ISD::SETGT; 4235 break; 4236 case Intrinsic::x86_sse_ucomige_ss: 4237 case Intrinsic::x86_sse2_ucomige_sd: 4238 Opc = X86ISD::UCOMI; 4239 CC = ISD::SETGE; 4240 break; 4241 case Intrinsic::x86_sse_ucomineq_ss: 4242 case Intrinsic::x86_sse2_ucomineq_sd: 4243 Opc = X86ISD::UCOMI; 4244 CC = ISD::SETNE; 4245 break; 4246 } 4247 4248 unsigned X86CC; 4249 SDOperand LHS = Op.getOperand(1); 4250 SDOperand RHS = Op.getOperand(2); 4251 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4252 4253 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4254 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4255 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4256 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4257 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4258 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4259 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4260 } 4261 } 4262} 4263 4264SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4265 // Depths > 0 not supported yet! 4266 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4267 return SDOperand(); 4268 4269 // Just load the return address 4270 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4271 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4272} 4273 4274SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4275 // Depths > 0 not supported yet! 4276 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4277 return SDOperand(); 4278 4279 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4280 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4281 DAG.getConstant(4, getPointerTy())); 4282} 4283 4284SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4285 SelectionDAG &DAG) { 4286 // Is not yet supported on x86-64 4287 if (Subtarget->is64Bit()) 4288 return SDOperand(); 4289 4290 return DAG.getConstant(8, getPointerTy()); 4291} 4292 4293SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4294{ 4295 assert(!Subtarget->is64Bit() && 4296 "Lowering of eh_return builtin is not supported yet on x86-64"); 4297 4298 MachineFunction &MF = DAG.getMachineFunction(); 4299 SDOperand Chain = Op.getOperand(0); 4300 SDOperand Offset = Op.getOperand(1); 4301 SDOperand Handler = Op.getOperand(2); 4302 4303 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4304 getPointerTy()); 4305 4306 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4307 DAG.getConstant(-4UL, getPointerTy())); 4308 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4309 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4310 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4311 MF.addLiveOut(X86::ECX); 4312 4313 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4314 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4315} 4316 4317SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4318 SelectionDAG &DAG) { 4319 SDOperand Root = Op.getOperand(0); 4320 SDOperand Trmp = Op.getOperand(1); // trampoline 4321 SDOperand FPtr = Op.getOperand(2); // nested function 4322 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4323 4324 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4325 4326 if (Subtarget->is64Bit()) { 4327 return SDOperand(); // not yet supported 4328 } else { 4329 Function *Func = (Function *) 4330 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4331 unsigned CC = Func->getCallingConv(); 4332 unsigned NestReg; 4333 4334 switch (CC) { 4335 default: 4336 assert(0 && "Unsupported calling convention"); 4337 case CallingConv::C: 4338 case CallingConv::Fast: 4339 case CallingConv::X86_StdCall: { 4340 // Pass 'nest' parameter in ECX. 4341 // Must be kept in sync with X86CallingConv.td 4342 NestReg = X86::ECX; 4343 4344 // Check that ECX wasn't needed by an 'inreg' parameter. 4345 const FunctionType *FTy = Func->getFunctionType(); 4346 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4347 4348 if (Attrs && !Func->isVarArg()) { 4349 unsigned InRegCount = 0; 4350 unsigned Idx = 1; 4351 4352 for (FunctionType::param_iterator I = FTy->param_begin(), 4353 E = FTy->param_end(); I != E; ++I, ++Idx) 4354 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4355 // FIXME: should only count parameters that are lowered to integers. 4356 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4357 4358 if (InRegCount > 2) { 4359 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4360 abort(); 4361 } 4362 } 4363 break; 4364 } 4365 case CallingConv::X86_FastCall: 4366 // Pass 'nest' parameter in EAX. 4367 // Must be kept in sync with X86CallingConv.td 4368 NestReg = X86::EAX; 4369 break; 4370 } 4371 4372 const X86InstrInfo *TII = 4373 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 4374 4375 SDOperand OutChains[4]; 4376 SDOperand Addr, Disp; 4377 4378 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4379 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4380 4381 unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); 4382 unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg); 4383 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 4384 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4385 4386 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4387 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4388 TrmpSV->getOffset() + 1, false, 1); 4389 4390 unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); 4391 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4392 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4393 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4394 4395 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4396 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4397 TrmpSV->getOffset() + 6, false, 1); 4398 4399 return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4); 4400 } 4401} 4402 4403/// LowerOperation - Provide custom lowering hooks for some operations. 4404/// 4405SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4406 switch (Op.getOpcode()) { 4407 default: assert(0 && "Should not custom lower this!"); 4408 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4409 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4410 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4411 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4412 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4413 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4414 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4415 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4416 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4417 case ISD::SHL_PARTS: 4418 case ISD::SRA_PARTS: 4419 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4420 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4421 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4422 case ISD::FABS: return LowerFABS(Op, DAG); 4423 case ISD::FNEG: return LowerFNEG(Op, DAG); 4424 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4425 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4426 case ISD::SELECT: return LowerSELECT(Op, DAG); 4427 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4428 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4429 case ISD::CALL: return LowerCALL(Op, DAG); 4430 case ISD::RET: return LowerRET(Op, DAG); 4431 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4432 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4433 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4434 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4435 case ISD::VASTART: return LowerVASTART(Op, DAG); 4436 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4437 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4438 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4439 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4440 case ISD::FRAME_TO_ARGS_OFFSET: 4441 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4442 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4443 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4444 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4445 } 4446 return SDOperand(); 4447} 4448 4449const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4450 switch (Opcode) { 4451 default: return NULL; 4452 case X86ISD::SHLD: return "X86ISD::SHLD"; 4453 case X86ISD::SHRD: return "X86ISD::SHRD"; 4454 case X86ISD::FAND: return "X86ISD::FAND"; 4455 case X86ISD::FOR: return "X86ISD::FOR"; 4456 case X86ISD::FXOR: return "X86ISD::FXOR"; 4457 case X86ISD::FSRL: return "X86ISD::FSRL"; 4458 case X86ISD::FILD: return "X86ISD::FILD"; 4459 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4460 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4461 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4462 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4463 case X86ISD::FLD: return "X86ISD::FLD"; 4464 case X86ISD::FST: return "X86ISD::FST"; 4465 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4466 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4467 case X86ISD::CALL: return "X86ISD::CALL"; 4468 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4469 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4470 case X86ISD::CMP: return "X86ISD::CMP"; 4471 case X86ISD::COMI: return "X86ISD::COMI"; 4472 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4473 case X86ISD::SETCC: return "X86ISD::SETCC"; 4474 case X86ISD::CMOV: return "X86ISD::CMOV"; 4475 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4476 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4477 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4478 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4479 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4480 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4481 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4482 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4483 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4484 case X86ISD::FMAX: return "X86ISD::FMAX"; 4485 case X86ISD::FMIN: return "X86ISD::FMIN"; 4486 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4487 case X86ISD::FRCP: return "X86ISD::FRCP"; 4488 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4489 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4490 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4491 } 4492} 4493 4494// isLegalAddressingMode - Return true if the addressing mode represented 4495// by AM is legal for this target, for a load/store of the specified type. 4496bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4497 const Type *Ty) const { 4498 // X86 supports extremely general addressing modes. 4499 4500 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4501 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4502 return false; 4503 4504 if (AM.BaseGV) { 4505 // We can only fold this if we don't need an extra load. 4506 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4507 return false; 4508 4509 // X86-64 only supports addr of globals in small code model. 4510 if (Subtarget->is64Bit()) { 4511 if (getTargetMachine().getCodeModel() != CodeModel::Small) 4512 return false; 4513 // If lower 4G is not available, then we must use rip-relative addressing. 4514 if (AM.BaseOffs || AM.Scale > 1) 4515 return false; 4516 } 4517 } 4518 4519 switch (AM.Scale) { 4520 case 0: 4521 case 1: 4522 case 2: 4523 case 4: 4524 case 8: 4525 // These scales always work. 4526 break; 4527 case 3: 4528 case 5: 4529 case 9: 4530 // These scales are formed with basereg+scalereg. Only accept if there is 4531 // no basereg yet. 4532 if (AM.HasBaseReg) 4533 return false; 4534 break; 4535 default: // Other stuff never works. 4536 return false; 4537 } 4538 4539 return true; 4540} 4541 4542 4543/// isShuffleMaskLegal - Targets can use this to indicate that they only 4544/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4545/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4546/// are assumed to be legal. 4547bool 4548X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4549 // Only do shuffles on 128-bit vector types for now. 4550 if (MVT::getSizeInBits(VT) == 64) return false; 4551 return (Mask.Val->getNumOperands() <= 4 || 4552 isIdentityMask(Mask.Val) || 4553 isIdentityMask(Mask.Val, true) || 4554 isSplatMask(Mask.Val) || 4555 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4556 X86::isUNPCKLMask(Mask.Val) || 4557 X86::isUNPCKHMask(Mask.Val) || 4558 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4559 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4560} 4561 4562bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4563 MVT::ValueType EVT, 4564 SelectionDAG &DAG) const { 4565 unsigned NumElts = BVOps.size(); 4566 // Only do shuffles on 128-bit vector types for now. 4567 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4568 if (NumElts == 2) return true; 4569 if (NumElts == 4) { 4570 return (isMOVLMask(&BVOps[0], 4) || 4571 isCommutedMOVL(&BVOps[0], 4, true) || 4572 isSHUFPMask(&BVOps[0], 4) || 4573 isCommutedSHUFP(&BVOps[0], 4)); 4574 } 4575 return false; 4576} 4577 4578//===----------------------------------------------------------------------===// 4579// X86 Scheduler Hooks 4580//===----------------------------------------------------------------------===// 4581 4582MachineBasicBlock * 4583X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4584 MachineBasicBlock *BB) { 4585 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4586 switch (MI->getOpcode()) { 4587 default: assert(false && "Unexpected instr type to insert"); 4588 case X86::CMOV_FR32: 4589 case X86::CMOV_FR64: 4590 case X86::CMOV_V4F32: 4591 case X86::CMOV_V2F64: 4592 case X86::CMOV_V2I64: { 4593 // To "insert" a SELECT_CC instruction, we actually have to insert the 4594 // diamond control-flow pattern. The incoming instruction knows the 4595 // destination vreg to set, the condition code register to branch on, the 4596 // true/false values to select between, and a branch opcode to use. 4597 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4598 ilist<MachineBasicBlock>::iterator It = BB; 4599 ++It; 4600 4601 // thisMBB: 4602 // ... 4603 // TrueVal = ... 4604 // cmpTY ccX, r1, r2 4605 // bCC copy1MBB 4606 // fallthrough --> copy0MBB 4607 MachineBasicBlock *thisMBB = BB; 4608 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4609 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4610 unsigned Opc = 4611 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4612 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4613 MachineFunction *F = BB->getParent(); 4614 F->getBasicBlockList().insert(It, copy0MBB); 4615 F->getBasicBlockList().insert(It, sinkMBB); 4616 // Update machine-CFG edges by first adding all successors of the current 4617 // block to the new block which will contain the Phi node for the select. 4618 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4619 e = BB->succ_end(); i != e; ++i) 4620 sinkMBB->addSuccessor(*i); 4621 // Next, remove all successors of the current block, and add the true 4622 // and fallthrough blocks as its successors. 4623 while(!BB->succ_empty()) 4624 BB->removeSuccessor(BB->succ_begin()); 4625 BB->addSuccessor(copy0MBB); 4626 BB->addSuccessor(sinkMBB); 4627 4628 // copy0MBB: 4629 // %FalseValue = ... 4630 // # fallthrough to sinkMBB 4631 BB = copy0MBB; 4632 4633 // Update machine-CFG edges 4634 BB->addSuccessor(sinkMBB); 4635 4636 // sinkMBB: 4637 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4638 // ... 4639 BB = sinkMBB; 4640 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4641 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4642 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4643 4644 delete MI; // The pseudo instruction is gone now. 4645 return BB; 4646 } 4647 4648 case X86::FP32_TO_INT16_IN_MEM: 4649 case X86::FP32_TO_INT32_IN_MEM: 4650 case X86::FP32_TO_INT64_IN_MEM: 4651 case X86::FP64_TO_INT16_IN_MEM: 4652 case X86::FP64_TO_INT32_IN_MEM: 4653 case X86::FP64_TO_INT64_IN_MEM: 4654 case X86::FP80_TO_INT16_IN_MEM: 4655 case X86::FP80_TO_INT32_IN_MEM: 4656 case X86::FP80_TO_INT64_IN_MEM: { 4657 // Change the floating point control register to use "round towards zero" 4658 // mode when truncating to an integer value. 4659 MachineFunction *F = BB->getParent(); 4660 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4661 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4662 4663 // Load the old value of the high byte of the control word... 4664 unsigned OldCW = 4665 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4666 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4667 4668 // Set the high part to be round to zero... 4669 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4670 .addImm(0xC7F); 4671 4672 // Reload the modified control word now... 4673 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4674 4675 // Restore the memory image of control word to original value 4676 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4677 .addReg(OldCW); 4678 4679 // Get the X86 opcode to use. 4680 unsigned Opc; 4681 switch (MI->getOpcode()) { 4682 default: assert(0 && "illegal opcode!"); 4683 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 4684 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 4685 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 4686 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 4687 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 4688 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 4689 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 4690 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 4691 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 4692 } 4693 4694 X86AddressMode AM; 4695 MachineOperand &Op = MI->getOperand(0); 4696 if (Op.isRegister()) { 4697 AM.BaseType = X86AddressMode::RegBase; 4698 AM.Base.Reg = Op.getReg(); 4699 } else { 4700 AM.BaseType = X86AddressMode::FrameIndexBase; 4701 AM.Base.FrameIndex = Op.getFrameIndex(); 4702 } 4703 Op = MI->getOperand(1); 4704 if (Op.isImmediate()) 4705 AM.Scale = Op.getImm(); 4706 Op = MI->getOperand(2); 4707 if (Op.isImmediate()) 4708 AM.IndexReg = Op.getImm(); 4709 Op = MI->getOperand(3); 4710 if (Op.isGlobalAddress()) { 4711 AM.GV = Op.getGlobal(); 4712 } else { 4713 AM.Disp = Op.getImm(); 4714 } 4715 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4716 .addReg(MI->getOperand(4).getReg()); 4717 4718 // Reload the original control word now. 4719 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4720 4721 delete MI; // The pseudo instruction is gone now. 4722 return BB; 4723 } 4724 } 4725} 4726 4727//===----------------------------------------------------------------------===// 4728// X86 Optimization Hooks 4729//===----------------------------------------------------------------------===// 4730 4731void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4732 uint64_t Mask, 4733 uint64_t &KnownZero, 4734 uint64_t &KnownOne, 4735 const SelectionDAG &DAG, 4736 unsigned Depth) const { 4737 unsigned Opc = Op.getOpcode(); 4738 assert((Opc >= ISD::BUILTIN_OP_END || 4739 Opc == ISD::INTRINSIC_WO_CHAIN || 4740 Opc == ISD::INTRINSIC_W_CHAIN || 4741 Opc == ISD::INTRINSIC_VOID) && 4742 "Should use MaskedValueIsZero if you don't know whether Op" 4743 " is a target node!"); 4744 4745 KnownZero = KnownOne = 0; // Don't know anything. 4746 switch (Opc) { 4747 default: break; 4748 case X86ISD::SETCC: 4749 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4750 break; 4751 } 4752} 4753 4754/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4755/// element of the result of the vector shuffle. 4756static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4757 MVT::ValueType VT = N->getValueType(0); 4758 SDOperand PermMask = N->getOperand(2); 4759 unsigned NumElems = PermMask.getNumOperands(); 4760 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4761 i %= NumElems; 4762 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4763 return (i == 0) 4764 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4765 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4766 SDOperand Idx = PermMask.getOperand(i); 4767 if (Idx.getOpcode() == ISD::UNDEF) 4768 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4769 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4770 } 4771 return SDOperand(); 4772} 4773 4774/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4775/// node is a GlobalAddress + an offset. 4776static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4777 unsigned Opc = N->getOpcode(); 4778 if (Opc == X86ISD::Wrapper) { 4779 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4780 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4781 return true; 4782 } 4783 } else if (Opc == ISD::ADD) { 4784 SDOperand N1 = N->getOperand(0); 4785 SDOperand N2 = N->getOperand(1); 4786 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4787 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4788 if (V) { 4789 Offset += V->getSignExtended(); 4790 return true; 4791 } 4792 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4793 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4794 if (V) { 4795 Offset += V->getSignExtended(); 4796 return true; 4797 } 4798 } 4799 } 4800 return false; 4801} 4802 4803/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4804/// + Dist * Size. 4805static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4806 MachineFrameInfo *MFI) { 4807 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4808 return false; 4809 4810 SDOperand Loc = N->getOperand(1); 4811 SDOperand BaseLoc = Base->getOperand(1); 4812 if (Loc.getOpcode() == ISD::FrameIndex) { 4813 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4814 return false; 4815 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 4816 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4817 int FS = MFI->getObjectSize(FI); 4818 int BFS = MFI->getObjectSize(BFI); 4819 if (FS != BFS || FS != Size) return false; 4820 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4821 } else { 4822 GlobalValue *GV1 = NULL; 4823 GlobalValue *GV2 = NULL; 4824 int64_t Offset1 = 0; 4825 int64_t Offset2 = 0; 4826 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4827 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4828 if (isGA1 && isGA2 && GV1 == GV2) 4829 return Offset1 == (Offset2 + Dist*Size); 4830 } 4831 4832 return false; 4833} 4834 4835static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4836 const X86Subtarget *Subtarget) { 4837 GlobalValue *GV; 4838 int64_t Offset; 4839 if (isGAPlusOffset(Base, GV, Offset)) 4840 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4841 else { 4842 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4843 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 4844 if (BFI < 0) 4845 // Fixed objects do not specify alignment, however the offsets are known. 4846 return ((Subtarget->getStackAlignment() % 16) == 0 && 4847 (MFI->getObjectOffset(BFI) % 16) == 0); 4848 else 4849 return MFI->getObjectAlignment(BFI) >= 16; 4850 } 4851 return false; 4852} 4853 4854 4855/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4856/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4857/// if the load addresses are consecutive, non-overlapping, and in the right 4858/// order. 4859static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4860 const X86Subtarget *Subtarget) { 4861 MachineFunction &MF = DAG.getMachineFunction(); 4862 MachineFrameInfo *MFI = MF.getFrameInfo(); 4863 MVT::ValueType VT = N->getValueType(0); 4864 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4865 SDOperand PermMask = N->getOperand(2); 4866 int NumElems = (int)PermMask.getNumOperands(); 4867 SDNode *Base = NULL; 4868 for (int i = 0; i < NumElems; ++i) { 4869 SDOperand Idx = PermMask.getOperand(i); 4870 if (Idx.getOpcode() == ISD::UNDEF) { 4871 if (!Base) return SDOperand(); 4872 } else { 4873 SDOperand Arg = 4874 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4875 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4876 return SDOperand(); 4877 if (!Base) 4878 Base = Arg.Val; 4879 else if (!isConsecutiveLoad(Arg.Val, Base, 4880 i, MVT::getSizeInBits(EVT)/8,MFI)) 4881 return SDOperand(); 4882 } 4883 } 4884 4885 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4886 LoadSDNode *LD = cast<LoadSDNode>(Base); 4887 if (isAlign16) { 4888 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4889 LD->getSrcValueOffset(), LD->isVolatile()); 4890 } else { 4891 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4892 LD->getSrcValueOffset(), LD->isVolatile(), 4893 LD->getAlignment()); 4894 } 4895} 4896 4897/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4898static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4899 const X86Subtarget *Subtarget) { 4900 SDOperand Cond = N->getOperand(0); 4901 4902 // If we have SSE[12] support, try to form min/max nodes. 4903 if (Subtarget->hasSSE2() && 4904 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4905 if (Cond.getOpcode() == ISD::SETCC) { 4906 // Get the LHS/RHS of the select. 4907 SDOperand LHS = N->getOperand(1); 4908 SDOperand RHS = N->getOperand(2); 4909 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4910 4911 unsigned Opcode = 0; 4912 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4913 switch (CC) { 4914 default: break; 4915 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4916 case ISD::SETULE: 4917 case ISD::SETLE: 4918 if (!UnsafeFPMath) break; 4919 // FALL THROUGH. 4920 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4921 case ISD::SETLT: 4922 Opcode = X86ISD::FMIN; 4923 break; 4924 4925 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4926 case ISD::SETUGT: 4927 case ISD::SETGT: 4928 if (!UnsafeFPMath) break; 4929 // FALL THROUGH. 4930 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4931 case ISD::SETGE: 4932 Opcode = X86ISD::FMAX; 4933 break; 4934 } 4935 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4936 switch (CC) { 4937 default: break; 4938 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4939 case ISD::SETUGT: 4940 case ISD::SETGT: 4941 if (!UnsafeFPMath) break; 4942 // FALL THROUGH. 4943 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4944 case ISD::SETGE: 4945 Opcode = X86ISD::FMIN; 4946 break; 4947 4948 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4949 case ISD::SETULE: 4950 case ISD::SETLE: 4951 if (!UnsafeFPMath) break; 4952 // FALL THROUGH. 4953 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4954 case ISD::SETLT: 4955 Opcode = X86ISD::FMAX; 4956 break; 4957 } 4958 } 4959 4960 if (Opcode) 4961 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4962 } 4963 4964 } 4965 4966 return SDOperand(); 4967} 4968 4969 4970SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4971 DAGCombinerInfo &DCI) const { 4972 SelectionDAG &DAG = DCI.DAG; 4973 switch (N->getOpcode()) { 4974 default: break; 4975 case ISD::VECTOR_SHUFFLE: 4976 return PerformShuffleCombine(N, DAG, Subtarget); 4977 case ISD::SELECT: 4978 return PerformSELECTCombine(N, DAG, Subtarget); 4979 } 4980 4981 return SDOperand(); 4982} 4983 4984//===----------------------------------------------------------------------===// 4985// X86 Inline Assembly Support 4986//===----------------------------------------------------------------------===// 4987 4988/// getConstraintType - Given a constraint letter, return the type of 4989/// constraint it is for this target. 4990X86TargetLowering::ConstraintType 4991X86TargetLowering::getConstraintType(const std::string &Constraint) const { 4992 if (Constraint.size() == 1) { 4993 switch (Constraint[0]) { 4994 case 'A': 4995 case 'r': 4996 case 'R': 4997 case 'l': 4998 case 'q': 4999 case 'Q': 5000 case 'x': 5001 case 'Y': 5002 return C_RegisterClass; 5003 default: 5004 break; 5005 } 5006 } 5007 return TargetLowering::getConstraintType(Constraint); 5008} 5009 5010/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5011/// vector. If it is invalid, don't add anything to Ops. 5012void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5013 char Constraint, 5014 std::vector<SDOperand>&Ops, 5015 SelectionDAG &DAG) { 5016 SDOperand Result(0, 0); 5017 5018 switch (Constraint) { 5019 default: break; 5020 case 'I': 5021 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5022 if (C->getValue() <= 31) { 5023 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5024 break; 5025 } 5026 } 5027 return; 5028 case 'N': 5029 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5030 if (C->getValue() <= 255) { 5031 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5032 break; 5033 } 5034 } 5035 return; 5036 case 'i': { 5037 // Literal immediates are always ok. 5038 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5039 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5040 break; 5041 } 5042 5043 // If we are in non-pic codegen mode, we allow the address of a global (with 5044 // an optional displacement) to be used with 'i'. 5045 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5046 int64_t Offset = 0; 5047 5048 // Match either (GA) or (GA+C) 5049 if (GA) { 5050 Offset = GA->getOffset(); 5051 } else if (Op.getOpcode() == ISD::ADD) { 5052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5053 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5054 if (C && GA) { 5055 Offset = GA->getOffset()+C->getValue(); 5056 } else { 5057 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5058 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5059 if (C && GA) 5060 Offset = GA->getOffset()+C->getValue(); 5061 else 5062 C = 0, GA = 0; 5063 } 5064 } 5065 5066 if (GA) { 5067 // If addressing this global requires a load (e.g. in PIC mode), we can't 5068 // match. 5069 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5070 false)) 5071 return; 5072 5073 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5074 Offset); 5075 Result = Op; 5076 break; 5077 } 5078 5079 // Otherwise, not valid for this mode. 5080 return; 5081 } 5082 } 5083 5084 if (Result.Val) { 5085 Ops.push_back(Result); 5086 return; 5087 } 5088 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5089} 5090 5091std::vector<unsigned> X86TargetLowering:: 5092getRegClassForInlineAsmConstraint(const std::string &Constraint, 5093 MVT::ValueType VT) const { 5094 if (Constraint.size() == 1) { 5095 // FIXME: not handling fp-stack yet! 5096 switch (Constraint[0]) { // GCC X86 Constraint Letters 5097 default: break; // Unknown constraint letter 5098 case 'A': // EAX/EDX 5099 if (VT == MVT::i32 || VT == MVT::i64) 5100 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5101 break; 5102 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5103 case 'Q': // Q_REGS 5104 if (VT == MVT::i32) 5105 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5106 else if (VT == MVT::i16) 5107 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5108 else if (VT == MVT::i8) 5109 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5110 break; 5111 } 5112 } 5113 5114 return std::vector<unsigned>(); 5115} 5116 5117std::pair<unsigned, const TargetRegisterClass*> 5118X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5119 MVT::ValueType VT) const { 5120 // First, see if this is a constraint that directly corresponds to an LLVM 5121 // register class. 5122 if (Constraint.size() == 1) { 5123 // GCC Constraint Letters 5124 switch (Constraint[0]) { 5125 default: break; 5126 case 'r': // GENERAL_REGS 5127 case 'R': // LEGACY_REGS 5128 case 'l': // INDEX_REGS 5129 if (VT == MVT::i64 && Subtarget->is64Bit()) 5130 return std::make_pair(0U, X86::GR64RegisterClass); 5131 if (VT == MVT::i32) 5132 return std::make_pair(0U, X86::GR32RegisterClass); 5133 else if (VT == MVT::i16) 5134 return std::make_pair(0U, X86::GR16RegisterClass); 5135 else if (VT == MVT::i8) 5136 return std::make_pair(0U, X86::GR8RegisterClass); 5137 break; 5138 case 'y': // MMX_REGS if MMX allowed. 5139 if (!Subtarget->hasMMX()) break; 5140 return std::make_pair(0U, X86::VR64RegisterClass); 5141 break; 5142 case 'Y': // SSE_REGS if SSE2 allowed 5143 if (!Subtarget->hasSSE2()) break; 5144 // FALL THROUGH. 5145 case 'x': // SSE_REGS if SSE1 allowed 5146 if (!Subtarget->hasSSE1()) break; 5147 5148 switch (VT) { 5149 default: break; 5150 // Scalar SSE types. 5151 case MVT::f32: 5152 case MVT::i32: 5153 return std::make_pair(0U, X86::FR32RegisterClass); 5154 case MVT::f64: 5155 case MVT::i64: 5156 return std::make_pair(0U, X86::FR64RegisterClass); 5157 // Vector types. 5158 case MVT::v16i8: 5159 case MVT::v8i16: 5160 case MVT::v4i32: 5161 case MVT::v2i64: 5162 case MVT::v4f32: 5163 case MVT::v2f64: 5164 return std::make_pair(0U, X86::VR128RegisterClass); 5165 } 5166 break; 5167 } 5168 } 5169 5170 // Use the default implementation in TargetLowering to convert the register 5171 // constraint into a member of a register class. 5172 std::pair<unsigned, const TargetRegisterClass*> Res; 5173 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5174 5175 // Not found as a standard register? 5176 if (Res.second == 0) { 5177 // GCC calls "st(0)" just plain "st". 5178 if (StringsEqualNoCase("{st}", Constraint)) { 5179 Res.first = X86::ST0; 5180 Res.second = X86::RSTRegisterClass; 5181 } 5182 5183 return Res; 5184 } 5185 5186 // Otherwise, check to see if this is a register class of the wrong value 5187 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5188 // turn into {ax},{dx}. 5189 if (Res.second->hasType(VT)) 5190 return Res; // Correct type already, nothing to do. 5191 5192 // All of the single-register GCC register classes map their values onto 5193 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5194 // really want an 8-bit or 32-bit register, map to the appropriate register 5195 // class and return the appropriate register. 5196 if (Res.second != X86::GR16RegisterClass) 5197 return Res; 5198 5199 if (VT == MVT::i8) { 5200 unsigned DestReg = 0; 5201 switch (Res.first) { 5202 default: break; 5203 case X86::AX: DestReg = X86::AL; break; 5204 case X86::DX: DestReg = X86::DL; break; 5205 case X86::CX: DestReg = X86::CL; break; 5206 case X86::BX: DestReg = X86::BL; break; 5207 } 5208 if (DestReg) { 5209 Res.first = DestReg; 5210 Res.second = Res.second = X86::GR8RegisterClass; 5211 } 5212 } else if (VT == MVT::i32) { 5213 unsigned DestReg = 0; 5214 switch (Res.first) { 5215 default: break; 5216 case X86::AX: DestReg = X86::EAX; break; 5217 case X86::DX: DestReg = X86::EDX; break; 5218 case X86::CX: DestReg = X86::ECX; break; 5219 case X86::BX: DestReg = X86::EBX; break; 5220 case X86::SI: DestReg = X86::ESI; break; 5221 case X86::DI: DestReg = X86::EDI; break; 5222 case X86::BP: DestReg = X86::EBP; break; 5223 case X86::SP: DestReg = X86::ESP; break; 5224 } 5225 if (DestReg) { 5226 Res.first = DestReg; 5227 Res.second = Res.second = X86::GR32RegisterClass; 5228 } 5229 } else if (VT == MVT::i64) { 5230 unsigned DestReg = 0; 5231 switch (Res.first) { 5232 default: break; 5233 case X86::AX: DestReg = X86::RAX; break; 5234 case X86::DX: DestReg = X86::RDX; break; 5235 case X86::CX: DestReg = X86::RCX; break; 5236 case X86::BX: DestReg = X86::RBX; break; 5237 case X86::SI: DestReg = X86::RSI; break; 5238 case X86::DI: DestReg = X86::RDI; break; 5239 case X86::BP: DestReg = X86::RBP; break; 5240 case X86::SP: DestReg = X86::RSP; break; 5241 } 5242 if (DestReg) { 5243 Res.first = DestReg; 5244 Res.second = Res.second = X86::GR64RegisterClass; 5245 } 5246 } 5247 5248 return Res; 5249} 5250