X86ISelLowering.cpp revision 44c8265cf86ba3cfc317605726296474aa8d7fa6
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86CodeEmitter.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/Constants.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalVariable.h" 25#include "llvm/Function.h" 26#include "llvm/Intrinsics.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/SelectionDAG.h" 34#include "llvm/CodeGen/SSARegMap.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Target/TargetOptions.h" 37#include "llvm/ADT/StringExtras.h" 38#include "llvm/ParameterAttributes.h" 39using namespace llvm; 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 46 47 RegInfo = TM.getRegisterInfo(); 48 49 // Set up the TargetLowering object. 50 51 // X86 is weird, it always uses i8 for shift amounts and setcc results. 52 setShiftAmountType(MVT::i8); 53 setSetCCResultType(MVT::i8); 54 setSetCCResultContents(ZeroOrOneSetCCResult); 55 setSchedulingPreference(SchedulingForRegPressure); 56 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 57 setStackPointerRegisterToSaveRestore(X86StackPtr); 58 59 if (Subtarget->isTargetDarwin()) { 60 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 61 setUseUnderscoreSetJmp(false); 62 setUseUnderscoreLongJmp(false); 63 } else if (Subtarget->isTargetMingw()) { 64 // MS runtime is weird: it exports _setjmp, but longjmp! 65 setUseUnderscoreSetJmp(true); 66 setUseUnderscoreLongJmp(false); 67 } else { 68 setUseUnderscoreSetJmp(true); 69 setUseUnderscoreLongJmp(true); 70 } 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 74 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 75 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 76 if (Subtarget->is64Bit()) 77 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 78 79 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 80 81 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 82 // operation. 83 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 84 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 86 87 if (Subtarget->is64Bit()) { 88 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 89 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 90 } else { 91 if (X86ScalarSSE) 92 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 93 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 94 else 95 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 96 } 97 98 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 99 // this operation. 100 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 101 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 102 // SSE has no i16 to fp conversion, only i32 103 if (X86ScalarSSE) 104 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 105 else { 106 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 107 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 108 } 109 110 if (!Subtarget->is64Bit()) { 111 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 112 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 113 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 114 } 115 116 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 117 // this operation. 118 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 119 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 120 121 if (X86ScalarSSE) { 122 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 123 } else { 124 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 125 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 126 } 127 128 // Handle FP_TO_UINT by promoting the destination to a larger signed 129 // conversion. 130 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 131 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 132 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 133 134 if (Subtarget->is64Bit()) { 135 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 136 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 137 } else { 138 if (X86ScalarSSE && !Subtarget->hasSSE3()) 139 // Expand FP_TO_UINT into a select. 140 // FIXME: We would like to use a Custom expander here eventually to do 141 // the optimal thing for SSE vs. the default expansion in the legalizer. 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 143 else 144 // With SSE3 we can use fisttpll to convert to a signed i64. 145 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 146 } 147 148 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 149 if (!X86ScalarSSE) { 150 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 151 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 152 } 153 154 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 155 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 156 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 157 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 158 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 159 if (Subtarget->is64Bit()) 160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 164 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 165 setOperationAction(ISD::FREM , MVT::f64 , Expand); 166 167 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 168 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 169 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 170 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 171 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 172 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 173 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 176 if (Subtarget->is64Bit()) { 177 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 178 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 179 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 180 } 181 182 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 183 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 184 185 // These should be promoted to a larger select which is supported. 186 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 187 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 188 // X86 wants to expand cmov itself. 189 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 190 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 191 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 192 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 193 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 195 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 196 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 197 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 198 if (Subtarget->is64Bit()) { 199 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 200 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 201 } 202 // X86 ret instruction may pop stack. 203 setOperationAction(ISD::RET , MVT::Other, Custom); 204 if (!Subtarget->is64Bit()) 205 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 206 207 // Darwin ABI issue. 208 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 209 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 210 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 211 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 212 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 213 if (Subtarget->is64Bit()) { 214 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 215 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 216 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 217 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 218 } 219 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 220 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 221 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 222 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 223 // X86 wants to expand memset / memcpy itself. 224 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 225 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 226 227 // We don't have line number support yet. 228 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 229 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 230 // FIXME - use subtarget debug flags 231 if (!Subtarget->isTargetDarwin() && 232 !Subtarget->isTargetELF() && 233 !Subtarget->isTargetCygMing()) 234 setOperationAction(ISD::LABEL, MVT::Other, Expand); 235 236 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 237 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 238 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 239 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 240 if (Subtarget->is64Bit()) { 241 // FIXME: Verify 242 setExceptionPointerRegister(X86::RAX); 243 setExceptionSelectorRegister(X86::RDX); 244 } else { 245 setExceptionPointerRegister(X86::EAX); 246 setExceptionSelectorRegister(X86::EDX); 247 } 248 249 setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand); 250 setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand); 251 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 252 253 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 254 setOperationAction(ISD::VASTART , MVT::Other, Custom); 255 setOperationAction(ISD::VAARG , MVT::Other, Expand); 256 setOperationAction(ISD::VAEND , MVT::Other, Expand); 257 if (Subtarget->is64Bit()) 258 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 259 else 260 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 261 262 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 263 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 264 if (Subtarget->is64Bit()) 265 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 266 if (Subtarget->isTargetCygMing()) 267 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 268 else 269 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 270 271 if (X86ScalarSSE) { 272 // Set up the FP register classes. 273 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 274 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 275 276 // Use ANDPD to simulate FABS. 277 setOperationAction(ISD::FABS , MVT::f64, Custom); 278 setOperationAction(ISD::FABS , MVT::f32, Custom); 279 280 // Use XORP to simulate FNEG. 281 setOperationAction(ISD::FNEG , MVT::f64, Custom); 282 setOperationAction(ISD::FNEG , MVT::f32, Custom); 283 284 // Use ANDPD and ORPD to simulate FCOPYSIGN. 285 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 286 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 287 288 // We don't support sin/cos/fmod 289 setOperationAction(ISD::FSIN , MVT::f64, Expand); 290 setOperationAction(ISD::FCOS , MVT::f64, Expand); 291 setOperationAction(ISD::FREM , MVT::f64, Expand); 292 setOperationAction(ISD::FSIN , MVT::f32, Expand); 293 setOperationAction(ISD::FCOS , MVT::f32, Expand); 294 setOperationAction(ISD::FREM , MVT::f32, Expand); 295 296 // Expand FP immediates into loads from the stack, except for the special 297 // cases we handle. 298 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 299 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 300 addLegalFPImmediate(+0.0); // xorps / xorpd 301 302 // Conversions to long double (in X87) go through memory. 303 setConvertAction(MVT::f32, MVT::f80, Expand); 304 setConvertAction(MVT::f64, MVT::f80, Expand); 305 306 // Conversions from long double (in X87) go through memory. 307 setConvertAction(MVT::f80, MVT::f32, Expand); 308 setConvertAction(MVT::f80, MVT::f64, Expand); 309 } else { 310 // Set up the FP register classes. 311 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 312 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 313 314 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 315 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 316 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 317 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 318 319 // Floating truncations need to go through memory. 320 setConvertAction(MVT::f80, MVT::f32, Expand); 321 setConvertAction(MVT::f64, MVT::f32, Expand); 322 setConvertAction(MVT::f80, MVT::f64, Expand); 323 324 if (!UnsafeFPMath) { 325 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 326 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 327 } 328 329 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 330 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 331 addLegalFPImmediate(+0.0); // FLD0 332 addLegalFPImmediate(+1.0); // FLD1 333 addLegalFPImmediate(-0.0); // FLD0/FCHS 334 addLegalFPImmediate(-1.0); // FLD1/FCHS 335 } 336 337 // Long double always uses X87. 338 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 339 340 // First set operation action for all vector types to expand. Then we 341 // will selectively turn on ones that can be effectively codegen'd. 342 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 343 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 344 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 345 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 346 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 347 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 348 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 349 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 350 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 351 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 352 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 353 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 354 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 355 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 356 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 357 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 358 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 359 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 360 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 361 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 362 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 363 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 364 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 365 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 366 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 367 } 368 369 if (Subtarget->hasMMX()) { 370 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 371 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 372 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 373 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 374 375 // FIXME: add MMX packed arithmetics 376 377 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 378 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 379 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 380 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 381 382 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 383 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 384 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 385 386 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 387 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 388 389 setOperationAction(ISD::AND, MVT::v8i8, Promote); 390 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 391 setOperationAction(ISD::AND, MVT::v4i16, Promote); 392 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 393 setOperationAction(ISD::AND, MVT::v2i32, Promote); 394 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 395 setOperationAction(ISD::AND, MVT::v1i64, Legal); 396 397 setOperationAction(ISD::OR, MVT::v8i8, Promote); 398 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 399 setOperationAction(ISD::OR, MVT::v4i16, Promote); 400 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 401 setOperationAction(ISD::OR, MVT::v2i32, Promote); 402 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 403 setOperationAction(ISD::OR, MVT::v1i64, Legal); 404 405 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 406 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 407 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 408 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 409 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 410 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 411 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 412 413 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 414 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 415 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 416 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 417 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 418 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 419 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 420 421 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 422 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 423 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 424 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 425 426 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 427 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 428 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 429 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 430 431 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 432 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 433 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 434 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 435 } 436 437 if (Subtarget->hasSSE1()) { 438 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 439 440 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 441 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 442 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 443 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 444 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 445 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 446 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 447 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 448 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 449 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 450 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 451 } 452 453 if (Subtarget->hasSSE2()) { 454 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 455 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 456 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 457 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 458 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 459 460 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 461 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 462 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 463 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 464 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 465 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 466 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 467 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 468 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 469 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 470 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 471 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 472 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 473 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 474 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 475 476 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 477 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 478 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 479 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 480 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 481 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 482 483 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 484 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 485 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 486 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 487 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 488 } 489 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 490 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 491 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 492 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 493 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 494 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 495 496 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 497 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 498 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 499 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 500 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 501 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 502 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 503 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 504 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 505 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 506 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 507 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 508 } 509 510 // Custom lower v2i64 and v2f64 selects. 511 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 512 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 513 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 514 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 515 } 516 517 // We want to custom lower some of our intrinsics. 518 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 519 520 // We have target-specific dag combine patterns for the following nodes: 521 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 522 setTargetDAGCombine(ISD::SELECT); 523 524 computeRegisterProperties(); 525 526 // FIXME: These should be based on subtarget info. Plus, the values should 527 // be smaller when we are in optimizing for size mode. 528 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 529 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 530 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 531 allowUnalignedMemoryAccesses = true; // x86 supports it! 532} 533 534 535//===----------------------------------------------------------------------===// 536// Return Value Calling Convention Implementation 537//===----------------------------------------------------------------------===// 538 539#include "X86GenCallingConv.inc" 540 541/// LowerRET - Lower an ISD::RET node. 542SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 543 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 544 545 SmallVector<CCValAssign, 16> RVLocs; 546 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 547 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 548 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 549 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 550 551 552 // If this is the first return lowered for this function, add the regs to the 553 // liveout set for the function. 554 if (DAG.getMachineFunction().liveout_empty()) { 555 for (unsigned i = 0; i != RVLocs.size(); ++i) 556 if (RVLocs[i].isRegLoc()) 557 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 558 } 559 560 SDOperand Chain = Op.getOperand(0); 561 SDOperand Flag; 562 563 // Copy the result values into the output registers. 564 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 565 RVLocs[0].getLocReg() != X86::ST0) { 566 for (unsigned i = 0; i != RVLocs.size(); ++i) { 567 CCValAssign &VA = RVLocs[i]; 568 assert(VA.isRegLoc() && "Can only return in registers!"); 569 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 570 Flag); 571 Flag = Chain.getValue(1); 572 } 573 } else { 574 // We need to handle a destination of ST0 specially, because it isn't really 575 // a register. 576 SDOperand Value = Op.getOperand(1); 577 578 // If this is an FP return with ScalarSSE, we need to move the value from 579 // an XMM register onto the fp-stack. 580 if (X86ScalarSSE) { 581 SDOperand MemLoc; 582 583 // If this is a load into a scalarsse value, don't store the loaded value 584 // back to the stack, only to reload it: just replace the scalar-sse load. 585 if (ISD::isNON_EXTLoad(Value.Val) && 586 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 587 Chain = Value.getOperand(0); 588 MemLoc = Value.getOperand(1); 589 } else { 590 // Spill the value to memory and reload it into top of stack. 591 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 592 MachineFunction &MF = DAG.getMachineFunction(); 593 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 594 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 595 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 596 } 597 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 598 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 599 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 600 Chain = Value.getValue(1); 601 } 602 603 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 604 SDOperand Ops[] = { Chain, Value }; 605 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 606 Flag = Chain.getValue(1); 607 } 608 609 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 610 if (Flag.Val) 611 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 612 else 613 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 614} 615 616 617/// LowerCallResult - Lower the result values of an ISD::CALL into the 618/// appropriate copies out of appropriate physical registers. This assumes that 619/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 620/// being lowered. The returns a SDNode with the same number of values as the 621/// ISD::CALL. 622SDNode *X86TargetLowering:: 623LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 624 unsigned CallingConv, SelectionDAG &DAG) { 625 626 // Assign locations to each value returned by this call. 627 SmallVector<CCValAssign, 16> RVLocs; 628 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 629 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 630 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 631 632 633 SmallVector<SDOperand, 8> ResultVals; 634 635 // Copy all of the result registers out of their specified physreg. 636 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 637 for (unsigned i = 0; i != RVLocs.size(); ++i) { 638 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 639 RVLocs[i].getValVT(), InFlag).getValue(1); 640 InFlag = Chain.getValue(2); 641 ResultVals.push_back(Chain.getValue(0)); 642 } 643 } else { 644 // Copies from the FP stack are special, as ST0 isn't a valid register 645 // before the fp stackifier runs. 646 647 // Copy ST0 into an RFP register with FP_GET_RESULT. 648 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 649 SDOperand GROps[] = { Chain, InFlag }; 650 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 651 Chain = RetVal.getValue(1); 652 InFlag = RetVal.getValue(2); 653 654 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 655 // an XMM register. 656 if (X86ScalarSSE) { 657 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 658 // shouldn't be necessary except that RFP cannot be live across 659 // multiple blocks. When stackifier is fixed, they can be uncoupled. 660 MachineFunction &MF = DAG.getMachineFunction(); 661 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 662 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 663 SDOperand Ops[] = { 664 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 665 }; 666 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 667 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 668 Chain = RetVal.getValue(1); 669 } 670 ResultVals.push_back(RetVal); 671 } 672 673 // Merge everything together with a MERGE_VALUES node. 674 ResultVals.push_back(Chain); 675 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 676 &ResultVals[0], ResultVals.size()).Val; 677} 678 679 680//===----------------------------------------------------------------------===// 681// C & StdCall Calling Convention implementation 682//===----------------------------------------------------------------------===// 683// StdCall calling convention seems to be standard for many Windows' API 684// routines and around. It differs from C calling convention just a little: 685// callee should clean up the stack, not caller. Symbols should be also 686// decorated in some fancy way :) It doesn't support any vector arguments. 687 688/// AddLiveIn - This helper function adds the specified physical register to the 689/// MachineFunction as a live in value. It also creates a corresponding virtual 690/// register for it. 691static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 692 const TargetRegisterClass *RC) { 693 assert(RC->contains(PReg) && "Not the correct regclass!"); 694 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 695 MF.addLiveIn(PReg, VReg); 696 return VReg; 697} 698 699SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 700 bool isStdCall) { 701 unsigned NumArgs = Op.Val->getNumValues() - 1; 702 MachineFunction &MF = DAG.getMachineFunction(); 703 MachineFrameInfo *MFI = MF.getFrameInfo(); 704 SDOperand Root = Op.getOperand(0); 705 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 706 707 // Assign locations to all of the incoming arguments. 708 SmallVector<CCValAssign, 16> ArgLocs; 709 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 710 getTargetMachine(), ArgLocs); 711 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 712 713 SmallVector<SDOperand, 8> ArgValues; 714 unsigned LastVal = ~0U; 715 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 716 CCValAssign &VA = ArgLocs[i]; 717 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 718 // places. 719 assert(VA.getValNo() != LastVal && 720 "Don't support value assigned to multiple locs yet"); 721 LastVal = VA.getValNo(); 722 723 if (VA.isRegLoc()) { 724 MVT::ValueType RegVT = VA.getLocVT(); 725 TargetRegisterClass *RC; 726 if (RegVT == MVT::i32) 727 RC = X86::GR32RegisterClass; 728 else { 729 assert(MVT::isVector(RegVT)); 730 RC = X86::VR128RegisterClass; 731 } 732 733 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 734 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 735 736 // If this is an 8 or 16-bit value, it is really passed promoted to 32 737 // bits. Insert an assert[sz]ext to capture this, then truncate to the 738 // right size. 739 if (VA.getLocInfo() == CCValAssign::SExt) 740 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 741 DAG.getValueType(VA.getValVT())); 742 else if (VA.getLocInfo() == CCValAssign::ZExt) 743 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 744 DAG.getValueType(VA.getValVT())); 745 746 if (VA.getLocInfo() != CCValAssign::Full) 747 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 748 749 ArgValues.push_back(ArgValue); 750 } else { 751 assert(VA.isMemLoc()); 752 753 // Create the nodes corresponding to a load from this parameter slot. 754 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 755 VA.getLocMemOffset()); 756 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 757 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 758 } 759 } 760 761 unsigned StackSize = CCInfo.getNextStackOffset(); 762 763 ArgValues.push_back(Root); 764 765 // If the function takes variable number of arguments, make a frame index for 766 // the start of the first vararg value... for expansion of llvm.va_start. 767 if (isVarArg) 768 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 769 770 if (isStdCall && !isVarArg) { 771 BytesToPopOnReturn = StackSize; // Callee pops everything.. 772 BytesCallerReserves = 0; 773 } else { 774 BytesToPopOnReturn = 0; // Callee pops nothing. 775 776 // If this is an sret function, the return should pop the hidden pointer. 777 if (NumArgs && 778 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 779 ISD::ParamFlags::StructReturn)) 780 BytesToPopOnReturn = 4; 781 782 BytesCallerReserves = StackSize; 783 } 784 785 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 786 787 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 788 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 789 790 // Return the new list of results. 791 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 792 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 793} 794 795SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 796 unsigned CC) { 797 SDOperand Chain = Op.getOperand(0); 798 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 799 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 800 SDOperand Callee = Op.getOperand(4); 801 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 802 803 // Analyze operands of the call, assigning locations to each operand. 804 SmallVector<CCValAssign, 16> ArgLocs; 805 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 806 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 807 808 // Get a count of how many bytes are to be pushed on the stack. 809 unsigned NumBytes = CCInfo.getNextStackOffset(); 810 811 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 812 813 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 814 SmallVector<SDOperand, 8> MemOpChains; 815 816 SDOperand StackPtr; 817 818 // Walk the register/memloc assignments, inserting copies/loads. 819 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 820 CCValAssign &VA = ArgLocs[i]; 821 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 822 823 // Promote the value if needed. 824 switch (VA.getLocInfo()) { 825 default: assert(0 && "Unknown loc info!"); 826 case CCValAssign::Full: break; 827 case CCValAssign::SExt: 828 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 829 break; 830 case CCValAssign::ZExt: 831 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 832 break; 833 case CCValAssign::AExt: 834 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 835 break; 836 } 837 838 if (VA.isRegLoc()) { 839 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 840 } else { 841 assert(VA.isMemLoc()); 842 if (StackPtr.Val == 0) 843 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 844 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 845 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 846 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 847 } 848 } 849 850 // If the first argument is an sret pointer, remember it. 851 bool isSRet = NumOps && 852 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 853 ISD::ParamFlags::StructReturn); 854 855 if (!MemOpChains.empty()) 856 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 857 &MemOpChains[0], MemOpChains.size()); 858 859 // Build a sequence of copy-to-reg nodes chained together with token chain 860 // and flag operands which copy the outgoing args into registers. 861 SDOperand InFlag; 862 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 863 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 864 InFlag); 865 InFlag = Chain.getValue(1); 866 } 867 868 // ELF / PIC requires GOT in the EBX register before function calls via PLT 869 // GOT pointer. 870 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 871 Subtarget->isPICStyleGOT()) { 872 Chain = DAG.getCopyToReg(Chain, X86::EBX, 873 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 874 InFlag); 875 InFlag = Chain.getValue(1); 876 } 877 878 // If the callee is a GlobalAddress node (quite common, every direct call is) 879 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 880 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 881 // We should use extra load for direct calls to dllimported functions in 882 // non-JIT mode. 883 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 884 getTargetMachine(), true)) 885 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 886 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 887 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 888 889 // Returns a chain & a flag for retval copy to use. 890 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 891 SmallVector<SDOperand, 8> Ops; 892 Ops.push_back(Chain); 893 Ops.push_back(Callee); 894 895 // Add argument registers to the end of the list so that they are known live 896 // into the call. 897 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 898 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 899 RegsToPass[i].second.getValueType())); 900 901 // Add an implicit use GOT pointer in EBX. 902 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 903 Subtarget->isPICStyleGOT()) 904 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 905 906 if (InFlag.Val) 907 Ops.push_back(InFlag); 908 909 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 910 NodeTys, &Ops[0], Ops.size()); 911 InFlag = Chain.getValue(1); 912 913 // Create the CALLSEQ_END node. 914 unsigned NumBytesForCalleeToPush = 0; 915 916 if (CC == CallingConv::X86_StdCall) { 917 if (isVarArg) 918 NumBytesForCalleeToPush = isSRet ? 4 : 0; 919 else 920 NumBytesForCalleeToPush = NumBytes; 921 } else { 922 // If this is is a call to a struct-return function, the callee 923 // pops the hidden struct pointer, so we have to push it back. 924 // This is common for Darwin/X86, Linux & Mingw32 targets. 925 NumBytesForCalleeToPush = isSRet ? 4 : 0; 926 } 927 928 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 929 Ops.clear(); 930 Ops.push_back(Chain); 931 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 932 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 933 Ops.push_back(InFlag); 934 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 935 InFlag = Chain.getValue(1); 936 937 // Handle result values, copying them out of physregs into vregs that we 938 // return. 939 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 940} 941 942 943//===----------------------------------------------------------------------===// 944// FastCall Calling Convention implementation 945//===----------------------------------------------------------------------===// 946// 947// The X86 'fastcall' calling convention passes up to two integer arguments in 948// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 949// and requires that the callee pop its arguments off the stack (allowing proper 950// tail calls), and has the same return value conventions as C calling convs. 951// 952// This calling convention always arranges for the callee pop value to be 8n+4 953// bytes, which is needed for tail recursion elimination and stack alignment 954// reasons. 955SDOperand 956X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 957 MachineFunction &MF = DAG.getMachineFunction(); 958 MachineFrameInfo *MFI = MF.getFrameInfo(); 959 SDOperand Root = Op.getOperand(0); 960 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 961 962 // Assign locations to all of the incoming arguments. 963 SmallVector<CCValAssign, 16> ArgLocs; 964 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 965 getTargetMachine(), ArgLocs); 966 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 967 968 SmallVector<SDOperand, 8> ArgValues; 969 unsigned LastVal = ~0U; 970 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 971 CCValAssign &VA = ArgLocs[i]; 972 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 973 // places. 974 assert(VA.getValNo() != LastVal && 975 "Don't support value assigned to multiple locs yet"); 976 LastVal = VA.getValNo(); 977 978 if (VA.isRegLoc()) { 979 MVT::ValueType RegVT = VA.getLocVT(); 980 TargetRegisterClass *RC; 981 if (RegVT == MVT::i32) 982 RC = X86::GR32RegisterClass; 983 else { 984 assert(MVT::isVector(RegVT)); 985 RC = X86::VR128RegisterClass; 986 } 987 988 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 989 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 990 991 // If this is an 8 or 16-bit value, it is really passed promoted to 32 992 // bits. Insert an assert[sz]ext to capture this, then truncate to the 993 // right size. 994 if (VA.getLocInfo() == CCValAssign::SExt) 995 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 996 DAG.getValueType(VA.getValVT())); 997 else if (VA.getLocInfo() == CCValAssign::ZExt) 998 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 999 DAG.getValueType(VA.getValVT())); 1000 1001 if (VA.getLocInfo() != CCValAssign::Full) 1002 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1003 1004 ArgValues.push_back(ArgValue); 1005 } else { 1006 assert(VA.isMemLoc()); 1007 1008 // Create the nodes corresponding to a load from this parameter slot. 1009 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1010 VA.getLocMemOffset()); 1011 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1012 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1013 } 1014 } 1015 1016 ArgValues.push_back(Root); 1017 1018 unsigned StackSize = CCInfo.getNextStackOffset(); 1019 1020 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1021 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1022 // arguments and the arguments after the retaddr has been pushed are aligned. 1023 if ((StackSize & 7) == 0) 1024 StackSize += 4; 1025 } 1026 1027 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1028 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1029 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1030 BytesCallerReserves = 0; 1031 1032 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1033 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1034 1035 // Return the new list of results. 1036 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1037 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1038} 1039 1040SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1041 unsigned CC) { 1042 SDOperand Chain = Op.getOperand(0); 1043 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1044 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1045 SDOperand Callee = Op.getOperand(4); 1046 1047 // Analyze operands of the call, assigning locations to each operand. 1048 SmallVector<CCValAssign, 16> ArgLocs; 1049 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1050 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1051 1052 // Get a count of how many bytes are to be pushed on the stack. 1053 unsigned NumBytes = CCInfo.getNextStackOffset(); 1054 1055 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1056 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1057 // arguments and the arguments after the retaddr has been pushed are aligned. 1058 if ((NumBytes & 7) == 0) 1059 NumBytes += 4; 1060 } 1061 1062 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1063 1064 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1065 SmallVector<SDOperand, 8> MemOpChains; 1066 1067 SDOperand StackPtr; 1068 1069 // Walk the register/memloc assignments, inserting copies/loads. 1070 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1071 CCValAssign &VA = ArgLocs[i]; 1072 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1073 1074 // Promote the value if needed. 1075 switch (VA.getLocInfo()) { 1076 default: assert(0 && "Unknown loc info!"); 1077 case CCValAssign::Full: break; 1078 case CCValAssign::SExt: 1079 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1080 break; 1081 case CCValAssign::ZExt: 1082 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1083 break; 1084 case CCValAssign::AExt: 1085 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1086 break; 1087 } 1088 1089 if (VA.isRegLoc()) { 1090 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1091 } else { 1092 assert(VA.isMemLoc()); 1093 if (StackPtr.Val == 0) 1094 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1095 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1096 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1097 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1098 } 1099 } 1100 1101 if (!MemOpChains.empty()) 1102 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1103 &MemOpChains[0], MemOpChains.size()); 1104 1105 // Build a sequence of copy-to-reg nodes chained together with token chain 1106 // and flag operands which copy the outgoing args into registers. 1107 SDOperand InFlag; 1108 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1109 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1110 InFlag); 1111 InFlag = Chain.getValue(1); 1112 } 1113 1114 // If the callee is a GlobalAddress node (quite common, every direct call is) 1115 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1116 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1117 // We should use extra load for direct calls to dllimported functions in 1118 // non-JIT mode. 1119 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1120 getTargetMachine(), true)) 1121 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1122 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1123 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1124 1125 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1126 // GOT pointer. 1127 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1128 Subtarget->isPICStyleGOT()) { 1129 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1130 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1131 InFlag); 1132 InFlag = Chain.getValue(1); 1133 } 1134 1135 // Returns a chain & a flag for retval copy to use. 1136 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1137 SmallVector<SDOperand, 8> Ops; 1138 Ops.push_back(Chain); 1139 Ops.push_back(Callee); 1140 1141 // Add argument registers to the end of the list so that they are known live 1142 // into the call. 1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1144 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1145 RegsToPass[i].second.getValueType())); 1146 1147 // Add an implicit use GOT pointer in EBX. 1148 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1149 Subtarget->isPICStyleGOT()) 1150 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1151 1152 if (InFlag.Val) 1153 Ops.push_back(InFlag); 1154 1155 // FIXME: Do not generate X86ISD::TAILCALL for now. 1156 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1157 NodeTys, &Ops[0], Ops.size()); 1158 InFlag = Chain.getValue(1); 1159 1160 // Returns a flag for retval copy to use. 1161 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1162 Ops.clear(); 1163 Ops.push_back(Chain); 1164 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1165 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1166 Ops.push_back(InFlag); 1167 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1168 InFlag = Chain.getValue(1); 1169 1170 // Handle result values, copying them out of physregs into vregs that we 1171 // return. 1172 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1173} 1174 1175 1176//===----------------------------------------------------------------------===// 1177// X86-64 C Calling Convention implementation 1178//===----------------------------------------------------------------------===// 1179 1180SDOperand 1181X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1182 MachineFunction &MF = DAG.getMachineFunction(); 1183 MachineFrameInfo *MFI = MF.getFrameInfo(); 1184 SDOperand Root = Op.getOperand(0); 1185 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1186 1187 static const unsigned GPR64ArgRegs[] = { 1188 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1189 }; 1190 static const unsigned XMMArgRegs[] = { 1191 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1192 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1193 }; 1194 1195 1196 // Assign locations to all of the incoming arguments. 1197 SmallVector<CCValAssign, 16> ArgLocs; 1198 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1199 getTargetMachine(), ArgLocs); 1200 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1201 1202 SmallVector<SDOperand, 8> ArgValues; 1203 unsigned LastVal = ~0U; 1204 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1205 CCValAssign &VA = ArgLocs[i]; 1206 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1207 // places. 1208 assert(VA.getValNo() != LastVal && 1209 "Don't support value assigned to multiple locs yet"); 1210 LastVal = VA.getValNo(); 1211 1212 if (VA.isRegLoc()) { 1213 MVT::ValueType RegVT = VA.getLocVT(); 1214 TargetRegisterClass *RC; 1215 if (RegVT == MVT::i32) 1216 RC = X86::GR32RegisterClass; 1217 else if (RegVT == MVT::i64) 1218 RC = X86::GR64RegisterClass; 1219 else if (RegVT == MVT::f32) 1220 RC = X86::FR32RegisterClass; 1221 else if (RegVT == MVT::f64) 1222 RC = X86::FR64RegisterClass; 1223 else { 1224 assert(MVT::isVector(RegVT)); 1225 if (MVT::getSizeInBits(RegVT) == 64) { 1226 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1227 RegVT = MVT::i64; 1228 } else 1229 RC = X86::VR128RegisterClass; 1230 } 1231 1232 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1233 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1234 1235 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1236 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1237 // right size. 1238 if (VA.getLocInfo() == CCValAssign::SExt) 1239 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1240 DAG.getValueType(VA.getValVT())); 1241 else if (VA.getLocInfo() == CCValAssign::ZExt) 1242 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1243 DAG.getValueType(VA.getValVT())); 1244 1245 if (VA.getLocInfo() != CCValAssign::Full) 1246 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1247 1248 // Handle MMX values passed in GPRs. 1249 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1250 MVT::getSizeInBits(RegVT) == 64) 1251 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1252 1253 ArgValues.push_back(ArgValue); 1254 } else { 1255 assert(VA.isMemLoc()); 1256 1257 // Create the nodes corresponding to a load from this parameter slot. 1258 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1259 VA.getLocMemOffset()); 1260 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1261 1262 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue(); 1263 if (Flags & ISD::ParamFlags::ByVal) 1264 ArgValues.push_back(FIN); 1265 else 1266 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1267 } 1268 } 1269 1270 unsigned StackSize = CCInfo.getNextStackOffset(); 1271 1272 // If the function takes variable number of arguments, make a frame index for 1273 // the start of the first vararg value... for expansion of llvm.va_start. 1274 if (isVarArg) { 1275 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1276 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1277 1278 // For X86-64, if there are vararg parameters that are passed via 1279 // registers, then we must store them to their spots on the stack so they 1280 // may be loaded by deferencing the result of va_next. 1281 VarArgsGPOffset = NumIntRegs * 8; 1282 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1283 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1284 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1285 1286 // Store the integer parameter registers. 1287 SmallVector<SDOperand, 8> MemOps; 1288 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1289 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1290 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1291 for (; NumIntRegs != 6; ++NumIntRegs) { 1292 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1293 X86::GR64RegisterClass); 1294 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1295 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1296 MemOps.push_back(Store); 1297 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1298 DAG.getConstant(8, getPointerTy())); 1299 } 1300 1301 // Now store the XMM (fp + vector) parameter registers. 1302 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1303 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1304 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1305 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1306 X86::VR128RegisterClass); 1307 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1308 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1309 MemOps.push_back(Store); 1310 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1311 DAG.getConstant(16, getPointerTy())); 1312 } 1313 if (!MemOps.empty()) 1314 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1315 &MemOps[0], MemOps.size()); 1316 } 1317 1318 ArgValues.push_back(Root); 1319 1320 BytesToPopOnReturn = 0; // Callee pops nothing. 1321 BytesCallerReserves = StackSize; 1322 1323 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1324 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1325 1326 // Return the new list of results. 1327 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1328 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1329} 1330 1331SDOperand 1332X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1333 unsigned CC) { 1334 SDOperand Chain = Op.getOperand(0); 1335 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1336 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1337 SDOperand Callee = Op.getOperand(4); 1338 1339 // Analyze operands of the call, assigning locations to each operand. 1340 SmallVector<CCValAssign, 16> ArgLocs; 1341 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1342 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1343 1344 // Get a count of how many bytes are to be pushed on the stack. 1345 unsigned NumBytes = CCInfo.getNextStackOffset(); 1346 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1347 1348 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1349 SmallVector<SDOperand, 8> MemOpChains; 1350 1351 SDOperand StackPtr; 1352 1353 // Walk the register/memloc assignments, inserting copies/loads. 1354 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1355 CCValAssign &VA = ArgLocs[i]; 1356 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1357 1358 // Promote the value if needed. 1359 switch (VA.getLocInfo()) { 1360 default: assert(0 && "Unknown loc info!"); 1361 case CCValAssign::Full: break; 1362 case CCValAssign::SExt: 1363 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1364 break; 1365 case CCValAssign::ZExt: 1366 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1367 break; 1368 case CCValAssign::AExt: 1369 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1370 break; 1371 } 1372 1373 if (VA.isRegLoc()) { 1374 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1375 } else { 1376 assert(VA.isMemLoc()); 1377 if (StackPtr.Val == 0) 1378 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1379 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1380 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1381 1382 SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); 1383 unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); 1384 if (Flags & ISD::ParamFlags::ByVal) { 1385 unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> 1386 ISD::ParamFlags::ByValAlignOffs); 1387 unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> 1388 ISD::ParamFlags::ByValSizeOffs; 1389 1390 SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); 1391 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1392 1393 assert(0 && "Not Implemented"); 1394 1395 SDOperand Copy = DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, 1396 Arg, SizeNode, AlignNode); 1397 MemOpChains.push_back(Copy); 1398 } 1399 else { 1400 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1401 } 1402 } 1403 } 1404 1405 if (!MemOpChains.empty()) 1406 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1407 &MemOpChains[0], MemOpChains.size()); 1408 1409 // Build a sequence of copy-to-reg nodes chained together with token chain 1410 // and flag operands which copy the outgoing args into registers. 1411 SDOperand InFlag; 1412 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1413 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1414 InFlag); 1415 InFlag = Chain.getValue(1); 1416 } 1417 1418 if (isVarArg) { 1419 // From AMD64 ABI document: 1420 // For calls that may call functions that use varargs or stdargs 1421 // (prototype-less calls or calls to functions containing ellipsis (...) in 1422 // the declaration) %al is used as hidden argument to specify the number 1423 // of SSE registers used. The contents of %al do not need to match exactly 1424 // the number of registers, but must be an ubound on the number of SSE 1425 // registers used and is in the range 0 - 8 inclusive. 1426 1427 // Count the number of XMM registers allocated. 1428 static const unsigned XMMArgRegs[] = { 1429 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1430 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1431 }; 1432 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1433 1434 Chain = DAG.getCopyToReg(Chain, X86::AL, 1435 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1436 InFlag = Chain.getValue(1); 1437 } 1438 1439 // If the callee is a GlobalAddress node (quite common, every direct call is) 1440 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1441 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1442 // We should use extra load for direct calls to dllimported functions in 1443 // non-JIT mode. 1444 if (getTargetMachine().getCodeModel() != CodeModel::Large 1445 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1446 getTargetMachine(), true)) 1447 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1448 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1449 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1450 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1451 1452 // Returns a chain & a flag for retval copy to use. 1453 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1454 SmallVector<SDOperand, 8> Ops; 1455 Ops.push_back(Chain); 1456 Ops.push_back(Callee); 1457 1458 // Add argument registers to the end of the list so that they are known live 1459 // into the call. 1460 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1461 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1462 RegsToPass[i].second.getValueType())); 1463 1464 if (InFlag.Val) 1465 Ops.push_back(InFlag); 1466 1467 // FIXME: Do not generate X86ISD::TAILCALL for now. 1468 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1469 NodeTys, &Ops[0], Ops.size()); 1470 InFlag = Chain.getValue(1); 1471 1472 // Returns a flag for retval copy to use. 1473 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1474 Ops.clear(); 1475 Ops.push_back(Chain); 1476 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1477 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1478 Ops.push_back(InFlag); 1479 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1480 InFlag = Chain.getValue(1); 1481 1482 // Handle result values, copying them out of physregs into vregs that we 1483 // return. 1484 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1485} 1486 1487 1488//===----------------------------------------------------------------------===// 1489// Other Lowering Hooks 1490//===----------------------------------------------------------------------===// 1491 1492 1493SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1494 MachineFunction &MF = DAG.getMachineFunction(); 1495 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1496 int ReturnAddrIndex = FuncInfo->getRAIndex(); 1497 1498 if (ReturnAddrIndex == 0) { 1499 // Set up a frame object for the return address. 1500 if (Subtarget->is64Bit()) 1501 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1502 else 1503 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1504 1505 FuncInfo->setRAIndex(ReturnAddrIndex); 1506 } 1507 1508 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1509} 1510 1511 1512 1513/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1514/// specific condition code. It returns a false if it cannot do a direct 1515/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1516/// needed. 1517static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1518 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1519 SelectionDAG &DAG) { 1520 X86CC = X86::COND_INVALID; 1521 if (!isFP) { 1522 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1523 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1524 // X > -1 -> X == 0, jump !sign. 1525 RHS = DAG.getConstant(0, RHS.getValueType()); 1526 X86CC = X86::COND_NS; 1527 return true; 1528 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1529 // X < 0 -> X == 0, jump on sign. 1530 X86CC = X86::COND_S; 1531 return true; 1532 } 1533 } 1534 1535 switch (SetCCOpcode) { 1536 default: break; 1537 case ISD::SETEQ: X86CC = X86::COND_E; break; 1538 case ISD::SETGT: X86CC = X86::COND_G; break; 1539 case ISD::SETGE: X86CC = X86::COND_GE; break; 1540 case ISD::SETLT: X86CC = X86::COND_L; break; 1541 case ISD::SETLE: X86CC = X86::COND_LE; break; 1542 case ISD::SETNE: X86CC = X86::COND_NE; break; 1543 case ISD::SETULT: X86CC = X86::COND_B; break; 1544 case ISD::SETUGT: X86CC = X86::COND_A; break; 1545 case ISD::SETULE: X86CC = X86::COND_BE; break; 1546 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1547 } 1548 } else { 1549 // On a floating point condition, the flags are set as follows: 1550 // ZF PF CF op 1551 // 0 | 0 | 0 | X > Y 1552 // 0 | 0 | 1 | X < Y 1553 // 1 | 0 | 0 | X == Y 1554 // 1 | 1 | 1 | unordered 1555 bool Flip = false; 1556 switch (SetCCOpcode) { 1557 default: break; 1558 case ISD::SETUEQ: 1559 case ISD::SETEQ: X86CC = X86::COND_E; break; 1560 case ISD::SETOLT: Flip = true; // Fallthrough 1561 case ISD::SETOGT: 1562 case ISD::SETGT: X86CC = X86::COND_A; break; 1563 case ISD::SETOLE: Flip = true; // Fallthrough 1564 case ISD::SETOGE: 1565 case ISD::SETGE: X86CC = X86::COND_AE; break; 1566 case ISD::SETUGT: Flip = true; // Fallthrough 1567 case ISD::SETULT: 1568 case ISD::SETLT: X86CC = X86::COND_B; break; 1569 case ISD::SETUGE: Flip = true; // Fallthrough 1570 case ISD::SETULE: 1571 case ISD::SETLE: X86CC = X86::COND_BE; break; 1572 case ISD::SETONE: 1573 case ISD::SETNE: X86CC = X86::COND_NE; break; 1574 case ISD::SETUO: X86CC = X86::COND_P; break; 1575 case ISD::SETO: X86CC = X86::COND_NP; break; 1576 } 1577 if (Flip) 1578 std::swap(LHS, RHS); 1579 } 1580 1581 return X86CC != X86::COND_INVALID; 1582} 1583 1584/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1585/// code. Current x86 isa includes the following FP cmov instructions: 1586/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1587static bool hasFPCMov(unsigned X86CC) { 1588 switch (X86CC) { 1589 default: 1590 return false; 1591 case X86::COND_B: 1592 case X86::COND_BE: 1593 case X86::COND_E: 1594 case X86::COND_P: 1595 case X86::COND_A: 1596 case X86::COND_AE: 1597 case X86::COND_NE: 1598 case X86::COND_NP: 1599 return true; 1600 } 1601} 1602 1603/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1604/// true if Op is undef or if its value falls within the specified range (L, H]. 1605static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1606 if (Op.getOpcode() == ISD::UNDEF) 1607 return true; 1608 1609 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1610 return (Val >= Low && Val < Hi); 1611} 1612 1613/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1614/// true if Op is undef or if its value equal to the specified value. 1615static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1616 if (Op.getOpcode() == ISD::UNDEF) 1617 return true; 1618 return cast<ConstantSDNode>(Op)->getValue() == Val; 1619} 1620 1621/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1622/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1623bool X86::isPSHUFDMask(SDNode *N) { 1624 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1625 1626 if (N->getNumOperands() != 2 && N->getNumOperands() != 4) 1627 return false; 1628 1629 // Check if the value doesn't reference the second vector. 1630 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1631 SDOperand Arg = N->getOperand(i); 1632 if (Arg.getOpcode() == ISD::UNDEF) continue; 1633 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1634 if (cast<ConstantSDNode>(Arg)->getValue() >= e) 1635 return false; 1636 } 1637 1638 return true; 1639} 1640 1641/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1642/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1643bool X86::isPSHUFHWMask(SDNode *N) { 1644 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1645 1646 if (N->getNumOperands() != 8) 1647 return false; 1648 1649 // Lower quadword copied in order. 1650 for (unsigned i = 0; i != 4; ++i) { 1651 SDOperand Arg = N->getOperand(i); 1652 if (Arg.getOpcode() == ISD::UNDEF) continue; 1653 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1654 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1655 return false; 1656 } 1657 1658 // Upper quadword shuffled. 1659 for (unsigned i = 4; i != 8; ++i) { 1660 SDOperand Arg = N->getOperand(i); 1661 if (Arg.getOpcode() == ISD::UNDEF) continue; 1662 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1663 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1664 if (Val < 4 || Val > 7) 1665 return false; 1666 } 1667 1668 return true; 1669} 1670 1671/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1672/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1673bool X86::isPSHUFLWMask(SDNode *N) { 1674 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1675 1676 if (N->getNumOperands() != 8) 1677 return false; 1678 1679 // Upper quadword copied in order. 1680 for (unsigned i = 4; i != 8; ++i) 1681 if (!isUndefOrEqual(N->getOperand(i), i)) 1682 return false; 1683 1684 // Lower quadword shuffled. 1685 for (unsigned i = 0; i != 4; ++i) 1686 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1687 return false; 1688 1689 return true; 1690} 1691 1692/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1693/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1694static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1695 if (NumElems != 2 && NumElems != 4) return false; 1696 1697 unsigned Half = NumElems / 2; 1698 for (unsigned i = 0; i < Half; ++i) 1699 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1700 return false; 1701 for (unsigned i = Half; i < NumElems; ++i) 1702 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1703 return false; 1704 1705 return true; 1706} 1707 1708bool X86::isSHUFPMask(SDNode *N) { 1709 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1710 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1711} 1712 1713/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1714/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1715/// half elements to come from vector 1 (which would equal the dest.) and 1716/// the upper half to come from vector 2. 1717static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1718 if (NumOps != 2 && NumOps != 4) return false; 1719 1720 unsigned Half = NumOps / 2; 1721 for (unsigned i = 0; i < Half; ++i) 1722 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1723 return false; 1724 for (unsigned i = Half; i < NumOps; ++i) 1725 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1726 return false; 1727 return true; 1728} 1729 1730static bool isCommutedSHUFP(SDNode *N) { 1731 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1732 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1733} 1734 1735/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1736/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1737bool X86::isMOVHLPSMask(SDNode *N) { 1738 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1739 1740 if (N->getNumOperands() != 4) 1741 return false; 1742 1743 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1744 return isUndefOrEqual(N->getOperand(0), 6) && 1745 isUndefOrEqual(N->getOperand(1), 7) && 1746 isUndefOrEqual(N->getOperand(2), 2) && 1747 isUndefOrEqual(N->getOperand(3), 3); 1748} 1749 1750/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1751/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1752/// <2, 3, 2, 3> 1753bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1754 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1755 1756 if (N->getNumOperands() != 4) 1757 return false; 1758 1759 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1760 return isUndefOrEqual(N->getOperand(0), 2) && 1761 isUndefOrEqual(N->getOperand(1), 3) && 1762 isUndefOrEqual(N->getOperand(2), 2) && 1763 isUndefOrEqual(N->getOperand(3), 3); 1764} 1765 1766/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1767/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1768bool X86::isMOVLPMask(SDNode *N) { 1769 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1770 1771 unsigned NumElems = N->getNumOperands(); 1772 if (NumElems != 2 && NumElems != 4) 1773 return false; 1774 1775 for (unsigned i = 0; i < NumElems/2; ++i) 1776 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1777 return false; 1778 1779 for (unsigned i = NumElems/2; i < NumElems; ++i) 1780 if (!isUndefOrEqual(N->getOperand(i), i)) 1781 return false; 1782 1783 return true; 1784} 1785 1786/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1787/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1788/// and MOVLHPS. 1789bool X86::isMOVHPMask(SDNode *N) { 1790 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1791 1792 unsigned NumElems = N->getNumOperands(); 1793 if (NumElems != 2 && NumElems != 4) 1794 return false; 1795 1796 for (unsigned i = 0; i < NumElems/2; ++i) 1797 if (!isUndefOrEqual(N->getOperand(i), i)) 1798 return false; 1799 1800 for (unsigned i = 0; i < NumElems/2; ++i) { 1801 SDOperand Arg = N->getOperand(i + NumElems/2); 1802 if (!isUndefOrEqual(Arg, i + NumElems)) 1803 return false; 1804 } 1805 1806 return true; 1807} 1808 1809/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1810/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1811bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1812 bool V2IsSplat = false) { 1813 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1814 return false; 1815 1816 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1817 SDOperand BitI = Elts[i]; 1818 SDOperand BitI1 = Elts[i+1]; 1819 if (!isUndefOrEqual(BitI, j)) 1820 return false; 1821 if (V2IsSplat) { 1822 if (isUndefOrEqual(BitI1, NumElts)) 1823 return false; 1824 } else { 1825 if (!isUndefOrEqual(BitI1, j + NumElts)) 1826 return false; 1827 } 1828 } 1829 1830 return true; 1831} 1832 1833bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1834 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1835 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1836} 1837 1838/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1839/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1840bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1841 bool V2IsSplat = false) { 1842 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1843 return false; 1844 1845 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1846 SDOperand BitI = Elts[i]; 1847 SDOperand BitI1 = Elts[i+1]; 1848 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1849 return false; 1850 if (V2IsSplat) { 1851 if (isUndefOrEqual(BitI1, NumElts)) 1852 return false; 1853 } else { 1854 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1855 return false; 1856 } 1857 } 1858 1859 return true; 1860} 1861 1862bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1863 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1864 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1865} 1866 1867/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1868/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1869/// <0, 0, 1, 1> 1870bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1871 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1872 1873 unsigned NumElems = N->getNumOperands(); 1874 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1875 return false; 1876 1877 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1878 SDOperand BitI = N->getOperand(i); 1879 SDOperand BitI1 = N->getOperand(i+1); 1880 1881 if (!isUndefOrEqual(BitI, j)) 1882 return false; 1883 if (!isUndefOrEqual(BitI1, j)) 1884 return false; 1885 } 1886 1887 return true; 1888} 1889 1890/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1891/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1892/// <2, 2, 3, 3> 1893bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1894 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1895 1896 unsigned NumElems = N->getNumOperands(); 1897 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1898 return false; 1899 1900 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1901 SDOperand BitI = N->getOperand(i); 1902 SDOperand BitI1 = N->getOperand(i + 1); 1903 1904 if (!isUndefOrEqual(BitI, j)) 1905 return false; 1906 if (!isUndefOrEqual(BitI1, j)) 1907 return false; 1908 } 1909 1910 return true; 1911} 1912 1913/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1914/// specifies a shuffle of elements that is suitable for input to MOVSS, 1915/// MOVSD, and MOVD, i.e. setting the lowest element. 1916static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1917 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1918 return false; 1919 1920 if (!isUndefOrEqual(Elts[0], NumElts)) 1921 return false; 1922 1923 for (unsigned i = 1; i < NumElts; ++i) { 1924 if (!isUndefOrEqual(Elts[i], i)) 1925 return false; 1926 } 1927 1928 return true; 1929} 1930 1931bool X86::isMOVLMask(SDNode *N) { 1932 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1933 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1934} 1935 1936/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1937/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1938/// element of vector 2 and the other elements to come from vector 1 in order. 1939static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1940 bool V2IsSplat = false, 1941 bool V2IsUndef = false) { 1942 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1943 return false; 1944 1945 if (!isUndefOrEqual(Ops[0], 0)) 1946 return false; 1947 1948 for (unsigned i = 1; i < NumOps; ++i) { 1949 SDOperand Arg = Ops[i]; 1950 if (!(isUndefOrEqual(Arg, i+NumOps) || 1951 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1952 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1953 return false; 1954 } 1955 1956 return true; 1957} 1958 1959static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1960 bool V2IsUndef = false) { 1961 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1962 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1963 V2IsSplat, V2IsUndef); 1964} 1965 1966/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1967/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1968bool X86::isMOVSHDUPMask(SDNode *N) { 1969 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1970 1971 if (N->getNumOperands() != 4) 1972 return false; 1973 1974 // Expect 1, 1, 3, 3 1975 for (unsigned i = 0; i < 2; ++i) { 1976 SDOperand Arg = N->getOperand(i); 1977 if (Arg.getOpcode() == ISD::UNDEF) continue; 1978 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1979 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1980 if (Val != 1) return false; 1981 } 1982 1983 bool HasHi = false; 1984 for (unsigned i = 2; i < 4; ++i) { 1985 SDOperand Arg = N->getOperand(i); 1986 if (Arg.getOpcode() == ISD::UNDEF) continue; 1987 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1988 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1989 if (Val != 3) return false; 1990 HasHi = true; 1991 } 1992 1993 // Don't use movshdup if it can be done with a shufps. 1994 return HasHi; 1995} 1996 1997/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1998/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1999bool X86::isMOVSLDUPMask(SDNode *N) { 2000 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2001 2002 if (N->getNumOperands() != 4) 2003 return false; 2004 2005 // Expect 0, 0, 2, 2 2006 for (unsigned i = 0; i < 2; ++i) { 2007 SDOperand Arg = N->getOperand(i); 2008 if (Arg.getOpcode() == ISD::UNDEF) continue; 2009 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2010 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2011 if (Val != 0) return false; 2012 } 2013 2014 bool HasHi = false; 2015 for (unsigned i = 2; i < 4; ++i) { 2016 SDOperand Arg = N->getOperand(i); 2017 if (Arg.getOpcode() == ISD::UNDEF) continue; 2018 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2019 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2020 if (Val != 2) return false; 2021 HasHi = true; 2022 } 2023 2024 // Don't use movshdup if it can be done with a shufps. 2025 return HasHi; 2026} 2027 2028/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 2029/// specifies a identity operation on the LHS or RHS. 2030static bool isIdentityMask(SDNode *N, bool RHS = false) { 2031 unsigned NumElems = N->getNumOperands(); 2032 for (unsigned i = 0; i < NumElems; ++i) 2033 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 2034 return false; 2035 return true; 2036} 2037 2038/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2039/// a splat of a single element. 2040static bool isSplatMask(SDNode *N) { 2041 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2042 2043 // This is a splat operation if each element of the permute is the same, and 2044 // if the value doesn't reference the second vector. 2045 unsigned NumElems = N->getNumOperands(); 2046 SDOperand ElementBase; 2047 unsigned i = 0; 2048 for (; i != NumElems; ++i) { 2049 SDOperand Elt = N->getOperand(i); 2050 if (isa<ConstantSDNode>(Elt)) { 2051 ElementBase = Elt; 2052 break; 2053 } 2054 } 2055 2056 if (!ElementBase.Val) 2057 return false; 2058 2059 for (; i != NumElems; ++i) { 2060 SDOperand Arg = N->getOperand(i); 2061 if (Arg.getOpcode() == ISD::UNDEF) continue; 2062 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2063 if (Arg != ElementBase) return false; 2064 } 2065 2066 // Make sure it is a splat of the first vector operand. 2067 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2068} 2069 2070/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2071/// a splat of a single element and it's a 2 or 4 element mask. 2072bool X86::isSplatMask(SDNode *N) { 2073 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2074 2075 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2076 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2077 return false; 2078 return ::isSplatMask(N); 2079} 2080 2081/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2082/// specifies a splat of zero element. 2083bool X86::isSplatLoMask(SDNode *N) { 2084 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2085 2086 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2087 if (!isUndefOrEqual(N->getOperand(i), 0)) 2088 return false; 2089 return true; 2090} 2091 2092/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2093/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2094/// instructions. 2095unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2096 unsigned NumOperands = N->getNumOperands(); 2097 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2098 unsigned Mask = 0; 2099 for (unsigned i = 0; i < NumOperands; ++i) { 2100 unsigned Val = 0; 2101 SDOperand Arg = N->getOperand(NumOperands-i-1); 2102 if (Arg.getOpcode() != ISD::UNDEF) 2103 Val = cast<ConstantSDNode>(Arg)->getValue(); 2104 if (Val >= NumOperands) Val -= NumOperands; 2105 Mask |= Val; 2106 if (i != NumOperands - 1) 2107 Mask <<= Shift; 2108 } 2109 2110 return Mask; 2111} 2112 2113/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2114/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2115/// instructions. 2116unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2117 unsigned Mask = 0; 2118 // 8 nodes, but we only care about the last 4. 2119 for (unsigned i = 7; i >= 4; --i) { 2120 unsigned Val = 0; 2121 SDOperand Arg = N->getOperand(i); 2122 if (Arg.getOpcode() != ISD::UNDEF) 2123 Val = cast<ConstantSDNode>(Arg)->getValue(); 2124 Mask |= (Val - 4); 2125 if (i != 4) 2126 Mask <<= 2; 2127 } 2128 2129 return Mask; 2130} 2131 2132/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2133/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2134/// instructions. 2135unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2136 unsigned Mask = 0; 2137 // 8 nodes, but we only care about the first 4. 2138 for (int i = 3; i >= 0; --i) { 2139 unsigned Val = 0; 2140 SDOperand Arg = N->getOperand(i); 2141 if (Arg.getOpcode() != ISD::UNDEF) 2142 Val = cast<ConstantSDNode>(Arg)->getValue(); 2143 Mask |= Val; 2144 if (i != 0) 2145 Mask <<= 2; 2146 } 2147 2148 return Mask; 2149} 2150 2151/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2152/// specifies a 8 element shuffle that can be broken into a pair of 2153/// PSHUFHW and PSHUFLW. 2154static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2155 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2156 2157 if (N->getNumOperands() != 8) 2158 return false; 2159 2160 // Lower quadword shuffled. 2161 for (unsigned i = 0; i != 4; ++i) { 2162 SDOperand Arg = N->getOperand(i); 2163 if (Arg.getOpcode() == ISD::UNDEF) continue; 2164 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2165 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2166 if (Val > 4) 2167 return false; 2168 } 2169 2170 // Upper quadword shuffled. 2171 for (unsigned i = 4; i != 8; ++i) { 2172 SDOperand Arg = N->getOperand(i); 2173 if (Arg.getOpcode() == ISD::UNDEF) continue; 2174 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2175 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2176 if (Val < 4 || Val > 7) 2177 return false; 2178 } 2179 2180 return true; 2181} 2182 2183/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2184/// values in ther permute mask. 2185static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2186 SDOperand &V2, SDOperand &Mask, 2187 SelectionDAG &DAG) { 2188 MVT::ValueType VT = Op.getValueType(); 2189 MVT::ValueType MaskVT = Mask.getValueType(); 2190 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2191 unsigned NumElems = Mask.getNumOperands(); 2192 SmallVector<SDOperand, 8> MaskVec; 2193 2194 for (unsigned i = 0; i != NumElems; ++i) { 2195 SDOperand Arg = Mask.getOperand(i); 2196 if (Arg.getOpcode() == ISD::UNDEF) { 2197 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2198 continue; 2199 } 2200 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2201 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2202 if (Val < NumElems) 2203 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2204 else 2205 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2206 } 2207 2208 std::swap(V1, V2); 2209 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2210 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2211} 2212 2213/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2214/// match movhlps. The lower half elements should come from upper half of 2215/// V1 (and in order), and the upper half elements should come from the upper 2216/// half of V2 (and in order). 2217static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2218 unsigned NumElems = Mask->getNumOperands(); 2219 if (NumElems != 4) 2220 return false; 2221 for (unsigned i = 0, e = 2; i != e; ++i) 2222 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2223 return false; 2224 for (unsigned i = 2; i != 4; ++i) 2225 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2226 return false; 2227 return true; 2228} 2229 2230/// isScalarLoadToVector - Returns true if the node is a scalar load that 2231/// is promoted to a vector. 2232static inline bool isScalarLoadToVector(SDNode *N) { 2233 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2234 N = N->getOperand(0).Val; 2235 return ISD::isNON_EXTLoad(N); 2236 } 2237 return false; 2238} 2239 2240/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2241/// match movlp{s|d}. The lower half elements should come from lower half of 2242/// V1 (and in order), and the upper half elements should come from the upper 2243/// half of V2 (and in order). And since V1 will become the source of the 2244/// MOVLP, it must be either a vector load or a scalar load to vector. 2245static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2246 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2247 return false; 2248 // Is V2 is a vector load, don't do this transformation. We will try to use 2249 // load folding shufps op. 2250 if (ISD::isNON_EXTLoad(V2)) 2251 return false; 2252 2253 unsigned NumElems = Mask->getNumOperands(); 2254 if (NumElems != 2 && NumElems != 4) 2255 return false; 2256 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2257 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2258 return false; 2259 for (unsigned i = NumElems/2; i != NumElems; ++i) 2260 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2261 return false; 2262 return true; 2263} 2264 2265/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2266/// all the same. 2267static bool isSplatVector(SDNode *N) { 2268 if (N->getOpcode() != ISD::BUILD_VECTOR) 2269 return false; 2270 2271 SDOperand SplatValue = N->getOperand(0); 2272 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2273 if (N->getOperand(i) != SplatValue) 2274 return false; 2275 return true; 2276} 2277 2278/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2279/// to an undef. 2280static bool isUndefShuffle(SDNode *N) { 2281 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2282 return false; 2283 2284 SDOperand V1 = N->getOperand(0); 2285 SDOperand V2 = N->getOperand(1); 2286 SDOperand Mask = N->getOperand(2); 2287 unsigned NumElems = Mask.getNumOperands(); 2288 for (unsigned i = 0; i != NumElems; ++i) { 2289 SDOperand Arg = Mask.getOperand(i); 2290 if (Arg.getOpcode() != ISD::UNDEF) { 2291 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2292 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2293 return false; 2294 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2295 return false; 2296 } 2297 } 2298 return true; 2299} 2300 2301/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2302/// constant +0.0. 2303static inline bool isZeroNode(SDOperand Elt) { 2304 return ((isa<ConstantSDNode>(Elt) && 2305 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2306 (isa<ConstantFPSDNode>(Elt) && 2307 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2308} 2309 2310/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2311/// to an zero vector. 2312static bool isZeroShuffle(SDNode *N) { 2313 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2314 return false; 2315 2316 SDOperand V1 = N->getOperand(0); 2317 SDOperand V2 = N->getOperand(1); 2318 SDOperand Mask = N->getOperand(2); 2319 unsigned NumElems = Mask.getNumOperands(); 2320 for (unsigned i = 0; i != NumElems; ++i) { 2321 SDOperand Arg = Mask.getOperand(i); 2322 if (Arg.getOpcode() != ISD::UNDEF) { 2323 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2324 if (Idx < NumElems) { 2325 unsigned Opc = V1.Val->getOpcode(); 2326 if (Opc == ISD::UNDEF) 2327 continue; 2328 if (Opc != ISD::BUILD_VECTOR || 2329 !isZeroNode(V1.Val->getOperand(Idx))) 2330 return false; 2331 } else if (Idx >= NumElems) { 2332 unsigned Opc = V2.Val->getOpcode(); 2333 if (Opc == ISD::UNDEF) 2334 continue; 2335 if (Opc != ISD::BUILD_VECTOR || 2336 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2337 return false; 2338 } 2339 } 2340 } 2341 return true; 2342} 2343 2344/// getZeroVector - Returns a vector of specified type with all zero elements. 2345/// 2346static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2347 assert(MVT::isVector(VT) && "Expected a vector type"); 2348 unsigned NumElems = MVT::getVectorNumElements(VT); 2349 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2350 bool isFP = MVT::isFloatingPoint(EVT); 2351 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2352 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2353 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2354} 2355 2356/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2357/// that point to V2 points to its first element. 2358static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2359 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2360 2361 bool Changed = false; 2362 SmallVector<SDOperand, 8> MaskVec; 2363 unsigned NumElems = Mask.getNumOperands(); 2364 for (unsigned i = 0; i != NumElems; ++i) { 2365 SDOperand Arg = Mask.getOperand(i); 2366 if (Arg.getOpcode() != ISD::UNDEF) { 2367 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2368 if (Val > NumElems) { 2369 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2370 Changed = true; 2371 } 2372 } 2373 MaskVec.push_back(Arg); 2374 } 2375 2376 if (Changed) 2377 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2378 &MaskVec[0], MaskVec.size()); 2379 return Mask; 2380} 2381 2382/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2383/// operation of specified width. 2384static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2385 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2386 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2387 2388 SmallVector<SDOperand, 8> MaskVec; 2389 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2390 for (unsigned i = 1; i != NumElems; ++i) 2391 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2392 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2393} 2394 2395/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2396/// of specified width. 2397static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2398 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2399 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2400 SmallVector<SDOperand, 8> MaskVec; 2401 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2402 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2403 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2404 } 2405 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2406} 2407 2408/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2409/// of specified width. 2410static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2411 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2412 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2413 unsigned Half = NumElems/2; 2414 SmallVector<SDOperand, 8> MaskVec; 2415 for (unsigned i = 0; i != Half; ++i) { 2416 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2417 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2418 } 2419 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2420} 2421 2422/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2423/// 2424static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2425 SDOperand V1 = Op.getOperand(0); 2426 SDOperand Mask = Op.getOperand(2); 2427 MVT::ValueType VT = Op.getValueType(); 2428 unsigned NumElems = Mask.getNumOperands(); 2429 Mask = getUnpacklMask(NumElems, DAG); 2430 while (NumElems != 4) { 2431 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2432 NumElems >>= 1; 2433 } 2434 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2435 2436 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2437 Mask = getZeroVector(MaskVT, DAG); 2438 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2439 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2440 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2441} 2442 2443/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2444/// vector of zero or undef vector. 2445static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2446 unsigned NumElems, unsigned Idx, 2447 bool isZero, SelectionDAG &DAG) { 2448 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2449 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2450 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2451 SDOperand Zero = DAG.getConstant(0, EVT); 2452 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2453 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2454 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2455 &MaskVec[0], MaskVec.size()); 2456 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2457} 2458 2459/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2460/// 2461static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2462 unsigned NumNonZero, unsigned NumZero, 2463 SelectionDAG &DAG, TargetLowering &TLI) { 2464 if (NumNonZero > 8) 2465 return SDOperand(); 2466 2467 SDOperand V(0, 0); 2468 bool First = true; 2469 for (unsigned i = 0; i < 16; ++i) { 2470 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2471 if (ThisIsNonZero && First) { 2472 if (NumZero) 2473 V = getZeroVector(MVT::v8i16, DAG); 2474 else 2475 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2476 First = false; 2477 } 2478 2479 if ((i & 1) != 0) { 2480 SDOperand ThisElt(0, 0), LastElt(0, 0); 2481 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2482 if (LastIsNonZero) { 2483 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2484 } 2485 if (ThisIsNonZero) { 2486 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2487 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2488 ThisElt, DAG.getConstant(8, MVT::i8)); 2489 if (LastIsNonZero) 2490 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2491 } else 2492 ThisElt = LastElt; 2493 2494 if (ThisElt.Val) 2495 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2496 DAG.getConstant(i/2, TLI.getPointerTy())); 2497 } 2498 } 2499 2500 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2501} 2502 2503/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2504/// 2505static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2506 unsigned NumNonZero, unsigned NumZero, 2507 SelectionDAG &DAG, TargetLowering &TLI) { 2508 if (NumNonZero > 4) 2509 return SDOperand(); 2510 2511 SDOperand V(0, 0); 2512 bool First = true; 2513 for (unsigned i = 0; i < 8; ++i) { 2514 bool isNonZero = (NonZeros & (1 << i)) != 0; 2515 if (isNonZero) { 2516 if (First) { 2517 if (NumZero) 2518 V = getZeroVector(MVT::v8i16, DAG); 2519 else 2520 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2521 First = false; 2522 } 2523 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2524 DAG.getConstant(i, TLI.getPointerTy())); 2525 } 2526 } 2527 2528 return V; 2529} 2530 2531SDOperand 2532X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2533 // All zero's are handled with pxor. 2534 if (ISD::isBuildVectorAllZeros(Op.Val)) 2535 return Op; 2536 2537 // All one's are handled with pcmpeqd. 2538 if (ISD::isBuildVectorAllOnes(Op.Val)) 2539 return Op; 2540 2541 MVT::ValueType VT = Op.getValueType(); 2542 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2543 unsigned EVTBits = MVT::getSizeInBits(EVT); 2544 2545 unsigned NumElems = Op.getNumOperands(); 2546 unsigned NumZero = 0; 2547 unsigned NumNonZero = 0; 2548 unsigned NonZeros = 0; 2549 unsigned NumNonZeroImms = 0; 2550 std::set<SDOperand> Values; 2551 for (unsigned i = 0; i < NumElems; ++i) { 2552 SDOperand Elt = Op.getOperand(i); 2553 if (Elt.getOpcode() != ISD::UNDEF) { 2554 Values.insert(Elt); 2555 if (isZeroNode(Elt)) 2556 NumZero++; 2557 else { 2558 NonZeros |= (1 << i); 2559 NumNonZero++; 2560 if (Elt.getOpcode() == ISD::Constant || 2561 Elt.getOpcode() == ISD::ConstantFP) 2562 NumNonZeroImms++; 2563 } 2564 } 2565 } 2566 2567 if (NumNonZero == 0) { 2568 if (NumZero == 0) 2569 // All undef vector. Return an UNDEF. 2570 return DAG.getNode(ISD::UNDEF, VT); 2571 else 2572 // A mix of zero and undef. Return a zero vector. 2573 return getZeroVector(VT, DAG); 2574 } 2575 2576 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2577 if (Values.size() == 1) 2578 return SDOperand(); 2579 2580 // Special case for single non-zero element. 2581 if (NumNonZero == 1) { 2582 unsigned Idx = CountTrailingZeros_32(NonZeros); 2583 SDOperand Item = Op.getOperand(Idx); 2584 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2585 if (Idx == 0) 2586 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2587 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2588 NumZero > 0, DAG); 2589 2590 if (EVTBits == 32) { 2591 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2592 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2593 DAG); 2594 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2595 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2596 SmallVector<SDOperand, 8> MaskVec; 2597 for (unsigned i = 0; i < NumElems; i++) 2598 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2599 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2600 &MaskVec[0], MaskVec.size()); 2601 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2602 DAG.getNode(ISD::UNDEF, VT), Mask); 2603 } 2604 } 2605 2606 // A vector full of immediates; various special cases are already 2607 // handled, so this is best done with a single constant-pool load. 2608 if (NumNonZero == NumNonZeroImms) 2609 return SDOperand(); 2610 2611 // Let legalizer expand 2-wide build_vectors. 2612 if (EVTBits == 64) 2613 return SDOperand(); 2614 2615 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2616 if (EVTBits == 8 && NumElems == 16) { 2617 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2618 *this); 2619 if (V.Val) return V; 2620 } 2621 2622 if (EVTBits == 16 && NumElems == 8) { 2623 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2624 *this); 2625 if (V.Val) return V; 2626 } 2627 2628 // If element VT is == 32 bits, turn it into a number of shuffles. 2629 SmallVector<SDOperand, 8> V; 2630 V.resize(NumElems); 2631 if (NumElems == 4 && NumZero > 0) { 2632 for (unsigned i = 0; i < 4; ++i) { 2633 bool isZero = !(NonZeros & (1 << i)); 2634 if (isZero) 2635 V[i] = getZeroVector(VT, DAG); 2636 else 2637 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2638 } 2639 2640 for (unsigned i = 0; i < 2; ++i) { 2641 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2642 default: break; 2643 case 0: 2644 V[i] = V[i*2]; // Must be a zero vector. 2645 break; 2646 case 1: 2647 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2648 getMOVLMask(NumElems, DAG)); 2649 break; 2650 case 2: 2651 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2652 getMOVLMask(NumElems, DAG)); 2653 break; 2654 case 3: 2655 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2656 getUnpacklMask(NumElems, DAG)); 2657 break; 2658 } 2659 } 2660 2661 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2662 // clears the upper bits. 2663 // FIXME: we can do the same for v4f32 case when we know both parts of 2664 // the lower half come from scalar_to_vector (loadf32). We should do 2665 // that in post legalizer dag combiner with target specific hooks. 2666 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2667 return V[0]; 2668 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2669 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2670 SmallVector<SDOperand, 8> MaskVec; 2671 bool Reverse = (NonZeros & 0x3) == 2; 2672 for (unsigned i = 0; i < 2; ++i) 2673 if (Reverse) 2674 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2675 else 2676 MaskVec.push_back(DAG.getConstant(i, EVT)); 2677 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2678 for (unsigned i = 0; i < 2; ++i) 2679 if (Reverse) 2680 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2681 else 2682 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2683 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2684 &MaskVec[0], MaskVec.size()); 2685 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2686 } 2687 2688 if (Values.size() > 2) { 2689 // Expand into a number of unpckl*. 2690 // e.g. for v4f32 2691 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2692 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2693 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2694 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2695 for (unsigned i = 0; i < NumElems; ++i) 2696 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2697 NumElems >>= 1; 2698 while (NumElems != 0) { 2699 for (unsigned i = 0; i < NumElems; ++i) 2700 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2701 UnpckMask); 2702 NumElems >>= 1; 2703 } 2704 return V[0]; 2705 } 2706 2707 return SDOperand(); 2708} 2709 2710SDOperand 2711X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2712 SDOperand V1 = Op.getOperand(0); 2713 SDOperand V2 = Op.getOperand(1); 2714 SDOperand PermMask = Op.getOperand(2); 2715 MVT::ValueType VT = Op.getValueType(); 2716 unsigned NumElems = PermMask.getNumOperands(); 2717 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2718 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2719 bool V1IsSplat = false; 2720 bool V2IsSplat = false; 2721 2722 if (isUndefShuffle(Op.Val)) 2723 return DAG.getNode(ISD::UNDEF, VT); 2724 2725 if (isZeroShuffle(Op.Val)) 2726 return getZeroVector(VT, DAG); 2727 2728 if (isIdentityMask(PermMask.Val)) 2729 return V1; 2730 else if (isIdentityMask(PermMask.Val, true)) 2731 return V2; 2732 2733 if (isSplatMask(PermMask.Val)) { 2734 if (NumElems <= 4) return Op; 2735 // Promote it to a v4i32 splat. 2736 return PromoteSplat(Op, DAG); 2737 } 2738 2739 if (X86::isMOVLMask(PermMask.Val)) 2740 return (V1IsUndef) ? V2 : Op; 2741 2742 if (X86::isMOVSHDUPMask(PermMask.Val) || 2743 X86::isMOVSLDUPMask(PermMask.Val) || 2744 X86::isMOVHLPSMask(PermMask.Val) || 2745 X86::isMOVHPMask(PermMask.Val) || 2746 X86::isMOVLPMask(PermMask.Val)) 2747 return Op; 2748 2749 if (ShouldXformToMOVHLPS(PermMask.Val) || 2750 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2751 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2752 2753 bool Commuted = false; 2754 V1IsSplat = isSplatVector(V1.Val); 2755 V2IsSplat = isSplatVector(V2.Val); 2756 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2757 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2758 std::swap(V1IsSplat, V2IsSplat); 2759 std::swap(V1IsUndef, V2IsUndef); 2760 Commuted = true; 2761 } 2762 2763 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2764 if (V2IsUndef) return V1; 2765 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2766 if (V2IsSplat) { 2767 // V2 is a splat, so the mask may be malformed. That is, it may point 2768 // to any V2 element. The instruction selectior won't like this. Get 2769 // a corrected mask and commute to form a proper MOVS{S|D}. 2770 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2771 if (NewMask.Val != PermMask.Val) 2772 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2773 } 2774 return Op; 2775 } 2776 2777 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2778 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2779 X86::isUNPCKLMask(PermMask.Val) || 2780 X86::isUNPCKHMask(PermMask.Val)) 2781 return Op; 2782 2783 if (V2IsSplat) { 2784 // Normalize mask so all entries that point to V2 points to its first 2785 // element then try to match unpck{h|l} again. If match, return a 2786 // new vector_shuffle with the corrected mask. 2787 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2788 if (NewMask.Val != PermMask.Val) { 2789 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2790 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2791 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2792 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2793 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2794 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2795 } 2796 } 2797 } 2798 2799 // Normalize the node to match x86 shuffle ops if needed 2800 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2801 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2802 2803 if (Commuted) { 2804 // Commute is back and try unpck* again. 2805 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2806 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2807 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2808 X86::isUNPCKLMask(PermMask.Val) || 2809 X86::isUNPCKHMask(PermMask.Val)) 2810 return Op; 2811 } 2812 2813 // If VT is integer, try PSHUF* first, then SHUFP*. 2814 if (MVT::isInteger(VT)) { 2815 // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically 2816 // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. 2817 if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && 2818 X86::isPSHUFDMask(PermMask.Val)) || 2819 X86::isPSHUFHWMask(PermMask.Val) || 2820 X86::isPSHUFLWMask(PermMask.Val)) { 2821 if (V2.getOpcode() != ISD::UNDEF) 2822 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2823 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2824 return Op; 2825 } 2826 2827 if (X86::isSHUFPMask(PermMask.Val) && 2828 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2829 return Op; 2830 2831 // Handle v8i16 shuffle high / low shuffle node pair. 2832 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2833 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2834 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2835 SmallVector<SDOperand, 8> MaskVec; 2836 for (unsigned i = 0; i != 4; ++i) 2837 MaskVec.push_back(PermMask.getOperand(i)); 2838 for (unsigned i = 4; i != 8; ++i) 2839 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2840 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2841 &MaskVec[0], MaskVec.size()); 2842 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2843 MaskVec.clear(); 2844 for (unsigned i = 0; i != 4; ++i) 2845 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2846 for (unsigned i = 4; i != 8; ++i) 2847 MaskVec.push_back(PermMask.getOperand(i)); 2848 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2849 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2850 } 2851 } else { 2852 // Floating point cases in the other order. 2853 if (X86::isSHUFPMask(PermMask.Val)) 2854 return Op; 2855 if (X86::isPSHUFDMask(PermMask.Val) || 2856 X86::isPSHUFHWMask(PermMask.Val) || 2857 X86::isPSHUFLWMask(PermMask.Val)) { 2858 if (V2.getOpcode() != ISD::UNDEF) 2859 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2860 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2861 return Op; 2862 } 2863 } 2864 2865 if (NumElems == 4 && 2866 // Don't do this for MMX. 2867 MVT::getSizeInBits(VT) != 64) { 2868 MVT::ValueType MaskVT = PermMask.getValueType(); 2869 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2870 SmallVector<std::pair<int, int>, 8> Locs; 2871 Locs.reserve(NumElems); 2872 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2873 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2874 unsigned NumHi = 0; 2875 unsigned NumLo = 0; 2876 // If no more than two elements come from either vector. This can be 2877 // implemented with two shuffles. First shuffle gather the elements. 2878 // The second shuffle, which takes the first shuffle as both of its 2879 // vector operands, put the elements into the right order. 2880 for (unsigned i = 0; i != NumElems; ++i) { 2881 SDOperand Elt = PermMask.getOperand(i); 2882 if (Elt.getOpcode() == ISD::UNDEF) { 2883 Locs[i] = std::make_pair(-1, -1); 2884 } else { 2885 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2886 if (Val < NumElems) { 2887 Locs[i] = std::make_pair(0, NumLo); 2888 Mask1[NumLo] = Elt; 2889 NumLo++; 2890 } else { 2891 Locs[i] = std::make_pair(1, NumHi); 2892 if (2+NumHi < NumElems) 2893 Mask1[2+NumHi] = Elt; 2894 NumHi++; 2895 } 2896 } 2897 } 2898 if (NumLo <= 2 && NumHi <= 2) { 2899 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2900 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2901 &Mask1[0], Mask1.size())); 2902 for (unsigned i = 0; i != NumElems; ++i) { 2903 if (Locs[i].first == -1) 2904 continue; 2905 else { 2906 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2907 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2908 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2909 } 2910 } 2911 2912 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2913 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2914 &Mask2[0], Mask2.size())); 2915 } 2916 2917 // Break it into (shuffle shuffle_hi, shuffle_lo). 2918 Locs.clear(); 2919 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2920 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2921 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2922 unsigned MaskIdx = 0; 2923 unsigned LoIdx = 0; 2924 unsigned HiIdx = NumElems/2; 2925 for (unsigned i = 0; i != NumElems; ++i) { 2926 if (i == NumElems/2) { 2927 MaskPtr = &HiMask; 2928 MaskIdx = 1; 2929 LoIdx = 0; 2930 HiIdx = NumElems/2; 2931 } 2932 SDOperand Elt = PermMask.getOperand(i); 2933 if (Elt.getOpcode() == ISD::UNDEF) { 2934 Locs[i] = std::make_pair(-1, -1); 2935 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2936 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2937 (*MaskPtr)[LoIdx] = Elt; 2938 LoIdx++; 2939 } else { 2940 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2941 (*MaskPtr)[HiIdx] = Elt; 2942 HiIdx++; 2943 } 2944 } 2945 2946 SDOperand LoShuffle = 2947 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2948 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2949 &LoMask[0], LoMask.size())); 2950 SDOperand HiShuffle = 2951 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2952 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2953 &HiMask[0], HiMask.size())); 2954 SmallVector<SDOperand, 8> MaskOps; 2955 for (unsigned i = 0; i != NumElems; ++i) { 2956 if (Locs[i].first == -1) { 2957 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2958 } else { 2959 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2960 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2961 } 2962 } 2963 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2964 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2965 &MaskOps[0], MaskOps.size())); 2966 } 2967 2968 return SDOperand(); 2969} 2970 2971SDOperand 2972X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2973 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2974 return SDOperand(); 2975 2976 MVT::ValueType VT = Op.getValueType(); 2977 // TODO: handle v16i8. 2978 if (MVT::getSizeInBits(VT) == 16) { 2979 // Transform it so it match pextrw which produces a 32-bit result. 2980 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2981 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2982 Op.getOperand(0), Op.getOperand(1)); 2983 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2984 DAG.getValueType(VT)); 2985 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2986 } else if (MVT::getSizeInBits(VT) == 32) { 2987 SDOperand Vec = Op.getOperand(0); 2988 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2989 if (Idx == 0) 2990 return Op; 2991 // SHUFPS the element to the lowest double word, then movss. 2992 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2993 SmallVector<SDOperand, 8> IdxVec; 2994 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 2995 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2996 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2997 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2998 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2999 &IdxVec[0], IdxVec.size()); 3000 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3001 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3002 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3003 DAG.getConstant(0, getPointerTy())); 3004 } else if (MVT::getSizeInBits(VT) == 64) { 3005 SDOperand Vec = Op.getOperand(0); 3006 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3007 if (Idx == 0) 3008 return Op; 3009 3010 // UNPCKHPD the element to the lowest double word, then movsd. 3011 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 3012 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 3013 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3014 SmallVector<SDOperand, 8> IdxVec; 3015 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 3016 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 3017 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3018 &IdxVec[0], IdxVec.size()); 3019 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 3020 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 3021 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 3022 DAG.getConstant(0, getPointerTy())); 3023 } 3024 3025 return SDOperand(); 3026} 3027 3028SDOperand 3029X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 3030 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 3031 // as its second argument. 3032 MVT::ValueType VT = Op.getValueType(); 3033 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 3034 SDOperand N0 = Op.getOperand(0); 3035 SDOperand N1 = Op.getOperand(1); 3036 SDOperand N2 = Op.getOperand(2); 3037 if (MVT::getSizeInBits(BaseVT) == 16) { 3038 if (N1.getValueType() != MVT::i32) 3039 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 3040 if (N2.getValueType() != MVT::i32) 3041 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 3042 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 3043 } else if (MVT::getSizeInBits(BaseVT) == 32) { 3044 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 3045 if (Idx == 0) { 3046 // Use a movss. 3047 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3048 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3049 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3050 SmallVector<SDOperand, 8> MaskVec; 3051 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3052 for (unsigned i = 1; i <= 3; ++i) 3053 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3054 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3055 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3056 &MaskVec[0], MaskVec.size())); 3057 } else { 3058 // Use two pinsrw instructions to insert a 32 bit value. 3059 Idx <<= 1; 3060 if (MVT::isFloatingPoint(N1.getValueType())) { 3061 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3062 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3063 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3064 DAG.getConstant(0, getPointerTy())); 3065 } 3066 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3067 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3068 DAG.getConstant(Idx, getPointerTy())); 3069 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3070 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3071 DAG.getConstant(Idx+1, getPointerTy())); 3072 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3073 } 3074 } 3075 3076 return SDOperand(); 3077} 3078 3079SDOperand 3080X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3081 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3082 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3083} 3084 3085// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3086// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3087// one of the above mentioned nodes. It has to be wrapped because otherwise 3088// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3089// be used to form addressing mode. These wrapped nodes will be selected 3090// into MOV32ri. 3091SDOperand 3092X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3093 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3094 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3095 getPointerTy(), 3096 CP->getAlignment()); 3097 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3098 // With PIC, the address is actually $g + Offset. 3099 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3100 !Subtarget->isPICStyleRIPRel()) { 3101 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3102 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3103 Result); 3104 } 3105 3106 return Result; 3107} 3108 3109SDOperand 3110X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3111 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3112 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3113 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3114 // With PIC, the address is actually $g + Offset. 3115 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3116 !Subtarget->isPICStyleRIPRel()) { 3117 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3118 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3119 Result); 3120 } 3121 3122 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3123 // load the value at address GV, not the value of GV itself. This means that 3124 // the GlobalAddress must be in the base or index register of the address, not 3125 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3126 // The same applies for external symbols during PIC codegen 3127 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3128 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3129 3130 return Result; 3131} 3132 3133// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3134static SDOperand 3135LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3136 const MVT::ValueType PtrVT) { 3137 SDOperand InFlag; 3138 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3139 DAG.getNode(X86ISD::GlobalBaseReg, 3140 PtrVT), InFlag); 3141 InFlag = Chain.getValue(1); 3142 3143 // emit leal symbol@TLSGD(,%ebx,1), %eax 3144 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3145 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3146 GA->getValueType(0), 3147 GA->getOffset()); 3148 SDOperand Ops[] = { Chain, TGA, InFlag }; 3149 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3150 InFlag = Result.getValue(2); 3151 Chain = Result.getValue(1); 3152 3153 // call ___tls_get_addr. This function receives its argument in 3154 // the register EAX. 3155 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3156 InFlag = Chain.getValue(1); 3157 3158 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3159 SDOperand Ops1[] = { Chain, 3160 DAG.getTargetExternalSymbol("___tls_get_addr", 3161 PtrVT), 3162 DAG.getRegister(X86::EAX, PtrVT), 3163 DAG.getRegister(X86::EBX, PtrVT), 3164 InFlag }; 3165 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3166 InFlag = Chain.getValue(1); 3167 3168 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3169} 3170 3171// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3172// "local exec" model. 3173static SDOperand 3174LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3175 const MVT::ValueType PtrVT) { 3176 // Get the Thread Pointer 3177 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3178 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3179 // exec) 3180 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3181 GA->getValueType(0), 3182 GA->getOffset()); 3183 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3184 3185 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3186 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3187 3188 // The address of the thread local variable is the add of the thread 3189 // pointer with the offset of the variable. 3190 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3191} 3192 3193SDOperand 3194X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3195 // TODO: implement the "local dynamic" model 3196 // TODO: implement the "initial exec"model for pic executables 3197 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3198 "TLS not implemented for non-ELF and 64-bit targets"); 3199 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3200 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3201 // otherwise use the "Local Exec"TLS Model 3202 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3203 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3204 else 3205 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3206} 3207 3208SDOperand 3209X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3210 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3211 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3212 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3213 // With PIC, the address is actually $g + Offset. 3214 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3215 !Subtarget->isPICStyleRIPRel()) { 3216 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3217 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3218 Result); 3219 } 3220 3221 return Result; 3222} 3223 3224SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3225 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3226 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3227 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3228 // With PIC, the address is actually $g + Offset. 3229 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3230 !Subtarget->isPICStyleRIPRel()) { 3231 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3232 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3233 Result); 3234 } 3235 3236 return Result; 3237} 3238 3239SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3240 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3241 "Not an i64 shift!"); 3242 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3243 SDOperand ShOpLo = Op.getOperand(0); 3244 SDOperand ShOpHi = Op.getOperand(1); 3245 SDOperand ShAmt = Op.getOperand(2); 3246 SDOperand Tmp1 = isSRA ? 3247 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3248 DAG.getConstant(0, MVT::i32); 3249 3250 SDOperand Tmp2, Tmp3; 3251 if (Op.getOpcode() == ISD::SHL_PARTS) { 3252 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3253 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3254 } else { 3255 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3256 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3257 } 3258 3259 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3260 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3261 DAG.getConstant(32, MVT::i8)); 3262 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3263 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3264 3265 SDOperand Hi, Lo; 3266 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3267 3268 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3269 SmallVector<SDOperand, 4> Ops; 3270 if (Op.getOpcode() == ISD::SHL_PARTS) { 3271 Ops.push_back(Tmp2); 3272 Ops.push_back(Tmp3); 3273 Ops.push_back(CC); 3274 Ops.push_back(InFlag); 3275 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3276 InFlag = Hi.getValue(1); 3277 3278 Ops.clear(); 3279 Ops.push_back(Tmp3); 3280 Ops.push_back(Tmp1); 3281 Ops.push_back(CC); 3282 Ops.push_back(InFlag); 3283 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3284 } else { 3285 Ops.push_back(Tmp2); 3286 Ops.push_back(Tmp3); 3287 Ops.push_back(CC); 3288 Ops.push_back(InFlag); 3289 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3290 InFlag = Lo.getValue(1); 3291 3292 Ops.clear(); 3293 Ops.push_back(Tmp3); 3294 Ops.push_back(Tmp1); 3295 Ops.push_back(CC); 3296 Ops.push_back(InFlag); 3297 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3298 } 3299 3300 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3301 Ops.clear(); 3302 Ops.push_back(Lo); 3303 Ops.push_back(Hi); 3304 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3305} 3306 3307SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3308 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3309 Op.getOperand(0).getValueType() >= MVT::i16 && 3310 "Unknown SINT_TO_FP to lower!"); 3311 3312 SDOperand Result; 3313 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3314 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3315 MachineFunction &MF = DAG.getMachineFunction(); 3316 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3317 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3318 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3319 StackSlot, NULL, 0); 3320 3321 // Build the FILD 3322 SDVTList Tys; 3323 if (X86ScalarSSE) 3324 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3325 else 3326 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3327 SmallVector<SDOperand, 8> Ops; 3328 Ops.push_back(Chain); 3329 Ops.push_back(StackSlot); 3330 Ops.push_back(DAG.getValueType(SrcVT)); 3331 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3332 Tys, &Ops[0], Ops.size()); 3333 3334 if (X86ScalarSSE) { 3335 Chain = Result.getValue(1); 3336 SDOperand InFlag = Result.getValue(2); 3337 3338 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3339 // shouldn't be necessary except that RFP cannot be live across 3340 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3341 MachineFunction &MF = DAG.getMachineFunction(); 3342 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3343 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3344 Tys = DAG.getVTList(MVT::Other); 3345 SmallVector<SDOperand, 8> Ops; 3346 Ops.push_back(Chain); 3347 Ops.push_back(Result); 3348 Ops.push_back(StackSlot); 3349 Ops.push_back(DAG.getValueType(Op.getValueType())); 3350 Ops.push_back(InFlag); 3351 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3352 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3353 } 3354 3355 return Result; 3356} 3357 3358SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3359 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3360 "Unknown FP_TO_SINT to lower!"); 3361 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3362 // stack slot. 3363 MachineFunction &MF = DAG.getMachineFunction(); 3364 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3365 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3366 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3367 3368 unsigned Opc; 3369 switch (Op.getValueType()) { 3370 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3371 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3372 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3373 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3374 } 3375 3376 SDOperand Chain = DAG.getEntryNode(); 3377 SDOperand Value = Op.getOperand(0); 3378 if (X86ScalarSSE) { 3379 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3380 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3381 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3382 SDOperand Ops[] = { 3383 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3384 }; 3385 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3386 Chain = Value.getValue(1); 3387 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3388 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3389 } 3390 3391 // Build the FP_TO_INT*_IN_MEM 3392 SDOperand Ops[] = { Chain, Value, StackSlot }; 3393 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3394 3395 // Load the result. 3396 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3397} 3398 3399SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3400 MVT::ValueType VT = Op.getValueType(); 3401 MVT::ValueType EltVT = VT; 3402 if (MVT::isVector(VT)) 3403 EltVT = MVT::getVectorElementType(VT); 3404 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3405 std::vector<Constant*> CV; 3406 if (EltVT == MVT::f64) { 3407 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))); 3408 CV.push_back(C); 3409 CV.push_back(C); 3410 } else { 3411 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))); 3412 CV.push_back(C); 3413 CV.push_back(C); 3414 CV.push_back(C); 3415 CV.push_back(C); 3416 } 3417 Constant *C = ConstantVector::get(CV); 3418 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3419 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3420 false, 16); 3421 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3422} 3423 3424SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3425 MVT::ValueType VT = Op.getValueType(); 3426 MVT::ValueType EltVT = VT; 3427 unsigned EltNum = 1; 3428 if (MVT::isVector(VT)) { 3429 EltVT = MVT::getVectorElementType(VT); 3430 EltNum = MVT::getVectorNumElements(VT); 3431 } 3432 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3433 std::vector<Constant*> CV; 3434 if (EltVT == MVT::f64) { 3435 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)); 3436 CV.push_back(C); 3437 CV.push_back(C); 3438 } else { 3439 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31)); 3440 CV.push_back(C); 3441 CV.push_back(C); 3442 CV.push_back(C); 3443 CV.push_back(C); 3444 } 3445 Constant *C = ConstantVector::get(CV); 3446 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3447 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3448 false, 16); 3449 if (MVT::isVector(VT)) { 3450 return DAG.getNode(ISD::BIT_CONVERT, VT, 3451 DAG.getNode(ISD::XOR, MVT::v2i64, 3452 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3453 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3454 } else { 3455 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3456 } 3457} 3458 3459SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3460 SDOperand Op0 = Op.getOperand(0); 3461 SDOperand Op1 = Op.getOperand(1); 3462 MVT::ValueType VT = Op.getValueType(); 3463 MVT::ValueType SrcVT = Op1.getValueType(); 3464 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3465 3466 // If second operand is smaller, extend it first. 3467 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3468 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3469 SrcVT = VT; 3470 } 3471 3472 // First get the sign bit of second operand. 3473 std::vector<Constant*> CV; 3474 if (SrcVT == MVT::f64) { 3475 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); 3476 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3477 } else { 3478 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); 3479 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3480 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3481 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3482 } 3483 Constant *C = ConstantVector::get(CV); 3484 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3485 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3486 false, 16); 3487 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3488 3489 // Shift sign bit right or left if the two operands have different types. 3490 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3491 // Op0 is MVT::f32, Op1 is MVT::f64. 3492 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3493 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3494 DAG.getConstant(32, MVT::i32)); 3495 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3496 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3497 DAG.getConstant(0, getPointerTy())); 3498 } 3499 3500 // Clear first operand sign bit. 3501 CV.clear(); 3502 if (VT == MVT::f64) { 3503 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63)))); 3504 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3505 } else { 3506 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31)))); 3507 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3508 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3509 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3510 } 3511 C = ConstantVector::get(CV); 3512 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3513 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3514 false, 16); 3515 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3516 3517 // Or the value with the sign bit. 3518 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3519} 3520 3521SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3522 SDOperand Chain) { 3523 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3524 SDOperand Cond; 3525 SDOperand Op0 = Op.getOperand(0); 3526 SDOperand Op1 = Op.getOperand(1); 3527 SDOperand CC = Op.getOperand(2); 3528 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3529 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3530 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3531 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3532 unsigned X86CC; 3533 3534 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3535 Op0, Op1, DAG)) { 3536 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3537 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3538 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3539 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3540 } 3541 3542 assert(isFP && "Illegal integer SetCC!"); 3543 3544 SDOperand COps[] = { Chain, Op0, Op1 }; 3545 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3546 3547 switch (SetCCOpcode) { 3548 default: assert(false && "Illegal floating point SetCC!"); 3549 case ISD::SETOEQ: { // !PF & ZF 3550 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3551 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3552 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3553 Tmp1.getValue(1) }; 3554 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3555 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3556 } 3557 case ISD::SETUNE: { // PF | !ZF 3558 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3559 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3560 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3561 Tmp1.getValue(1) }; 3562 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3563 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3564 } 3565 } 3566} 3567 3568SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3569 bool addTest = true; 3570 SDOperand Chain = DAG.getEntryNode(); 3571 SDOperand Cond = Op.getOperand(0); 3572 SDOperand CC; 3573 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3574 3575 if (Cond.getOpcode() == ISD::SETCC) 3576 Cond = LowerSETCC(Cond, DAG, Chain); 3577 3578 if (Cond.getOpcode() == X86ISD::SETCC) { 3579 CC = Cond.getOperand(0); 3580 3581 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3582 // (since flag operand cannot be shared). Use it as the condition setting 3583 // operand in place of the X86ISD::SETCC. 3584 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3585 // to use a test instead of duplicating the X86ISD::CMP (for register 3586 // pressure reason)? 3587 SDOperand Cmp = Cond.getOperand(1); 3588 unsigned Opc = Cmp.getOpcode(); 3589 bool IllegalFPCMov = !X86ScalarSSE && 3590 MVT::isFloatingPoint(Op.getValueType()) && 3591 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3592 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3593 !IllegalFPCMov) { 3594 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3595 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3596 addTest = false; 3597 } 3598 } 3599 3600 if (addTest) { 3601 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3602 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3603 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3604 } 3605 3606 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3607 SmallVector<SDOperand, 4> Ops; 3608 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3609 // condition is true. 3610 Ops.push_back(Op.getOperand(2)); 3611 Ops.push_back(Op.getOperand(1)); 3612 Ops.push_back(CC); 3613 Ops.push_back(Cond.getValue(1)); 3614 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3615} 3616 3617SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3618 bool addTest = true; 3619 SDOperand Chain = Op.getOperand(0); 3620 SDOperand Cond = Op.getOperand(1); 3621 SDOperand Dest = Op.getOperand(2); 3622 SDOperand CC; 3623 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3624 3625 if (Cond.getOpcode() == ISD::SETCC) 3626 Cond = LowerSETCC(Cond, DAG, Chain); 3627 3628 if (Cond.getOpcode() == X86ISD::SETCC) { 3629 CC = Cond.getOperand(0); 3630 3631 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3632 // (since flag operand cannot be shared). Use it as the condition setting 3633 // operand in place of the X86ISD::SETCC. 3634 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3635 // to use a test instead of duplicating the X86ISD::CMP (for register 3636 // pressure reason)? 3637 SDOperand Cmp = Cond.getOperand(1); 3638 unsigned Opc = Cmp.getOpcode(); 3639 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3640 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3641 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3642 addTest = false; 3643 } 3644 } 3645 3646 if (addTest) { 3647 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3648 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3649 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3650 } 3651 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3652 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3653} 3654 3655SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3656 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3657 3658 if (Subtarget->is64Bit()) 3659 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3660 else 3661 switch (CallingConv) { 3662 default: 3663 assert(0 && "Unsupported calling convention"); 3664 case CallingConv::Fast: 3665 // TODO: Implement fastcc 3666 // Falls through 3667 case CallingConv::C: 3668 case CallingConv::X86_StdCall: 3669 return LowerCCCCallTo(Op, DAG, CallingConv); 3670 case CallingConv::X86_FastCall: 3671 return LowerFastCCCallTo(Op, DAG, CallingConv); 3672 } 3673} 3674 3675 3676// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3677// Calls to _alloca is needed to probe the stack when allocating more than 4k 3678// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3679// that the guard pages used by the OS virtual memory manager are allocated in 3680// correct sequence. 3681SDOperand 3682X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3683 SelectionDAG &DAG) { 3684 assert(Subtarget->isTargetCygMing() && 3685 "This should be used only on Cygwin/Mingw targets"); 3686 3687 // Get the inputs. 3688 SDOperand Chain = Op.getOperand(0); 3689 SDOperand Size = Op.getOperand(1); 3690 // FIXME: Ensure alignment here 3691 3692 SDOperand Flag; 3693 3694 MVT::ValueType IntPtr = getPointerTy(); 3695 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3696 3697 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3698 Flag = Chain.getValue(1); 3699 3700 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3701 SDOperand Ops[] = { Chain, 3702 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3703 DAG.getRegister(X86::EAX, IntPtr), 3704 Flag }; 3705 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3706 Flag = Chain.getValue(1); 3707 3708 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3709 3710 std::vector<MVT::ValueType> Tys; 3711 Tys.push_back(SPTy); 3712 Tys.push_back(MVT::Other); 3713 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3714 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3715} 3716 3717SDOperand 3718X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3719 MachineFunction &MF = DAG.getMachineFunction(); 3720 const Function* Fn = MF.getFunction(); 3721 if (Fn->hasExternalLinkage() && 3722 Subtarget->isTargetCygMing() && 3723 Fn->getName() == "main") 3724 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3725 3726 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3727 if (Subtarget->is64Bit()) 3728 return LowerX86_64CCCArguments(Op, DAG); 3729 else 3730 switch(CC) { 3731 default: 3732 assert(0 && "Unsupported calling convention"); 3733 case CallingConv::Fast: 3734 // TODO: implement fastcc. 3735 3736 // Falls through 3737 case CallingConv::C: 3738 return LowerCCCArguments(Op, DAG); 3739 case CallingConv::X86_StdCall: 3740 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3741 return LowerCCCArguments(Op, DAG, true); 3742 case CallingConv::X86_FastCall: 3743 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3744 return LowerFastCCArguments(Op, DAG); 3745 } 3746} 3747 3748SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3749 SDOperand InFlag(0, 0); 3750 SDOperand Chain = Op.getOperand(0); 3751 unsigned Align = 3752 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3753 if (Align == 0) Align = 1; 3754 3755 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3756 // If not DWORD aligned or size is more than the threshold, call memset. 3757 // The libc version is likely to be faster for these cases. It can use the 3758 // address value and run time information about the CPU. 3759 if ((Align & 3) != 0 || 3760 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3761 MVT::ValueType IntPtr = getPointerTy(); 3762 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3763 TargetLowering::ArgListTy Args; 3764 TargetLowering::ArgListEntry Entry; 3765 Entry.Node = Op.getOperand(1); 3766 Entry.Ty = IntPtrTy; 3767 Args.push_back(Entry); 3768 // Extend the unsigned i8 argument to be an int value for the call. 3769 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3770 Entry.Ty = IntPtrTy; 3771 Args.push_back(Entry); 3772 Entry.Node = Op.getOperand(3); 3773 Args.push_back(Entry); 3774 std::pair<SDOperand,SDOperand> CallResult = 3775 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3776 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3777 return CallResult.second; 3778 } 3779 3780 MVT::ValueType AVT; 3781 SDOperand Count; 3782 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3783 unsigned BytesLeft = 0; 3784 bool TwoRepStos = false; 3785 if (ValC) { 3786 unsigned ValReg; 3787 uint64_t Val = ValC->getValue() & 255; 3788 3789 // If the value is a constant, then we can potentially use larger sets. 3790 switch (Align & 3) { 3791 case 2: // WORD aligned 3792 AVT = MVT::i16; 3793 ValReg = X86::AX; 3794 Val = (Val << 8) | Val; 3795 break; 3796 case 0: // DWORD aligned 3797 AVT = MVT::i32; 3798 ValReg = X86::EAX; 3799 Val = (Val << 8) | Val; 3800 Val = (Val << 16) | Val; 3801 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3802 AVT = MVT::i64; 3803 ValReg = X86::RAX; 3804 Val = (Val << 32) | Val; 3805 } 3806 break; 3807 default: // Byte aligned 3808 AVT = MVT::i8; 3809 ValReg = X86::AL; 3810 Count = Op.getOperand(3); 3811 break; 3812 } 3813 3814 if (AVT > MVT::i8) { 3815 if (I) { 3816 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3817 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3818 BytesLeft = I->getValue() % UBytes; 3819 } else { 3820 assert(AVT >= MVT::i32 && 3821 "Do not use rep;stos if not at least DWORD aligned"); 3822 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3823 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3824 TwoRepStos = true; 3825 } 3826 } 3827 3828 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3829 InFlag); 3830 InFlag = Chain.getValue(1); 3831 } else { 3832 AVT = MVT::i8; 3833 Count = Op.getOperand(3); 3834 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3835 InFlag = Chain.getValue(1); 3836 } 3837 3838 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3839 Count, InFlag); 3840 InFlag = Chain.getValue(1); 3841 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3842 Op.getOperand(1), InFlag); 3843 InFlag = Chain.getValue(1); 3844 3845 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3846 SmallVector<SDOperand, 8> Ops; 3847 Ops.push_back(Chain); 3848 Ops.push_back(DAG.getValueType(AVT)); 3849 Ops.push_back(InFlag); 3850 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3851 3852 if (TwoRepStos) { 3853 InFlag = Chain.getValue(1); 3854 Count = Op.getOperand(3); 3855 MVT::ValueType CVT = Count.getValueType(); 3856 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3857 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3858 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3859 Left, InFlag); 3860 InFlag = Chain.getValue(1); 3861 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3862 Ops.clear(); 3863 Ops.push_back(Chain); 3864 Ops.push_back(DAG.getValueType(MVT::i8)); 3865 Ops.push_back(InFlag); 3866 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3867 } else if (BytesLeft) { 3868 // Issue stores for the last 1 - 7 bytes. 3869 SDOperand Value; 3870 unsigned Val = ValC->getValue() & 255; 3871 unsigned Offset = I->getValue() - BytesLeft; 3872 SDOperand DstAddr = Op.getOperand(1); 3873 MVT::ValueType AddrVT = DstAddr.getValueType(); 3874 if (BytesLeft >= 4) { 3875 Val = (Val << 8) | Val; 3876 Val = (Val << 16) | Val; 3877 Value = DAG.getConstant(Val, MVT::i32); 3878 Chain = DAG.getStore(Chain, Value, 3879 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3880 DAG.getConstant(Offset, AddrVT)), 3881 NULL, 0); 3882 BytesLeft -= 4; 3883 Offset += 4; 3884 } 3885 if (BytesLeft >= 2) { 3886 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3887 Chain = DAG.getStore(Chain, Value, 3888 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3889 DAG.getConstant(Offset, AddrVT)), 3890 NULL, 0); 3891 BytesLeft -= 2; 3892 Offset += 2; 3893 } 3894 if (BytesLeft == 1) { 3895 Value = DAG.getConstant(Val, MVT::i8); 3896 Chain = DAG.getStore(Chain, Value, 3897 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3898 DAG.getConstant(Offset, AddrVT)), 3899 NULL, 0); 3900 } 3901 } 3902 3903 return Chain; 3904} 3905 3906SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3907 SDOperand Chain = Op.getOperand(0); 3908 unsigned Align = 3909 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3910 if (Align == 0) Align = 1; 3911 3912 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3913 // If not DWORD aligned or size is more than the threshold, call memcpy. 3914 // The libc version is likely to be faster for these cases. It can use the 3915 // address value and run time information about the CPU. 3916 // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster 3917 if ((Align & 3) != 0 || 3918 (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { 3919 MVT::ValueType IntPtr = getPointerTy(); 3920 TargetLowering::ArgListTy Args; 3921 TargetLowering::ArgListEntry Entry; 3922 Entry.Ty = getTargetData()->getIntPtrType(); 3923 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3924 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3925 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3926 std::pair<SDOperand,SDOperand> CallResult = 3927 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3928 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3929 return CallResult.second; 3930 } 3931 3932 MVT::ValueType AVT; 3933 SDOperand Count; 3934 unsigned BytesLeft = 0; 3935 bool TwoRepMovs = false; 3936 switch (Align & 3) { 3937 case 2: // WORD aligned 3938 AVT = MVT::i16; 3939 break; 3940 case 0: // DWORD aligned 3941 AVT = MVT::i32; 3942 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3943 AVT = MVT::i64; 3944 break; 3945 default: // Byte aligned 3946 AVT = MVT::i8; 3947 Count = Op.getOperand(3); 3948 break; 3949 } 3950 3951 if (AVT > MVT::i8) { 3952 if (I) { 3953 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3954 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3955 BytesLeft = I->getValue() % UBytes; 3956 } else { 3957 assert(AVT >= MVT::i32 && 3958 "Do not use rep;movs if not at least DWORD aligned"); 3959 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3960 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3961 TwoRepMovs = true; 3962 } 3963 } 3964 3965 SDOperand InFlag(0, 0); 3966 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3967 Count, InFlag); 3968 InFlag = Chain.getValue(1); 3969 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3970 Op.getOperand(1), InFlag); 3971 InFlag = Chain.getValue(1); 3972 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 3973 Op.getOperand(2), InFlag); 3974 InFlag = Chain.getValue(1); 3975 3976 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3977 SmallVector<SDOperand, 8> Ops; 3978 Ops.push_back(Chain); 3979 Ops.push_back(DAG.getValueType(AVT)); 3980 Ops.push_back(InFlag); 3981 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3982 3983 if (TwoRepMovs) { 3984 InFlag = Chain.getValue(1); 3985 Count = Op.getOperand(3); 3986 MVT::ValueType CVT = Count.getValueType(); 3987 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3988 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3989 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3990 Left, InFlag); 3991 InFlag = Chain.getValue(1); 3992 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3993 Ops.clear(); 3994 Ops.push_back(Chain); 3995 Ops.push_back(DAG.getValueType(MVT::i8)); 3996 Ops.push_back(InFlag); 3997 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3998 } else if (BytesLeft) { 3999 // Issue loads and stores for the last 1 - 7 bytes. 4000 unsigned Offset = I->getValue() - BytesLeft; 4001 SDOperand DstAddr = Op.getOperand(1); 4002 MVT::ValueType DstVT = DstAddr.getValueType(); 4003 SDOperand SrcAddr = Op.getOperand(2); 4004 MVT::ValueType SrcVT = SrcAddr.getValueType(); 4005 SDOperand Value; 4006 if (BytesLeft >= 4) { 4007 Value = DAG.getLoad(MVT::i32, Chain, 4008 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4009 DAG.getConstant(Offset, SrcVT)), 4010 NULL, 0); 4011 Chain = Value.getValue(1); 4012 Chain = DAG.getStore(Chain, Value, 4013 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4014 DAG.getConstant(Offset, DstVT)), 4015 NULL, 0); 4016 BytesLeft -= 4; 4017 Offset += 4; 4018 } 4019 if (BytesLeft >= 2) { 4020 Value = DAG.getLoad(MVT::i16, Chain, 4021 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4022 DAG.getConstant(Offset, SrcVT)), 4023 NULL, 0); 4024 Chain = Value.getValue(1); 4025 Chain = DAG.getStore(Chain, Value, 4026 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4027 DAG.getConstant(Offset, DstVT)), 4028 NULL, 0); 4029 BytesLeft -= 2; 4030 Offset += 2; 4031 } 4032 4033 if (BytesLeft == 1) { 4034 Value = DAG.getLoad(MVT::i8, Chain, 4035 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 4036 DAG.getConstant(Offset, SrcVT)), 4037 NULL, 0); 4038 Chain = Value.getValue(1); 4039 Chain = DAG.getStore(Chain, Value, 4040 DAG.getNode(ISD::ADD, DstVT, DstAddr, 4041 DAG.getConstant(Offset, DstVT)), 4042 NULL, 0); 4043 } 4044 } 4045 4046 return Chain; 4047} 4048 4049SDOperand 4050X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4051 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4052 SDOperand TheOp = Op.getOperand(0); 4053 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4054 if (Subtarget->is64Bit()) { 4055 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4056 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4057 MVT::i64, Copy1.getValue(2)); 4058 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4059 DAG.getConstant(32, MVT::i8)); 4060 SDOperand Ops[] = { 4061 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4062 }; 4063 4064 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4065 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4066 } 4067 4068 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4069 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4070 MVT::i32, Copy1.getValue(2)); 4071 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4072 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4073 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4074} 4075 4076SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4077 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4078 4079 if (!Subtarget->is64Bit()) { 4080 // vastart just stores the address of the VarArgsFrameIndex slot into the 4081 // memory location argument. 4082 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4083 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4084 SV->getOffset()); 4085 } 4086 4087 // __va_list_tag: 4088 // gp_offset (0 - 6 * 8) 4089 // fp_offset (48 - 48 + 8 * 16) 4090 // overflow_arg_area (point to parameters coming in memory). 4091 // reg_save_area 4092 SmallVector<SDOperand, 8> MemOps; 4093 SDOperand FIN = Op.getOperand(1); 4094 // Store gp_offset 4095 SDOperand Store = DAG.getStore(Op.getOperand(0), 4096 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4097 FIN, SV->getValue(), SV->getOffset()); 4098 MemOps.push_back(Store); 4099 4100 // Store fp_offset 4101 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4102 DAG.getConstant(4, getPointerTy())); 4103 Store = DAG.getStore(Op.getOperand(0), 4104 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4105 FIN, SV->getValue(), SV->getOffset()); 4106 MemOps.push_back(Store); 4107 4108 // Store ptr to overflow_arg_area 4109 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4110 DAG.getConstant(4, getPointerTy())); 4111 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4112 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4113 SV->getOffset()); 4114 MemOps.push_back(Store); 4115 4116 // Store ptr to reg_save_area. 4117 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4118 DAG.getConstant(8, getPointerTy())); 4119 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4120 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4121 SV->getOffset()); 4122 MemOps.push_back(Store); 4123 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4124} 4125 4126SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4127 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4128 SDOperand Chain = Op.getOperand(0); 4129 SDOperand DstPtr = Op.getOperand(1); 4130 SDOperand SrcPtr = Op.getOperand(2); 4131 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4132 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4133 4134 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4135 SrcSV->getValue(), SrcSV->getOffset()); 4136 Chain = SrcPtr.getValue(1); 4137 for (unsigned i = 0; i < 3; ++i) { 4138 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4139 SrcSV->getValue(), SrcSV->getOffset()); 4140 Chain = Val.getValue(1); 4141 Chain = DAG.getStore(Chain, Val, DstPtr, 4142 DstSV->getValue(), DstSV->getOffset()); 4143 if (i == 2) 4144 break; 4145 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4146 DAG.getConstant(8, getPointerTy())); 4147 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4148 DAG.getConstant(8, getPointerTy())); 4149 } 4150 return Chain; 4151} 4152 4153SDOperand 4154X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4155 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4156 switch (IntNo) { 4157 default: return SDOperand(); // Don't custom lower most intrinsics. 4158 // Comparison intrinsics. 4159 case Intrinsic::x86_sse_comieq_ss: 4160 case Intrinsic::x86_sse_comilt_ss: 4161 case Intrinsic::x86_sse_comile_ss: 4162 case Intrinsic::x86_sse_comigt_ss: 4163 case Intrinsic::x86_sse_comige_ss: 4164 case Intrinsic::x86_sse_comineq_ss: 4165 case Intrinsic::x86_sse_ucomieq_ss: 4166 case Intrinsic::x86_sse_ucomilt_ss: 4167 case Intrinsic::x86_sse_ucomile_ss: 4168 case Intrinsic::x86_sse_ucomigt_ss: 4169 case Intrinsic::x86_sse_ucomige_ss: 4170 case Intrinsic::x86_sse_ucomineq_ss: 4171 case Intrinsic::x86_sse2_comieq_sd: 4172 case Intrinsic::x86_sse2_comilt_sd: 4173 case Intrinsic::x86_sse2_comile_sd: 4174 case Intrinsic::x86_sse2_comigt_sd: 4175 case Intrinsic::x86_sse2_comige_sd: 4176 case Intrinsic::x86_sse2_comineq_sd: 4177 case Intrinsic::x86_sse2_ucomieq_sd: 4178 case Intrinsic::x86_sse2_ucomilt_sd: 4179 case Intrinsic::x86_sse2_ucomile_sd: 4180 case Intrinsic::x86_sse2_ucomigt_sd: 4181 case Intrinsic::x86_sse2_ucomige_sd: 4182 case Intrinsic::x86_sse2_ucomineq_sd: { 4183 unsigned Opc = 0; 4184 ISD::CondCode CC = ISD::SETCC_INVALID; 4185 switch (IntNo) { 4186 default: break; 4187 case Intrinsic::x86_sse_comieq_ss: 4188 case Intrinsic::x86_sse2_comieq_sd: 4189 Opc = X86ISD::COMI; 4190 CC = ISD::SETEQ; 4191 break; 4192 case Intrinsic::x86_sse_comilt_ss: 4193 case Intrinsic::x86_sse2_comilt_sd: 4194 Opc = X86ISD::COMI; 4195 CC = ISD::SETLT; 4196 break; 4197 case Intrinsic::x86_sse_comile_ss: 4198 case Intrinsic::x86_sse2_comile_sd: 4199 Opc = X86ISD::COMI; 4200 CC = ISD::SETLE; 4201 break; 4202 case Intrinsic::x86_sse_comigt_ss: 4203 case Intrinsic::x86_sse2_comigt_sd: 4204 Opc = X86ISD::COMI; 4205 CC = ISD::SETGT; 4206 break; 4207 case Intrinsic::x86_sse_comige_ss: 4208 case Intrinsic::x86_sse2_comige_sd: 4209 Opc = X86ISD::COMI; 4210 CC = ISD::SETGE; 4211 break; 4212 case Intrinsic::x86_sse_comineq_ss: 4213 case Intrinsic::x86_sse2_comineq_sd: 4214 Opc = X86ISD::COMI; 4215 CC = ISD::SETNE; 4216 break; 4217 case Intrinsic::x86_sse_ucomieq_ss: 4218 case Intrinsic::x86_sse2_ucomieq_sd: 4219 Opc = X86ISD::UCOMI; 4220 CC = ISD::SETEQ; 4221 break; 4222 case Intrinsic::x86_sse_ucomilt_ss: 4223 case Intrinsic::x86_sse2_ucomilt_sd: 4224 Opc = X86ISD::UCOMI; 4225 CC = ISD::SETLT; 4226 break; 4227 case Intrinsic::x86_sse_ucomile_ss: 4228 case Intrinsic::x86_sse2_ucomile_sd: 4229 Opc = X86ISD::UCOMI; 4230 CC = ISD::SETLE; 4231 break; 4232 case Intrinsic::x86_sse_ucomigt_ss: 4233 case Intrinsic::x86_sse2_ucomigt_sd: 4234 Opc = X86ISD::UCOMI; 4235 CC = ISD::SETGT; 4236 break; 4237 case Intrinsic::x86_sse_ucomige_ss: 4238 case Intrinsic::x86_sse2_ucomige_sd: 4239 Opc = X86ISD::UCOMI; 4240 CC = ISD::SETGE; 4241 break; 4242 case Intrinsic::x86_sse_ucomineq_ss: 4243 case Intrinsic::x86_sse2_ucomineq_sd: 4244 Opc = X86ISD::UCOMI; 4245 CC = ISD::SETNE; 4246 break; 4247 } 4248 4249 unsigned X86CC; 4250 SDOperand LHS = Op.getOperand(1); 4251 SDOperand RHS = Op.getOperand(2); 4252 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4253 4254 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4255 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4256 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4257 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4258 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4259 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4260 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4261 } 4262 } 4263} 4264 4265SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4266 // Depths > 0 not supported yet! 4267 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4268 return SDOperand(); 4269 4270 // Just load the return address 4271 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4272 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4273} 4274 4275SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4276 // Depths > 0 not supported yet! 4277 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4278 return SDOperand(); 4279 4280 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4281 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4282 DAG.getConstant(4, getPointerTy())); 4283} 4284 4285SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4286 SelectionDAG &DAG) { 4287 // Is not yet supported on x86-64 4288 if (Subtarget->is64Bit()) 4289 return SDOperand(); 4290 4291 return DAG.getConstant(8, getPointerTy()); 4292} 4293 4294SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4295{ 4296 assert(!Subtarget->is64Bit() && 4297 "Lowering of eh_return builtin is not supported yet on x86-64"); 4298 4299 MachineFunction &MF = DAG.getMachineFunction(); 4300 SDOperand Chain = Op.getOperand(0); 4301 SDOperand Offset = Op.getOperand(1); 4302 SDOperand Handler = Op.getOperand(2); 4303 4304 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4305 getPointerTy()); 4306 4307 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4308 DAG.getConstant(-4UL, getPointerTy())); 4309 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4310 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4311 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4312 MF.addLiveOut(X86::ECX); 4313 4314 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4315 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4316} 4317 4318SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4319 SelectionDAG &DAG) { 4320 SDOperand Root = Op.getOperand(0); 4321 SDOperand Trmp = Op.getOperand(1); // trampoline 4322 SDOperand FPtr = Op.getOperand(2); // nested function 4323 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4324 4325 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4326 4327 if (Subtarget->is64Bit()) { 4328 return SDOperand(); // not yet supported 4329 } else { 4330 Function *Func = (Function *) 4331 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4332 unsigned CC = Func->getCallingConv(); 4333 unsigned char NestReg; 4334 4335 switch (CC) { 4336 default: 4337 assert(0 && "Unsupported calling convention"); 4338 case CallingConv::C: 4339 case CallingConv::Fast: 4340 case CallingConv::X86_StdCall: { 4341 // Pass 'nest' parameter in ECX. 4342 // Must be kept in sync with X86CallingConv.td 4343 NestReg = N86::ECX; 4344 4345 // Check that ECX wasn't needed by an 'inreg' parameter. 4346 const FunctionType *FTy = Func->getFunctionType(); 4347 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4348 4349 if (Attrs && !Func->isVarArg()) { 4350 unsigned InRegCount = 0; 4351 unsigned Idx = 1; 4352 4353 for (FunctionType::param_iterator I = FTy->param_begin(), 4354 E = FTy->param_end(); I != E; ++I, ++Idx) 4355 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4356 // FIXME: should only count parameters that are lowered to integers. 4357 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4358 4359 if (InRegCount > 2) { 4360 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4361 abort(); 4362 } 4363 } 4364 break; 4365 } 4366 case CallingConv::X86_FastCall: 4367 // Pass 'nest' parameter in EAX. 4368 // Must be kept in sync with X86CallingConv.td 4369 NestReg = N86::EAX; 4370 break; 4371 } 4372 4373 SDOperand OutChains[4]; 4374 SDOperand Addr, Disp; 4375 4376 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4377 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4378 4379 const unsigned char MOV32ri = 0xB8; 4380 const unsigned char JMP = 0xE9; 4381 4382 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|NestReg, MVT::i8), 4383 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4384 4385 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4386 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4387 TrmpSV->getOffset() + 1, false, 1); 4388 4389 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4390 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4391 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4392 4393 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4394 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4395 TrmpSV->getOffset() + 6, false, 1); 4396 4397 return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4); 4398 } 4399} 4400 4401/// LowerOperation - Provide custom lowering hooks for some operations. 4402/// 4403SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4404 switch (Op.getOpcode()) { 4405 default: assert(0 && "Should not custom lower this!"); 4406 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4407 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4408 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4409 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4410 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4411 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4412 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4413 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4414 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4415 case ISD::SHL_PARTS: 4416 case ISD::SRA_PARTS: 4417 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4418 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4419 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4420 case ISD::FABS: return LowerFABS(Op, DAG); 4421 case ISD::FNEG: return LowerFNEG(Op, DAG); 4422 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4423 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4424 case ISD::SELECT: return LowerSELECT(Op, DAG); 4425 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4426 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4427 case ISD::CALL: return LowerCALL(Op, DAG); 4428 case ISD::RET: return LowerRET(Op, DAG); 4429 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4430 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4431 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4432 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4433 case ISD::VASTART: return LowerVASTART(Op, DAG); 4434 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4435 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4436 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4437 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4438 case ISD::FRAME_TO_ARGS_OFFSET: 4439 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4440 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4441 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4442 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4443 } 4444 return SDOperand(); 4445} 4446 4447const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4448 switch (Opcode) { 4449 default: return NULL; 4450 case X86ISD::SHLD: return "X86ISD::SHLD"; 4451 case X86ISD::SHRD: return "X86ISD::SHRD"; 4452 case X86ISD::FAND: return "X86ISD::FAND"; 4453 case X86ISD::FOR: return "X86ISD::FOR"; 4454 case X86ISD::FXOR: return "X86ISD::FXOR"; 4455 case X86ISD::FSRL: return "X86ISD::FSRL"; 4456 case X86ISD::FILD: return "X86ISD::FILD"; 4457 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4458 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4459 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4460 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4461 case X86ISD::FLD: return "X86ISD::FLD"; 4462 case X86ISD::FST: return "X86ISD::FST"; 4463 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4464 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4465 case X86ISD::CALL: return "X86ISD::CALL"; 4466 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4467 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4468 case X86ISD::CMP: return "X86ISD::CMP"; 4469 case X86ISD::COMI: return "X86ISD::COMI"; 4470 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4471 case X86ISD::SETCC: return "X86ISD::SETCC"; 4472 case X86ISD::CMOV: return "X86ISD::CMOV"; 4473 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4474 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4475 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4476 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4477 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4478 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4479 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4480 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4481 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4482 case X86ISD::FMAX: return "X86ISD::FMAX"; 4483 case X86ISD::FMIN: return "X86ISD::FMIN"; 4484 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4485 case X86ISD::FRCP: return "X86ISD::FRCP"; 4486 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4487 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4488 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4489 } 4490} 4491 4492// isLegalAddressingMode - Return true if the addressing mode represented 4493// by AM is legal for this target, for a load/store of the specified type. 4494bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4495 const Type *Ty) const { 4496 // X86 supports extremely general addressing modes. 4497 4498 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4499 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4500 return false; 4501 4502 if (AM.BaseGV) { 4503 // We can only fold this if we don't need an extra load. 4504 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4505 return false; 4506 4507 // X86-64 only supports addr of globals in small code model. 4508 if (Subtarget->is64Bit()) { 4509 if (getTargetMachine().getCodeModel() != CodeModel::Small) 4510 return false; 4511 // If lower 4G is not available, then we must use rip-relative addressing. 4512 if (AM.BaseOffs || AM.Scale > 1) 4513 return false; 4514 } 4515 } 4516 4517 switch (AM.Scale) { 4518 case 0: 4519 case 1: 4520 case 2: 4521 case 4: 4522 case 8: 4523 // These scales always work. 4524 break; 4525 case 3: 4526 case 5: 4527 case 9: 4528 // These scales are formed with basereg+scalereg. Only accept if there is 4529 // no basereg yet. 4530 if (AM.HasBaseReg) 4531 return false; 4532 break; 4533 default: // Other stuff never works. 4534 return false; 4535 } 4536 4537 return true; 4538} 4539 4540 4541/// isShuffleMaskLegal - Targets can use this to indicate that they only 4542/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4543/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4544/// are assumed to be legal. 4545bool 4546X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4547 // Only do shuffles on 128-bit vector types for now. 4548 if (MVT::getSizeInBits(VT) == 64) return false; 4549 return (Mask.Val->getNumOperands() <= 4 || 4550 isIdentityMask(Mask.Val) || 4551 isIdentityMask(Mask.Val, true) || 4552 isSplatMask(Mask.Val) || 4553 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4554 X86::isUNPCKLMask(Mask.Val) || 4555 X86::isUNPCKHMask(Mask.Val) || 4556 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4557 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4558} 4559 4560bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4561 MVT::ValueType EVT, 4562 SelectionDAG &DAG) const { 4563 unsigned NumElts = BVOps.size(); 4564 // Only do shuffles on 128-bit vector types for now. 4565 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4566 if (NumElts == 2) return true; 4567 if (NumElts == 4) { 4568 return (isMOVLMask(&BVOps[0], 4) || 4569 isCommutedMOVL(&BVOps[0], 4, true) || 4570 isSHUFPMask(&BVOps[0], 4) || 4571 isCommutedSHUFP(&BVOps[0], 4)); 4572 } 4573 return false; 4574} 4575 4576//===----------------------------------------------------------------------===// 4577// X86 Scheduler Hooks 4578//===----------------------------------------------------------------------===// 4579 4580MachineBasicBlock * 4581X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4582 MachineBasicBlock *BB) { 4583 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4584 switch (MI->getOpcode()) { 4585 default: assert(false && "Unexpected instr type to insert"); 4586 case X86::CMOV_FR32: 4587 case X86::CMOV_FR64: 4588 case X86::CMOV_V4F32: 4589 case X86::CMOV_V2F64: 4590 case X86::CMOV_V2I64: { 4591 // To "insert" a SELECT_CC instruction, we actually have to insert the 4592 // diamond control-flow pattern. The incoming instruction knows the 4593 // destination vreg to set, the condition code register to branch on, the 4594 // true/false values to select between, and a branch opcode to use. 4595 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4596 ilist<MachineBasicBlock>::iterator It = BB; 4597 ++It; 4598 4599 // thisMBB: 4600 // ... 4601 // TrueVal = ... 4602 // cmpTY ccX, r1, r2 4603 // bCC copy1MBB 4604 // fallthrough --> copy0MBB 4605 MachineBasicBlock *thisMBB = BB; 4606 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4607 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4608 unsigned Opc = 4609 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4610 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4611 MachineFunction *F = BB->getParent(); 4612 F->getBasicBlockList().insert(It, copy0MBB); 4613 F->getBasicBlockList().insert(It, sinkMBB); 4614 // Update machine-CFG edges by first adding all successors of the current 4615 // block to the new block which will contain the Phi node for the select. 4616 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4617 e = BB->succ_end(); i != e; ++i) 4618 sinkMBB->addSuccessor(*i); 4619 // Next, remove all successors of the current block, and add the true 4620 // and fallthrough blocks as its successors. 4621 while(!BB->succ_empty()) 4622 BB->removeSuccessor(BB->succ_begin()); 4623 BB->addSuccessor(copy0MBB); 4624 BB->addSuccessor(sinkMBB); 4625 4626 // copy0MBB: 4627 // %FalseValue = ... 4628 // # fallthrough to sinkMBB 4629 BB = copy0MBB; 4630 4631 // Update machine-CFG edges 4632 BB->addSuccessor(sinkMBB); 4633 4634 // sinkMBB: 4635 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4636 // ... 4637 BB = sinkMBB; 4638 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4639 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4640 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4641 4642 delete MI; // The pseudo instruction is gone now. 4643 return BB; 4644 } 4645 4646 case X86::FP32_TO_INT16_IN_MEM: 4647 case X86::FP32_TO_INT32_IN_MEM: 4648 case X86::FP32_TO_INT64_IN_MEM: 4649 case X86::FP64_TO_INT16_IN_MEM: 4650 case X86::FP64_TO_INT32_IN_MEM: 4651 case X86::FP64_TO_INT64_IN_MEM: 4652 case X86::FP80_TO_INT16_IN_MEM: 4653 case X86::FP80_TO_INT32_IN_MEM: 4654 case X86::FP80_TO_INT64_IN_MEM: { 4655 // Change the floating point control register to use "round towards zero" 4656 // mode when truncating to an integer value. 4657 MachineFunction *F = BB->getParent(); 4658 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4659 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4660 4661 // Load the old value of the high byte of the control word... 4662 unsigned OldCW = 4663 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4664 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4665 4666 // Set the high part to be round to zero... 4667 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4668 .addImm(0xC7F); 4669 4670 // Reload the modified control word now... 4671 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4672 4673 // Restore the memory image of control word to original value 4674 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4675 .addReg(OldCW); 4676 4677 // Get the X86 opcode to use. 4678 unsigned Opc; 4679 switch (MI->getOpcode()) { 4680 default: assert(0 && "illegal opcode!"); 4681 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 4682 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 4683 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 4684 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 4685 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 4686 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 4687 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 4688 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 4689 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 4690 } 4691 4692 X86AddressMode AM; 4693 MachineOperand &Op = MI->getOperand(0); 4694 if (Op.isRegister()) { 4695 AM.BaseType = X86AddressMode::RegBase; 4696 AM.Base.Reg = Op.getReg(); 4697 } else { 4698 AM.BaseType = X86AddressMode::FrameIndexBase; 4699 AM.Base.FrameIndex = Op.getFrameIndex(); 4700 } 4701 Op = MI->getOperand(1); 4702 if (Op.isImmediate()) 4703 AM.Scale = Op.getImm(); 4704 Op = MI->getOperand(2); 4705 if (Op.isImmediate()) 4706 AM.IndexReg = Op.getImm(); 4707 Op = MI->getOperand(3); 4708 if (Op.isGlobalAddress()) { 4709 AM.GV = Op.getGlobal(); 4710 } else { 4711 AM.Disp = Op.getImm(); 4712 } 4713 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4714 .addReg(MI->getOperand(4).getReg()); 4715 4716 // Reload the original control word now. 4717 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4718 4719 delete MI; // The pseudo instruction is gone now. 4720 return BB; 4721 } 4722 } 4723} 4724 4725//===----------------------------------------------------------------------===// 4726// X86 Optimization Hooks 4727//===----------------------------------------------------------------------===// 4728 4729void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4730 uint64_t Mask, 4731 uint64_t &KnownZero, 4732 uint64_t &KnownOne, 4733 const SelectionDAG &DAG, 4734 unsigned Depth) const { 4735 unsigned Opc = Op.getOpcode(); 4736 assert((Opc >= ISD::BUILTIN_OP_END || 4737 Opc == ISD::INTRINSIC_WO_CHAIN || 4738 Opc == ISD::INTRINSIC_W_CHAIN || 4739 Opc == ISD::INTRINSIC_VOID) && 4740 "Should use MaskedValueIsZero if you don't know whether Op" 4741 " is a target node!"); 4742 4743 KnownZero = KnownOne = 0; // Don't know anything. 4744 switch (Opc) { 4745 default: break; 4746 case X86ISD::SETCC: 4747 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4748 break; 4749 } 4750} 4751 4752/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4753/// element of the result of the vector shuffle. 4754static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4755 MVT::ValueType VT = N->getValueType(0); 4756 SDOperand PermMask = N->getOperand(2); 4757 unsigned NumElems = PermMask.getNumOperands(); 4758 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4759 i %= NumElems; 4760 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4761 return (i == 0) 4762 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4763 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4764 SDOperand Idx = PermMask.getOperand(i); 4765 if (Idx.getOpcode() == ISD::UNDEF) 4766 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4767 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4768 } 4769 return SDOperand(); 4770} 4771 4772/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4773/// node is a GlobalAddress + an offset. 4774static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4775 unsigned Opc = N->getOpcode(); 4776 if (Opc == X86ISD::Wrapper) { 4777 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4778 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4779 return true; 4780 } 4781 } else if (Opc == ISD::ADD) { 4782 SDOperand N1 = N->getOperand(0); 4783 SDOperand N2 = N->getOperand(1); 4784 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4785 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4786 if (V) { 4787 Offset += V->getSignExtended(); 4788 return true; 4789 } 4790 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4791 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4792 if (V) { 4793 Offset += V->getSignExtended(); 4794 return true; 4795 } 4796 } 4797 } 4798 return false; 4799} 4800 4801/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4802/// + Dist * Size. 4803static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4804 MachineFrameInfo *MFI) { 4805 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4806 return false; 4807 4808 SDOperand Loc = N->getOperand(1); 4809 SDOperand BaseLoc = Base->getOperand(1); 4810 if (Loc.getOpcode() == ISD::FrameIndex) { 4811 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4812 return false; 4813 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 4814 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4815 int FS = MFI->getObjectSize(FI); 4816 int BFS = MFI->getObjectSize(BFI); 4817 if (FS != BFS || FS != Size) return false; 4818 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4819 } else { 4820 GlobalValue *GV1 = NULL; 4821 GlobalValue *GV2 = NULL; 4822 int64_t Offset1 = 0; 4823 int64_t Offset2 = 0; 4824 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4825 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4826 if (isGA1 && isGA2 && GV1 == GV2) 4827 return Offset1 == (Offset2 + Dist*Size); 4828 } 4829 4830 return false; 4831} 4832 4833static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4834 const X86Subtarget *Subtarget) { 4835 GlobalValue *GV; 4836 int64_t Offset; 4837 if (isGAPlusOffset(Base, GV, Offset)) 4838 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4839 else { 4840 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4841 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 4842 if (BFI < 0) 4843 // Fixed objects do not specify alignment, however the offsets are known. 4844 return ((Subtarget->getStackAlignment() % 16) == 0 && 4845 (MFI->getObjectOffset(BFI) % 16) == 0); 4846 else 4847 return MFI->getObjectAlignment(BFI) >= 16; 4848 } 4849 return false; 4850} 4851 4852 4853/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4854/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4855/// if the load addresses are consecutive, non-overlapping, and in the right 4856/// order. 4857static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4858 const X86Subtarget *Subtarget) { 4859 MachineFunction &MF = DAG.getMachineFunction(); 4860 MachineFrameInfo *MFI = MF.getFrameInfo(); 4861 MVT::ValueType VT = N->getValueType(0); 4862 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4863 SDOperand PermMask = N->getOperand(2); 4864 int NumElems = (int)PermMask.getNumOperands(); 4865 SDNode *Base = NULL; 4866 for (int i = 0; i < NumElems; ++i) { 4867 SDOperand Idx = PermMask.getOperand(i); 4868 if (Idx.getOpcode() == ISD::UNDEF) { 4869 if (!Base) return SDOperand(); 4870 } else { 4871 SDOperand Arg = 4872 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4873 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4874 return SDOperand(); 4875 if (!Base) 4876 Base = Arg.Val; 4877 else if (!isConsecutiveLoad(Arg.Val, Base, 4878 i, MVT::getSizeInBits(EVT)/8,MFI)) 4879 return SDOperand(); 4880 } 4881 } 4882 4883 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4884 LoadSDNode *LD = cast<LoadSDNode>(Base); 4885 if (isAlign16) { 4886 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4887 LD->getSrcValueOffset(), LD->isVolatile()); 4888 } else { 4889 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4890 LD->getSrcValueOffset(), LD->isVolatile(), 4891 LD->getAlignment()); 4892 } 4893} 4894 4895/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4896static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4897 const X86Subtarget *Subtarget) { 4898 SDOperand Cond = N->getOperand(0); 4899 4900 // If we have SSE[12] support, try to form min/max nodes. 4901 if (Subtarget->hasSSE2() && 4902 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4903 if (Cond.getOpcode() == ISD::SETCC) { 4904 // Get the LHS/RHS of the select. 4905 SDOperand LHS = N->getOperand(1); 4906 SDOperand RHS = N->getOperand(2); 4907 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4908 4909 unsigned Opcode = 0; 4910 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4911 switch (CC) { 4912 default: break; 4913 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4914 case ISD::SETULE: 4915 case ISD::SETLE: 4916 if (!UnsafeFPMath) break; 4917 // FALL THROUGH. 4918 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4919 case ISD::SETLT: 4920 Opcode = X86ISD::FMIN; 4921 break; 4922 4923 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4924 case ISD::SETUGT: 4925 case ISD::SETGT: 4926 if (!UnsafeFPMath) break; 4927 // FALL THROUGH. 4928 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4929 case ISD::SETGE: 4930 Opcode = X86ISD::FMAX; 4931 break; 4932 } 4933 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4934 switch (CC) { 4935 default: break; 4936 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4937 case ISD::SETUGT: 4938 case ISD::SETGT: 4939 if (!UnsafeFPMath) break; 4940 // FALL THROUGH. 4941 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4942 case ISD::SETGE: 4943 Opcode = X86ISD::FMIN; 4944 break; 4945 4946 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4947 case ISD::SETULE: 4948 case ISD::SETLE: 4949 if (!UnsafeFPMath) break; 4950 // FALL THROUGH. 4951 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4952 case ISD::SETLT: 4953 Opcode = X86ISD::FMAX; 4954 break; 4955 } 4956 } 4957 4958 if (Opcode) 4959 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4960 } 4961 4962 } 4963 4964 return SDOperand(); 4965} 4966 4967 4968SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4969 DAGCombinerInfo &DCI) const { 4970 SelectionDAG &DAG = DCI.DAG; 4971 switch (N->getOpcode()) { 4972 default: break; 4973 case ISD::VECTOR_SHUFFLE: 4974 return PerformShuffleCombine(N, DAG, Subtarget); 4975 case ISD::SELECT: 4976 return PerformSELECTCombine(N, DAG, Subtarget); 4977 } 4978 4979 return SDOperand(); 4980} 4981 4982//===----------------------------------------------------------------------===// 4983// X86 Inline Assembly Support 4984//===----------------------------------------------------------------------===// 4985 4986/// getConstraintType - Given a constraint letter, return the type of 4987/// constraint it is for this target. 4988X86TargetLowering::ConstraintType 4989X86TargetLowering::getConstraintType(const std::string &Constraint) const { 4990 if (Constraint.size() == 1) { 4991 switch (Constraint[0]) { 4992 case 'A': 4993 case 'r': 4994 case 'R': 4995 case 'l': 4996 case 'q': 4997 case 'Q': 4998 case 'x': 4999 case 'Y': 5000 return C_RegisterClass; 5001 default: 5002 break; 5003 } 5004 } 5005 return TargetLowering::getConstraintType(Constraint); 5006} 5007 5008/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5009/// vector. If it is invalid, don't add anything to Ops. 5010void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 5011 char Constraint, 5012 std::vector<SDOperand>&Ops, 5013 SelectionDAG &DAG) { 5014 SDOperand Result(0, 0); 5015 5016 switch (Constraint) { 5017 default: break; 5018 case 'I': 5019 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5020 if (C->getValue() <= 31) { 5021 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5022 break; 5023 } 5024 } 5025 return; 5026 case 'N': 5027 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 5028 if (C->getValue() <= 255) { 5029 Result = DAG.getTargetConstant(C->getValue(), Op.getValueType()); 5030 break; 5031 } 5032 } 5033 return; 5034 case 'i': { 5035 // Literal immediates are always ok. 5036 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 5037 Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 5038 break; 5039 } 5040 5041 // If we are in non-pic codegen mode, we allow the address of a global (with 5042 // an optional displacement) to be used with 'i'. 5043 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 5044 int64_t Offset = 0; 5045 5046 // Match either (GA) or (GA+C) 5047 if (GA) { 5048 Offset = GA->getOffset(); 5049 } else if (Op.getOpcode() == ISD::ADD) { 5050 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5051 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5052 if (C && GA) { 5053 Offset = GA->getOffset()+C->getValue(); 5054 } else { 5055 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 5056 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 5057 if (C && GA) 5058 Offset = GA->getOffset()+C->getValue(); 5059 else 5060 C = 0, GA = 0; 5061 } 5062 } 5063 5064 if (GA) { 5065 // If addressing this global requires a load (e.g. in PIC mode), we can't 5066 // match. 5067 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5068 false)) 5069 return; 5070 5071 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5072 Offset); 5073 Result = Op; 5074 break; 5075 } 5076 5077 // Otherwise, not valid for this mode. 5078 return; 5079 } 5080 } 5081 5082 if (Result.Val) { 5083 Ops.push_back(Result); 5084 return; 5085 } 5086 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5087} 5088 5089std::vector<unsigned> X86TargetLowering:: 5090getRegClassForInlineAsmConstraint(const std::string &Constraint, 5091 MVT::ValueType VT) const { 5092 if (Constraint.size() == 1) { 5093 // FIXME: not handling fp-stack yet! 5094 switch (Constraint[0]) { // GCC X86 Constraint Letters 5095 default: break; // Unknown constraint letter 5096 case 'A': // EAX/EDX 5097 if (VT == MVT::i32 || VT == MVT::i64) 5098 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5099 break; 5100 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5101 case 'Q': // Q_REGS 5102 if (VT == MVT::i32) 5103 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5104 else if (VT == MVT::i16) 5105 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5106 else if (VT == MVT::i8) 5107 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 5108 break; 5109 } 5110 } 5111 5112 return std::vector<unsigned>(); 5113} 5114 5115std::pair<unsigned, const TargetRegisterClass*> 5116X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5117 MVT::ValueType VT) const { 5118 // First, see if this is a constraint that directly corresponds to an LLVM 5119 // register class. 5120 if (Constraint.size() == 1) { 5121 // GCC Constraint Letters 5122 switch (Constraint[0]) { 5123 default: break; 5124 case 'r': // GENERAL_REGS 5125 case 'R': // LEGACY_REGS 5126 case 'l': // INDEX_REGS 5127 if (VT == MVT::i64 && Subtarget->is64Bit()) 5128 return std::make_pair(0U, X86::GR64RegisterClass); 5129 if (VT == MVT::i32) 5130 return std::make_pair(0U, X86::GR32RegisterClass); 5131 else if (VT == MVT::i16) 5132 return std::make_pair(0U, X86::GR16RegisterClass); 5133 else if (VT == MVT::i8) 5134 return std::make_pair(0U, X86::GR8RegisterClass); 5135 break; 5136 case 'y': // MMX_REGS if MMX allowed. 5137 if (!Subtarget->hasMMX()) break; 5138 return std::make_pair(0U, X86::VR64RegisterClass); 5139 break; 5140 case 'Y': // SSE_REGS if SSE2 allowed 5141 if (!Subtarget->hasSSE2()) break; 5142 // FALL THROUGH. 5143 case 'x': // SSE_REGS if SSE1 allowed 5144 if (!Subtarget->hasSSE1()) break; 5145 5146 switch (VT) { 5147 default: break; 5148 // Scalar SSE types. 5149 case MVT::f32: 5150 case MVT::i32: 5151 return std::make_pair(0U, X86::FR32RegisterClass); 5152 case MVT::f64: 5153 case MVT::i64: 5154 return std::make_pair(0U, X86::FR64RegisterClass); 5155 // Vector types. 5156 case MVT::v16i8: 5157 case MVT::v8i16: 5158 case MVT::v4i32: 5159 case MVT::v2i64: 5160 case MVT::v4f32: 5161 case MVT::v2f64: 5162 return std::make_pair(0U, X86::VR128RegisterClass); 5163 } 5164 break; 5165 } 5166 } 5167 5168 // Use the default implementation in TargetLowering to convert the register 5169 // constraint into a member of a register class. 5170 std::pair<unsigned, const TargetRegisterClass*> Res; 5171 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5172 5173 // Not found as a standard register? 5174 if (Res.second == 0) { 5175 // GCC calls "st(0)" just plain "st". 5176 if (StringsEqualNoCase("{st}", Constraint)) { 5177 Res.first = X86::ST0; 5178 Res.second = X86::RSTRegisterClass; 5179 } 5180 5181 return Res; 5182 } 5183 5184 // Otherwise, check to see if this is a register class of the wrong value 5185 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5186 // turn into {ax},{dx}. 5187 if (Res.second->hasType(VT)) 5188 return Res; // Correct type already, nothing to do. 5189 5190 // All of the single-register GCC register classes map their values onto 5191 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5192 // really want an 8-bit or 32-bit register, map to the appropriate register 5193 // class and return the appropriate register. 5194 if (Res.second != X86::GR16RegisterClass) 5195 return Res; 5196 5197 if (VT == MVT::i8) { 5198 unsigned DestReg = 0; 5199 switch (Res.first) { 5200 default: break; 5201 case X86::AX: DestReg = X86::AL; break; 5202 case X86::DX: DestReg = X86::DL; break; 5203 case X86::CX: DestReg = X86::CL; break; 5204 case X86::BX: DestReg = X86::BL; break; 5205 } 5206 if (DestReg) { 5207 Res.first = DestReg; 5208 Res.second = Res.second = X86::GR8RegisterClass; 5209 } 5210 } else if (VT == MVT::i32) { 5211 unsigned DestReg = 0; 5212 switch (Res.first) { 5213 default: break; 5214 case X86::AX: DestReg = X86::EAX; break; 5215 case X86::DX: DestReg = X86::EDX; break; 5216 case X86::CX: DestReg = X86::ECX; break; 5217 case X86::BX: DestReg = X86::EBX; break; 5218 case X86::SI: DestReg = X86::ESI; break; 5219 case X86::DI: DestReg = X86::EDI; break; 5220 case X86::BP: DestReg = X86::EBP; break; 5221 case X86::SP: DestReg = X86::ESP; break; 5222 } 5223 if (DestReg) { 5224 Res.first = DestReg; 5225 Res.second = Res.second = X86::GR32RegisterClass; 5226 } 5227 } else if (VT == MVT::i64) { 5228 unsigned DestReg = 0; 5229 switch (Res.first) { 5230 default: break; 5231 case X86::AX: DestReg = X86::RAX; break; 5232 case X86::DX: DestReg = X86::RDX; break; 5233 case X86::CX: DestReg = X86::RCX; break; 5234 case X86::BX: DestReg = X86::RBX; break; 5235 case X86::SI: DestReg = X86::RSI; break; 5236 case X86::DI: DestReg = X86::RDI; break; 5237 case X86::BP: DestReg = X86::RBP; break; 5238 case X86::SP: DestReg = X86::RSP; break; 5239 } 5240 if (DestReg) { 5241 Res.first = DestReg; 5242 Res.second = Res.second = X86::GR64RegisterClass; 5243 } 5244 } 5245 5246 return Res; 5247} 5248