X86ISelLowering.cpp revision b116fac90f9b54142ac511a30b4d45b54d3508ba
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86CodeEmitter.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/Constants.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalVariable.h" 25#include "llvm/Function.h" 26#include "llvm/Intrinsics.h" 27#include "llvm/ADT/VectorExtras.h" 28#include "llvm/Analysis/ScalarEvolutionExpressions.h" 29#include "llvm/CodeGen/CallingConvLower.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineFunction.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/SelectionDAG.h" 34#include "llvm/CodeGen/SSARegMap.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Target/TargetOptions.h" 37#include "llvm/ADT/StringExtras.h" 38#include "llvm/ParameterAttributes.h" 39using namespace llvm; 40 41X86TargetLowering::X86TargetLowering(TargetMachine &TM) 42 : TargetLowering(TM) { 43 Subtarget = &TM.getSubtarget<X86Subtarget>(); 44 X86ScalarSSE = Subtarget->hasSSE2(); 45 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 46 47 RegInfo = TM.getRegisterInfo(); 48 49 // Set up the TargetLowering object. 50 51 // X86 is weird, it always uses i8 for shift amounts and setcc results. 52 setShiftAmountType(MVT::i8); 53 setSetCCResultType(MVT::i8); 54 setSetCCResultContents(ZeroOrOneSetCCResult); 55 setSchedulingPreference(SchedulingForRegPressure); 56 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 57 setStackPointerRegisterToSaveRestore(X86StackPtr); 58 59 if (Subtarget->isTargetDarwin()) { 60 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 61 setUseUnderscoreSetJmp(false); 62 setUseUnderscoreLongJmp(false); 63 } else if (Subtarget->isTargetMingw()) { 64 // MS runtime is weird: it exports _setjmp, but longjmp! 65 setUseUnderscoreSetJmp(true); 66 setUseUnderscoreLongJmp(false); 67 } else { 68 setUseUnderscoreSetJmp(true); 69 setUseUnderscoreLongJmp(true); 70 } 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 74 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 75 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 76 if (Subtarget->is64Bit()) 77 addRegisterClass(MVT::i64, X86::GR64RegisterClass); 78 79 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 80 81 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 82 // operation. 83 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 84 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 85 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 86 87 if (Subtarget->is64Bit()) { 88 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 89 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 90 } else { 91 if (X86ScalarSSE) 92 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. 93 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 94 else 95 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 96 } 97 98 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 99 // this operation. 100 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 101 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 102 // SSE has no i16 to fp conversion, only i32 103 if (X86ScalarSSE) 104 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 105 else { 106 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 107 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 108 } 109 110 if (!Subtarget->is64Bit()) { 111 // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. 112 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 113 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 114 } 115 116 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 117 // this operation. 118 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 119 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 120 121 if (X86ScalarSSE) { 122 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 123 } else { 124 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 125 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 126 } 127 128 // Handle FP_TO_UINT by promoting the destination to a larger signed 129 // conversion. 130 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 131 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 132 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 133 134 if (Subtarget->is64Bit()) { 135 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 136 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 137 } else { 138 if (X86ScalarSSE && !Subtarget->hasSSE3()) 139 // Expand FP_TO_UINT into a select. 140 // FIXME: We would like to use a Custom expander here eventually to do 141 // the optimal thing for SSE vs. the default expansion in the legalizer. 142 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 143 else 144 // With SSE3 we can use fisttpll to convert to a signed i64. 145 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 146 } 147 148 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 149 if (!X86ScalarSSE) { 150 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 151 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 152 } 153 154 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 155 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 156 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 157 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 158 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 159 if (Subtarget->is64Bit()) 160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 164 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 165 setOperationAction(ISD::FREM , MVT::f64 , Expand); 166 167 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 168 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 169 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 170 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 171 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 172 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 173 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 175 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 176 if (Subtarget->is64Bit()) { 177 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 178 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 179 setOperationAction(ISD::CTLZ , MVT::i64 , Expand); 180 } 181 182 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 183 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 184 185 // These should be promoted to a larger select which is supported. 186 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 187 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 188 // X86 wants to expand cmov itself. 189 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 190 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 191 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 192 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 193 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 194 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 195 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 196 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 197 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 198 if (Subtarget->is64Bit()) { 199 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 200 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 201 } 202 // X86 ret instruction may pop stack. 203 setOperationAction(ISD::RET , MVT::Other, Custom); 204 if (!Subtarget->is64Bit()) 205 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 206 207 // Darwin ABI issue. 208 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 209 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 210 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 211 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 212 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 213 if (Subtarget->is64Bit()) { 214 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 215 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 216 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 217 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 218 } 219 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 220 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 221 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 222 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 223 // X86 wants to expand memset / memcpy itself. 224 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 225 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 226 227 // We don't have line number support yet. 228 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 229 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 230 // FIXME - use subtarget debug flags 231 if (!Subtarget->isTargetDarwin() && 232 !Subtarget->isTargetELF() && 233 !Subtarget->isTargetCygMing()) 234 setOperationAction(ISD::LABEL, MVT::Other, Expand); 235 236 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 237 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 238 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 239 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 240 if (Subtarget->is64Bit()) { 241 // FIXME: Verify 242 setExceptionPointerRegister(X86::RAX); 243 setExceptionSelectorRegister(X86::RDX); 244 } else { 245 setExceptionPointerRegister(X86::EAX); 246 setExceptionSelectorRegister(X86::EDX); 247 } 248 249 setOperationAction(ISD::ADJUST_TRAMP, MVT::i32, Expand); 250 setOperationAction(ISD::ADJUST_TRAMP, MVT::i64, Expand); 251 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 252 253 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 254 setOperationAction(ISD::VASTART , MVT::Other, Custom); 255 setOperationAction(ISD::VAARG , MVT::Other, Expand); 256 setOperationAction(ISD::VAEND , MVT::Other, Expand); 257 if (Subtarget->is64Bit()) 258 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 259 else 260 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 261 262 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 263 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 264 if (Subtarget->is64Bit()) 265 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 266 if (Subtarget->isTargetCygMing()) 267 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 268 else 269 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 270 271 if (X86ScalarSSE) { 272 // Set up the FP register classes. 273 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 274 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 275 276 // Use ANDPD to simulate FABS. 277 setOperationAction(ISD::FABS , MVT::f64, Custom); 278 setOperationAction(ISD::FABS , MVT::f32, Custom); 279 280 // Use XORP to simulate FNEG. 281 setOperationAction(ISD::FNEG , MVT::f64, Custom); 282 setOperationAction(ISD::FNEG , MVT::f32, Custom); 283 284 // Use ANDPD and ORPD to simulate FCOPYSIGN. 285 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 286 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 287 288 // We don't support sin/cos/fmod 289 setOperationAction(ISD::FSIN , MVT::f64, Expand); 290 setOperationAction(ISD::FCOS , MVT::f64, Expand); 291 setOperationAction(ISD::FREM , MVT::f64, Expand); 292 setOperationAction(ISD::FSIN , MVT::f32, Expand); 293 setOperationAction(ISD::FCOS , MVT::f32, Expand); 294 setOperationAction(ISD::FREM , MVT::f32, Expand); 295 296 // Expand FP immediates into loads from the stack, except for the special 297 // cases we handle. 298 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 299 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 300 addLegalFPImmediate(+0.0); // xorps / xorpd 301 } else { 302 // Set up the FP register classes. 303 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 304 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 305 306 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 307 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 308 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 309 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 310 setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); 311 312 if (!UnsafeFPMath) { 313 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 314 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 315 } 316 317 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 318 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 319 addLegalFPImmediate(+0.0); // FLD0 320 addLegalFPImmediate(+1.0); // FLD1 321 addLegalFPImmediate(-0.0); // FLD0/FCHS 322 addLegalFPImmediate(-1.0); // FLD1/FCHS 323 } 324 325 // First set operation action for all vector types to expand. Then we 326 // will selectively turn on ones that can be effectively codegen'd. 327 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 328 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 329 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 330 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 331 setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); 332 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 333 setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); 334 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 335 setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); 336 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 337 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 338 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 340 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 341 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 342 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 343 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 344 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 345 setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); 346 setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); 347 setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); 348 setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); 349 setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); 350 setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); 351 setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); 352 } 353 354 if (Subtarget->hasMMX()) { 355 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 356 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 357 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 358 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 359 360 // FIXME: add MMX packed arithmetics 361 362 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 363 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 364 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 365 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 366 367 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 368 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 369 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 370 371 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 372 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 373 374 setOperationAction(ISD::AND, MVT::v8i8, Promote); 375 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 376 setOperationAction(ISD::AND, MVT::v4i16, Promote); 377 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 378 setOperationAction(ISD::AND, MVT::v2i32, Promote); 379 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 380 setOperationAction(ISD::AND, MVT::v1i64, Legal); 381 382 setOperationAction(ISD::OR, MVT::v8i8, Promote); 383 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 384 setOperationAction(ISD::OR, MVT::v4i16, Promote); 385 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 386 setOperationAction(ISD::OR, MVT::v2i32, Promote); 387 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 388 setOperationAction(ISD::OR, MVT::v1i64, Legal); 389 390 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 391 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 392 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 393 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 394 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 395 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 396 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 397 398 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 399 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 400 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 401 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 402 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 403 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 404 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 405 406 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 407 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 408 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 409 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 410 411 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 412 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 413 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 414 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 415 416 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 417 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 418 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); 419 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 420 } 421 422 if (Subtarget->hasSSE1()) { 423 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 424 425 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 426 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 427 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 428 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 429 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 430 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 431 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 432 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 433 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 434 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 435 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 436 } 437 438 if (Subtarget->hasSSE2()) { 439 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 440 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 441 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 442 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 443 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 444 445 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 446 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 447 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 448 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 449 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 450 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 451 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 452 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 453 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 454 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 455 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 456 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 457 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 458 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 459 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 460 461 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 462 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 463 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 464 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 465 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 466 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 467 468 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 469 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 470 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 471 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 472 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 473 } 474 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 475 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 476 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 477 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 478 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 479 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 480 481 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 482 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 483 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 484 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 485 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 486 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 487 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 488 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 489 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 490 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 491 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 492 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 493 } 494 495 // Custom lower v2i64 and v2f64 selects. 496 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 497 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 498 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 499 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 500 } 501 502 // We want to custom lower some of our intrinsics. 503 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 504 505 // We have target-specific dag combine patterns for the following nodes: 506 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 507 setTargetDAGCombine(ISD::SELECT); 508 509 computeRegisterProperties(); 510 511 // FIXME: These should be based on subtarget info. Plus, the values should 512 // be smaller when we are in optimizing for size mode. 513 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 514 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 515 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 516 allowUnalignedMemoryAccesses = true; // x86 supports it! 517} 518 519 520//===----------------------------------------------------------------------===// 521// Return Value Calling Convention Implementation 522//===----------------------------------------------------------------------===// 523 524#include "X86GenCallingConv.inc" 525 526/// LowerRET - Lower an ISD::RET node. 527SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 528 assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); 529 530 SmallVector<CCValAssign, 16> RVLocs; 531 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 532 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 533 CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); 534 CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); 535 536 537 // If this is the first return lowered for this function, add the regs to the 538 // liveout set for the function. 539 if (DAG.getMachineFunction().liveout_empty()) { 540 for (unsigned i = 0; i != RVLocs.size(); ++i) 541 if (RVLocs[i].isRegLoc()) 542 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); 543 } 544 545 SDOperand Chain = Op.getOperand(0); 546 SDOperand Flag; 547 548 // Copy the result values into the output registers. 549 if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || 550 RVLocs[0].getLocReg() != X86::ST0) { 551 for (unsigned i = 0; i != RVLocs.size(); ++i) { 552 CCValAssign &VA = RVLocs[i]; 553 assert(VA.isRegLoc() && "Can only return in registers!"); 554 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), 555 Flag); 556 Flag = Chain.getValue(1); 557 } 558 } else { 559 // We need to handle a destination of ST0 specially, because it isn't really 560 // a register. 561 SDOperand Value = Op.getOperand(1); 562 563 // If this is an FP return with ScalarSSE, we need to move the value from 564 // an XMM register onto the fp-stack. 565 if (X86ScalarSSE) { 566 SDOperand MemLoc; 567 568 // If this is a load into a scalarsse value, don't store the loaded value 569 // back to the stack, only to reload it: just replace the scalar-sse load. 570 if (ISD::isNON_EXTLoad(Value.Val) && 571 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 572 Chain = Value.getOperand(0); 573 MemLoc = Value.getOperand(1); 574 } else { 575 // Spill the value to memory and reload it into top of stack. 576 unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8; 577 MachineFunction &MF = DAG.getMachineFunction(); 578 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 579 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 580 Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); 581 } 582 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); 583 SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; 584 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 585 Chain = Value.getValue(1); 586 } 587 588 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 589 SDOperand Ops[] = { Chain, Value }; 590 Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2); 591 Flag = Chain.getValue(1); 592 } 593 594 SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16); 595 if (Flag.Val) 596 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag); 597 else 598 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop); 599} 600 601 602/// LowerCallResult - Lower the result values of an ISD::CALL into the 603/// appropriate copies out of appropriate physical registers. This assumes that 604/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 605/// being lowered. The returns a SDNode with the same number of values as the 606/// ISD::CALL. 607SDNode *X86TargetLowering:: 608LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 609 unsigned CallingConv, SelectionDAG &DAG) { 610 611 // Assign locations to each value returned by this call. 612 SmallVector<CCValAssign, 16> RVLocs; 613 bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0; 614 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); 615 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); 616 617 618 SmallVector<SDOperand, 8> ResultVals; 619 620 // Copy all of the result registers out of their specified physreg. 621 if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { 622 for (unsigned i = 0; i != RVLocs.size(); ++i) { 623 Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), 624 RVLocs[i].getValVT(), InFlag).getValue(1); 625 InFlag = Chain.getValue(2); 626 ResultVals.push_back(Chain.getValue(0)); 627 } 628 } else { 629 // Copies from the FP stack are special, as ST0 isn't a valid register 630 // before the fp stackifier runs. 631 632 // Copy ST0 into an RFP register with FP_GET_RESULT. 633 SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); 634 SDOperand GROps[] = { Chain, InFlag }; 635 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); 636 Chain = RetVal.getValue(1); 637 InFlag = RetVal.getValue(2); 638 639 // If we are using ScalarSSE, store ST(0) to the stack and reload it into 640 // an XMM register. 641 if (X86ScalarSSE) { 642 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 643 // shouldn't be necessary except that RFP cannot be live across 644 // multiple blocks. When stackifier is fixed, they can be uncoupled. 645 MachineFunction &MF = DAG.getMachineFunction(); 646 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 647 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 648 SDOperand Ops[] = { 649 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag 650 }; 651 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5); 652 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); 653 Chain = RetVal.getValue(1); 654 } 655 ResultVals.push_back(RetVal); 656 } 657 658 // Merge everything together with a MERGE_VALUES node. 659 ResultVals.push_back(Chain); 660 return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), 661 &ResultVals[0], ResultVals.size()).Val; 662} 663 664 665//===----------------------------------------------------------------------===// 666// C & StdCall Calling Convention implementation 667//===----------------------------------------------------------------------===// 668// StdCall calling convention seems to be standard for many Windows' API 669// routines and around. It differs from C calling convention just a little: 670// callee should clean up the stack, not caller. Symbols should be also 671// decorated in some fancy way :) It doesn't support any vector arguments. 672 673/// AddLiveIn - This helper function adds the specified physical register to the 674/// MachineFunction as a live in value. It also creates a corresponding virtual 675/// register for it. 676static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 677 const TargetRegisterClass *RC) { 678 assert(RC->contains(PReg) && "Not the correct regclass!"); 679 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 680 MF.addLiveIn(PReg, VReg); 681 return VReg; 682} 683 684SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, 685 bool isStdCall) { 686 unsigned NumArgs = Op.Val->getNumValues() - 1; 687 MachineFunction &MF = DAG.getMachineFunction(); 688 MachineFrameInfo *MFI = MF.getFrameInfo(); 689 SDOperand Root = Op.getOperand(0); 690 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 691 692 // Assign locations to all of the incoming arguments. 693 SmallVector<CCValAssign, 16> ArgLocs; 694 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 695 getTargetMachine(), ArgLocs); 696 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); 697 698 SmallVector<SDOperand, 8> ArgValues; 699 unsigned LastVal = ~0U; 700 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 701 CCValAssign &VA = ArgLocs[i]; 702 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 703 // places. 704 assert(VA.getValNo() != LastVal && 705 "Don't support value assigned to multiple locs yet"); 706 LastVal = VA.getValNo(); 707 708 if (VA.isRegLoc()) { 709 MVT::ValueType RegVT = VA.getLocVT(); 710 TargetRegisterClass *RC; 711 if (RegVT == MVT::i32) 712 RC = X86::GR32RegisterClass; 713 else { 714 assert(MVT::isVector(RegVT)); 715 RC = X86::VR128RegisterClass; 716 } 717 718 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 719 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 720 721 // If this is an 8 or 16-bit value, it is really passed promoted to 32 722 // bits. Insert an assert[sz]ext to capture this, then truncate to the 723 // right size. 724 if (VA.getLocInfo() == CCValAssign::SExt) 725 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 726 DAG.getValueType(VA.getValVT())); 727 else if (VA.getLocInfo() == CCValAssign::ZExt) 728 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 729 DAG.getValueType(VA.getValVT())); 730 731 if (VA.getLocInfo() != CCValAssign::Full) 732 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 733 734 ArgValues.push_back(ArgValue); 735 } else { 736 assert(VA.isMemLoc()); 737 738 // Create the nodes corresponding to a load from this parameter slot. 739 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 740 VA.getLocMemOffset()); 741 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 742 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 743 } 744 } 745 746 unsigned StackSize = CCInfo.getNextStackOffset(); 747 748 ArgValues.push_back(Root); 749 750 // If the function takes variable number of arguments, make a frame index for 751 // the start of the first vararg value... for expansion of llvm.va_start. 752 if (isVarArg) 753 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 754 755 if (isStdCall && !isVarArg) { 756 BytesToPopOnReturn = StackSize; // Callee pops everything.. 757 BytesCallerReserves = 0; 758 } else { 759 BytesToPopOnReturn = 0; // Callee pops nothing. 760 761 // If this is an sret function, the return should pop the hidden pointer. 762 if (NumArgs && 763 (cast<ConstantSDNode>(Op.getOperand(3))->getValue() & 764 ISD::ParamFlags::StructReturn)) 765 BytesToPopOnReturn = 4; 766 767 BytesCallerReserves = StackSize; 768 } 769 770 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 771 ReturnAddrIndex = 0; // No return address slot generated yet. 772 773 MF.getInfo<X86MachineFunctionInfo>() 774 ->setBytesToPopOnReturn(BytesToPopOnReturn); 775 776 // Return the new list of results. 777 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 778 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 779} 780 781SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, 782 unsigned CC) { 783 SDOperand Chain = Op.getOperand(0); 784 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 785 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 786 SDOperand Callee = Op.getOperand(4); 787 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 788 789 // Analyze operands of the call, assigning locations to each operand. 790 SmallVector<CCValAssign, 16> ArgLocs; 791 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 792 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); 793 794 // Get a count of how many bytes are to be pushed on the stack. 795 unsigned NumBytes = CCInfo.getNextStackOffset(); 796 797 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 798 799 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 800 SmallVector<SDOperand, 8> MemOpChains; 801 802 SDOperand StackPtr; 803 804 // Walk the register/memloc assignments, inserting copies/loads. 805 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 806 CCValAssign &VA = ArgLocs[i]; 807 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 808 809 // Promote the value if needed. 810 switch (VA.getLocInfo()) { 811 default: assert(0 && "Unknown loc info!"); 812 case CCValAssign::Full: break; 813 case CCValAssign::SExt: 814 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 815 break; 816 case CCValAssign::ZExt: 817 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 818 break; 819 case CCValAssign::AExt: 820 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 821 break; 822 } 823 824 if (VA.isRegLoc()) { 825 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 826 } else { 827 assert(VA.isMemLoc()); 828 if (StackPtr.Val == 0) 829 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 830 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 831 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 832 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 833 } 834 } 835 836 // If the first argument is an sret pointer, remember it. 837 bool isSRet = NumOps && 838 (cast<ConstantSDNode>(Op.getOperand(6))->getValue() & 839 ISD::ParamFlags::StructReturn); 840 841 if (!MemOpChains.empty()) 842 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 843 &MemOpChains[0], MemOpChains.size()); 844 845 // Build a sequence of copy-to-reg nodes chained together with token chain 846 // and flag operands which copy the outgoing args into registers. 847 SDOperand InFlag; 848 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 849 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 850 InFlag); 851 InFlag = Chain.getValue(1); 852 } 853 854 // ELF / PIC requires GOT in the EBX register before function calls via PLT 855 // GOT pointer. 856 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 857 Subtarget->isPICStyleGOT()) { 858 Chain = DAG.getCopyToReg(Chain, X86::EBX, 859 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 860 InFlag); 861 InFlag = Chain.getValue(1); 862 } 863 864 // If the callee is a GlobalAddress node (quite common, every direct call is) 865 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 866 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 867 // We should use extra load for direct calls to dllimported functions in 868 // non-JIT mode. 869 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 870 getTargetMachine(), true)) 871 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 872 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 873 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 874 875 // Returns a chain & a flag for retval copy to use. 876 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 877 SmallVector<SDOperand, 8> Ops; 878 Ops.push_back(Chain); 879 Ops.push_back(Callee); 880 881 // Add argument registers to the end of the list so that they are known live 882 // into the call. 883 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 884 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 885 RegsToPass[i].second.getValueType())); 886 887 // Add an implicit use GOT pointer in EBX. 888 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 889 Subtarget->isPICStyleGOT()) 890 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 891 892 if (InFlag.Val) 893 Ops.push_back(InFlag); 894 895 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 896 NodeTys, &Ops[0], Ops.size()); 897 InFlag = Chain.getValue(1); 898 899 // Create the CALLSEQ_END node. 900 unsigned NumBytesForCalleeToPush = 0; 901 902 if (CC == CallingConv::X86_StdCall) { 903 if (isVarArg) 904 NumBytesForCalleeToPush = isSRet ? 4 : 0; 905 else 906 NumBytesForCalleeToPush = NumBytes; 907 } else { 908 // If this is is a call to a struct-return function, the callee 909 // pops the hidden struct pointer, so we have to push it back. 910 // This is common for Darwin/X86, Linux & Mingw32 targets. 911 NumBytesForCalleeToPush = isSRet ? 4 : 0; 912 } 913 914 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 915 Ops.clear(); 916 Ops.push_back(Chain); 917 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 918 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 919 Ops.push_back(InFlag); 920 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 921 InFlag = Chain.getValue(1); 922 923 // Handle result values, copying them out of physregs into vregs that we 924 // return. 925 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 926} 927 928 929//===----------------------------------------------------------------------===// 930// FastCall Calling Convention implementation 931//===----------------------------------------------------------------------===// 932// 933// The X86 'fastcall' calling convention passes up to two integer arguments in 934// registers (an appropriate portion of ECX/EDX), passes arguments in C order, 935// and requires that the callee pop its arguments off the stack (allowing proper 936// tail calls), and has the same return value conventions as C calling convs. 937// 938// This calling convention always arranges for the callee pop value to be 8n+4 939// bytes, which is needed for tail recursion elimination and stack alignment 940// reasons. 941SDOperand 942X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 943 MachineFunction &MF = DAG.getMachineFunction(); 944 MachineFrameInfo *MFI = MF.getFrameInfo(); 945 SDOperand Root = Op.getOperand(0); 946 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 947 948 // Assign locations to all of the incoming arguments. 949 SmallVector<CCValAssign, 16> ArgLocs; 950 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 951 getTargetMachine(), ArgLocs); 952 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); 953 954 SmallVector<SDOperand, 8> ArgValues; 955 unsigned LastVal = ~0U; 956 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 957 CCValAssign &VA = ArgLocs[i]; 958 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 959 // places. 960 assert(VA.getValNo() != LastVal && 961 "Don't support value assigned to multiple locs yet"); 962 LastVal = VA.getValNo(); 963 964 if (VA.isRegLoc()) { 965 MVT::ValueType RegVT = VA.getLocVT(); 966 TargetRegisterClass *RC; 967 if (RegVT == MVT::i32) 968 RC = X86::GR32RegisterClass; 969 else { 970 assert(MVT::isVector(RegVT)); 971 RC = X86::VR128RegisterClass; 972 } 973 974 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 975 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 976 977 // If this is an 8 or 16-bit value, it is really passed promoted to 32 978 // bits. Insert an assert[sz]ext to capture this, then truncate to the 979 // right size. 980 if (VA.getLocInfo() == CCValAssign::SExt) 981 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 982 DAG.getValueType(VA.getValVT())); 983 else if (VA.getLocInfo() == CCValAssign::ZExt) 984 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 985 DAG.getValueType(VA.getValVT())); 986 987 if (VA.getLocInfo() != CCValAssign::Full) 988 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 989 990 ArgValues.push_back(ArgValue); 991 } else { 992 assert(VA.isMemLoc()); 993 994 // Create the nodes corresponding to a load from this parameter slot. 995 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 996 VA.getLocMemOffset()); 997 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 998 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 999 } 1000 } 1001 1002 ArgValues.push_back(Root); 1003 1004 unsigned StackSize = CCInfo.getNextStackOffset(); 1005 1006 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1007 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1008 // arguments and the arguments after the retaddr has been pushed are aligned. 1009 if ((StackSize & 7) == 0) 1010 StackSize += 4; 1011 } 1012 1013 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1014 RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. 1015 ReturnAddrIndex = 0; // No return address slot generated yet. 1016 BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. 1017 BytesCallerReserves = 0; 1018 1019 MF.getInfo<X86MachineFunctionInfo>() 1020 ->setBytesToPopOnReturn(BytesToPopOnReturn); 1021 1022 // Return the new list of results. 1023 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1024 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1025} 1026 1027SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, 1028 unsigned CC) { 1029 SDOperand Chain = Op.getOperand(0); 1030 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1031 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1032 SDOperand Callee = Op.getOperand(4); 1033 1034 // Analyze operands of the call, assigning locations to each operand. 1035 SmallVector<CCValAssign, 16> ArgLocs; 1036 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1037 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); 1038 1039 // Get a count of how many bytes are to be pushed on the stack. 1040 unsigned NumBytes = CCInfo.getNextStackOffset(); 1041 1042 if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { 1043 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 1044 // arguments and the arguments after the retaddr has been pushed are aligned. 1045 if ((NumBytes & 7) == 0) 1046 NumBytes += 4; 1047 } 1048 1049 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1050 1051 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1052 SmallVector<SDOperand, 8> MemOpChains; 1053 1054 SDOperand StackPtr; 1055 1056 // Walk the register/memloc assignments, inserting copies/loads. 1057 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1058 CCValAssign &VA = ArgLocs[i]; 1059 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1060 1061 // Promote the value if needed. 1062 switch (VA.getLocInfo()) { 1063 default: assert(0 && "Unknown loc info!"); 1064 case CCValAssign::Full: break; 1065 case CCValAssign::SExt: 1066 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1067 break; 1068 case CCValAssign::ZExt: 1069 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1070 break; 1071 case CCValAssign::AExt: 1072 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1073 break; 1074 } 1075 1076 if (VA.isRegLoc()) { 1077 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1078 } else { 1079 assert(VA.isMemLoc()); 1080 if (StackPtr.Val == 0) 1081 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1082 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1083 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1084 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1085 } 1086 } 1087 1088 if (!MemOpChains.empty()) 1089 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1090 &MemOpChains[0], MemOpChains.size()); 1091 1092 // Build a sequence of copy-to-reg nodes chained together with token chain 1093 // and flag operands which copy the outgoing args into registers. 1094 SDOperand InFlag; 1095 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1096 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1097 InFlag); 1098 InFlag = Chain.getValue(1); 1099 } 1100 1101 // If the callee is a GlobalAddress node (quite common, every direct call is) 1102 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1103 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1104 // We should use extra load for direct calls to dllimported functions in 1105 // non-JIT mode. 1106 if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1107 getTargetMachine(), true)) 1108 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1109 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1110 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1111 1112 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1113 // GOT pointer. 1114 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1115 Subtarget->isPICStyleGOT()) { 1116 Chain = DAG.getCopyToReg(Chain, X86::EBX, 1117 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 1118 InFlag); 1119 InFlag = Chain.getValue(1); 1120 } 1121 1122 // Returns a chain & a flag for retval copy to use. 1123 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1124 SmallVector<SDOperand, 8> Ops; 1125 Ops.push_back(Chain); 1126 Ops.push_back(Callee); 1127 1128 // Add argument registers to the end of the list so that they are known live 1129 // into the call. 1130 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1131 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1132 RegsToPass[i].second.getValueType())); 1133 1134 // Add an implicit use GOT pointer in EBX. 1135 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1136 Subtarget->isPICStyleGOT()) 1137 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 1138 1139 if (InFlag.Val) 1140 Ops.push_back(InFlag); 1141 1142 // FIXME: Do not generate X86ISD::TAILCALL for now. 1143 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1144 NodeTys, &Ops[0], Ops.size()); 1145 InFlag = Chain.getValue(1); 1146 1147 // Returns a flag for retval copy to use. 1148 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1149 Ops.clear(); 1150 Ops.push_back(Chain); 1151 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1152 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1153 Ops.push_back(InFlag); 1154 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1155 InFlag = Chain.getValue(1); 1156 1157 // Handle result values, copying them out of physregs into vregs that we 1158 // return. 1159 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1160} 1161 1162 1163//===----------------------------------------------------------------------===// 1164// X86-64 C Calling Convention implementation 1165//===----------------------------------------------------------------------===// 1166 1167SDOperand 1168X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { 1169 MachineFunction &MF = DAG.getMachineFunction(); 1170 MachineFrameInfo *MFI = MF.getFrameInfo(); 1171 SDOperand Root = Op.getOperand(0); 1172 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1173 1174 static const unsigned GPR64ArgRegs[] = { 1175 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1176 }; 1177 static const unsigned XMMArgRegs[] = { 1178 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1179 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1180 }; 1181 1182 1183 // Assign locations to all of the incoming arguments. 1184 SmallVector<CCValAssign, 16> ArgLocs; 1185 CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, 1186 getTargetMachine(), ArgLocs); 1187 CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); 1188 1189 SmallVector<SDOperand, 8> ArgValues; 1190 unsigned LastVal = ~0U; 1191 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1192 CCValAssign &VA = ArgLocs[i]; 1193 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1194 // places. 1195 assert(VA.getValNo() != LastVal && 1196 "Don't support value assigned to multiple locs yet"); 1197 LastVal = VA.getValNo(); 1198 1199 if (VA.isRegLoc()) { 1200 MVT::ValueType RegVT = VA.getLocVT(); 1201 TargetRegisterClass *RC; 1202 if (RegVT == MVT::i32) 1203 RC = X86::GR32RegisterClass; 1204 else if (RegVT == MVT::i64) 1205 RC = X86::GR64RegisterClass; 1206 else if (RegVT == MVT::f32) 1207 RC = X86::FR32RegisterClass; 1208 else if (RegVT == MVT::f64) 1209 RC = X86::FR64RegisterClass; 1210 else { 1211 assert(MVT::isVector(RegVT)); 1212 if (MVT::getSizeInBits(RegVT) == 64) { 1213 RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. 1214 RegVT = MVT::i64; 1215 } else 1216 RC = X86::VR128RegisterClass; 1217 } 1218 1219 unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); 1220 SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); 1221 1222 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1223 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1224 // right size. 1225 if (VA.getLocInfo() == CCValAssign::SExt) 1226 ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, 1227 DAG.getValueType(VA.getValVT())); 1228 else if (VA.getLocInfo() == CCValAssign::ZExt) 1229 ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, 1230 DAG.getValueType(VA.getValVT())); 1231 1232 if (VA.getLocInfo() != CCValAssign::Full) 1233 ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); 1234 1235 // Handle MMX values passed in GPRs. 1236 if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && 1237 MVT::getSizeInBits(RegVT) == 64) 1238 ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); 1239 1240 ArgValues.push_back(ArgValue); 1241 } else { 1242 assert(VA.isMemLoc()); 1243 1244 // Create the nodes corresponding to a load from this parameter slot. 1245 int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, 1246 VA.getLocMemOffset()); 1247 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 1248 ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); 1249 } 1250 } 1251 1252 unsigned StackSize = CCInfo.getNextStackOffset(); 1253 1254 // If the function takes variable number of arguments, make a frame index for 1255 // the start of the first vararg value... for expansion of llvm.va_start. 1256 if (isVarArg) { 1257 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); 1258 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1259 1260 // For X86-64, if there are vararg parameters that are passed via 1261 // registers, then we must store them to their spots on the stack so they 1262 // may be loaded by deferencing the result of va_next. 1263 VarArgsGPOffset = NumIntRegs * 8; 1264 VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; 1265 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); 1266 RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); 1267 1268 // Store the integer parameter registers. 1269 SmallVector<SDOperand, 8> MemOps; 1270 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1271 SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1272 DAG.getConstant(VarArgsGPOffset, getPointerTy())); 1273 for (; NumIntRegs != 6; ++NumIntRegs) { 1274 unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], 1275 X86::GR64RegisterClass); 1276 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1277 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1278 MemOps.push_back(Store); 1279 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1280 DAG.getConstant(8, getPointerTy())); 1281 } 1282 1283 // Now store the XMM (fp + vector) parameter registers. 1284 FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, 1285 DAG.getConstant(VarArgsFPOffset, getPointerTy())); 1286 for (; NumXMMRegs != 8; ++NumXMMRegs) { 1287 unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 1288 X86::VR128RegisterClass); 1289 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); 1290 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1291 MemOps.push_back(Store); 1292 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 1293 DAG.getConstant(16, getPointerTy())); 1294 } 1295 if (!MemOps.empty()) 1296 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, 1297 &MemOps[0], MemOps.size()); 1298 } 1299 1300 ArgValues.push_back(Root); 1301 1302 ReturnAddrIndex = 0; // No return address slot generated yet. 1303 BytesToPopOnReturn = 0; // Callee pops nothing. 1304 BytesCallerReserves = StackSize; 1305 1306 // Return the new list of results. 1307 return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 1308 &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); 1309} 1310 1311SDOperand 1312X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, 1313 unsigned CC) { 1314 SDOperand Chain = Op.getOperand(0); 1315 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1316 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 1317 SDOperand Callee = Op.getOperand(4); 1318 1319 // Analyze operands of the call, assigning locations to each operand. 1320 SmallVector<CCValAssign, 16> ArgLocs; 1321 CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); 1322 CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); 1323 1324 // Get a count of how many bytes are to be pushed on the stack. 1325 unsigned NumBytes = CCInfo.getNextStackOffset(); 1326 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 1327 1328 SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; 1329 SmallVector<SDOperand, 8> MemOpChains; 1330 1331 SDOperand StackPtr; 1332 1333 // Walk the register/memloc assignments, inserting copies/loads. 1334 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1335 CCValAssign &VA = ArgLocs[i]; 1336 SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); 1337 1338 // Promote the value if needed. 1339 switch (VA.getLocInfo()) { 1340 default: assert(0 && "Unknown loc info!"); 1341 case CCValAssign::Full: break; 1342 case CCValAssign::SExt: 1343 Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); 1344 break; 1345 case CCValAssign::ZExt: 1346 Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); 1347 break; 1348 case CCValAssign::AExt: 1349 Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); 1350 break; 1351 } 1352 1353 if (VA.isRegLoc()) { 1354 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1355 } else { 1356 assert(VA.isMemLoc()); 1357 if (StackPtr.Val == 0) 1358 StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); 1359 SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); 1360 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 1361 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1362 } 1363 } 1364 1365 if (!MemOpChains.empty()) 1366 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1367 &MemOpChains[0], MemOpChains.size()); 1368 1369 // Build a sequence of copy-to-reg nodes chained together with token chain 1370 // and flag operands which copy the outgoing args into registers. 1371 SDOperand InFlag; 1372 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1373 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1374 InFlag); 1375 InFlag = Chain.getValue(1); 1376 } 1377 1378 if (isVarArg) { 1379 // From AMD64 ABI document: 1380 // For calls that may call functions that use varargs or stdargs 1381 // (prototype-less calls or calls to functions containing ellipsis (...) in 1382 // the declaration) %al is used as hidden argument to specify the number 1383 // of SSE registers used. The contents of %al do not need to match exactly 1384 // the number of registers, but must be an ubound on the number of SSE 1385 // registers used and is in the range 0 - 8 inclusive. 1386 1387 // Count the number of XMM registers allocated. 1388 static const unsigned XMMArgRegs[] = { 1389 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1390 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1391 }; 1392 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1393 1394 Chain = DAG.getCopyToReg(Chain, X86::AL, 1395 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1396 InFlag = Chain.getValue(1); 1397 } 1398 1399 // If the callee is a GlobalAddress node (quite common, every direct call is) 1400 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1401 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1402 // We should use extra load for direct calls to dllimported functions in 1403 // non-JIT mode. 1404 if (getTargetMachine().getCodeModel() != CodeModel::Large 1405 && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), 1406 getTargetMachine(), true)) 1407 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 1408 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1409 if (getTargetMachine().getCodeModel() != CodeModel::Large) 1410 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1411 1412 // Returns a chain & a flag for retval copy to use. 1413 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1414 SmallVector<SDOperand, 8> Ops; 1415 Ops.push_back(Chain); 1416 Ops.push_back(Callee); 1417 1418 // Add argument registers to the end of the list so that they are known live 1419 // into the call. 1420 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1421 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1422 RegsToPass[i].second.getValueType())); 1423 1424 if (InFlag.Val) 1425 Ops.push_back(InFlag); 1426 1427 // FIXME: Do not generate X86ISD::TAILCALL for now. 1428 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 1429 NodeTys, &Ops[0], Ops.size()); 1430 InFlag = Chain.getValue(1); 1431 1432 // Returns a flag for retval copy to use. 1433 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1434 Ops.clear(); 1435 Ops.push_back(Chain); 1436 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 1437 Ops.push_back(DAG.getConstant(0, getPointerTy())); 1438 Ops.push_back(InFlag); 1439 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); 1440 InFlag = Chain.getValue(1); 1441 1442 // Handle result values, copying them out of physregs into vregs that we 1443 // return. 1444 return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); 1445} 1446 1447 1448//===----------------------------------------------------------------------===// 1449// Other Lowering Hooks 1450//===----------------------------------------------------------------------===// 1451 1452 1453SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 1454 if (ReturnAddrIndex == 0) { 1455 // Set up a frame object for the return address. 1456 MachineFunction &MF = DAG.getMachineFunction(); 1457 if (Subtarget->is64Bit()) 1458 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); 1459 else 1460 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 1461 } 1462 1463 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 1464} 1465 1466 1467 1468/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 1469/// specific condition code. It returns a false if it cannot do a direct 1470/// translation. X86CC is the translated CondCode. LHS/RHS are modified as 1471/// needed. 1472static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 1473 unsigned &X86CC, SDOperand &LHS, SDOperand &RHS, 1474 SelectionDAG &DAG) { 1475 X86CC = X86::COND_INVALID; 1476 if (!isFP) { 1477 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1478 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 1479 // X > -1 -> X == 0, jump !sign. 1480 RHS = DAG.getConstant(0, RHS.getValueType()); 1481 X86CC = X86::COND_NS; 1482 return true; 1483 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 1484 // X < 0 -> X == 0, jump on sign. 1485 X86CC = X86::COND_S; 1486 return true; 1487 } 1488 } 1489 1490 switch (SetCCOpcode) { 1491 default: break; 1492 case ISD::SETEQ: X86CC = X86::COND_E; break; 1493 case ISD::SETGT: X86CC = X86::COND_G; break; 1494 case ISD::SETGE: X86CC = X86::COND_GE; break; 1495 case ISD::SETLT: X86CC = X86::COND_L; break; 1496 case ISD::SETLE: X86CC = X86::COND_LE; break; 1497 case ISD::SETNE: X86CC = X86::COND_NE; break; 1498 case ISD::SETULT: X86CC = X86::COND_B; break; 1499 case ISD::SETUGT: X86CC = X86::COND_A; break; 1500 case ISD::SETULE: X86CC = X86::COND_BE; break; 1501 case ISD::SETUGE: X86CC = X86::COND_AE; break; 1502 } 1503 } else { 1504 // On a floating point condition, the flags are set as follows: 1505 // ZF PF CF op 1506 // 0 | 0 | 0 | X > Y 1507 // 0 | 0 | 1 | X < Y 1508 // 1 | 0 | 0 | X == Y 1509 // 1 | 1 | 1 | unordered 1510 bool Flip = false; 1511 switch (SetCCOpcode) { 1512 default: break; 1513 case ISD::SETUEQ: 1514 case ISD::SETEQ: X86CC = X86::COND_E; break; 1515 case ISD::SETOLT: Flip = true; // Fallthrough 1516 case ISD::SETOGT: 1517 case ISD::SETGT: X86CC = X86::COND_A; break; 1518 case ISD::SETOLE: Flip = true; // Fallthrough 1519 case ISD::SETOGE: 1520 case ISD::SETGE: X86CC = X86::COND_AE; break; 1521 case ISD::SETUGT: Flip = true; // Fallthrough 1522 case ISD::SETULT: 1523 case ISD::SETLT: X86CC = X86::COND_B; break; 1524 case ISD::SETUGE: Flip = true; // Fallthrough 1525 case ISD::SETULE: 1526 case ISD::SETLE: X86CC = X86::COND_BE; break; 1527 case ISD::SETONE: 1528 case ISD::SETNE: X86CC = X86::COND_NE; break; 1529 case ISD::SETUO: X86CC = X86::COND_P; break; 1530 case ISD::SETO: X86CC = X86::COND_NP; break; 1531 } 1532 if (Flip) 1533 std::swap(LHS, RHS); 1534 } 1535 1536 return X86CC != X86::COND_INVALID; 1537} 1538 1539/// hasFPCMov - is there a floating point cmov for the specific X86 condition 1540/// code. Current x86 isa includes the following FP cmov instructions: 1541/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 1542static bool hasFPCMov(unsigned X86CC) { 1543 switch (X86CC) { 1544 default: 1545 return false; 1546 case X86::COND_B: 1547 case X86::COND_BE: 1548 case X86::COND_E: 1549 case X86::COND_P: 1550 case X86::COND_A: 1551 case X86::COND_AE: 1552 case X86::COND_NE: 1553 case X86::COND_NP: 1554 return true; 1555 } 1556} 1557 1558/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 1559/// true if Op is undef or if its value falls within the specified range (L, H]. 1560static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 1561 if (Op.getOpcode() == ISD::UNDEF) 1562 return true; 1563 1564 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 1565 return (Val >= Low && Val < Hi); 1566} 1567 1568/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 1569/// true if Op is undef or if its value equal to the specified value. 1570static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 1571 if (Op.getOpcode() == ISD::UNDEF) 1572 return true; 1573 return cast<ConstantSDNode>(Op)->getValue() == Val; 1574} 1575 1576/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 1577/// specifies a shuffle of elements that is suitable for input to PSHUFD. 1578bool X86::isPSHUFDMask(SDNode *N) { 1579 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1580 1581 if (N->getNumOperands() != 4) 1582 return false; 1583 1584 // Check if the value doesn't reference the second vector. 1585 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1586 SDOperand Arg = N->getOperand(i); 1587 if (Arg.getOpcode() == ISD::UNDEF) continue; 1588 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1589 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 1590 return false; 1591 } 1592 1593 return true; 1594} 1595 1596/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 1597/// specifies a shuffle of elements that is suitable for input to PSHUFHW. 1598bool X86::isPSHUFHWMask(SDNode *N) { 1599 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1600 1601 if (N->getNumOperands() != 8) 1602 return false; 1603 1604 // Lower quadword copied in order. 1605 for (unsigned i = 0; i != 4; ++i) { 1606 SDOperand Arg = N->getOperand(i); 1607 if (Arg.getOpcode() == ISD::UNDEF) continue; 1608 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1609 if (cast<ConstantSDNode>(Arg)->getValue() != i) 1610 return false; 1611 } 1612 1613 // Upper quadword shuffled. 1614 for (unsigned i = 4; i != 8; ++i) { 1615 SDOperand Arg = N->getOperand(i); 1616 if (Arg.getOpcode() == ISD::UNDEF) continue; 1617 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1618 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1619 if (Val < 4 || Val > 7) 1620 return false; 1621 } 1622 1623 return true; 1624} 1625 1626/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 1627/// specifies a shuffle of elements that is suitable for input to PSHUFLW. 1628bool X86::isPSHUFLWMask(SDNode *N) { 1629 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1630 1631 if (N->getNumOperands() != 8) 1632 return false; 1633 1634 // Upper quadword copied in order. 1635 for (unsigned i = 4; i != 8; ++i) 1636 if (!isUndefOrEqual(N->getOperand(i), i)) 1637 return false; 1638 1639 // Lower quadword shuffled. 1640 for (unsigned i = 0; i != 4; ++i) 1641 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 1642 return false; 1643 1644 return true; 1645} 1646 1647/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 1648/// specifies a shuffle of elements that is suitable for input to SHUFP*. 1649static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) { 1650 if (NumElems != 2 && NumElems != 4) return false; 1651 1652 unsigned Half = NumElems / 2; 1653 for (unsigned i = 0; i < Half; ++i) 1654 if (!isUndefOrInRange(Elems[i], 0, NumElems)) 1655 return false; 1656 for (unsigned i = Half; i < NumElems; ++i) 1657 if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2)) 1658 return false; 1659 1660 return true; 1661} 1662 1663bool X86::isSHUFPMask(SDNode *N) { 1664 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1665 return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); 1666} 1667 1668/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 1669/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 1670/// half elements to come from vector 1 (which would equal the dest.) and 1671/// the upper half to come from vector 2. 1672static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) { 1673 if (NumOps != 2 && NumOps != 4) return false; 1674 1675 unsigned Half = NumOps / 2; 1676 for (unsigned i = 0; i < Half; ++i) 1677 if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2)) 1678 return false; 1679 for (unsigned i = Half; i < NumOps; ++i) 1680 if (!isUndefOrInRange(Ops[i], 0, NumOps)) 1681 return false; 1682 return true; 1683} 1684 1685static bool isCommutedSHUFP(SDNode *N) { 1686 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1687 return isCommutedSHUFP(N->op_begin(), N->getNumOperands()); 1688} 1689 1690/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 1691/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 1692bool X86::isMOVHLPSMask(SDNode *N) { 1693 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1694 1695 if (N->getNumOperands() != 4) 1696 return false; 1697 1698 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 1699 return isUndefOrEqual(N->getOperand(0), 6) && 1700 isUndefOrEqual(N->getOperand(1), 7) && 1701 isUndefOrEqual(N->getOperand(2), 2) && 1702 isUndefOrEqual(N->getOperand(3), 3); 1703} 1704 1705/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 1706/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 1707/// <2, 3, 2, 3> 1708bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { 1709 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1710 1711 if (N->getNumOperands() != 4) 1712 return false; 1713 1714 // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 1715 return isUndefOrEqual(N->getOperand(0), 2) && 1716 isUndefOrEqual(N->getOperand(1), 3) && 1717 isUndefOrEqual(N->getOperand(2), 2) && 1718 isUndefOrEqual(N->getOperand(3), 3); 1719} 1720 1721/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 1722/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 1723bool X86::isMOVLPMask(SDNode *N) { 1724 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1725 1726 unsigned NumElems = N->getNumOperands(); 1727 if (NumElems != 2 && NumElems != 4) 1728 return false; 1729 1730 for (unsigned i = 0; i < NumElems/2; ++i) 1731 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 1732 return false; 1733 1734 for (unsigned i = NumElems/2; i < NumElems; ++i) 1735 if (!isUndefOrEqual(N->getOperand(i), i)) 1736 return false; 1737 1738 return true; 1739} 1740 1741/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 1742/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 1743/// and MOVLHPS. 1744bool X86::isMOVHPMask(SDNode *N) { 1745 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1746 1747 unsigned NumElems = N->getNumOperands(); 1748 if (NumElems != 2 && NumElems != 4) 1749 return false; 1750 1751 for (unsigned i = 0; i < NumElems/2; ++i) 1752 if (!isUndefOrEqual(N->getOperand(i), i)) 1753 return false; 1754 1755 for (unsigned i = 0; i < NumElems/2; ++i) { 1756 SDOperand Arg = N->getOperand(i + NumElems/2); 1757 if (!isUndefOrEqual(Arg, i + NumElems)) 1758 return false; 1759 } 1760 1761 return true; 1762} 1763 1764/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 1765/// specifies a shuffle of elements that is suitable for input to UNPCKL. 1766bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts, 1767 bool V2IsSplat = false) { 1768 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1769 return false; 1770 1771 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1772 SDOperand BitI = Elts[i]; 1773 SDOperand BitI1 = Elts[i+1]; 1774 if (!isUndefOrEqual(BitI, j)) 1775 return false; 1776 if (V2IsSplat) { 1777 if (isUndefOrEqual(BitI1, NumElts)) 1778 return false; 1779 } else { 1780 if (!isUndefOrEqual(BitI1, j + NumElts)) 1781 return false; 1782 } 1783 } 1784 1785 return true; 1786} 1787 1788bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 1789 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1790 return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1791} 1792 1793/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 1794/// specifies a shuffle of elements that is suitable for input to UNPCKH. 1795bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts, 1796 bool V2IsSplat = false) { 1797 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1798 return false; 1799 1800 for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) { 1801 SDOperand BitI = Elts[i]; 1802 SDOperand BitI1 = Elts[i+1]; 1803 if (!isUndefOrEqual(BitI, j + NumElts/2)) 1804 return false; 1805 if (V2IsSplat) { 1806 if (isUndefOrEqual(BitI1, NumElts)) 1807 return false; 1808 } else { 1809 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 1810 return false; 1811 } 1812 } 1813 1814 return true; 1815} 1816 1817bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 1818 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1819 return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat); 1820} 1821 1822/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 1823/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 1824/// <0, 0, 1, 1> 1825bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 1826 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1827 1828 unsigned NumElems = N->getNumOperands(); 1829 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1830 return false; 1831 1832 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 1833 SDOperand BitI = N->getOperand(i); 1834 SDOperand BitI1 = N->getOperand(i+1); 1835 1836 if (!isUndefOrEqual(BitI, j)) 1837 return false; 1838 if (!isUndefOrEqual(BitI1, j)) 1839 return false; 1840 } 1841 1842 return true; 1843} 1844 1845/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 1846/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 1847/// <2, 2, 3, 3> 1848bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { 1849 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1850 1851 unsigned NumElems = N->getNumOperands(); 1852 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 1853 return false; 1854 1855 for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 1856 SDOperand BitI = N->getOperand(i); 1857 SDOperand BitI1 = N->getOperand(i + 1); 1858 1859 if (!isUndefOrEqual(BitI, j)) 1860 return false; 1861 if (!isUndefOrEqual(BitI1, j)) 1862 return false; 1863 } 1864 1865 return true; 1866} 1867 1868/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 1869/// specifies a shuffle of elements that is suitable for input to MOVSS, 1870/// MOVSD, and MOVD, i.e. setting the lowest element. 1871static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) { 1872 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 1873 return false; 1874 1875 if (!isUndefOrEqual(Elts[0], NumElts)) 1876 return false; 1877 1878 for (unsigned i = 1; i < NumElts; ++i) { 1879 if (!isUndefOrEqual(Elts[i], i)) 1880 return false; 1881 } 1882 1883 return true; 1884} 1885 1886bool X86::isMOVLMask(SDNode *N) { 1887 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1888 return ::isMOVLMask(N->op_begin(), N->getNumOperands()); 1889} 1890 1891/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 1892/// of what x86 movss want. X86 movs requires the lowest element to be lowest 1893/// element of vector 2 and the other elements to come from vector 1 in order. 1894static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps, 1895 bool V2IsSplat = false, 1896 bool V2IsUndef = false) { 1897 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 1898 return false; 1899 1900 if (!isUndefOrEqual(Ops[0], 0)) 1901 return false; 1902 1903 for (unsigned i = 1; i < NumOps; ++i) { 1904 SDOperand Arg = Ops[i]; 1905 if (!(isUndefOrEqual(Arg, i+NumOps) || 1906 (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) || 1907 (V2IsSplat && isUndefOrEqual(Arg, NumOps)))) 1908 return false; 1909 } 1910 1911 return true; 1912} 1913 1914static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false, 1915 bool V2IsUndef = false) { 1916 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1917 return isCommutedMOVL(N->op_begin(), N->getNumOperands(), 1918 V2IsSplat, V2IsUndef); 1919} 1920 1921/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1922/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 1923bool X86::isMOVSHDUPMask(SDNode *N) { 1924 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1925 1926 if (N->getNumOperands() != 4) 1927 return false; 1928 1929 // Expect 1, 1, 3, 3 1930 for (unsigned i = 0; i < 2; ++i) { 1931 SDOperand Arg = N->getOperand(i); 1932 if (Arg.getOpcode() == ISD::UNDEF) continue; 1933 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1934 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1935 if (Val != 1) return false; 1936 } 1937 1938 bool HasHi = false; 1939 for (unsigned i = 2; i < 4; ++i) { 1940 SDOperand Arg = N->getOperand(i); 1941 if (Arg.getOpcode() == ISD::UNDEF) continue; 1942 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1943 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1944 if (Val != 3) return false; 1945 HasHi = true; 1946 } 1947 1948 // Don't use movshdup if it can be done with a shufps. 1949 return HasHi; 1950} 1951 1952/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 1953/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 1954bool X86::isMOVSLDUPMask(SDNode *N) { 1955 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1956 1957 if (N->getNumOperands() != 4) 1958 return false; 1959 1960 // Expect 0, 0, 2, 2 1961 for (unsigned i = 0; i < 2; ++i) { 1962 SDOperand Arg = N->getOperand(i); 1963 if (Arg.getOpcode() == ISD::UNDEF) continue; 1964 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1965 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1966 if (Val != 0) return false; 1967 } 1968 1969 bool HasHi = false; 1970 for (unsigned i = 2; i < 4; ++i) { 1971 SDOperand Arg = N->getOperand(i); 1972 if (Arg.getOpcode() == ISD::UNDEF) continue; 1973 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 1974 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 1975 if (Val != 2) return false; 1976 HasHi = true; 1977 } 1978 1979 // Don't use movshdup if it can be done with a shufps. 1980 return HasHi; 1981} 1982 1983/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand 1984/// specifies a identity operation on the LHS or RHS. 1985static bool isIdentityMask(SDNode *N, bool RHS = false) { 1986 unsigned NumElems = N->getNumOperands(); 1987 for (unsigned i = 0; i < NumElems; ++i) 1988 if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) 1989 return false; 1990 return true; 1991} 1992 1993/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 1994/// a splat of a single element. 1995static bool isSplatMask(SDNode *N) { 1996 assert(N->getOpcode() == ISD::BUILD_VECTOR); 1997 1998 // This is a splat operation if each element of the permute is the same, and 1999 // if the value doesn't reference the second vector. 2000 unsigned NumElems = N->getNumOperands(); 2001 SDOperand ElementBase; 2002 unsigned i = 0; 2003 for (; i != NumElems; ++i) { 2004 SDOperand Elt = N->getOperand(i); 2005 if (isa<ConstantSDNode>(Elt)) { 2006 ElementBase = Elt; 2007 break; 2008 } 2009 } 2010 2011 if (!ElementBase.Val) 2012 return false; 2013 2014 for (; i != NumElems; ++i) { 2015 SDOperand Arg = N->getOperand(i); 2016 if (Arg.getOpcode() == ISD::UNDEF) continue; 2017 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2018 if (Arg != ElementBase) return false; 2019 } 2020 2021 // Make sure it is a splat of the first vector operand. 2022 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 2023} 2024 2025/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 2026/// a splat of a single element and it's a 2 or 4 element mask. 2027bool X86::isSplatMask(SDNode *N) { 2028 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2029 2030 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 2031 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 2032 return false; 2033 return ::isSplatMask(N); 2034} 2035 2036/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand 2037/// specifies a splat of zero element. 2038bool X86::isSplatLoMask(SDNode *N) { 2039 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2040 2041 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 2042 if (!isUndefOrEqual(N->getOperand(i), 0)) 2043 return false; 2044 return true; 2045} 2046 2047/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 2048/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 2049/// instructions. 2050unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 2051 unsigned NumOperands = N->getNumOperands(); 2052 unsigned Shift = (NumOperands == 4) ? 2 : 1; 2053 unsigned Mask = 0; 2054 for (unsigned i = 0; i < NumOperands; ++i) { 2055 unsigned Val = 0; 2056 SDOperand Arg = N->getOperand(NumOperands-i-1); 2057 if (Arg.getOpcode() != ISD::UNDEF) 2058 Val = cast<ConstantSDNode>(Arg)->getValue(); 2059 if (Val >= NumOperands) Val -= NumOperands; 2060 Mask |= Val; 2061 if (i != NumOperands - 1) 2062 Mask <<= Shift; 2063 } 2064 2065 return Mask; 2066} 2067 2068/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 2069/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 2070/// instructions. 2071unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 2072 unsigned Mask = 0; 2073 // 8 nodes, but we only care about the last 4. 2074 for (unsigned i = 7; i >= 4; --i) { 2075 unsigned Val = 0; 2076 SDOperand Arg = N->getOperand(i); 2077 if (Arg.getOpcode() != ISD::UNDEF) 2078 Val = cast<ConstantSDNode>(Arg)->getValue(); 2079 Mask |= (Val - 4); 2080 if (i != 4) 2081 Mask <<= 2; 2082 } 2083 2084 return Mask; 2085} 2086 2087/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 2088/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 2089/// instructions. 2090unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 2091 unsigned Mask = 0; 2092 // 8 nodes, but we only care about the first 4. 2093 for (int i = 3; i >= 0; --i) { 2094 unsigned Val = 0; 2095 SDOperand Arg = N->getOperand(i); 2096 if (Arg.getOpcode() != ISD::UNDEF) 2097 Val = cast<ConstantSDNode>(Arg)->getValue(); 2098 Mask |= Val; 2099 if (i != 0) 2100 Mask <<= 2; 2101 } 2102 2103 return Mask; 2104} 2105 2106/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 2107/// specifies a 8 element shuffle that can be broken into a pair of 2108/// PSHUFHW and PSHUFLW. 2109static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 2110 assert(N->getOpcode() == ISD::BUILD_VECTOR); 2111 2112 if (N->getNumOperands() != 8) 2113 return false; 2114 2115 // Lower quadword shuffled. 2116 for (unsigned i = 0; i != 4; ++i) { 2117 SDOperand Arg = N->getOperand(i); 2118 if (Arg.getOpcode() == ISD::UNDEF) continue; 2119 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2120 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2121 if (Val > 4) 2122 return false; 2123 } 2124 2125 // Upper quadword shuffled. 2126 for (unsigned i = 4; i != 8; ++i) { 2127 SDOperand Arg = N->getOperand(i); 2128 if (Arg.getOpcode() == ISD::UNDEF) continue; 2129 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2130 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2131 if (Val < 4 || Val > 7) 2132 return false; 2133 } 2134 2135 return true; 2136} 2137 2138/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 2139/// values in ther permute mask. 2140static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, 2141 SDOperand &V2, SDOperand &Mask, 2142 SelectionDAG &DAG) { 2143 MVT::ValueType VT = Op.getValueType(); 2144 MVT::ValueType MaskVT = Mask.getValueType(); 2145 MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); 2146 unsigned NumElems = Mask.getNumOperands(); 2147 SmallVector<SDOperand, 8> MaskVec; 2148 2149 for (unsigned i = 0; i != NumElems; ++i) { 2150 SDOperand Arg = Mask.getOperand(i); 2151 if (Arg.getOpcode() == ISD::UNDEF) { 2152 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 2153 continue; 2154 } 2155 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 2156 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2157 if (Val < NumElems) 2158 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 2159 else 2160 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 2161 } 2162 2163 std::swap(V1, V2); 2164 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2165 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2166} 2167 2168/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 2169/// match movhlps. The lower half elements should come from upper half of 2170/// V1 (and in order), and the upper half elements should come from the upper 2171/// half of V2 (and in order). 2172static bool ShouldXformToMOVHLPS(SDNode *Mask) { 2173 unsigned NumElems = Mask->getNumOperands(); 2174 if (NumElems != 4) 2175 return false; 2176 for (unsigned i = 0, e = 2; i != e; ++i) 2177 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 2178 return false; 2179 for (unsigned i = 2; i != 4; ++i) 2180 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 2181 return false; 2182 return true; 2183} 2184 2185/// isScalarLoadToVector - Returns true if the node is a scalar load that 2186/// is promoted to a vector. 2187static inline bool isScalarLoadToVector(SDNode *N) { 2188 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2189 N = N->getOperand(0).Val; 2190 return ISD::isNON_EXTLoad(N); 2191 } 2192 return false; 2193} 2194 2195/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 2196/// match movlp{s|d}. The lower half elements should come from lower half of 2197/// V1 (and in order), and the upper half elements should come from the upper 2198/// half of V2 (and in order). And since V1 will become the source of the 2199/// MOVLP, it must be either a vector load or a scalar load to vector. 2200static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) { 2201 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 2202 return false; 2203 // Is V2 is a vector load, don't do this transformation. We will try to use 2204 // load folding shufps op. 2205 if (ISD::isNON_EXTLoad(V2)) 2206 return false; 2207 2208 unsigned NumElems = Mask->getNumOperands(); 2209 if (NumElems != 2 && NumElems != 4) 2210 return false; 2211 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 2212 if (!isUndefOrEqual(Mask->getOperand(i), i)) 2213 return false; 2214 for (unsigned i = NumElems/2; i != NumElems; ++i) 2215 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 2216 return false; 2217 return true; 2218} 2219 2220/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 2221/// all the same. 2222static bool isSplatVector(SDNode *N) { 2223 if (N->getOpcode() != ISD::BUILD_VECTOR) 2224 return false; 2225 2226 SDOperand SplatValue = N->getOperand(0); 2227 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 2228 if (N->getOperand(i) != SplatValue) 2229 return false; 2230 return true; 2231} 2232 2233/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2234/// to an undef. 2235static bool isUndefShuffle(SDNode *N) { 2236 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2237 return false; 2238 2239 SDOperand V1 = N->getOperand(0); 2240 SDOperand V2 = N->getOperand(1); 2241 SDOperand Mask = N->getOperand(2); 2242 unsigned NumElems = Mask.getNumOperands(); 2243 for (unsigned i = 0; i != NumElems; ++i) { 2244 SDOperand Arg = Mask.getOperand(i); 2245 if (Arg.getOpcode() != ISD::UNDEF) { 2246 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2247 if (Val < NumElems && V1.getOpcode() != ISD::UNDEF) 2248 return false; 2249 else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF) 2250 return false; 2251 } 2252 } 2253 return true; 2254} 2255 2256/// isZeroNode - Returns true if Elt is a constant zero or a floating point 2257/// constant +0.0. 2258static inline bool isZeroNode(SDOperand Elt) { 2259 return ((isa<ConstantSDNode>(Elt) && 2260 cast<ConstantSDNode>(Elt)->getValue() == 0) || 2261 (isa<ConstantFPSDNode>(Elt) && 2262 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 2263} 2264 2265/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 2266/// to an zero vector. 2267static bool isZeroShuffle(SDNode *N) { 2268 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) 2269 return false; 2270 2271 SDOperand V1 = N->getOperand(0); 2272 SDOperand V2 = N->getOperand(1); 2273 SDOperand Mask = N->getOperand(2); 2274 unsigned NumElems = Mask.getNumOperands(); 2275 for (unsigned i = 0; i != NumElems; ++i) { 2276 SDOperand Arg = Mask.getOperand(i); 2277 if (Arg.getOpcode() != ISD::UNDEF) { 2278 unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); 2279 if (Idx < NumElems) { 2280 unsigned Opc = V1.Val->getOpcode(); 2281 if (Opc == ISD::UNDEF) 2282 continue; 2283 if (Opc != ISD::BUILD_VECTOR || 2284 !isZeroNode(V1.Val->getOperand(Idx))) 2285 return false; 2286 } else if (Idx >= NumElems) { 2287 unsigned Opc = V2.Val->getOpcode(); 2288 if (Opc == ISD::UNDEF) 2289 continue; 2290 if (Opc != ISD::BUILD_VECTOR || 2291 !isZeroNode(V2.Val->getOperand(Idx - NumElems))) 2292 return false; 2293 } 2294 } 2295 } 2296 return true; 2297} 2298 2299/// getZeroVector - Returns a vector of specified type with all zero elements. 2300/// 2301static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 2302 assert(MVT::isVector(VT) && "Expected a vector type"); 2303 unsigned NumElems = MVT::getVectorNumElements(VT); 2304 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2305 bool isFP = MVT::isFloatingPoint(EVT); 2306 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 2307 SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); 2308 return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); 2309} 2310 2311/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 2312/// that point to V2 points to its first element. 2313static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 2314 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 2315 2316 bool Changed = false; 2317 SmallVector<SDOperand, 8> MaskVec; 2318 unsigned NumElems = Mask.getNumOperands(); 2319 for (unsigned i = 0; i != NumElems; ++i) { 2320 SDOperand Arg = Mask.getOperand(i); 2321 if (Arg.getOpcode() != ISD::UNDEF) { 2322 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 2323 if (Val > NumElems) { 2324 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 2325 Changed = true; 2326 } 2327 } 2328 MaskVec.push_back(Arg); 2329 } 2330 2331 if (Changed) 2332 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), 2333 &MaskVec[0], MaskVec.size()); 2334 return Mask; 2335} 2336 2337/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 2338/// operation of specified width. 2339static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 2340 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2341 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2342 2343 SmallVector<SDOperand, 8> MaskVec; 2344 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 2345 for (unsigned i = 1; i != NumElems; ++i) 2346 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2347 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2348} 2349 2350/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 2351/// of specified width. 2352static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 2353 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2354 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2355 SmallVector<SDOperand, 8> MaskVec; 2356 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 2357 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2358 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 2359 } 2360 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2361} 2362 2363/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 2364/// of specified width. 2365static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 2366 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2367 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2368 unsigned Half = NumElems/2; 2369 SmallVector<SDOperand, 8> MaskVec; 2370 for (unsigned i = 0; i != Half; ++i) { 2371 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 2372 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 2373 } 2374 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); 2375} 2376 2377/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 2378/// 2379static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 2380 SDOperand V1 = Op.getOperand(0); 2381 SDOperand Mask = Op.getOperand(2); 2382 MVT::ValueType VT = Op.getValueType(); 2383 unsigned NumElems = Mask.getNumOperands(); 2384 Mask = getUnpacklMask(NumElems, DAG); 2385 while (NumElems != 4) { 2386 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 2387 NumElems >>= 1; 2388 } 2389 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 2390 2391 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2392 Mask = getZeroVector(MaskVT, DAG); 2393 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 2394 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 2395 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 2396} 2397 2398/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 2399/// vector of zero or undef vector. 2400static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 2401 unsigned NumElems, unsigned Idx, 2402 bool isZero, SelectionDAG &DAG) { 2403 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 2404 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2405 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2406 SDOperand Zero = DAG.getConstant(0, EVT); 2407 SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); 2408 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 2409 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2410 &MaskVec[0], MaskVec.size()); 2411 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2412} 2413 2414/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 2415/// 2416static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 2417 unsigned NumNonZero, unsigned NumZero, 2418 SelectionDAG &DAG, TargetLowering &TLI) { 2419 if (NumNonZero > 8) 2420 return SDOperand(); 2421 2422 SDOperand V(0, 0); 2423 bool First = true; 2424 for (unsigned i = 0; i < 16; ++i) { 2425 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 2426 if (ThisIsNonZero && First) { 2427 if (NumZero) 2428 V = getZeroVector(MVT::v8i16, DAG); 2429 else 2430 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2431 First = false; 2432 } 2433 2434 if ((i & 1) != 0) { 2435 SDOperand ThisElt(0, 0), LastElt(0, 0); 2436 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 2437 if (LastIsNonZero) { 2438 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 2439 } 2440 if (ThisIsNonZero) { 2441 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 2442 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 2443 ThisElt, DAG.getConstant(8, MVT::i8)); 2444 if (LastIsNonZero) 2445 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 2446 } else 2447 ThisElt = LastElt; 2448 2449 if (ThisElt.Val) 2450 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 2451 DAG.getConstant(i/2, TLI.getPointerTy())); 2452 } 2453 } 2454 2455 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 2456} 2457 2458/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 2459/// 2460static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 2461 unsigned NumNonZero, unsigned NumZero, 2462 SelectionDAG &DAG, TargetLowering &TLI) { 2463 if (NumNonZero > 4) 2464 return SDOperand(); 2465 2466 SDOperand V(0, 0); 2467 bool First = true; 2468 for (unsigned i = 0; i < 8; ++i) { 2469 bool isNonZero = (NonZeros & (1 << i)) != 0; 2470 if (isNonZero) { 2471 if (First) { 2472 if (NumZero) 2473 V = getZeroVector(MVT::v8i16, DAG); 2474 else 2475 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 2476 First = false; 2477 } 2478 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 2479 DAG.getConstant(i, TLI.getPointerTy())); 2480 } 2481 } 2482 2483 return V; 2484} 2485 2486SDOperand 2487X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2488 // All zero's are handled with pxor. 2489 if (ISD::isBuildVectorAllZeros(Op.Val)) 2490 return Op; 2491 2492 // All one's are handled with pcmpeqd. 2493 if (ISD::isBuildVectorAllOnes(Op.Val)) 2494 return Op; 2495 2496 MVT::ValueType VT = Op.getValueType(); 2497 MVT::ValueType EVT = MVT::getVectorElementType(VT); 2498 unsigned EVTBits = MVT::getSizeInBits(EVT); 2499 2500 unsigned NumElems = Op.getNumOperands(); 2501 unsigned NumZero = 0; 2502 unsigned NumNonZero = 0; 2503 unsigned NonZeros = 0; 2504 unsigned NumNonZeroImms = 0; 2505 std::set<SDOperand> Values; 2506 for (unsigned i = 0; i < NumElems; ++i) { 2507 SDOperand Elt = Op.getOperand(i); 2508 if (Elt.getOpcode() != ISD::UNDEF) { 2509 Values.insert(Elt); 2510 if (isZeroNode(Elt)) 2511 NumZero++; 2512 else { 2513 NonZeros |= (1 << i); 2514 NumNonZero++; 2515 if (Elt.getOpcode() == ISD::Constant || 2516 Elt.getOpcode() == ISD::ConstantFP) 2517 NumNonZeroImms++; 2518 } 2519 } 2520 } 2521 2522 if (NumNonZero == 0) { 2523 if (NumZero == 0) 2524 // All undef vector. Return an UNDEF. 2525 return DAG.getNode(ISD::UNDEF, VT); 2526 else 2527 // A mix of zero and undef. Return a zero vector. 2528 return getZeroVector(VT, DAG); 2529 } 2530 2531 // Splat is obviously ok. Let legalizer expand it to a shuffle. 2532 if (Values.size() == 1) 2533 return SDOperand(); 2534 2535 // Special case for single non-zero element. 2536 if (NumNonZero == 1) { 2537 unsigned Idx = CountTrailingZeros_32(NonZeros); 2538 SDOperand Item = Op.getOperand(Idx); 2539 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 2540 if (Idx == 0) 2541 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 2542 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 2543 NumZero > 0, DAG); 2544 2545 if (EVTBits == 32) { 2546 // Turn it into a shuffle of zero and zero-extended scalar to vector. 2547 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 2548 DAG); 2549 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2550 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2551 SmallVector<SDOperand, 8> MaskVec; 2552 for (unsigned i = 0; i < NumElems; i++) 2553 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 2554 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2555 &MaskVec[0], MaskVec.size()); 2556 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 2557 DAG.getNode(ISD::UNDEF, VT), Mask); 2558 } 2559 } 2560 2561 // A vector full of immediates; various special cases are already 2562 // handled, so this is best done with a single constant-pool load. 2563 if (NumNonZero == NumNonZeroImms) 2564 return SDOperand(); 2565 2566 // Let legalizer expand 2-wide build_vectors. 2567 if (EVTBits == 64) 2568 return SDOperand(); 2569 2570 // If element VT is < 32 bits, convert it to inserts into a zero vector. 2571 if (EVTBits == 8 && NumElems == 16) { 2572 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 2573 *this); 2574 if (V.Val) return V; 2575 } 2576 2577 if (EVTBits == 16 && NumElems == 8) { 2578 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 2579 *this); 2580 if (V.Val) return V; 2581 } 2582 2583 // If element VT is == 32 bits, turn it into a number of shuffles. 2584 SmallVector<SDOperand, 8> V; 2585 V.resize(NumElems); 2586 if (NumElems == 4 && NumZero > 0) { 2587 for (unsigned i = 0; i < 4; ++i) { 2588 bool isZero = !(NonZeros & (1 << i)); 2589 if (isZero) 2590 V[i] = getZeroVector(VT, DAG); 2591 else 2592 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2593 } 2594 2595 for (unsigned i = 0; i < 2; ++i) { 2596 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 2597 default: break; 2598 case 0: 2599 V[i] = V[i*2]; // Must be a zero vector. 2600 break; 2601 case 1: 2602 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 2603 getMOVLMask(NumElems, DAG)); 2604 break; 2605 case 2: 2606 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2607 getMOVLMask(NumElems, DAG)); 2608 break; 2609 case 3: 2610 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 2611 getUnpacklMask(NumElems, DAG)); 2612 break; 2613 } 2614 } 2615 2616 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 2617 // clears the upper bits. 2618 // FIXME: we can do the same for v4f32 case when we know both parts of 2619 // the lower half come from scalar_to_vector (loadf32). We should do 2620 // that in post legalizer dag combiner with target specific hooks. 2621 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 2622 return V[0]; 2623 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2624 MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); 2625 SmallVector<SDOperand, 8> MaskVec; 2626 bool Reverse = (NonZeros & 0x3) == 2; 2627 for (unsigned i = 0; i < 2; ++i) 2628 if (Reverse) 2629 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 2630 else 2631 MaskVec.push_back(DAG.getConstant(i, EVT)); 2632 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 2633 for (unsigned i = 0; i < 2; ++i) 2634 if (Reverse) 2635 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 2636 else 2637 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 2638 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2639 &MaskVec[0], MaskVec.size()); 2640 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 2641 } 2642 2643 if (Values.size() > 2) { 2644 // Expand into a number of unpckl*. 2645 // e.g. for v4f32 2646 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 2647 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 2648 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 2649 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 2650 for (unsigned i = 0; i < NumElems; ++i) 2651 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 2652 NumElems >>= 1; 2653 while (NumElems != 0) { 2654 for (unsigned i = 0; i < NumElems; ++i) 2655 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 2656 UnpckMask); 2657 NumElems >>= 1; 2658 } 2659 return V[0]; 2660 } 2661 2662 return SDOperand(); 2663} 2664 2665SDOperand 2666X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2667 SDOperand V1 = Op.getOperand(0); 2668 SDOperand V2 = Op.getOperand(1); 2669 SDOperand PermMask = Op.getOperand(2); 2670 MVT::ValueType VT = Op.getValueType(); 2671 unsigned NumElems = PermMask.getNumOperands(); 2672 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 2673 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 2674 bool V1IsSplat = false; 2675 bool V2IsSplat = false; 2676 2677 if (isUndefShuffle(Op.Val)) 2678 return DAG.getNode(ISD::UNDEF, VT); 2679 2680 if (isZeroShuffle(Op.Val)) 2681 return getZeroVector(VT, DAG); 2682 2683 if (isIdentityMask(PermMask.Val)) 2684 return V1; 2685 else if (isIdentityMask(PermMask.Val, true)) 2686 return V2; 2687 2688 if (isSplatMask(PermMask.Val)) { 2689 if (NumElems <= 4) return Op; 2690 // Promote it to a v4i32 splat. 2691 return PromoteSplat(Op, DAG); 2692 } 2693 2694 if (X86::isMOVLMask(PermMask.Val)) 2695 return (V1IsUndef) ? V2 : Op; 2696 2697 if (X86::isMOVSHDUPMask(PermMask.Val) || 2698 X86::isMOVSLDUPMask(PermMask.Val) || 2699 X86::isMOVHLPSMask(PermMask.Val) || 2700 X86::isMOVHPMask(PermMask.Val) || 2701 X86::isMOVLPMask(PermMask.Val)) 2702 return Op; 2703 2704 if (ShouldXformToMOVHLPS(PermMask.Val) || 2705 ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val)) 2706 return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2707 2708 bool Commuted = false; 2709 V1IsSplat = isSplatVector(V1.Val); 2710 V2IsSplat = isSplatVector(V2.Val); 2711 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 2712 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2713 std::swap(V1IsSplat, V2IsSplat); 2714 std::swap(V1IsUndef, V2IsUndef); 2715 Commuted = true; 2716 } 2717 2718 if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { 2719 if (V2IsUndef) return V1; 2720 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2721 if (V2IsSplat) { 2722 // V2 is a splat, so the mask may be malformed. That is, it may point 2723 // to any V2 element. The instruction selectior won't like this. Get 2724 // a corrected mask and commute to form a proper MOVS{S|D}. 2725 SDOperand NewMask = getMOVLMask(NumElems, DAG); 2726 if (NewMask.Val != PermMask.Val) 2727 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2728 } 2729 return Op; 2730 } 2731 2732 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2733 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2734 X86::isUNPCKLMask(PermMask.Val) || 2735 X86::isUNPCKHMask(PermMask.Val)) 2736 return Op; 2737 2738 if (V2IsSplat) { 2739 // Normalize mask so all entries that point to V2 points to its first 2740 // element then try to match unpck{h|l} again. If match, return a 2741 // new vector_shuffle with the corrected mask. 2742 SDOperand NewMask = NormalizeMask(PermMask, DAG); 2743 if (NewMask.Val != PermMask.Val) { 2744 if (X86::isUNPCKLMask(PermMask.Val, true)) { 2745 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 2746 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2747 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 2748 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 2749 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 2750 } 2751 } 2752 } 2753 2754 // Normalize the node to match x86 shuffle ops if needed 2755 if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val)) 2756 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2757 2758 if (Commuted) { 2759 // Commute is back and try unpck* again. 2760 Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); 2761 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 2762 X86::isUNPCKH_v_undef_Mask(PermMask.Val) || 2763 X86::isUNPCKLMask(PermMask.Val) || 2764 X86::isUNPCKHMask(PermMask.Val)) 2765 return Op; 2766 } 2767 2768 // If VT is integer, try PSHUF* first, then SHUFP*. 2769 if (MVT::isInteger(VT)) { 2770 if (X86::isPSHUFDMask(PermMask.Val) || 2771 X86::isPSHUFHWMask(PermMask.Val) || 2772 X86::isPSHUFLWMask(PermMask.Val)) { 2773 if (V2.getOpcode() != ISD::UNDEF) 2774 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2775 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2776 return Op; 2777 } 2778 2779 if (X86::isSHUFPMask(PermMask.Val) && 2780 MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. 2781 return Op; 2782 2783 // Handle v8i16 shuffle high / low shuffle node pair. 2784 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 2785 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 2786 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 2787 SmallVector<SDOperand, 8> MaskVec; 2788 for (unsigned i = 0; i != 4; ++i) 2789 MaskVec.push_back(PermMask.getOperand(i)); 2790 for (unsigned i = 4; i != 8; ++i) 2791 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2792 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2793 &MaskVec[0], MaskVec.size()); 2794 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2795 MaskVec.clear(); 2796 for (unsigned i = 0; i != 4; ++i) 2797 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 2798 for (unsigned i = 4; i != 8; ++i) 2799 MaskVec.push_back(PermMask.getOperand(i)); 2800 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size()); 2801 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 2802 } 2803 } else { 2804 // Floating point cases in the other order. 2805 if (X86::isSHUFPMask(PermMask.Val)) 2806 return Op; 2807 if (X86::isPSHUFDMask(PermMask.Val) || 2808 X86::isPSHUFHWMask(PermMask.Val) || 2809 X86::isPSHUFLWMask(PermMask.Val)) { 2810 if (V2.getOpcode() != ISD::UNDEF) 2811 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 2812 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 2813 return Op; 2814 } 2815 } 2816 2817 if (NumElems == 4 && 2818 // Don't do this for MMX. 2819 MVT::getSizeInBits(VT) != 64) { 2820 MVT::ValueType MaskVT = PermMask.getValueType(); 2821 MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); 2822 SmallVector<std::pair<int, int>, 8> Locs; 2823 Locs.reserve(NumElems); 2824 SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2825 SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2826 unsigned NumHi = 0; 2827 unsigned NumLo = 0; 2828 // If no more than two elements come from either vector. This can be 2829 // implemented with two shuffles. First shuffle gather the elements. 2830 // The second shuffle, which takes the first shuffle as both of its 2831 // vector operands, put the elements into the right order. 2832 for (unsigned i = 0; i != NumElems; ++i) { 2833 SDOperand Elt = PermMask.getOperand(i); 2834 if (Elt.getOpcode() == ISD::UNDEF) { 2835 Locs[i] = std::make_pair(-1, -1); 2836 } else { 2837 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 2838 if (Val < NumElems) { 2839 Locs[i] = std::make_pair(0, NumLo); 2840 Mask1[NumLo] = Elt; 2841 NumLo++; 2842 } else { 2843 Locs[i] = std::make_pair(1, NumHi); 2844 if (2+NumHi < NumElems) 2845 Mask1[2+NumHi] = Elt; 2846 NumHi++; 2847 } 2848 } 2849 } 2850 if (NumLo <= 2 && NumHi <= 2) { 2851 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2852 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2853 &Mask1[0], Mask1.size())); 2854 for (unsigned i = 0; i != NumElems; ++i) { 2855 if (Locs[i].first == -1) 2856 continue; 2857 else { 2858 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 2859 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 2860 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 2861 } 2862 } 2863 2864 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 2865 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2866 &Mask2[0], Mask2.size())); 2867 } 2868 2869 // Break it into (shuffle shuffle_hi, shuffle_lo). 2870 Locs.clear(); 2871 SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2872 SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 2873 SmallVector<SDOperand,8> *MaskPtr = &LoMask; 2874 unsigned MaskIdx = 0; 2875 unsigned LoIdx = 0; 2876 unsigned HiIdx = NumElems/2; 2877 for (unsigned i = 0; i != NumElems; ++i) { 2878 if (i == NumElems/2) { 2879 MaskPtr = &HiMask; 2880 MaskIdx = 1; 2881 LoIdx = 0; 2882 HiIdx = NumElems/2; 2883 } 2884 SDOperand Elt = PermMask.getOperand(i); 2885 if (Elt.getOpcode() == ISD::UNDEF) { 2886 Locs[i] = std::make_pair(-1, -1); 2887 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 2888 Locs[i] = std::make_pair(MaskIdx, LoIdx); 2889 (*MaskPtr)[LoIdx] = Elt; 2890 LoIdx++; 2891 } else { 2892 Locs[i] = std::make_pair(MaskIdx, HiIdx); 2893 (*MaskPtr)[HiIdx] = Elt; 2894 HiIdx++; 2895 } 2896 } 2897 2898 SDOperand LoShuffle = 2899 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2900 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2901 &LoMask[0], LoMask.size())); 2902 SDOperand HiShuffle = 2903 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 2904 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2905 &HiMask[0], HiMask.size())); 2906 SmallVector<SDOperand, 8> MaskOps; 2907 for (unsigned i = 0; i != NumElems; ++i) { 2908 if (Locs[i].first == -1) { 2909 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 2910 } else { 2911 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 2912 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 2913 } 2914 } 2915 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 2916 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2917 &MaskOps[0], MaskOps.size())); 2918 } 2919 2920 return SDOperand(); 2921} 2922 2923SDOperand 2924X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2925 if (!isa<ConstantSDNode>(Op.getOperand(1))) 2926 return SDOperand(); 2927 2928 MVT::ValueType VT = Op.getValueType(); 2929 // TODO: handle v16i8. 2930 if (MVT::getSizeInBits(VT) == 16) { 2931 // Transform it so it match pextrw which produces a 32-bit result. 2932 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 2933 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 2934 Op.getOperand(0), Op.getOperand(1)); 2935 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 2936 DAG.getValueType(VT)); 2937 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 2938 } else if (MVT::getSizeInBits(VT) == 32) { 2939 SDOperand Vec = Op.getOperand(0); 2940 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2941 if (Idx == 0) 2942 return Op; 2943 // SHUFPS the element to the lowest double word, then movss. 2944 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2945 SmallVector<SDOperand, 8> IdxVec; 2946 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); 2947 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2948 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2949 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2950 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2951 &IdxVec[0], IdxVec.size()); 2952 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2953 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2954 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2955 DAG.getConstant(0, getPointerTy())); 2956 } else if (MVT::getSizeInBits(VT) == 64) { 2957 SDOperand Vec = Op.getOperand(0); 2958 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2959 if (Idx == 0) 2960 return Op; 2961 2962 // UNPCKHPD the element to the lowest double word, then movsd. 2963 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 2964 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 2965 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 2966 SmallVector<SDOperand, 8> IdxVec; 2967 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); 2968 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); 2969 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 2970 &IdxVec[0], IdxVec.size()); 2971 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 2972 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 2973 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 2974 DAG.getConstant(0, getPointerTy())); 2975 } 2976 2977 return SDOperand(); 2978} 2979 2980SDOperand 2981X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 2982 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 2983 // as its second argument. 2984 MVT::ValueType VT = Op.getValueType(); 2985 MVT::ValueType BaseVT = MVT::getVectorElementType(VT); 2986 SDOperand N0 = Op.getOperand(0); 2987 SDOperand N1 = Op.getOperand(1); 2988 SDOperand N2 = Op.getOperand(2); 2989 if (MVT::getSizeInBits(BaseVT) == 16) { 2990 if (N1.getValueType() != MVT::i32) 2991 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 2992 if (N2.getValueType() != MVT::i32) 2993 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy()); 2994 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 2995 } else if (MVT::getSizeInBits(BaseVT) == 32) { 2996 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 2997 if (Idx == 0) { 2998 // Use a movss. 2999 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 3000 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 3001 MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); 3002 SmallVector<SDOperand, 8> MaskVec; 3003 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 3004 for (unsigned i = 1; i <= 3; ++i) 3005 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 3006 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 3007 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, 3008 &MaskVec[0], MaskVec.size())); 3009 } else { 3010 // Use two pinsrw instructions to insert a 32 bit value. 3011 Idx <<= 1; 3012 if (MVT::isFloatingPoint(N1.getValueType())) { 3013 if (ISD::isNON_EXTLoad(N1.Val)) { 3014 // Just load directly from f32mem to GR32. 3015 LoadSDNode *LD = cast<LoadSDNode>(N1); 3016 N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(), 3017 LD->getSrcValue(), LD->getSrcValueOffset()); 3018 } else { 3019 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 3020 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 3021 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 3022 DAG.getConstant(0, getPointerTy())); 3023 } 3024 } 3025 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 3026 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3027 DAG.getConstant(Idx, getPointerTy())); 3028 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 3029 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 3030 DAG.getConstant(Idx+1, getPointerTy())); 3031 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 3032 } 3033 } 3034 3035 return SDOperand(); 3036} 3037 3038SDOperand 3039X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 3040 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 3041 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 3042} 3043 3044// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 3045// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 3046// one of the above mentioned nodes. It has to be wrapped because otherwise 3047// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 3048// be used to form addressing mode. These wrapped nodes will be selected 3049// into MOV32ri. 3050SDOperand 3051X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 3052 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 3053 SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(), 3054 getPointerTy(), 3055 CP->getAlignment()); 3056 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3057 // With PIC, the address is actually $g + Offset. 3058 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3059 !Subtarget->isPICStyleRIPRel()) { 3060 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3061 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3062 Result); 3063 } 3064 3065 return Result; 3066} 3067 3068SDOperand 3069X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 3070 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 3071 SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); 3072 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3073 // With PIC, the address is actually $g + Offset. 3074 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3075 !Subtarget->isPICStyleRIPRel()) { 3076 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3077 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3078 Result); 3079 } 3080 3081 // For Darwin & Mingw32, external and weak symbols are indirect, so we want to 3082 // load the value at address GV, not the value of GV itself. This means that 3083 // the GlobalAddress must be in the base or index register of the address, not 3084 // the GV offset field. Platform check is inside GVRequiresExtraLoad() call 3085 // The same applies for external symbols during PIC codegen 3086 if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)) 3087 Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0); 3088 3089 return Result; 3090} 3091 3092// Lower ISD::GlobalTLSAddress using the "general dynamic" model 3093static SDOperand 3094LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3095 const MVT::ValueType PtrVT) { 3096 SDOperand InFlag; 3097 SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, 3098 DAG.getNode(X86ISD::GlobalBaseReg, 3099 PtrVT), InFlag); 3100 InFlag = Chain.getValue(1); 3101 3102 // emit leal symbol@TLSGD(,%ebx,1), %eax 3103 SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); 3104 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3105 GA->getValueType(0), 3106 GA->getOffset()); 3107 SDOperand Ops[] = { Chain, TGA, InFlag }; 3108 SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); 3109 InFlag = Result.getValue(2); 3110 Chain = Result.getValue(1); 3111 3112 // call ___tls_get_addr. This function receives its argument in 3113 // the register EAX. 3114 Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); 3115 InFlag = Chain.getValue(1); 3116 3117 NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3118 SDOperand Ops1[] = { Chain, 3119 DAG.getTargetExternalSymbol("___tls_get_addr", 3120 PtrVT), 3121 DAG.getRegister(X86::EAX, PtrVT), 3122 DAG.getRegister(X86::EBX, PtrVT), 3123 InFlag }; 3124 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); 3125 InFlag = Chain.getValue(1); 3126 3127 return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); 3128} 3129 3130// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 3131// "local exec" model. 3132static SDOperand 3133LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 3134 const MVT::ValueType PtrVT) { 3135 // Get the Thread Pointer 3136 SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); 3137 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 3138 // exec) 3139 SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 3140 GA->getValueType(0), 3141 GA->getOffset()); 3142 SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); 3143 3144 if (GA->getGlobal()->isDeclaration()) // initial exec TLS model 3145 Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); 3146 3147 // The address of the thread local variable is the add of the thread 3148 // pointer with the offset of the variable. 3149 return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); 3150} 3151 3152SDOperand 3153X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 3154 // TODO: implement the "local dynamic" model 3155 // TODO: implement the "initial exec"model for pic executables 3156 assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && 3157 "TLS not implemented for non-ELF and 64-bit targets"); 3158 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 3159 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 3160 // otherwise use the "Local Exec"TLS Model 3161 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 3162 return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); 3163 else 3164 return LowerToTLSExecModel(GA, DAG, getPointerTy()); 3165} 3166 3167SDOperand 3168X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 3169 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 3170 SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 3171 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3172 // With PIC, the address is actually $g + Offset. 3173 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3174 !Subtarget->isPICStyleRIPRel()) { 3175 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3176 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3177 Result); 3178 } 3179 3180 return Result; 3181} 3182 3183SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 3184 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 3185 SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); 3186 Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); 3187 // With PIC, the address is actually $g + Offset. 3188 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 3189 !Subtarget->isPICStyleRIPRel()) { 3190 Result = DAG.getNode(ISD::ADD, getPointerTy(), 3191 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 3192 Result); 3193 } 3194 3195 return Result; 3196} 3197 3198SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 3199 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 3200 "Not an i64 shift!"); 3201 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 3202 SDOperand ShOpLo = Op.getOperand(0); 3203 SDOperand ShOpHi = Op.getOperand(1); 3204 SDOperand ShAmt = Op.getOperand(2); 3205 SDOperand Tmp1 = isSRA ? 3206 DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : 3207 DAG.getConstant(0, MVT::i32); 3208 3209 SDOperand Tmp2, Tmp3; 3210 if (Op.getOpcode() == ISD::SHL_PARTS) { 3211 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 3212 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 3213 } else { 3214 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 3215 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 3216 } 3217 3218 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3219 SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, 3220 DAG.getConstant(32, MVT::i8)); 3221 SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; 3222 SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); 3223 3224 SDOperand Hi, Lo; 3225 SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3226 3227 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); 3228 SmallVector<SDOperand, 4> Ops; 3229 if (Op.getOpcode() == ISD::SHL_PARTS) { 3230 Ops.push_back(Tmp2); 3231 Ops.push_back(Tmp3); 3232 Ops.push_back(CC); 3233 Ops.push_back(InFlag); 3234 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3235 InFlag = Hi.getValue(1); 3236 3237 Ops.clear(); 3238 Ops.push_back(Tmp3); 3239 Ops.push_back(Tmp1); 3240 Ops.push_back(CC); 3241 Ops.push_back(InFlag); 3242 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3243 } else { 3244 Ops.push_back(Tmp2); 3245 Ops.push_back(Tmp3); 3246 Ops.push_back(CC); 3247 Ops.push_back(InFlag); 3248 Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3249 InFlag = Lo.getValue(1); 3250 3251 Ops.clear(); 3252 Ops.push_back(Tmp3); 3253 Ops.push_back(Tmp1); 3254 Ops.push_back(CC); 3255 Ops.push_back(InFlag); 3256 Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3257 } 3258 3259 VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); 3260 Ops.clear(); 3261 Ops.push_back(Lo); 3262 Ops.push_back(Hi); 3263 return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); 3264} 3265 3266SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3267 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 3268 Op.getOperand(0).getValueType() >= MVT::i16 && 3269 "Unknown SINT_TO_FP to lower!"); 3270 3271 SDOperand Result; 3272 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 3273 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 3274 MachineFunction &MF = DAG.getMachineFunction(); 3275 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 3276 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3277 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0), 3278 StackSlot, NULL, 0); 3279 3280 // Build the FILD 3281 SDVTList Tys; 3282 if (X86ScalarSSE) 3283 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 3284 else 3285 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 3286 SmallVector<SDOperand, 8> Ops; 3287 Ops.push_back(Chain); 3288 Ops.push_back(StackSlot); 3289 Ops.push_back(DAG.getValueType(SrcVT)); 3290 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 3291 Tys, &Ops[0], Ops.size()); 3292 3293 if (X86ScalarSSE) { 3294 Chain = Result.getValue(1); 3295 SDOperand InFlag = Result.getValue(2); 3296 3297 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 3298 // shouldn't be necessary except that RFP cannot be live across 3299 // multiple blocks. When stackifier is fixed, they can be uncoupled. 3300 MachineFunction &MF = DAG.getMachineFunction(); 3301 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3302 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3303 Tys = DAG.getVTList(MVT::Other); 3304 SmallVector<SDOperand, 8> Ops; 3305 Ops.push_back(Chain); 3306 Ops.push_back(Result); 3307 Ops.push_back(StackSlot); 3308 Ops.push_back(DAG.getValueType(Op.getValueType())); 3309 Ops.push_back(InFlag); 3310 Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size()); 3311 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0); 3312 } 3313 3314 return Result; 3315} 3316 3317SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 3318 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 3319 "Unknown FP_TO_SINT to lower!"); 3320 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 3321 // stack slot. 3322 MachineFunction &MF = DAG.getMachineFunction(); 3323 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 3324 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3325 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3326 3327 unsigned Opc; 3328 switch (Op.getValueType()) { 3329 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 3330 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 3331 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 3332 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 3333 } 3334 3335 SDOperand Chain = DAG.getEntryNode(); 3336 SDOperand Value = Op.getOperand(0); 3337 if (X86ScalarSSE) { 3338 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 3339 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); 3340 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 3341 SDOperand Ops[] = { 3342 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 3343 }; 3344 Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); 3345 Chain = Value.getValue(1); 3346 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 3347 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 3348 } 3349 3350 // Build the FP_TO_INT*_IN_MEM 3351 SDOperand Ops[] = { Chain, Value, StackSlot }; 3352 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); 3353 3354 // Load the result. 3355 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); 3356} 3357 3358SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 3359 MVT::ValueType VT = Op.getValueType(); 3360 MVT::ValueType EltVT = VT; 3361 if (MVT::isVector(VT)) 3362 EltVT = MVT::getVectorElementType(VT); 3363 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3364 std::vector<Constant*> CV; 3365 if (EltVT == MVT::f64) { 3366 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))); 3367 CV.push_back(C); 3368 CV.push_back(C); 3369 } else { 3370 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))); 3371 CV.push_back(C); 3372 CV.push_back(C); 3373 CV.push_back(C); 3374 CV.push_back(C); 3375 } 3376 Constant *C = ConstantVector::get(CV); 3377 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3378 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3379 false, 16); 3380 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 3381} 3382 3383SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 3384 MVT::ValueType VT = Op.getValueType(); 3385 MVT::ValueType EltVT = VT; 3386 unsigned EltNum = 1; 3387 if (MVT::isVector(VT)) { 3388 EltVT = MVT::getVectorElementType(VT); 3389 EltNum = MVT::getVectorNumElements(VT); 3390 } 3391 const Type *OpNTy = MVT::getTypeForValueType(EltVT); 3392 std::vector<Constant*> CV; 3393 if (EltVT == MVT::f64) { 3394 Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)); 3395 CV.push_back(C); 3396 CV.push_back(C); 3397 } else { 3398 Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31)); 3399 CV.push_back(C); 3400 CV.push_back(C); 3401 CV.push_back(C); 3402 CV.push_back(C); 3403 } 3404 Constant *C = ConstantVector::get(CV); 3405 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3406 SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3407 false, 16); 3408 if (MVT::isVector(VT)) { 3409 return DAG.getNode(ISD::BIT_CONVERT, VT, 3410 DAG.getNode(ISD::XOR, MVT::v2i64, 3411 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), 3412 DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); 3413 } else { 3414 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 3415 } 3416} 3417 3418SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { 3419 SDOperand Op0 = Op.getOperand(0); 3420 SDOperand Op1 = Op.getOperand(1); 3421 MVT::ValueType VT = Op.getValueType(); 3422 MVT::ValueType SrcVT = Op1.getValueType(); 3423 const Type *SrcTy = MVT::getTypeForValueType(SrcVT); 3424 3425 // If second operand is smaller, extend it first. 3426 if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { 3427 Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1); 3428 SrcVT = VT; 3429 } 3430 3431 // First get the sign bit of second operand. 3432 std::vector<Constant*> CV; 3433 if (SrcVT == MVT::f64) { 3434 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); 3435 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3436 } else { 3437 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); 3438 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3439 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3440 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3441 } 3442 Constant *C = ConstantVector::get(CV); 3443 SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3444 SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0, 3445 false, 16); 3446 SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1); 3447 3448 // Shift sign bit right or left if the two operands have different types. 3449 if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { 3450 // Op0 is MVT::f32, Op1 is MVT::f64. 3451 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); 3452 SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, 3453 DAG.getConstant(32, MVT::i32)); 3454 SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); 3455 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, 3456 DAG.getConstant(0, getPointerTy())); 3457 } 3458 3459 // Clear first operand sign bit. 3460 CV.clear(); 3461 if (VT == MVT::f64) { 3462 CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63)))); 3463 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3464 } else { 3465 CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31)))); 3466 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3467 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3468 CV.push_back(ConstantFP::get(SrcTy, 0.0)); 3469 } 3470 C = ConstantVector::get(CV); 3471 CPIdx = DAG.getConstantPool(C, getPointerTy(), 4); 3472 SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0, 3473 false, 16); 3474 SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2); 3475 3476 // Or the value with the sign bit. 3477 return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); 3478} 3479 3480SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, 3481 SDOperand Chain) { 3482 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 3483 SDOperand Cond; 3484 SDOperand Op0 = Op.getOperand(0); 3485 SDOperand Op1 = Op.getOperand(1); 3486 SDOperand CC = Op.getOperand(2); 3487 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3488 const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3489 const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 3490 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 3491 unsigned X86CC; 3492 3493 if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, 3494 Op0, Op1, DAG)) { 3495 SDOperand Ops1[] = { Chain, Op0, Op1 }; 3496 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); 3497 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 3498 return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3499 } 3500 3501 assert(isFP && "Illegal integer SetCC!"); 3502 3503 SDOperand COps[] = { Chain, Op0, Op1 }; 3504 Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); 3505 3506 switch (SetCCOpcode) { 3507 default: assert(false && "Illegal floating point SetCC!"); 3508 case ISD::SETOEQ: { // !PF & ZF 3509 SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; 3510 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3511 SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), 3512 Tmp1.getValue(1) }; 3513 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3514 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 3515 } 3516 case ISD::SETUNE: { // PF | !ZF 3517 SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; 3518 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); 3519 SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), 3520 Tmp1.getValue(1) }; 3521 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); 3522 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 3523 } 3524 } 3525} 3526 3527SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 3528 bool addTest = true; 3529 SDOperand Chain = DAG.getEntryNode(); 3530 SDOperand Cond = Op.getOperand(0); 3531 SDOperand CC; 3532 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3533 3534 if (Cond.getOpcode() == ISD::SETCC) 3535 Cond = LowerSETCC(Cond, DAG, Chain); 3536 3537 if (Cond.getOpcode() == X86ISD::SETCC) { 3538 CC = Cond.getOperand(0); 3539 3540 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3541 // (since flag operand cannot be shared). Use it as the condition setting 3542 // operand in place of the X86ISD::SETCC. 3543 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3544 // to use a test instead of duplicating the X86ISD::CMP (for register 3545 // pressure reason)? 3546 SDOperand Cmp = Cond.getOperand(1); 3547 unsigned Opc = Cmp.getOpcode(); 3548 bool IllegalFPCMov = !X86ScalarSSE && 3549 MVT::isFloatingPoint(Op.getValueType()) && 3550 !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 3551 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && 3552 !IllegalFPCMov) { 3553 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3554 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3555 addTest = false; 3556 } 3557 } 3558 3559 if (addTest) { 3560 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3561 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3562 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3563 } 3564 3565 VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); 3566 SmallVector<SDOperand, 4> Ops; 3567 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 3568 // condition is true. 3569 Ops.push_back(Op.getOperand(2)); 3570 Ops.push_back(Op.getOperand(1)); 3571 Ops.push_back(CC); 3572 Ops.push_back(Cond.getValue(1)); 3573 return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); 3574} 3575 3576SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 3577 bool addTest = true; 3578 SDOperand Chain = Op.getOperand(0); 3579 SDOperand Cond = Op.getOperand(1); 3580 SDOperand Dest = Op.getOperand(2); 3581 SDOperand CC; 3582 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 3583 3584 if (Cond.getOpcode() == ISD::SETCC) 3585 Cond = LowerSETCC(Cond, DAG, Chain); 3586 3587 if (Cond.getOpcode() == X86ISD::SETCC) { 3588 CC = Cond.getOperand(0); 3589 3590 // If condition flag is set by a X86ISD::CMP, then make a copy of it 3591 // (since flag operand cannot be shared). Use it as the condition setting 3592 // operand in place of the X86ISD::SETCC. 3593 // If the X86ISD::SETCC has more than one use, then perhaps it's better 3594 // to use a test instead of duplicating the X86ISD::CMP (for register 3595 // pressure reason)? 3596 SDOperand Cmp = Cond.getOperand(1); 3597 unsigned Opc = Cmp.getOpcode(); 3598 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { 3599 SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; 3600 Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); 3601 addTest = false; 3602 } 3603 } 3604 3605 if (addTest) { 3606 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 3607 SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; 3608 Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); 3609 } 3610 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 3611 Cond, Op.getOperand(2), CC, Cond.getValue(1)); 3612} 3613 3614SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 3615 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3616 3617 if (Subtarget->is64Bit()) 3618 return LowerX86_64CCCCallTo(Op, DAG, CallingConv); 3619 else 3620 switch (CallingConv) { 3621 default: 3622 assert(0 && "Unsupported calling convention"); 3623 case CallingConv::Fast: 3624 // TODO: Implement fastcc 3625 // Falls through 3626 case CallingConv::C: 3627 case CallingConv::X86_StdCall: 3628 return LowerCCCCallTo(Op, DAG, CallingConv); 3629 case CallingConv::X86_FastCall: 3630 return LowerFastCCCallTo(Op, DAG, CallingConv); 3631 } 3632} 3633 3634 3635// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 3636// Calls to _alloca is needed to probe the stack when allocating more than 4k 3637// bytes in one go. Touching the stack at 4K increments is necessary to ensure 3638// that the guard pages used by the OS virtual memory manager are allocated in 3639// correct sequence. 3640SDOperand 3641X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 3642 SelectionDAG &DAG) { 3643 assert(Subtarget->isTargetCygMing() && 3644 "This should be used only on Cygwin/Mingw targets"); 3645 3646 // Get the inputs. 3647 SDOperand Chain = Op.getOperand(0); 3648 SDOperand Size = Op.getOperand(1); 3649 // FIXME: Ensure alignment here 3650 3651 SDOperand Flag; 3652 3653 MVT::ValueType IntPtr = getPointerTy(); 3654 MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 3655 3656 Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); 3657 Flag = Chain.getValue(1); 3658 3659 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 3660 SDOperand Ops[] = { Chain, 3661 DAG.getTargetExternalSymbol("_alloca", IntPtr), 3662 DAG.getRegister(X86::EAX, IntPtr), 3663 Flag }; 3664 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); 3665 Flag = Chain.getValue(1); 3666 3667 Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); 3668 3669 std::vector<MVT::ValueType> Tys; 3670 Tys.push_back(SPTy); 3671 Tys.push_back(MVT::Other); 3672 SDOperand Ops1[2] = { Chain.getValue(0), Chain }; 3673 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); 3674} 3675 3676SDOperand 3677X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 3678 MachineFunction &MF = DAG.getMachineFunction(); 3679 const Function* Fn = MF.getFunction(); 3680 if (Fn->hasExternalLinkage() && 3681 Subtarget->isTargetCygMing() && 3682 Fn->getName() == "main") 3683 MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true); 3684 3685 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 3686 if (Subtarget->is64Bit()) 3687 return LowerX86_64CCCArguments(Op, DAG); 3688 else 3689 switch(CC) { 3690 default: 3691 assert(0 && "Unsupported calling convention"); 3692 case CallingConv::Fast: 3693 // TODO: implement fastcc. 3694 3695 // Falls through 3696 case CallingConv::C: 3697 return LowerCCCArguments(Op, DAG); 3698 case CallingConv::X86_StdCall: 3699 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall); 3700 return LowerCCCArguments(Op, DAG, true); 3701 case CallingConv::X86_FastCall: 3702 MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall); 3703 return LowerFastCCArguments(Op, DAG); 3704 } 3705} 3706 3707SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 3708 SDOperand InFlag(0, 0); 3709 SDOperand Chain = Op.getOperand(0); 3710 unsigned Align = 3711 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3712 if (Align == 0) Align = 1; 3713 3714 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3715 // If not DWORD aligned, call memset if size is less than the threshold. 3716 // It knows how to align to the right boundary first. 3717 if ((Align & 3) != 0 || 3718 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3719 MVT::ValueType IntPtr = getPointerTy(); 3720 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 3721 TargetLowering::ArgListTy Args; 3722 TargetLowering::ArgListEntry Entry; 3723 Entry.Node = Op.getOperand(1); 3724 Entry.Ty = IntPtrTy; 3725 Args.push_back(Entry); 3726 // Extend the unsigned i8 argument to be an int value for the call. 3727 Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 3728 Entry.Ty = IntPtrTy; 3729 Args.push_back(Entry); 3730 Entry.Node = Op.getOperand(3); 3731 Args.push_back(Entry); 3732 std::pair<SDOperand,SDOperand> CallResult = 3733 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3734 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 3735 return CallResult.second; 3736 } 3737 3738 MVT::ValueType AVT; 3739 SDOperand Count; 3740 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3741 unsigned BytesLeft = 0; 3742 bool TwoRepStos = false; 3743 if (ValC) { 3744 unsigned ValReg; 3745 uint64_t Val = ValC->getValue() & 255; 3746 3747 // If the value is a constant, then we can potentially use larger sets. 3748 switch (Align & 3) { 3749 case 2: // WORD aligned 3750 AVT = MVT::i16; 3751 ValReg = X86::AX; 3752 Val = (Val << 8) | Val; 3753 break; 3754 case 0: // DWORD aligned 3755 AVT = MVT::i32; 3756 ValReg = X86::EAX; 3757 Val = (Val << 8) | Val; 3758 Val = (Val << 16) | Val; 3759 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned 3760 AVT = MVT::i64; 3761 ValReg = X86::RAX; 3762 Val = (Val << 32) | Val; 3763 } 3764 break; 3765 default: // Byte aligned 3766 AVT = MVT::i8; 3767 ValReg = X86::AL; 3768 Count = Op.getOperand(3); 3769 break; 3770 } 3771 3772 if (AVT > MVT::i8) { 3773 if (I) { 3774 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3775 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3776 BytesLeft = I->getValue() % UBytes; 3777 } else { 3778 assert(AVT >= MVT::i32 && 3779 "Do not use rep;stos if not at least DWORD aligned"); 3780 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3781 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3782 TwoRepStos = true; 3783 } 3784 } 3785 3786 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 3787 InFlag); 3788 InFlag = Chain.getValue(1); 3789 } else { 3790 AVT = MVT::i8; 3791 Count = Op.getOperand(3); 3792 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 3793 InFlag = Chain.getValue(1); 3794 } 3795 3796 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3797 Count, InFlag); 3798 InFlag = Chain.getValue(1); 3799 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3800 Op.getOperand(1), InFlag); 3801 InFlag = Chain.getValue(1); 3802 3803 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3804 SmallVector<SDOperand, 8> Ops; 3805 Ops.push_back(Chain); 3806 Ops.push_back(DAG.getValueType(AVT)); 3807 Ops.push_back(InFlag); 3808 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3809 3810 if (TwoRepStos) { 3811 InFlag = Chain.getValue(1); 3812 Count = Op.getOperand(3); 3813 MVT::ValueType CVT = Count.getValueType(); 3814 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3815 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3816 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3817 Left, InFlag); 3818 InFlag = Chain.getValue(1); 3819 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3820 Ops.clear(); 3821 Ops.push_back(Chain); 3822 Ops.push_back(DAG.getValueType(MVT::i8)); 3823 Ops.push_back(InFlag); 3824 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); 3825 } else if (BytesLeft) { 3826 // Issue stores for the last 1 - 7 bytes. 3827 SDOperand Value; 3828 unsigned Val = ValC->getValue() & 255; 3829 unsigned Offset = I->getValue() - BytesLeft; 3830 SDOperand DstAddr = Op.getOperand(1); 3831 MVT::ValueType AddrVT = DstAddr.getValueType(); 3832 if (BytesLeft >= 4) { 3833 Val = (Val << 8) | Val; 3834 Val = (Val << 16) | Val; 3835 Value = DAG.getConstant(Val, MVT::i32); 3836 Chain = DAG.getStore(Chain, Value, 3837 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3838 DAG.getConstant(Offset, AddrVT)), 3839 NULL, 0); 3840 BytesLeft -= 4; 3841 Offset += 4; 3842 } 3843 if (BytesLeft >= 2) { 3844 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 3845 Chain = DAG.getStore(Chain, Value, 3846 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3847 DAG.getConstant(Offset, AddrVT)), 3848 NULL, 0); 3849 BytesLeft -= 2; 3850 Offset += 2; 3851 } 3852 if (BytesLeft == 1) { 3853 Value = DAG.getConstant(Val, MVT::i8); 3854 Chain = DAG.getStore(Chain, Value, 3855 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 3856 DAG.getConstant(Offset, AddrVT)), 3857 NULL, 0); 3858 } 3859 } 3860 3861 return Chain; 3862} 3863 3864SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 3865 SDOperand Chain = Op.getOperand(0); 3866 unsigned Align = 3867 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 3868 if (Align == 0) Align = 1; 3869 3870 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 3871 // If not DWORD aligned, call memcpy if size is less than the threshold. 3872 // It knows how to align to the right boundary first. 3873 if ((Align & 3) != 0 || 3874 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 3875 MVT::ValueType IntPtr = getPointerTy(); 3876 TargetLowering::ArgListTy Args; 3877 TargetLowering::ArgListEntry Entry; 3878 Entry.Ty = getTargetData()->getIntPtrType(); 3879 Entry.Node = Op.getOperand(1); Args.push_back(Entry); 3880 Entry.Node = Op.getOperand(2); Args.push_back(Entry); 3881 Entry.Node = Op.getOperand(3); Args.push_back(Entry); 3882 std::pair<SDOperand,SDOperand> CallResult = 3883 LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, 3884 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 3885 return CallResult.second; 3886 } 3887 3888 MVT::ValueType AVT; 3889 SDOperand Count; 3890 unsigned BytesLeft = 0; 3891 bool TwoRepMovs = false; 3892 switch (Align & 3) { 3893 case 2: // WORD aligned 3894 AVT = MVT::i16; 3895 break; 3896 case 0: // DWORD aligned 3897 AVT = MVT::i32; 3898 if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned 3899 AVT = MVT::i64; 3900 break; 3901 default: // Byte aligned 3902 AVT = MVT::i8; 3903 Count = Op.getOperand(3); 3904 break; 3905 } 3906 3907 if (AVT > MVT::i8) { 3908 if (I) { 3909 unsigned UBytes = MVT::getSizeInBits(AVT) / 8; 3910 Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); 3911 BytesLeft = I->getValue() % UBytes; 3912 } else { 3913 assert(AVT >= MVT::i32 && 3914 "Do not use rep;movs if not at least DWORD aligned"); 3915 Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), 3916 Op.getOperand(3), DAG.getConstant(2, MVT::i8)); 3917 TwoRepMovs = true; 3918 } 3919 } 3920 3921 SDOperand InFlag(0, 0); 3922 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, 3923 Count, InFlag); 3924 InFlag = Chain.getValue(1); 3925 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, 3926 Op.getOperand(1), InFlag); 3927 InFlag = Chain.getValue(1); 3928 Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, 3929 Op.getOperand(2), InFlag); 3930 InFlag = Chain.getValue(1); 3931 3932 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3933 SmallVector<SDOperand, 8> Ops; 3934 Ops.push_back(Chain); 3935 Ops.push_back(DAG.getValueType(AVT)); 3936 Ops.push_back(InFlag); 3937 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3938 3939 if (TwoRepMovs) { 3940 InFlag = Chain.getValue(1); 3941 Count = Op.getOperand(3); 3942 MVT::ValueType CVT = Count.getValueType(); 3943 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 3944 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 3945 Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, 3946 Left, InFlag); 3947 InFlag = Chain.getValue(1); 3948 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 3949 Ops.clear(); 3950 Ops.push_back(Chain); 3951 Ops.push_back(DAG.getValueType(MVT::i8)); 3952 Ops.push_back(InFlag); 3953 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); 3954 } else if (BytesLeft) { 3955 // Issue loads and stores for the last 1 - 7 bytes. 3956 unsigned Offset = I->getValue() - BytesLeft; 3957 SDOperand DstAddr = Op.getOperand(1); 3958 MVT::ValueType DstVT = DstAddr.getValueType(); 3959 SDOperand SrcAddr = Op.getOperand(2); 3960 MVT::ValueType SrcVT = SrcAddr.getValueType(); 3961 SDOperand Value; 3962 if (BytesLeft >= 4) { 3963 Value = DAG.getLoad(MVT::i32, Chain, 3964 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3965 DAG.getConstant(Offset, SrcVT)), 3966 NULL, 0); 3967 Chain = Value.getValue(1); 3968 Chain = DAG.getStore(Chain, Value, 3969 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3970 DAG.getConstant(Offset, DstVT)), 3971 NULL, 0); 3972 BytesLeft -= 4; 3973 Offset += 4; 3974 } 3975 if (BytesLeft >= 2) { 3976 Value = DAG.getLoad(MVT::i16, Chain, 3977 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3978 DAG.getConstant(Offset, SrcVT)), 3979 NULL, 0); 3980 Chain = Value.getValue(1); 3981 Chain = DAG.getStore(Chain, Value, 3982 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3983 DAG.getConstant(Offset, DstVT)), 3984 NULL, 0); 3985 BytesLeft -= 2; 3986 Offset += 2; 3987 } 3988 3989 if (BytesLeft == 1) { 3990 Value = DAG.getLoad(MVT::i8, Chain, 3991 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 3992 DAG.getConstant(Offset, SrcVT)), 3993 NULL, 0); 3994 Chain = Value.getValue(1); 3995 Chain = DAG.getStore(Chain, Value, 3996 DAG.getNode(ISD::ADD, DstVT, DstAddr, 3997 DAG.getConstant(Offset, DstVT)), 3998 NULL, 0); 3999 } 4000 } 4001 4002 return Chain; 4003} 4004 4005SDOperand 4006X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 4007 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 4008 SDOperand TheOp = Op.getOperand(0); 4009 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); 4010 if (Subtarget->is64Bit()) { 4011 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); 4012 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, 4013 MVT::i64, Copy1.getValue(2)); 4014 SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, 4015 DAG.getConstant(32, MVT::i8)); 4016 SDOperand Ops[] = { 4017 DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) 4018 }; 4019 4020 Tys = DAG.getVTList(MVT::i64, MVT::Other); 4021 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); 4022 } 4023 4024 SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); 4025 SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, 4026 MVT::i32, Copy1.getValue(2)); 4027 SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; 4028 Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4029 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); 4030} 4031 4032SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 4033 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 4034 4035 if (!Subtarget->is64Bit()) { 4036 // vastart just stores the address of the VarArgsFrameIndex slot into the 4037 // memory location argument. 4038 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4039 return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(), 4040 SV->getOffset()); 4041 } 4042 4043 // __va_list_tag: 4044 // gp_offset (0 - 6 * 8) 4045 // fp_offset (48 - 48 + 8 * 16) 4046 // overflow_arg_area (point to parameters coming in memory). 4047 // reg_save_area 4048 SmallVector<SDOperand, 8> MemOps; 4049 SDOperand FIN = Op.getOperand(1); 4050 // Store gp_offset 4051 SDOperand Store = DAG.getStore(Op.getOperand(0), 4052 DAG.getConstant(VarArgsGPOffset, MVT::i32), 4053 FIN, SV->getValue(), SV->getOffset()); 4054 MemOps.push_back(Store); 4055 4056 // Store fp_offset 4057 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4058 DAG.getConstant(4, getPointerTy())); 4059 Store = DAG.getStore(Op.getOperand(0), 4060 DAG.getConstant(VarArgsFPOffset, MVT::i32), 4061 FIN, SV->getValue(), SV->getOffset()); 4062 MemOps.push_back(Store); 4063 4064 // Store ptr to overflow_arg_area 4065 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4066 DAG.getConstant(4, getPointerTy())); 4067 SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 4068 Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(), 4069 SV->getOffset()); 4070 MemOps.push_back(Store); 4071 4072 // Store ptr to reg_save_area. 4073 FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, 4074 DAG.getConstant(8, getPointerTy())); 4075 SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 4076 Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(), 4077 SV->getOffset()); 4078 MemOps.push_back(Store); 4079 return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); 4080} 4081 4082SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { 4083 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 4084 SDOperand Chain = Op.getOperand(0); 4085 SDOperand DstPtr = Op.getOperand(1); 4086 SDOperand SrcPtr = Op.getOperand(2); 4087 SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3)); 4088 SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4089 4090 SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, 4091 SrcSV->getValue(), SrcSV->getOffset()); 4092 Chain = SrcPtr.getValue(1); 4093 for (unsigned i = 0; i < 3; ++i) { 4094 SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, 4095 SrcSV->getValue(), SrcSV->getOffset()); 4096 Chain = Val.getValue(1); 4097 Chain = DAG.getStore(Chain, Val, DstPtr, 4098 DstSV->getValue(), DstSV->getOffset()); 4099 if (i == 2) 4100 break; 4101 SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 4102 DAG.getConstant(8, getPointerTy())); 4103 DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 4104 DAG.getConstant(8, getPointerTy())); 4105 } 4106 return Chain; 4107} 4108 4109SDOperand 4110X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 4111 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 4112 switch (IntNo) { 4113 default: return SDOperand(); // Don't custom lower most intrinsics. 4114 // Comparison intrinsics. 4115 case Intrinsic::x86_sse_comieq_ss: 4116 case Intrinsic::x86_sse_comilt_ss: 4117 case Intrinsic::x86_sse_comile_ss: 4118 case Intrinsic::x86_sse_comigt_ss: 4119 case Intrinsic::x86_sse_comige_ss: 4120 case Intrinsic::x86_sse_comineq_ss: 4121 case Intrinsic::x86_sse_ucomieq_ss: 4122 case Intrinsic::x86_sse_ucomilt_ss: 4123 case Intrinsic::x86_sse_ucomile_ss: 4124 case Intrinsic::x86_sse_ucomigt_ss: 4125 case Intrinsic::x86_sse_ucomige_ss: 4126 case Intrinsic::x86_sse_ucomineq_ss: 4127 case Intrinsic::x86_sse2_comieq_sd: 4128 case Intrinsic::x86_sse2_comilt_sd: 4129 case Intrinsic::x86_sse2_comile_sd: 4130 case Intrinsic::x86_sse2_comigt_sd: 4131 case Intrinsic::x86_sse2_comige_sd: 4132 case Intrinsic::x86_sse2_comineq_sd: 4133 case Intrinsic::x86_sse2_ucomieq_sd: 4134 case Intrinsic::x86_sse2_ucomilt_sd: 4135 case Intrinsic::x86_sse2_ucomile_sd: 4136 case Intrinsic::x86_sse2_ucomigt_sd: 4137 case Intrinsic::x86_sse2_ucomige_sd: 4138 case Intrinsic::x86_sse2_ucomineq_sd: { 4139 unsigned Opc = 0; 4140 ISD::CondCode CC = ISD::SETCC_INVALID; 4141 switch (IntNo) { 4142 default: break; 4143 case Intrinsic::x86_sse_comieq_ss: 4144 case Intrinsic::x86_sse2_comieq_sd: 4145 Opc = X86ISD::COMI; 4146 CC = ISD::SETEQ; 4147 break; 4148 case Intrinsic::x86_sse_comilt_ss: 4149 case Intrinsic::x86_sse2_comilt_sd: 4150 Opc = X86ISD::COMI; 4151 CC = ISD::SETLT; 4152 break; 4153 case Intrinsic::x86_sse_comile_ss: 4154 case Intrinsic::x86_sse2_comile_sd: 4155 Opc = X86ISD::COMI; 4156 CC = ISD::SETLE; 4157 break; 4158 case Intrinsic::x86_sse_comigt_ss: 4159 case Intrinsic::x86_sse2_comigt_sd: 4160 Opc = X86ISD::COMI; 4161 CC = ISD::SETGT; 4162 break; 4163 case Intrinsic::x86_sse_comige_ss: 4164 case Intrinsic::x86_sse2_comige_sd: 4165 Opc = X86ISD::COMI; 4166 CC = ISD::SETGE; 4167 break; 4168 case Intrinsic::x86_sse_comineq_ss: 4169 case Intrinsic::x86_sse2_comineq_sd: 4170 Opc = X86ISD::COMI; 4171 CC = ISD::SETNE; 4172 break; 4173 case Intrinsic::x86_sse_ucomieq_ss: 4174 case Intrinsic::x86_sse2_ucomieq_sd: 4175 Opc = X86ISD::UCOMI; 4176 CC = ISD::SETEQ; 4177 break; 4178 case Intrinsic::x86_sse_ucomilt_ss: 4179 case Intrinsic::x86_sse2_ucomilt_sd: 4180 Opc = X86ISD::UCOMI; 4181 CC = ISD::SETLT; 4182 break; 4183 case Intrinsic::x86_sse_ucomile_ss: 4184 case Intrinsic::x86_sse2_ucomile_sd: 4185 Opc = X86ISD::UCOMI; 4186 CC = ISD::SETLE; 4187 break; 4188 case Intrinsic::x86_sse_ucomigt_ss: 4189 case Intrinsic::x86_sse2_ucomigt_sd: 4190 Opc = X86ISD::UCOMI; 4191 CC = ISD::SETGT; 4192 break; 4193 case Intrinsic::x86_sse_ucomige_ss: 4194 case Intrinsic::x86_sse2_ucomige_sd: 4195 Opc = X86ISD::UCOMI; 4196 CC = ISD::SETGE; 4197 break; 4198 case Intrinsic::x86_sse_ucomineq_ss: 4199 case Intrinsic::x86_sse2_ucomineq_sd: 4200 Opc = X86ISD::UCOMI; 4201 CC = ISD::SETNE; 4202 break; 4203 } 4204 4205 unsigned X86CC; 4206 SDOperand LHS = Op.getOperand(1); 4207 SDOperand RHS = Op.getOperand(2); 4208 translateX86CC(CC, true, X86CC, LHS, RHS, DAG); 4209 4210 const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); 4211 SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; 4212 SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); 4213 VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); 4214 SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; 4215 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); 4216 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 4217 } 4218 } 4219} 4220 4221SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4222 // Depths > 0 not supported yet! 4223 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4224 return SDOperand(); 4225 4226 // Just load the return address 4227 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4228 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4229} 4230 4231SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4232 // Depths > 0 not supported yet! 4233 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4234 return SDOperand(); 4235 4236 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 4237 return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 4238 DAG.getConstant(4, getPointerTy())); 4239} 4240 4241SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, 4242 SelectionDAG &DAG) { 4243 // Is not yet supported on x86-64 4244 if (Subtarget->is64Bit()) 4245 return SDOperand(); 4246 4247 return DAG.getConstant(8, getPointerTy()); 4248} 4249 4250SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) 4251{ 4252 assert(!Subtarget->is64Bit() && 4253 "Lowering of eh_return builtin is not supported yet on x86-64"); 4254 4255 MachineFunction &MF = DAG.getMachineFunction(); 4256 SDOperand Chain = Op.getOperand(0); 4257 SDOperand Offset = Op.getOperand(1); 4258 SDOperand Handler = Op.getOperand(2); 4259 4260 SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), 4261 getPointerTy()); 4262 4263 SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, 4264 DAG.getConstant(-4UL, getPointerTy())); 4265 StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); 4266 Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); 4267 Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); 4268 MF.addLiveOut(X86::ECX); 4269 4270 return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, 4271 Chain, DAG.getRegister(X86::ECX, getPointerTy())); 4272} 4273 4274SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, 4275 SelectionDAG &DAG) { 4276 SDOperand Root = Op.getOperand(0); 4277 SDOperand Trmp = Op.getOperand(1); // trampoline 4278 SDOperand FPtr = Op.getOperand(2); // nested function 4279 SDOperand Nest = Op.getOperand(3); // 'nest' parameter value 4280 4281 SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4)); 4282 4283 if (Subtarget->is64Bit()) { 4284 return SDOperand(); // not yet supported 4285 } else { 4286 Function *Func = (Function *) 4287 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 4288 unsigned CC = Func->getCallingConv(); 4289 unsigned char NestReg; 4290 4291 switch (CC) { 4292 default: 4293 assert(0 && "Unsupported calling convention"); 4294 case CallingConv::C: 4295 case CallingConv::Fast: 4296 case CallingConv::X86_StdCall: { 4297 // Pass 'nest' parameter in ECX. 4298 // Must be kept in sync with X86CallingConv.td 4299 NestReg = N86::ECX; 4300 4301 // Check that ECX wasn't needed by an 'inreg' parameter. 4302 const FunctionType *FTy = Func->getFunctionType(); 4303 const ParamAttrsList *Attrs = FTy->getParamAttrs(); 4304 4305 if (Attrs && !Func->isVarArg()) { 4306 unsigned InRegCount = 0; 4307 unsigned Idx = 1; 4308 4309 for (FunctionType::param_iterator I = FTy->param_begin(), 4310 E = FTy->param_end(); I != E; ++I, ++Idx) 4311 if (Attrs->paramHasAttr(Idx, ParamAttr::InReg)) 4312 // FIXME: should only count parameters that are lowered to integers. 4313 InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32; 4314 4315 if (InRegCount > 2) { 4316 cerr << "Nest register in use - reduce number of inreg parameters!\n"; 4317 abort(); 4318 } 4319 } 4320 break; 4321 } 4322 case CallingConv::X86_FastCall: 4323 // Pass 'nest' parameter in EAX. 4324 // Must be kept in sync with X86CallingConv.td 4325 NestReg = N86::EAX; 4326 break; 4327 } 4328 4329 SDOperand OutChains[4]; 4330 SDOperand Addr, Disp; 4331 4332 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32)); 4333 Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr); 4334 4335 const unsigned char MOV32ri = 0xB8; 4336 const unsigned char JMP = 0xE9; 4337 4338 OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|NestReg, MVT::i8), 4339 Trmp, TrmpSV->getValue(), TrmpSV->getOffset()); 4340 4341 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); 4342 OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(), 4343 TrmpSV->getOffset() + 1, false, 1); 4344 4345 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); 4346 OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr, 4347 TrmpSV->getValue() + 5, TrmpSV->getOffset()); 4348 4349 Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); 4350 OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(), 4351 TrmpSV->getOffset() + 6, false, 1); 4352 4353 return DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4); 4354 } 4355} 4356 4357/// LowerOperation - Provide custom lowering hooks for some operations. 4358/// 4359SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 4360 switch (Op.getOpcode()) { 4361 default: assert(0 && "Should not custom lower this!"); 4362 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4363 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4364 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4365 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 4366 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4367 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4368 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4369 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4370 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 4371 case ISD::SHL_PARTS: 4372 case ISD::SRA_PARTS: 4373 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 4374 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4375 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 4376 case ISD::FABS: return LowerFABS(Op, DAG); 4377 case ISD::FNEG: return LowerFNEG(Op, DAG); 4378 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4379 case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); 4380 case ISD::SELECT: return LowerSELECT(Op, DAG); 4381 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 4382 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4383 case ISD::CALL: return LowerCALL(Op, DAG); 4384 case ISD::RET: return LowerRET(Op, DAG); 4385 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 4386 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 4387 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 4388 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 4389 case ISD::VASTART: return LowerVASTART(Op, DAG); 4390 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 4391 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4392 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4393 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4394 case ISD::FRAME_TO_ARGS_OFFSET: 4395 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 4396 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 4397 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 4398 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 4399 } 4400 return SDOperand(); 4401} 4402 4403const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 4404 switch (Opcode) { 4405 default: return NULL; 4406 case X86ISD::SHLD: return "X86ISD::SHLD"; 4407 case X86ISD::SHRD: return "X86ISD::SHRD"; 4408 case X86ISD::FAND: return "X86ISD::FAND"; 4409 case X86ISD::FOR: return "X86ISD::FOR"; 4410 case X86ISD::FXOR: return "X86ISD::FXOR"; 4411 case X86ISD::FSRL: return "X86ISD::FSRL"; 4412 case X86ISD::FILD: return "X86ISD::FILD"; 4413 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 4414 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 4415 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 4416 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 4417 case X86ISD::FLD: return "X86ISD::FLD"; 4418 case X86ISD::FST: return "X86ISD::FST"; 4419 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 4420 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 4421 case X86ISD::CALL: return "X86ISD::CALL"; 4422 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 4423 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 4424 case X86ISD::CMP: return "X86ISD::CMP"; 4425 case X86ISD::COMI: return "X86ISD::COMI"; 4426 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 4427 case X86ISD::SETCC: return "X86ISD::SETCC"; 4428 case X86ISD::CMOV: return "X86ISD::CMOV"; 4429 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 4430 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 4431 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 4432 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 4433 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 4434 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 4435 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 4436 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 4437 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 4438 case X86ISD::FMAX: return "X86ISD::FMAX"; 4439 case X86ISD::FMIN: return "X86ISD::FMIN"; 4440 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 4441 case X86ISD::FRCP: return "X86ISD::FRCP"; 4442 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 4443 case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; 4444 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 4445 } 4446} 4447 4448// isLegalAddressingMode - Return true if the addressing mode represented 4449// by AM is legal for this target, for a load/store of the specified type. 4450bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 4451 const Type *Ty) const { 4452 // X86 supports extremely general addressing modes. 4453 4454 // X86 allows a sign-extended 32-bit immediate field as a displacement. 4455 if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) 4456 return false; 4457 4458 if (AM.BaseGV) { 4459 // X86-64 only supports addr of globals in small code model. 4460 if (Subtarget->is64Bit() && 4461 getTargetMachine().getCodeModel() != CodeModel::Small) 4462 return false; 4463 4464 // We can only fold this if we don't need a load either. 4465 if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) 4466 return false; 4467 } 4468 4469 switch (AM.Scale) { 4470 case 0: 4471 case 1: 4472 case 2: 4473 case 4: 4474 case 8: 4475 // These scales always work. 4476 break; 4477 case 3: 4478 case 5: 4479 case 9: 4480 // These scales are formed with basereg+scalereg. Only accept if there is 4481 // no basereg yet. 4482 if (AM.HasBaseReg) 4483 return false; 4484 break; 4485 default: // Other stuff never works. 4486 return false; 4487 } 4488 4489 return true; 4490} 4491 4492 4493/// isShuffleMaskLegal - Targets can use this to indicate that they only 4494/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4495/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4496/// are assumed to be legal. 4497bool 4498X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 4499 // Only do shuffles on 128-bit vector types for now. 4500 if (MVT::getSizeInBits(VT) == 64) return false; 4501 return (Mask.Val->getNumOperands() <= 4 || 4502 isIdentityMask(Mask.Val) || 4503 isIdentityMask(Mask.Val, true) || 4504 isSplatMask(Mask.Val) || 4505 isPSHUFHW_PSHUFLWMask(Mask.Val) || 4506 X86::isUNPCKLMask(Mask.Val) || 4507 X86::isUNPCKHMask(Mask.Val) || 4508 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 4509 X86::isUNPCKH_v_undef_Mask(Mask.Val)); 4510} 4511 4512bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 4513 MVT::ValueType EVT, 4514 SelectionDAG &DAG) const { 4515 unsigned NumElts = BVOps.size(); 4516 // Only do shuffles on 128-bit vector types for now. 4517 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 4518 if (NumElts == 2) return true; 4519 if (NumElts == 4) { 4520 return (isMOVLMask(&BVOps[0], 4) || 4521 isCommutedMOVL(&BVOps[0], 4, true) || 4522 isSHUFPMask(&BVOps[0], 4) || 4523 isCommutedSHUFP(&BVOps[0], 4)); 4524 } 4525 return false; 4526} 4527 4528//===----------------------------------------------------------------------===// 4529// X86 Scheduler Hooks 4530//===----------------------------------------------------------------------===// 4531 4532MachineBasicBlock * 4533X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 4534 MachineBasicBlock *BB) { 4535 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4536 switch (MI->getOpcode()) { 4537 default: assert(false && "Unexpected instr type to insert"); 4538 case X86::CMOV_FR32: 4539 case X86::CMOV_FR64: 4540 case X86::CMOV_V4F32: 4541 case X86::CMOV_V2F64: 4542 case X86::CMOV_V2I64: { 4543 // To "insert" a SELECT_CC instruction, we actually have to insert the 4544 // diamond control-flow pattern. The incoming instruction knows the 4545 // destination vreg to set, the condition code register to branch on, the 4546 // true/false values to select between, and a branch opcode to use. 4547 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4548 ilist<MachineBasicBlock>::iterator It = BB; 4549 ++It; 4550 4551 // thisMBB: 4552 // ... 4553 // TrueVal = ... 4554 // cmpTY ccX, r1, r2 4555 // bCC copy1MBB 4556 // fallthrough --> copy0MBB 4557 MachineBasicBlock *thisMBB = BB; 4558 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4559 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4560 unsigned Opc = 4561 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 4562 BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB); 4563 MachineFunction *F = BB->getParent(); 4564 F->getBasicBlockList().insert(It, copy0MBB); 4565 F->getBasicBlockList().insert(It, sinkMBB); 4566 // Update machine-CFG edges by first adding all successors of the current 4567 // block to the new block which will contain the Phi node for the select. 4568 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 4569 e = BB->succ_end(); i != e; ++i) 4570 sinkMBB->addSuccessor(*i); 4571 // Next, remove all successors of the current block, and add the true 4572 // and fallthrough blocks as its successors. 4573 while(!BB->succ_empty()) 4574 BB->removeSuccessor(BB->succ_begin()); 4575 BB->addSuccessor(copy0MBB); 4576 BB->addSuccessor(sinkMBB); 4577 4578 // copy0MBB: 4579 // %FalseValue = ... 4580 // # fallthrough to sinkMBB 4581 BB = copy0MBB; 4582 4583 // Update machine-CFG edges 4584 BB->addSuccessor(sinkMBB); 4585 4586 // sinkMBB: 4587 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4588 // ... 4589 BB = sinkMBB; 4590 BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg()) 4591 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4592 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4593 4594 delete MI; // The pseudo instruction is gone now. 4595 return BB; 4596 } 4597 4598 case X86::FP32_TO_INT16_IN_MEM: 4599 case X86::FP32_TO_INT32_IN_MEM: 4600 case X86::FP32_TO_INT64_IN_MEM: 4601 case X86::FP64_TO_INT16_IN_MEM: 4602 case X86::FP64_TO_INT32_IN_MEM: 4603 case X86::FP64_TO_INT64_IN_MEM: { 4604 // Change the floating point control register to use "round towards zero" 4605 // mode when truncating to an integer value. 4606 MachineFunction *F = BB->getParent(); 4607 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 4608 addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); 4609 4610 // Load the old value of the high byte of the control word... 4611 unsigned OldCW = 4612 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 4613 addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); 4614 4615 // Set the high part to be round to zero... 4616 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx) 4617 .addImm(0xC7F); 4618 4619 // Reload the modified control word now... 4620 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4621 4622 // Restore the memory image of control word to original value 4623 addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx) 4624 .addReg(OldCW); 4625 4626 // Get the X86 opcode to use. 4627 unsigned Opc; 4628 switch (MI->getOpcode()) { 4629 default: assert(0 && "illegal opcode!"); 4630 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 4631 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 4632 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 4633 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 4634 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 4635 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 4636 } 4637 4638 X86AddressMode AM; 4639 MachineOperand &Op = MI->getOperand(0); 4640 if (Op.isRegister()) { 4641 AM.BaseType = X86AddressMode::RegBase; 4642 AM.Base.Reg = Op.getReg(); 4643 } else { 4644 AM.BaseType = X86AddressMode::FrameIndexBase; 4645 AM.Base.FrameIndex = Op.getFrameIndex(); 4646 } 4647 Op = MI->getOperand(1); 4648 if (Op.isImmediate()) 4649 AM.Scale = Op.getImm(); 4650 Op = MI->getOperand(2); 4651 if (Op.isImmediate()) 4652 AM.IndexReg = Op.getImm(); 4653 Op = MI->getOperand(3); 4654 if (Op.isGlobalAddress()) { 4655 AM.GV = Op.getGlobal(); 4656 } else { 4657 AM.Disp = Op.getImm(); 4658 } 4659 addFullAddress(BuildMI(BB, TII->get(Opc)), AM) 4660 .addReg(MI->getOperand(4).getReg()); 4661 4662 // Reload the original control word now. 4663 addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); 4664 4665 delete MI; // The pseudo instruction is gone now. 4666 return BB; 4667 } 4668 } 4669} 4670 4671//===----------------------------------------------------------------------===// 4672// X86 Optimization Hooks 4673//===----------------------------------------------------------------------===// 4674 4675void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4676 uint64_t Mask, 4677 uint64_t &KnownZero, 4678 uint64_t &KnownOne, 4679 const SelectionDAG &DAG, 4680 unsigned Depth) const { 4681 unsigned Opc = Op.getOpcode(); 4682 assert((Opc >= ISD::BUILTIN_OP_END || 4683 Opc == ISD::INTRINSIC_WO_CHAIN || 4684 Opc == ISD::INTRINSIC_W_CHAIN || 4685 Opc == ISD::INTRINSIC_VOID) && 4686 "Should use MaskedValueIsZero if you don't know whether Op" 4687 " is a target node!"); 4688 4689 KnownZero = KnownOne = 0; // Don't know anything. 4690 switch (Opc) { 4691 default: break; 4692 case X86ISD::SETCC: 4693 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 4694 break; 4695 } 4696} 4697 4698/// getShuffleScalarElt - Returns the scalar element that will make up the ith 4699/// element of the result of the vector shuffle. 4700static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 4701 MVT::ValueType VT = N->getValueType(0); 4702 SDOperand PermMask = N->getOperand(2); 4703 unsigned NumElems = PermMask.getNumOperands(); 4704 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 4705 i %= NumElems; 4706 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4707 return (i == 0) 4708 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4709 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 4710 SDOperand Idx = PermMask.getOperand(i); 4711 if (Idx.getOpcode() == ISD::UNDEF) 4712 return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); 4713 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 4714 } 4715 return SDOperand(); 4716} 4717 4718/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 4719/// node is a GlobalAddress + an offset. 4720static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 4721 unsigned Opc = N->getOpcode(); 4722 if (Opc == X86ISD::Wrapper) { 4723 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 4724 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 4725 return true; 4726 } 4727 } else if (Opc == ISD::ADD) { 4728 SDOperand N1 = N->getOperand(0); 4729 SDOperand N2 = N->getOperand(1); 4730 if (isGAPlusOffset(N1.Val, GA, Offset)) { 4731 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 4732 if (V) { 4733 Offset += V->getSignExtended(); 4734 return true; 4735 } 4736 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 4737 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 4738 if (V) { 4739 Offset += V->getSignExtended(); 4740 return true; 4741 } 4742 } 4743 } 4744 return false; 4745} 4746 4747/// isConsecutiveLoad - Returns true if N is loading from an address of Base 4748/// + Dist * Size. 4749static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 4750 MachineFrameInfo *MFI) { 4751 if (N->getOperand(0).Val != Base->getOperand(0).Val) 4752 return false; 4753 4754 SDOperand Loc = N->getOperand(1); 4755 SDOperand BaseLoc = Base->getOperand(1); 4756 if (Loc.getOpcode() == ISD::FrameIndex) { 4757 if (BaseLoc.getOpcode() != ISD::FrameIndex) 4758 return false; 4759 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 4760 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 4761 int FS = MFI->getObjectSize(FI); 4762 int BFS = MFI->getObjectSize(BFI); 4763 if (FS != BFS || FS != Size) return false; 4764 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 4765 } else { 4766 GlobalValue *GV1 = NULL; 4767 GlobalValue *GV2 = NULL; 4768 int64_t Offset1 = 0; 4769 int64_t Offset2 = 0; 4770 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 4771 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 4772 if (isGA1 && isGA2 && GV1 == GV2) 4773 return Offset1 == (Offset2 + Dist*Size); 4774 } 4775 4776 return false; 4777} 4778 4779static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 4780 const X86Subtarget *Subtarget) { 4781 GlobalValue *GV; 4782 int64_t Offset; 4783 if (isGAPlusOffset(Base, GV, Offset)) 4784 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 4785 else { 4786 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 4787 int BFI = cast<FrameIndexSDNode>(Base)->getIndex(); 4788 if (BFI < 0) 4789 // Fixed objects do not specify alignment, however the offsets are known. 4790 return ((Subtarget->getStackAlignment() % 16) == 0 && 4791 (MFI->getObjectOffset(BFI) % 16) == 0); 4792 else 4793 return MFI->getObjectAlignment(BFI) >= 16; 4794 } 4795 return false; 4796} 4797 4798 4799/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 4800/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 4801/// if the load addresses are consecutive, non-overlapping, and in the right 4802/// order. 4803static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 4804 const X86Subtarget *Subtarget) { 4805 MachineFunction &MF = DAG.getMachineFunction(); 4806 MachineFrameInfo *MFI = MF.getFrameInfo(); 4807 MVT::ValueType VT = N->getValueType(0); 4808 MVT::ValueType EVT = MVT::getVectorElementType(VT); 4809 SDOperand PermMask = N->getOperand(2); 4810 int NumElems = (int)PermMask.getNumOperands(); 4811 SDNode *Base = NULL; 4812 for (int i = 0; i < NumElems; ++i) { 4813 SDOperand Idx = PermMask.getOperand(i); 4814 if (Idx.getOpcode() == ISD::UNDEF) { 4815 if (!Base) return SDOperand(); 4816 } else { 4817 SDOperand Arg = 4818 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 4819 if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val)) 4820 return SDOperand(); 4821 if (!Base) 4822 Base = Arg.Val; 4823 else if (!isConsecutiveLoad(Arg.Val, Base, 4824 i, MVT::getSizeInBits(EVT)/8,MFI)) 4825 return SDOperand(); 4826 } 4827 } 4828 4829 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 4830 LoadSDNode *LD = cast<LoadSDNode>(Base); 4831 if (isAlign16) { 4832 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4833 LD->getSrcValueOffset(), LD->isVolatile()); 4834 } else { 4835 return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), 4836 LD->getSrcValueOffset(), LD->isVolatile(), 4837 LD->getAlignment()); 4838 } 4839} 4840 4841/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 4842static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 4843 const X86Subtarget *Subtarget) { 4844 SDOperand Cond = N->getOperand(0); 4845 4846 // If we have SSE[12] support, try to form min/max nodes. 4847 if (Subtarget->hasSSE2() && 4848 (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) { 4849 if (Cond.getOpcode() == ISD::SETCC) { 4850 // Get the LHS/RHS of the select. 4851 SDOperand LHS = N->getOperand(1); 4852 SDOperand RHS = N->getOperand(2); 4853 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4854 4855 unsigned Opcode = 0; 4856 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 4857 switch (CC) { 4858 default: break; 4859 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 4860 case ISD::SETULE: 4861 case ISD::SETLE: 4862 if (!UnsafeFPMath) break; 4863 // FALL THROUGH. 4864 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 4865 case ISD::SETLT: 4866 Opcode = X86ISD::FMIN; 4867 break; 4868 4869 case ISD::SETOGT: // (X > Y) ? X : Y -> max 4870 case ISD::SETUGT: 4871 case ISD::SETGT: 4872 if (!UnsafeFPMath) break; 4873 // FALL THROUGH. 4874 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 4875 case ISD::SETGE: 4876 Opcode = X86ISD::FMAX; 4877 break; 4878 } 4879 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 4880 switch (CC) { 4881 default: break; 4882 case ISD::SETOGT: // (X > Y) ? Y : X -> min 4883 case ISD::SETUGT: 4884 case ISD::SETGT: 4885 if (!UnsafeFPMath) break; 4886 // FALL THROUGH. 4887 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 4888 case ISD::SETGE: 4889 Opcode = X86ISD::FMIN; 4890 break; 4891 4892 case ISD::SETOLE: // (X <= Y) ? Y : X -> max 4893 case ISD::SETULE: 4894 case ISD::SETLE: 4895 if (!UnsafeFPMath) break; 4896 // FALL THROUGH. 4897 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 4898 case ISD::SETLT: 4899 Opcode = X86ISD::FMAX; 4900 break; 4901 } 4902 } 4903 4904 if (Opcode) 4905 return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS); 4906 } 4907 4908 } 4909 4910 return SDOperand(); 4911} 4912 4913 4914SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 4915 DAGCombinerInfo &DCI) const { 4916 SelectionDAG &DAG = DCI.DAG; 4917 switch (N->getOpcode()) { 4918 default: break; 4919 case ISD::VECTOR_SHUFFLE: 4920 return PerformShuffleCombine(N, DAG, Subtarget); 4921 case ISD::SELECT: 4922 return PerformSELECTCombine(N, DAG, Subtarget); 4923 } 4924 4925 return SDOperand(); 4926} 4927 4928//===----------------------------------------------------------------------===// 4929// X86 Inline Assembly Support 4930//===----------------------------------------------------------------------===// 4931 4932/// getConstraintType - Given a constraint letter, return the type of 4933/// constraint it is for this target. 4934X86TargetLowering::ConstraintType 4935X86TargetLowering::getConstraintType(const std::string &Constraint) const { 4936 if (Constraint.size() == 1) { 4937 switch (Constraint[0]) { 4938 case 'A': 4939 case 'r': 4940 case 'R': 4941 case 'l': 4942 case 'q': 4943 case 'Q': 4944 case 'x': 4945 case 'Y': 4946 return C_RegisterClass; 4947 default: 4948 break; 4949 } 4950 } 4951 return TargetLowering::getConstraintType(Constraint); 4952} 4953 4954/// isOperandValidForConstraint - Return the specified operand (possibly 4955/// modified) if the specified SDOperand is valid for the specified target 4956/// constraint letter, otherwise return null. 4957SDOperand X86TargetLowering:: 4958isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { 4959 switch (Constraint) { 4960 default: break; 4961 case 'I': 4962 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4963 if (C->getValue() <= 31) 4964 return DAG.getTargetConstant(C->getValue(), Op.getValueType()); 4965 } 4966 return SDOperand(0,0); 4967 case 'N': 4968 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 4969 if (C->getValue() <= 255) 4970 return DAG.getTargetConstant(C->getValue(), Op.getValueType()); 4971 } 4972 return SDOperand(0,0); 4973 case 'i': { 4974 // Literal immediates are always ok. 4975 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) 4976 return DAG.getTargetConstant(CST->getValue(), Op.getValueType()); 4977 4978 // If we are in non-pic codegen mode, we allow the address of a global (with 4979 // an optional displacement) to be used with 'i'. 4980 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 4981 int64_t Offset = 0; 4982 4983 // Match either (GA) or (GA+C) 4984 if (GA) { 4985 Offset = GA->getOffset(); 4986 } else if (Op.getOpcode() == ISD::ADD) { 4987 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4988 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 4989 if (C && GA) { 4990 Offset = GA->getOffset()+C->getValue(); 4991 } else { 4992 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4993 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 4994 if (C && GA) 4995 Offset = GA->getOffset()+C->getValue(); 4996 else 4997 C = 0, GA = 0; 4998 } 4999 } 5000 5001 if (GA) { 5002 // If addressing this global requires a load (e.g. in PIC mode), we can't 5003 // match. 5004 if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), 5005 false)) 5006 return SDOperand(0, 0); 5007 5008 Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5009 Offset); 5010 return Op; 5011 } 5012 5013 // Otherwise, not valid for this mode. 5014 return SDOperand(0, 0); 5015 } 5016 } 5017 return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); 5018} 5019 5020std::vector<unsigned> X86TargetLowering:: 5021getRegClassForInlineAsmConstraint(const std::string &Constraint, 5022 MVT::ValueType VT) const { 5023 if (Constraint.size() == 1) { 5024 // FIXME: not handling fp-stack yet! 5025 switch (Constraint[0]) { // GCC X86 Constraint Letters 5026 default: break; // Unknown constraint letter 5027 case 'A': // EAX/EDX 5028 if (VT == MVT::i32 || VT == MVT::i64) 5029 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 5030 break; 5031 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 5032 case 'Q': // Q_REGS 5033 if (VT == MVT::i32) 5034 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 5035 else if (VT == MVT::i16) 5036 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 5037 else if (VT == MVT::i8) 5038 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 5039 break; 5040 } 5041 } 5042 5043 return std::vector<unsigned>(); 5044} 5045 5046std::pair<unsigned, const TargetRegisterClass*> 5047X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5048 MVT::ValueType VT) const { 5049 // First, see if this is a constraint that directly corresponds to an LLVM 5050 // register class. 5051 if (Constraint.size() == 1) { 5052 // GCC Constraint Letters 5053 switch (Constraint[0]) { 5054 default: break; 5055 case 'r': // GENERAL_REGS 5056 case 'R': // LEGACY_REGS 5057 case 'l': // INDEX_REGS 5058 if (VT == MVT::i64 && Subtarget->is64Bit()) 5059 return std::make_pair(0U, X86::GR64RegisterClass); 5060 if (VT == MVT::i32) 5061 return std::make_pair(0U, X86::GR32RegisterClass); 5062 else if (VT == MVT::i16) 5063 return std::make_pair(0U, X86::GR16RegisterClass); 5064 else if (VT == MVT::i8) 5065 return std::make_pair(0U, X86::GR8RegisterClass); 5066 break; 5067 case 'y': // MMX_REGS if MMX allowed. 5068 if (!Subtarget->hasMMX()) break; 5069 return std::make_pair(0U, X86::VR64RegisterClass); 5070 break; 5071 case 'Y': // SSE_REGS if SSE2 allowed 5072 if (!Subtarget->hasSSE2()) break; 5073 // FALL THROUGH. 5074 case 'x': // SSE_REGS if SSE1 allowed 5075 if (!Subtarget->hasSSE1()) break; 5076 5077 switch (VT) { 5078 default: break; 5079 // Scalar SSE types. 5080 case MVT::f32: 5081 case MVT::i32: 5082 return std::make_pair(0U, X86::FR32RegisterClass); 5083 case MVT::f64: 5084 case MVT::i64: 5085 return std::make_pair(0U, X86::FR64RegisterClass); 5086 // Vector types. 5087 case MVT::v16i8: 5088 case MVT::v8i16: 5089 case MVT::v4i32: 5090 case MVT::v2i64: 5091 case MVT::v4f32: 5092 case MVT::v2f64: 5093 return std::make_pair(0U, X86::VR128RegisterClass); 5094 } 5095 break; 5096 } 5097 } 5098 5099 // Use the default implementation in TargetLowering to convert the register 5100 // constraint into a member of a register class. 5101 std::pair<unsigned, const TargetRegisterClass*> Res; 5102 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5103 5104 // Not found as a standard register? 5105 if (Res.second == 0) { 5106 // GCC calls "st(0)" just plain "st". 5107 if (StringsEqualNoCase("{st}", Constraint)) { 5108 Res.first = X86::ST0; 5109 Res.second = X86::RSTRegisterClass; 5110 } 5111 5112 return Res; 5113 } 5114 5115 // Otherwise, check to see if this is a register class of the wrong value 5116 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 5117 // turn into {ax},{dx}. 5118 if (Res.second->hasType(VT)) 5119 return Res; // Correct type already, nothing to do. 5120 5121 // All of the single-register GCC register classes map their values onto 5122 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 5123 // really want an 8-bit or 32-bit register, map to the appropriate register 5124 // class and return the appropriate register. 5125 if (Res.second != X86::GR16RegisterClass) 5126 return Res; 5127 5128 if (VT == MVT::i8) { 5129 unsigned DestReg = 0; 5130 switch (Res.first) { 5131 default: break; 5132 case X86::AX: DestReg = X86::AL; break; 5133 case X86::DX: DestReg = X86::DL; break; 5134 case X86::CX: DestReg = X86::CL; break; 5135 case X86::BX: DestReg = X86::BL; break; 5136 } 5137 if (DestReg) { 5138 Res.first = DestReg; 5139 Res.second = Res.second = X86::GR8RegisterClass; 5140 } 5141 } else if (VT == MVT::i32) { 5142 unsigned DestReg = 0; 5143 switch (Res.first) { 5144 default: break; 5145 case X86::AX: DestReg = X86::EAX; break; 5146 case X86::DX: DestReg = X86::EDX; break; 5147 case X86::CX: DestReg = X86::ECX; break; 5148 case X86::BX: DestReg = X86::EBX; break; 5149 case X86::SI: DestReg = X86::ESI; break; 5150 case X86::DI: DestReg = X86::EDI; break; 5151 case X86::BP: DestReg = X86::EBP; break; 5152 case X86::SP: DestReg = X86::ESP; break; 5153 } 5154 if (DestReg) { 5155 Res.first = DestReg; 5156 Res.second = Res.second = X86::GR32RegisterClass; 5157 } 5158 } else if (VT == MVT::i64) { 5159 unsigned DestReg = 0; 5160 switch (Res.first) { 5161 default: break; 5162 case X86::AX: DestReg = X86::RAX; break; 5163 case X86::DX: DestReg = X86::RDX; break; 5164 case X86::CX: DestReg = X86::RCX; break; 5165 case X86::BX: DestReg = X86::RBX; break; 5166 case X86::SI: DestReg = X86::RSI; break; 5167 case X86::DI: DestReg = X86::RDI; break; 5168 case X86::BP: DestReg = X86::RBP; break; 5169 case X86::SP: DestReg = X86::RSP; break; 5170 } 5171 if (DestReg) { 5172 Res.first = DestReg; 5173 Res.second = Res.second = X86::GR64RegisterClass; 5174 } 5175 } 5176 5177 return Res; 5178} 5179